diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2228476..81a3016 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -170,10 +170,40 @@ jobs: echo "No buf module found on main branch, skipping breaking change check" fi + proto-sync: + name: Proto Sync Check + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: bufbuild/buf-setup-action@v1 + - name: Verify protos match pinned BSR version + run: | + TMPDIR=$(mktemp -d) + buf export buf.build/multiagentcoordinationprotocol/macp -o "$TMPDIR" + + DRIFT=0 + for proto in \ + macp/v1/envelope.proto \ + macp/v1/core.proto \ + macp/modes/decision/v1/decision.proto; do + if ! diff -q "$TMPDIR/$proto" "proto/$proto" > /dev/null 2>&1; then + echo "DRIFT: $proto" + diff -u "$TMPDIR/$proto" "proto/$proto" || true + DRIFT=1 + fi + done + rm -rf "$TMPDIR" + + if [ "$DRIFT" -ne 0 ]; then + echo "Proto files don't match BSR. Run 'make sync-protos'." + exit 1 + fi + echo "All proto files match BSR." + ci-pass: name: All Checks Passed runs-on: ubuntu-latest - needs: [check, fmt, clippy, test, build, lint-protobuf] + needs: [check, fmt, clippy, test, build, lint-protobuf, proto-sync] steps: - name: Summary @@ -185,3 +215,4 @@ jobs: echo " - cargo test" echo " - cargo build --release" echo " - protobuf lint" + echo " - proto sync check" diff --git a/Makefile b/Makefile index e8d4253..f90d9eb 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,7 @@ -.PHONY: setup build test fmt clippy check +.PHONY: setup build test fmt clippy check sync-protos sync-protos-local check-protos + +SPEC_PROTO_DIR := ../multiagentcoordinationprotocol/schemas/proto +PROTO_FILES := macp/v1/envelope.proto macp/v1/core.proto macp/modes/decision/v1/decision.proto ## First-time setup: configure git hooks setup: @@ -18,3 +21,41 @@ clippy: cargo clippy --all-targets -- -D warnings check: fmt clippy test + +## Pull latest proto files from BSR +sync-protos: + buf export buf.build/multiagentcoordinationprotocol/macp -o proto + @echo "Done. Run 'git diff proto/' to review changes." + +## Sync from local sibling checkout (for development before BSR publish) +sync-protos-local: + @if [ ! -d "$(SPEC_PROTO_DIR)" ]; then \ + echo "Error: Spec repo not found at $(SPEC_PROTO_DIR)"; \ + echo "Use 'make sync-protos' to sync from BSR instead."; \ + exit 1; \ + fi + @for f in $(PROTO_FILES); do \ + mkdir -p proto/$$(dirname $$f); \ + cp "$(SPEC_PROTO_DIR)/$$f" "proto/$$f"; \ + echo " Copied $$f"; \ + done + @echo "Done. Run 'git diff proto/' to review changes." + +## Check if local protos match BSR +check-protos: + @TMPDIR=$$(mktemp -d); \ + buf export buf.build/multiagentcoordinationprotocol/macp -o "$$TMPDIR"; \ + DRIFT=0; \ + for f in $(PROTO_FILES); do \ + if ! diff -q "$$TMPDIR/$$f" "proto/$$f" > /dev/null 2>&1; then \ + echo "DRIFT: $$f"; \ + DRIFT=1; \ + fi; \ + done; \ + rm -rf "$$TMPDIR"; \ + if [ "$$DRIFT" -eq 0 ]; then \ + echo "All proto files match BSR."; \ + else \ + echo "Run 'make sync-protos' to update."; \ + exit 1; \ + fi diff --git a/README.md b/README.md index a0f0159..52225a0 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,128 @@ -# macp-runtime v0.1 +# macp-runtime v0.2 -Minimal Coordination Runtime (MCR) +**Minimal Coordination Runtime (MCR)** — an RFC-0001-compliant gRPC server implementing the Multi-Agent Coordination Protocol (MACP). -## Run +The MACP Runtime provides session-based message coordination between autonomous agents. It manages session lifecycles, enforces protocol invariants, routes messages through a pluggable Mode system, and ensures deterministic state transitions — so that agents can focus on coordination logic rather than infrastructure plumbing. -Install protoc first. +## Features -Then: +- **RFC-0001 Compliant Protocol** — Structured protobuf schema with versioned envelope, typed errors, and capability negotiation +- **Initialize Handshake** — Protocol version negotiation and capability discovery before any session work begins +- **Pluggable Mode System** — Coordination logic is decoupled from runtime physics; ship new modes without touching the kernel +- **Decision Mode (RFC Lifecycle)** — Full Proposal → Evaluation → Objection → Vote → Commitment workflow with phase tracking +- **Multi-Round Convergence Mode** — Participant-based `all_equal` convergence strategy with automatic resolution +- **Session Cancellation** — Explicit `CancelSession` RPC to terminate sessions with a recorded reason +- **Message Deduplication** — Idempotent message handling via `seen_message_ids` tracking +- **Participant Validation** — Sender membership enforcement when a participant list is configured +- **Signal Messages** — Ambient, session-less messages for out-of-band coordination signals +- **Bidirectional Streaming** — `StreamSession` RPC for real-time session event streaming +- **Mode & Manifest Discovery** — `ListModes` and `GetManifest` RPCs for runtime introspection +- **Structured Errors** — `MACPError` with RFC error codes, session/message correlation, and detail payloads +- **Append-Only Audit Log** — Log-before-mutate ordering for every session event +- **CI/CD Pipeline** — GitHub Actions workflow with formatting, linting, and test gates - cargo build - cargo run +## Prerequisites -Server runs on 127.0.0.1:50051 +- [Rust](https://www.rust-lang.org/tools/install) (stable toolchain) +- [Protocol Buffers compiler (`protoc`)](https://grpc.io/docs/protoc-installation/) -Send SessionStart, then Message. -If payload == "resolve", session transitions to RESOLVED. +## Quick Start + +```bash +# Build the project +cargo build + +# Run the server (listens on 127.0.0.1:50051) +cargo run + +# Run test clients (server must be running in another terminal) +cargo run --bin client # basic decision mode demo +cargo run --bin fuzz_client # all error paths + multi-round + new RPCs +cargo run --bin multi_round_client # multi-round convergence demo +``` + +## Build & Development Commands + +```bash +cargo build # compile the project +cargo run # start the runtime server +cargo test # run the test suite +cargo check # type-check without building +cargo fmt # format all code +cargo clippy # run the linter + +# Or use the Makefile: +make setup # configure git hooks +make build # cargo build +make test # cargo test +make fmt # cargo fmt +make clippy # cargo clippy with -D warnings +make check # fmt + clippy + test +``` + +## Project Structure + +``` +runtime/ +├── proto/ +│ ├── buf.yaml # Buf linter configuration +│ └── macp/ +│ ├── v1/ +│ │ ├── envelope.proto # Envelope, Ack, MACPError, SessionState +│ │ └── core.proto # Full service definition + all message types +│ └── modes/ +│ └── decision/ +│ └── v1/ +│ └── decision.proto # Decision mode payload types +├── src/ +│ ├── main.rs # Entry point — wires Runtime + gRPC server +│ ├── lib.rs # Library root — proto modules + re-exports +│ ├── server.rs # gRPC adapter (MacpRuntimeService impl) +│ ├── error.rs # MacpError enum + RFC error codes +│ ├── session.rs # Session struct, SessionState, TTL parsing +│ ├── registry.rs # SessionRegistry (thread-safe session store) +│ ├── log_store.rs # Append-only LogStore for audit trails +│ ├── runtime.rs # Runtime kernel (dispatch + apply ModeResponse) +│ ├── mode/ +│ │ ├── mod.rs # Mode trait + ModeResponse enum +│ │ ├── decision.rs # DecisionMode (RFC lifecycle) +│ │ └── multi_round.rs # MultiRoundMode (convergence) +│ └── bin/ +│ ├── client.rs # Basic decision mode demo client +│ ├── fuzz_client.rs # Comprehensive error-path test client +│ └── multi_round_client.rs # Multi-round convergence demo client +├── build.rs # tonic-build proto compilation +├── Cargo.toml # Dependencies and project config +├── Makefile # Development shortcuts +└── .github/ + └── workflows/ + └── ci.yml # CI/CD pipeline +``` + +## gRPC Service + +The runtime exposes `MACPRuntimeService` on `127.0.0.1:50051` with the following RPCs: + +| RPC | Description | +|-----|-------------| +| `Initialize` | Protocol version negotiation and capability exchange | +| `Send` | Send an Envelope, receive an Ack | +| `StreamSession` | Bidirectional streaming for session events | +| `GetSession` | Query session metadata by ID | +| `CancelSession` | Cancel an active session with a reason | +| `GetManifest` | Retrieve agent manifest and supported modes | +| `ListModes` | Discover registered mode descriptors | +| `ListRoots` | List resource roots | +| `WatchModeRegistry` | Stream mode registry change notifications | +| `WatchRoots` | Stream root change notifications | + +## Documentation + +- **[docs/README.md](./docs/README.md)** — Getting started guide and key concepts +- **[docs/protocol.md](./docs/protocol.md)** — Full MACP v1.0 protocol specification +- **[docs/architecture.md](./docs/architecture.md)** — Internal architecture and design principles +- **[docs/examples.md](./docs/examples.md)** — Step-by-step usage examples and common patterns + +## License + +See the repository root for license information. diff --git a/docs/README.md b/docs/README.md index e645943..0154ad3 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,119 +1,207 @@ # MACP Runtime Documentation -Welcome to the Multi-Agent Coordination Protocol (MACP) Runtime documentation. This guide explains everything about the system in plain language, even if you don't know Rust. +Welcome to the Multi-Agent Coordination Protocol (MACP) Runtime documentation. This guide explains everything about the system in plain language, whether you are an experienced distributed-systems engineer or someone encountering multi-agent coordination for the first time. + +--- ## What Is This Project? -The MACP Runtime (also called Minimal Coordination Runtime or MCR) is a **server** that helps multiple AI agents or programs coordinate with each other. Think of it as a traffic controller for conversations between different agents. +The MACP Runtime — also called the **Minimal Coordination Runtime (MCR)** — is a **gRPC server** that helps multiple AI agents or programs coordinate with each other. Think of it as a traffic controller for structured conversations between autonomous agents: it manages who can speak, tracks the state of each conversation, enforces time limits, and determines when a conversation has reached its conclusion. + +Version **0.2** of the runtime implements **RFC-0001**, introducing a formal protocol handshake, structured error reporting, a rich Decision Mode lifecycle, session cancellation, message deduplication, participant validation, and a host of new RPCs for runtime introspection. ### Real-World Analogy -Imagine you're organizing a meeting: -1. Someone starts the meeting (SessionStart) -2. People send messages back and forth -3. Eventually, the meeting reaches a decision (Resolved state) -4. Once resolved, no more messages can be sent +Imagine you are chairing a formal committee meeting: + +1. **Someone opens the meeting** — a `SessionStart` message creates a new coordination session. +2. **The chair announces the rules** — the `Initialize` handshake negotiates which protocol version everyone speaks and what capabilities the runtime supports. +3. **Participants discuss and propose** — agents send `Proposal`, `Evaluation`, `Objection`, and `Vote` messages through the Decision Mode, or `Contribute` messages through the Multi-Round Mode. +4. **The committee reaches a decision** — when the mode's convergence criteria are met, the session transitions to **Resolved** and the resolution is recorded. +5. **After the gavel falls, no more motions are accepted** — once a session is resolved or expired, no further messages can be sent to it. +6. **The chair can also adjourn early** — a `CancelSession` call terminates the session before natural resolution. -The MACP Runtime manages this entire lifecycle automatically and enforces the rules. +The MACP Runtime manages this entire lifecycle automatically and enforces the rules at every step. + +--- ## What Problem Does It Solve? When multiple AI agents or programs need to work together, they need a way to: -- **Start a conversation** (create a session) -- **Exchange messages** safely -- **Track the state** of the conversation -- **Know when it's done** (resolved) -- **Prevent messages after it's done** (enforce invariants) -Without a coordination runtime, each agent would need to implement all this logic themselves, leading to bugs and inconsistencies. +- **Negotiate a common protocol** — agree on version, capabilities, and supported modes before any real work begins. +- **Start a conversation** — create a session with a declared intent, participant list, and time-to-live. +- **Exchange messages safely** — with deduplication, participant validation, and ordered logging. +- **Track the state** of the conversation — know whether it is open, resolved, or expired at any moment. +- **Reach a decision** — through a structured lifecycle (proposals, evaluations, votes, commitments) or through iterative convergence. +- **Know when it is done** — terminal states are enforced; resolved or expired sessions reject further messages. +- **Cancel gracefully** — terminate sessions explicitly with a recorded reason. +- **Discover capabilities** — query which modes are available, inspect manifests, and watch for registry changes. + +Without a coordination runtime, each agent would need to implement all of this logic independently, leading to subtle bugs, inconsistent state machines, and fragile integrations. The MACP Runtime centralizes these concerns so that agents can focus on their domain logic. + +--- ## Key Concepts +### Protocol Version + +The current protocol version is **`1.0`**. Every `Envelope` must carry `macp_version: "1.0"` or the message will be rejected with `UNSUPPORTED_PROTOCOL_VERSION`. Before sending any session messages, clients should call the `Initialize` RPC to negotiate the protocol version and discover runtime capabilities. + ### Sessions -A **session** is like a conversation thread. Each session has: -- A unique ID -- A current state (Open, Resolved, or Expired) -- A time-to-live (TTL) - how long before it expires -- Optional resolution data (the final outcome) - -### Messages -**Messages** are sent within a session. Each message includes: -- Which session it belongs to -- A unique message ID -- Who sent it -- When it was sent -- The actual content (payload) - -### States -Sessions go through different **states**: -1. **Open** - Active, accepting messages -2. **Resolved** - Decision made, no more messages allowed -3. **Expired** - TTL expired (planned feature) - -### The Protocol -The **MACP protocol** defines the rules for: -- How to format messages -- What fields are required -- How sessions transition between states -- What errors can occur + +A **session** is a bounded coordination context — like a conversation thread with rules. Each session has: + +- A unique **session ID** chosen by the creator. +- A **mode** that defines the coordination logic (e.g., `macp.mode.decision.v1` or `macp.mode.multi_round.v1`). +- A current **state**: `Open`, `Resolved`, or `Expired`. +- A **time-to-live (TTL)** — how long the session remains open before automatic expiry (default 60 seconds, max 24 hours). +- An optional **participant list** — if provided, only listed senders may contribute. +- An optional **resolution** — the final outcome, recorded when the mode resolves the session. +- **Version metadata** — intent, mode_version, configuration_version, and policy_version carried from the `SessionStartPayload`. + +### Messages (Envelopes) + +Every message is wrapped in an **Envelope** — a structured protobuf container that carries: + +- **macp_version** — protocol version (`"1.0"`). +- **mode** — which coordination mode handles this message. +- **message_type** — the semantic type (`SessionStart`, `Message`, `Proposal`, `Vote`, `Contribute`, `Signal`, etc.). +- **message_id** — a unique identifier for deduplication and tracing. +- **session_id** — which session this belongs to (may be empty for `Signal` messages). +- **sender** — who is sending the message. +- **timestamp_unix_ms** — informational timestamp. +- **payload** — the actual content (protobuf-encoded or JSON, depending on the mode and message type). + +### Acknowledgments (Ack) + +Every `Send` call returns an **Ack** — a structured response that tells you: + +- **ok** — `true` if accepted, `false` if rejected. +- **duplicate** — `true` if this was an idempotent replay of a previously accepted message. +- **message_id** and **session_id** — echoed back for correlation. +- **accepted_at_unix_ms** — server-side acceptance timestamp. +- **session_state** — the session's state after processing (OPEN, RESOLVED, EXPIRED). +- **error** — a structured `MACPError` with an RFC error code, human-readable message, and optional details. + +### Session States + +Sessions follow a strict state machine with three states: + +| State | Can receive messages? | Transitions to | +|-------|----------------------|----------------| +| **Open** | Yes | Resolved, Expired | +| **Resolved** | No (terminal) | — | +| **Expired** | No (terminal) | — | + +- **Open** — the session is active and accepting messages. This is the initial state after `SessionStart`. +- **Resolved** — a mode returned a `Resolve` or `PersistAndResolve` response, recording the final outcome. No further messages are accepted. +- **Expired** — the session's TTL elapsed (detected lazily on the next message), or the session was explicitly cancelled via `CancelSession`. No further messages are accepted. + +### Modes + +**Modes** are pluggable coordination strategies. The runtime provides the "physics" — session invariants, logging, TTL enforcement, routing — while modes provide the "coordination logic" — when to resolve, what state to track, and what convergence criteria to apply. + +Two modes are built in: + +| Mode Name | Aliases | Description | +|-----------|---------|-------------| +| `macp.mode.decision.v1` | `decision` | RFC-compliant decision lifecycle: Proposal → Evaluation → Objection → Vote → Commitment | +| `macp.mode.multi_round.v1` | `multi_round` | Participant-based convergence using `all_equal` strategy | + +An empty `mode` field defaults to `macp.mode.decision.v1` for backward compatibility. + +### Signals + +**Signal** messages are ambient, session-less messages. They can be sent with an empty `session_id` and do not create or modify any session. They are useful for out-of-band coordination hints, heartbeats, or cross-session correlation. + +--- ## How It Works (High Level) ``` -Client MACP Runtime - | | - |--SessionStart("s1")------------->| - |<-------Ack(accepted=true)--------| - | | - |--Message("hello")--------------->| - |<-------Ack(accepted=true)--------| - | | - |--Message("resolve")------------->| (session now RESOLVED) - |<-------Ack(accepted=true)--------| - | | - |--Message("more")---------------->| - |<---Ack(accepted=false, ----------| - | error="SessionNotOpen") | +Client MACP Runtime + | | + |--- Initialize(["1.0"]) ------------>| + |<-- InitializeResponse(v=1.0) -------| (handshake complete) + | | + |--- Send(SessionStart, s1) --------->| + |<-- Ack(ok=true, state=OPEN) --------| (session created) + | | + |--- Send(Proposal, s1) ------------->| + |<-- Ack(ok=true, state=OPEN) --------| (proposal recorded) + | | + |--- Send(Vote, s1) ----------------->| + |<-- Ack(ok=true, state=OPEN) --------| (vote recorded) + | | + |--- Send(Commitment, s1) ----------->| + |<-- Ack(ok=true, state=RESOLVED) ----| (session resolved) + | | + |--- Send(Message, s1) -------------->| + |<-- Ack(ok=false, SESSION_NOT_OPEN) -| (rejected: terminal) + | | + |--- GetSession(s1) ----------------->| + |<-- SessionMetadata(RESOLVED) -------| (query state) ``` -## What's Built With +--- + +## What Is Built With + +- **gRPC over HTTP/2** — high-performance, type-safe RPC framework with streaming support. +- **Protocol Buffers (protobuf)** — binary serialization for efficient, schema-enforced message exchange. +- **Rust** — memory-safe, concurrent systems language with zero-cost abstractions. +- **Tonic** — Rust's async gRPC framework built on Tokio. +- **Buf** — protobuf linting and breaking-change detection. -- **gRPC**: A high-performance communication protocol (like HTTP but faster) -- **Protocol Buffers**: A way to define structured data (like JSON but more efficient) -- **Rust**: A programming language known for safety and speed +You do not need to know Rust to understand the protocol or use the runtime — any language with a gRPC client can connect. -You don't need to know Rust to understand the concepts - the documentation explains everything in plain language. +--- ## Components This runtime consists of: -1. **Server** (`macp-runtime`) - The main runtime that manages sessions -2. **Client** (`client`) - A test client demonstrating basic usage -3. **Fuzz Client** (`fuzz_client`) - A test client that tries to break the rules +1. **Runtime Server** (`macp-runtime`) — the main coordination server managing sessions, modes, and protocol enforcement. +2. **Basic Client** (`client`) — a demo client exercising the happy path: Initialize, ListModes, SessionStart, Message, Resolve, GetSession. +3. **Fuzz Client** (`fuzz_client`) — a comprehensive test client exercising every error path, every new RPC, participant validation, signal messages, cancellation, and multi-round convergence. +4. **Multi-Round Client** (`multi_round_client`) — a focused demo of multi-round convergence with two participants reaching agreement. + +--- ## Documentation Structure -- **[architecture.md](./architecture.md)** - How the system is designed internally -- **[protocol.md](./protocol.md)** - The MACP protocol specification -- **[examples.md](./examples.md)** - Step-by-step usage examples +| Document | What It Covers | +|----------|---------------| +| **[protocol.md](./protocol.md)** | Full MACP v1.0 protocol specification — message types, validation rules, error codes, mode specifications | +| **[architecture.md](./architecture.md)** | Internal architecture — component design, data flow, concurrency model, design principles | +| **[examples.md](./examples.md)** | Step-by-step usage examples — client walkthroughs, common patterns, FAQ | + +--- ## Quick Start -**Terminal 1** - Start the server: +**Terminal 1** — Start the server: ```bash cargo run ``` -**Terminal 2** - Run a test client: +You should see: +``` +macp-runtime v0.2 (RFC-0001) listening on 127.0.0.1:50051 +``` + +**Terminal 2** — Run a test client: ```bash cargo run --bin client ``` -You'll see the client send messages and the server respond with acknowledgments. +You will see the client negotiate the protocol version, discover modes, create a session, send messages, resolve the session, and verify the final state. + +--- ## Next Steps -1. Read [protocol.md](./protocol.md) to understand the MACP protocol -2. Read [architecture.md](./architecture.md) to understand how it's built -3. Read [examples.md](./examples.md) to see practical usage +1. Read **[protocol.md](./protocol.md)** to understand the full MACP v1.0 protocol specification. +2. Read **[architecture.md](./architecture.md)** to understand how the runtime is built internally. +3. Read **[examples.md](./examples.md)** for practical, step-by-step usage examples. diff --git a/docs/architecture.md b/docs/architecture.md index 281ad0c..606f826 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -1,167 +1,421 @@ # Architecture -This document explains how the MACP Runtime is built internally. We'll explain each component in plain language, without assuming Rust knowledge. +This document explains how the MACP Runtime v0.2 is built internally. It walks through every component, every data structure, every flow, and every design decision in narrative detail. You do not need to know Rust to follow along — the documentation explains concepts in plain language, with code excerpts for precision where it matters. + +--- + +## Table of Contents + +1. [System Overview](#system-overview) +2. [Protobuf Schema Layer](#protobuf-schema-layer) +3. [Build System](#build-system) +4. [Entry Point (main.rs)](#entry-point-mainrs) +5. [Library Root (lib.rs)](#library-root-librs) +6. [Error Types (error.rs)](#error-types-errorrs) +7. [Session Types (session.rs)](#session-types-sessionrs) +8. [Session Registry (registry.rs)](#session-registry-registryrs) +9. [Log Store (log_store.rs)](#log-store-log_storers) +10. [Mode System (mode/)](#mode-system-mode) +11. [Runtime Kernel (runtime.rs)](#runtime-kernel-runtimers) +12. [gRPC Server Adapter (server.rs)](#grpc-server-adapter-serverrs) +13. [Data Flow: Complete Message Processing](#data-flow-complete-message-processing) +14. [Data Flow: Session Cancellation](#data-flow-session-cancellation) +15. [Data Flow: Initialize Handshake](#data-flow-initialize-handshake) +16. [Concurrency Model](#concurrency-model) +17. [File Structure](#file-structure) +18. [Build Process](#build-process) +19. [CI/CD Pipeline](#cicd-pipeline) +20. [Design Principles](#design-principles) + +--- ## System Overview ``` -┌─────────────────────────────────────────────────┐ -│ Clients │ -│ (Other programs/agents wanting to coordinate) │ -└────────────┬────────────────────┬────────────────┘ - │ │ - │ gRPC calls │ gRPC calls - │ │ - ▼ ▼ -┌─────────────────────────────────────────────────┐ -│ MACP Runtime Server │ -│ ┌───────────────────────────────────────────┐ │ -│ │ MacpServer (gRPC Adapter Layer) │ │ -│ │ - Receives messages │ │ -│ │ - Validates transport-level fields │ │ -│ │ - Delegates to Runtime kernel │ │ -│ └──────────────┬────────────────────────────┘ │ -│ │ │ -│ ▼ │ -│ ┌───────────────────────────────────────────┐ │ -│ │ Runtime (Kernel) │ │ -│ │ - Resolves mode │ │ -│ │ - Enforces TTL / session invariants │ │ -│ │ - Dispatches to Mode implementations │ │ -│ │ - Applies ModeResponse │ │ -│ └──────┬───────────────┬────────────────────┘ │ -│ │ │ │ -│ ▼ ▼ │ -│ ┌─────────────┐ ┌─────────────────────┐ │ -│ │ Mode │ │ Mode │ │ -│ │ Dispatcher │ │ Implementations │ │ -│ │ │ │ - DecisionMode │ │ -│ │ │ │ - MultiRoundMode │ │ -│ └─────────────┘ └─────────────────────┘ │ -│ │ │ -│ ▼ │ -│ ┌───────────────────────────────────────────┐ │ -│ │ SessionRegistry LogStore │ │ -│ │ (Session State) (Event Log) │ │ -│ │ HashMap: id->Session HashMap: id->Vec │ │ -│ └───────────────────────────────────────────┘ │ -└─────────────────────────────────────────────────┘ +┌──────────────────────────────────────────────────────────────┐ +│ Clients │ +│ (AI agents, test programs, any gRPC-capable application) │ +└─────────┬──────────────────┬──────────────────┬──────────────┘ + │ Initialize │ Send / Stream │ GetSession / + │ │ │ CancelSession / + │ │ │ ListModes / ... + ▼ ▼ ▼ +┌──────────────────────────────────────────────────────────────┐ +│ MACP Runtime Server (v0.2) │ +│ │ +│ ┌────────────────────────────────────────────────────────┐ │ +│ │ MacpServer (gRPC Adapter Layer) │ │ +│ │ - Implements MACPRuntimeService (10 RPCs) │ │ +│ │ - Validates transport-level fields (version, IDs) │ │ +│ │ - Builds structured Ack responses with MACPError │ │ +│ │ - Delegates all coordination logic to Runtime │ │ +│ └───────────────────────┬────────────────────────────────┘ │ +│ │ │ +│ ▼ │ +│ ┌────────────────────────────────────────────────────────┐ │ +│ │ Runtime (Coordination Kernel) │ │ +│ │ - Routes messages by type (SessionStart/Signal/other) │ │ +│ │ - Resolves mode names to implementations │ │ +│ │ - Enforces TTL, session state, participant validation │ │ +│ │ - Handles message deduplication │ │ +│ │ - Dispatches to Mode implementations │ │ +│ │ - Applies ModeResponse as single mutation point │ │ +│ │ - Manages session cancellation │ │ +│ └──────┬────────────────────────────┬────────────────────┘ │ +│ │ │ │ +│ ▼ ▼ │ +│ ┌──────────────────┐ ┌──────────────────────────────┐ │ +│ │ Mode Registry │ │ Mode Implementations │ │ +│ │ HashMap> │ │ DecisionMode │ │ +│ │ │ │ - RFC lifecycle │ │ +│ │ 4 entries: │ │ - Proposal/Eval/Vote/Commit │ │ +│ │ decision (x2) │ │ - Phase tracking │ │ +│ │ multi_round(x2) │ │ │ │ +│ │ │ │ MultiRoundMode │ │ +│ │ │ │ - Convergence checking │ │ +│ │ │ │ - Round counting │ │ +│ └──────────────────┘ └──────────────────────────────┘ │ +│ │ │ +│ ▼ │ +│ ┌────────────────────────────────────────────────────────┐ │ +│ │ SessionRegistry LogStore │ │ +│ │ RwLock> String, Vec>>│ │ +│ │ │ │ +│ │ Thread-safe session Append-only per-session │ │ +│ │ storage with read/write event log with Incoming │ │ +│ │ locking and Internal entries │ │ +│ └────────────────────────────────────────────────────────┘ │ +└──────────────────────────────────────────────────────────────┘ ``` -## Core Components +The architecture follows a strict layered design: -### 1. Protocol Definitions (proto/macp.proto) +1. **Transport layer** (`MacpServer`) — handles gRPC protocol concerns, validates transport-level fields, builds structured responses. +2. **Coordination layer** (`Runtime`) — enforces protocol invariants, routes messages, manages session lifecycle, dispatches to modes. +3. **Logic layer** (`Mode` implementations) — provides coordination-specific behavior, returns declarative `ModeResponse` values. +4. **Storage layer** (`SessionRegistry`, `LogStore`) — provides thread-safe state persistence. -This file defines the "language" that clients and server use to communicate. It's like a contract that both sides agree to follow. +Each layer has a single responsibility and communicates through well-defined interfaces. -**What's defined:** -- **Envelope**: The wrapper for every message - - Contains metadata (who, when, which session, which mode) - - Contains the actual payload (the message content) -- **Ack**: The response the server sends back - - `accepted`: true if the message was accepted, false if rejected - - `error`: explanation if rejected -- **SessionQuery / SessionInfo**: For querying session state -- **MACPService**: The service interface - - `SendMessage`: Send an Envelope, get an Ack - - `GetSession`: Query session state by ID +--- -**Why Protocol Buffers?** -Instead of JSON (text-based), Protocol Buffers use a binary format that's: -- Faster to send/receive -- Smaller in size -- Type-safe (can't accidentally send wrong data types) +## Protobuf Schema Layer -### 2. Generated Code (build.rs + target/debug/build/) +### Schema Organization -The `build.rs` script runs before compilation and automatically generates Rust code from the `.proto` file. This generated code handles all the low-level serialization/deserialization. +The protocol schema has been restructured from a single `macp.proto` file (v0.1) into a modular, concern-separated layout: -**What you need to know:** -- You never edit the generated code -- Changes to `.proto` automatically update the generated code -- The generated code appears as the `pb` module (protocol buffers) +``` +proto/ +├── buf.yaml # Linting and breaking-change config +└── macp/ + ├── v1/ + │ ├── envelope.proto # Foundational types + │ └── core.proto # Service + all message types + └── modes/ + └── decision/ + └── v1/ + └── decision.proto # Decision mode payloads +``` + +**`envelope.proto`** defines the four foundational types that everything builds on: + +- **`Envelope`** — the universal message wrapper with 8 fields (macp_version, mode, message_type, message_id, session_id, sender, timestamp_unix_ms, payload). +- **`Ack`** — the structured acknowledgment with 7 fields (ok, duplicate, message_id, session_id, accepted_at_unix_ms, session_state, error). +- **`MACPError`** — the structured error type with 5 fields (code, message, session_id, message_id, details). +- **`SessionState`** — the enum with 4 values (UNSPECIFIED, OPEN, RESOLVED, EXPIRED). + +**`core.proto`** imports `envelope.proto` and defines everything else: + +- **Capability messages** — `ClientInfo`, `RuntimeInfo`, `Capabilities` (with sub-capabilities for sessions, cancellation, progress, manifest, mode registry, roots, and experimental features). +- **Initialize** — `InitializeRequest` and `InitializeResponse` for protocol handshake. +- **Session payloads** — `SessionStartPayload` (with intent, participants, versions, ttl_ms, context, roots), `SessionCancelPayload`, `CommitmentPayload`. +- **Coordination payloads** — `SignalPayload`, `ProgressPayload`. +- **Session metadata** — `SessionMetadata` with typed state and version fields. +- **Introspection** — `AgentManifest`, `ModeDescriptor`. +- **Request/Response wrappers** — `SendRequest`/`SendResponse`, `GetSessionRequest`/`GetSessionResponse`, `CancelSessionRequest`/`CancelSessionResponse`, etc. +- **Streaming types** — `StreamSessionRequest`/`StreamSessionResponse`. +- **Watch types** — `RegistryChanged`, `RootsChanged`. +- **Service definition** — `MACPRuntimeService` with 10 RPCs. + +**`decision.proto`** defines mode-specific payload types: + +- **`ProposalPayload`** — proposal_id, option, rationale, supporting_data. +- **`EvaluationPayload`** — proposal_id, recommendation, confidence, reason. +- **`ObjectionPayload`** — proposal_id, reason, severity. +- **`VotePayload`** — proposal_id, vote, reason. + +These types are not referenced by the core proto — they exist as domain schemas for clients and the Decision Mode implementation. The `CommitmentPayload` is defined in `core.proto` because it is reused across modes. + +### Buf Configuration + +The `buf.yaml` file configures: + +- **Lint rules:** `STANDARD` — enforces naming conventions, field numbering, and other best practices. +- **Breaking-change detection:** `FILE` — detects breaking changes at the file level, ensuring backward compatibility as the schema evolves. + +--- + +## Build System + +### build.rs + +The `build.rs` script runs before compilation and uses `tonic-build` to generate Rust code from the protobuf files: + +```rust +fn main() -> Result<(), Box> { + tonic_build::configure().build_server(true).compile( + &[ + "macp/v1/envelope.proto", + "macp/v1/core.proto", + "macp/modes/decision/v1/decision.proto", + ], + &["proto"], + )?; + Ok(()) +} +``` + +This generates two Rust modules: +- `macp.v1` — all core types and the gRPC service server/client stubs. +- `macp.modes.decision.v1` — decision mode payload types. + +The generated code is included in the binary via `tonic::include_proto!()` macros in `lib.rs`. + +### Cargo.toml Dependencies + +| Dependency | Purpose | +|------------|---------| +| `tokio` | Async runtime (full features) | +| `tonic` | gRPC framework | +| `prost` | Protobuf serialization/deserialization | +| `prost-types` | Well-known protobuf types | +| `uuid` | UUID generation (v4) | +| `thiserror` | Ergonomic error type derivation | +| `chrono` | Date/time handling | +| `serde` + `serde_json` | JSON serialization for mode state and payloads | +| `tokio-stream` | Stream utilities for async streaming | +| `futures-core` | Core future/stream traits | +| `async-stream` | Macro for creating async streams | + +### Makefile + +The `Makefile` provides development shortcuts: + +```makefile +setup: # Configure git hooks (points to .githooks/) +build: # cargo build +test: # cargo test +fmt: # cargo fmt --all +clippy: # cargo clippy --all-targets -- -D warnings +check: # fmt + clippy + test (full CI check locally) +``` + +--- + +## Entry Point (main.rs) + +The `main.rs` file is deliberately minimal — it wires up the three core components and starts the gRPC server: + +```rust +#[tokio::main] +async fn main() -> Result<(), Box> { + let addr = "127.0.0.1:50051".parse()?; + let registry = Arc::new(SessionRegistry::new()); + let log_store = Arc::new(LogStore::new()); + let runtime = Arc::new(Runtime::new(registry, log_store)); + let svc = MacpServer::new(runtime); + + println!("macp-runtime v0.2 (RFC-0001) listening on {}", addr); + + Server::builder() + .add_service(pb::macp_runtime_service_server::MacpRuntimeServiceServer::new(svc)) + .serve(addr) + .await?; + + Ok(()) +} +``` + +**What happens on startup:** + +1. The address `127.0.0.1:50051` is parsed. +2. A `SessionRegistry` is created — an empty, thread-safe hashmap for storing sessions. +3. A `LogStore` is created — an empty, thread-safe hashmap for storing per-session event logs. +4. A `Runtime` is created — it takes ownership of the registry and log store (via `Arc`), and registers the built-in modes (DecisionMode and MultiRoundMode with both RFC names and backward-compatible aliases). +5. A `MacpServer` is created — the gRPC adapter wrapping the runtime. +6. Tonic's gRPC `Server` is started, listening on the configured address. + +The `Arc` wrapper allows the runtime to be shared across all async tasks spawned by the Tokio runtime — each gRPC handler receives a clone of the `Arc`. + +--- + +## Library Root (lib.rs) + +```rust +pub mod pb { + tonic::include_proto!("macp.v1"); +} + +pub mod decision_pb { + tonic::include_proto!("macp.modes.decision.v1"); +} + +pub mod error; +pub mod log_store; +pub mod mode; +pub mod registry; +pub mod runtime; +pub mod session; +``` + +The library root serves two purposes: + +1. **Proto module inclusion** — The `pb` module contains all generated code from `envelope.proto` and `core.proto`. The `decision_pb` module contains generated code from `decision.proto`. These are available to both the server binary and the client binaries. -### 3. Main Server (src/main.rs) +2. **Module re-exports** — All internal modules are made public so that client binaries (in `src/bin/`) can import types like `SessionStartPayload`, `Envelope`, etc. -This is the entry point — where the program starts. +--- -**What it does:** -1. Creates a `SessionRegistry` (session state storage) -2. Creates a `LogStore` (event log storage) -3. Creates a `Runtime` (coordination kernel with registered modes) -4. Creates a `MacpServer` (gRPC adapter wrapping the runtime) -5. Starts a gRPC server on `127.0.0.1:50051` -6. Waits for incoming connections +## Error Types (error.rs) -### 4. Error Types (src/error.rs) +The error system is designed to provide both internal precision (distinct Rust error variants for each failure mode) and external clarity (RFC-compliant error codes for clients). -The system defines specific errors for each problem: +### MacpError Enum ```rust pub enum MacpError { - InvalidMacpVersion, // Version != "v1" - InvalidEnvelope, // Missing required fields or invalid TTL payload + InvalidMacpVersion, // Protocol version mismatch + InvalidEnvelope, // Missing required fields or invalid encoding DuplicateSession, // SessionStart for existing session UnknownSession, // Message for non-existent session - SessionNotOpen, // Message sent to resolved/expired session + SessionNotOpen, // Message to resolved/expired session TtlExpired, // Session TTL has elapsed - InvalidTtl, // TTL value out of range (<=0 or >24h) - UnknownMode, // Mode not registered in runtime - InvalidModeState, // Mode state bytes can't be deserialized - InvalidPayload, // Payload doesn't match mode's expected format + InvalidTtl, // TTL value out of range + UnknownMode, // Mode not registered + InvalidModeState, // Mode state deserialization failure + InvalidPayload, // Payload doesn't match mode's expectations + Forbidden, // Operation not permitted + Unauthenticated, // Authentication required + DuplicateMessage, // Explicit duplicate detection + PayloadTooLarge, // Payload exceeds size limits + RateLimited, // Too many requests } ``` -**Error conversion:** -Errors are never "panics" (crashes). They're converted to Ack responses: +### RFC Error Code Mapping + +Each variant maps to an RFC-compliant string code via the `error_code()` method: + +| Variant | RFC Code | +|---------|----------| +| `InvalidMacpVersion` | `"UNSUPPORTED_PROTOCOL_VERSION"` | +| `InvalidEnvelope` | `"INVALID_ENVELOPE"` | +| `DuplicateSession` | `"INVALID_ENVELOPE"` | +| `UnknownSession` | `"SESSION_NOT_FOUND"` | +| `SessionNotOpen` | `"SESSION_NOT_OPEN"` | +| `TtlExpired` | `"SESSION_NOT_OPEN"` | +| `InvalidTtl` | `"INVALID_ENVELOPE"` | +| `UnknownMode` | `"MODE_NOT_SUPPORTED"` | +| `InvalidModeState` | `"INVALID_ENVELOPE"` | +| `InvalidPayload` | `"INVALID_ENVELOPE"` | +| `Forbidden` | `"FORBIDDEN"` | +| `Unauthenticated` | `"UNAUTHENTICATED"` | +| `DuplicateMessage` | `"DUPLICATE_MESSAGE"` | +| `PayloadTooLarge` | `"PAYLOAD_TOO_LARGE"` | +| `RateLimited` | `"RATE_LIMITED"` | + +**Design rationale:** Multiple internal variants map to `INVALID_ENVELOPE` because, from a client's perspective, these are all "your request was malformed." The distinct internal variants allow for precise logging, metrics, and debugging. The `Display` trait implementation uses the variant names directly (e.g., `"InvalidMacpVersion"`), providing human-readable error messages in log output. + +--- + +## Session Types (session.rs) + +### SessionState Enum + ```rust -Ack { - accepted: false, - error: "SessionNotOpen" +pub enum SessionState { + Open, // Active — accepting messages + Resolved, // Terminal — mode resolved the session + Expired, // Terminal — TTL elapsed or cancelled } ``` -### 5. Session Types (src/session.rs) - -Each **Session** stores information about one coordination: +### Session Struct ```rust pub struct Session { - pub session_id: String, // e.g., "s1" - pub state: SessionState, // Open, Resolved, or Expired - pub ttl_expiry: i64, // Unix timestamp (milliseconds) - pub resolution: Option>, // Final result (if resolved) - pub mode: String, // Mode name (e.g., "decision", "multi_round") - pub mode_state: Vec, // Mode-specific state (opaque bytes) - pub participants: Vec, // Participant list (for multi_round) + pub session_id: String, + pub state: SessionState, + pub ttl_expiry: i64, // Unix ms when session expires + pub started_at_unix_ms: i64, // Unix ms when session was created + pub resolution: Option>, // Final outcome (if Resolved) + pub mode: String, // Mode name + pub mode_state: Vec, // Mode-specific serialized state + pub participants: Vec, // Allowed senders (empty = open) + pub seen_message_ids: HashSet, // For deduplication + + // RFC version fields from SessionStartPayload + pub intent: String, + pub mode_version: String, + pub configuration_version: String, + pub policy_version: String, } ``` -**Session States:** -- **Open**: Session is active, can receive messages -- **Resolved**: Decision made (via mode), no more messages allowed -- **Expired**: TTL has elapsed, enforced on next message receipt +**Key fields explained:** -### 6. Session Registry (src/registry.rs) +- **`mode_state`** — Opaque bytes owned by the mode. The runtime never inspects these; it simply stores whatever the mode returns in `PersistState`. Each mode serializes/deserializes its own state format (JSON for both built-in modes). -The **SessionRegistry** is like a database in memory. It stores all active sessions. +- **`seen_message_ids`** — A `HashSet` tracking every `message_id` that has been accepted for this session. Used for deduplication — if a message arrives with a `message_id` already in this set, it is returned as a duplicate without re-processing. -**Data structure:** -``` -SessionRegistry { - sessions: HashMap +- **`participants`** — If non-empty, only senders in this list may send messages to the session. This is populated from `SessionStartPayload.participants`. + +- **`started_at_unix_ms`** — Records when the session was created (server-side timestamp). Used in `GetSession` responses. + +- **Version fields** (`intent`, `mode_version`, `configuration_version`, `policy_version`) — Carried from the `SessionStartPayload` and returned in `GetSession` responses. The runtime does not interpret these; they exist for client-side versioning and policy tracking. + +### TTL Parsing + +Two functions handle TTL extraction from the protobuf payload: + +**`parse_session_start_payload(payload: &[u8])`** — Decodes the raw bytes as a protobuf `SessionStartPayload`. If the payload is empty, returns a default `SessionStartPayload` (all fields at their protobuf defaults). + +**`extract_ttl_ms(payload: &SessionStartPayload)`** — Returns the `ttl_ms` field, or the default `60,000 ms` if the field is `0`. Validates that the value is in range `[1, 86,400,000]`. Returns `Err(MacpError::InvalidTtl)` if out of range. + +**Constants:** +- `DEFAULT_TTL_MS: i64 = 60_000` (60 seconds) +- `MAX_TTL_MS: i64 = 86_400_000` (24 hours) + +--- + +## Session Registry (registry.rs) + +```rust +pub struct SessionRegistry { + pub(crate) sessions: RwLock>, } ``` -**Thread-safety:** -Multiple clients can connect at the same time. The `RwLock` ensures: -- Multiple readers can read simultaneously (efficient) -- Only one writer at a time (prevents corruption) -- Readers wait while someone is writing +The `SessionRegistry` is the in-memory session store. It wraps a `HashMap` in a Tokio `RwLock` for thread-safe concurrent access. + +**Methods:** -### 7. Log Store (src/log_store.rs) +- **`new()`** — Creates an empty registry. +- **`get_session(session_id: &str) -> Option`** — Acquires a read lock and returns a clone of the session if found. +- **`insert_session_for_test(session: Session)`** — Acquires a write lock and inserts a session. Used only in tests. -The **LogStore** maintains an append-only event log per session: +**Important note:** The registry's `RwLock` is also directly accessed by the `Runtime` for atomic read-modify-write operations. During `process_session_start()` and `process_message()`, the runtime acquires a **write lock** on the registry and holds it for the entire processing sequence (including mode dispatch). This ensures atomicity but creates a potential concurrency bottleneck for high-throughput scenarios. + +--- + +## Log Store (log_store.rs) + +The `LogStore` maintains an append-only audit log per session, providing a complete history of every event that occurred within a session. + +### LogEntry ```rust pub struct LogEntry { @@ -170,17 +424,46 @@ pub struct LogEntry { pub sender: String, pub message_type: String, pub raw_payload: Vec, - pub entry_kind: EntryKind, // Incoming or Internal + pub entry_kind: EntryKind, +} + +pub enum EntryKind { + Incoming, // Message from a client + Internal, // Runtime-generated event } ``` -- **Incoming** entries: messages received from clients -- **Internal** entries: runtime-generated events (e.g., TtlExpired) -- Entries are always appended before state mutation (log-before-mutate ordering) +**Entry kinds:** -### 8. Mode System (src/mode/) +- **Incoming** — Every client message (SessionStart, Proposal, Vote, Contribute, etc.) is logged as an `Incoming` entry before the mode processes it. This is the "log-before-mutate" guarantee. +- **Internal** — Runtime-generated events like `TtlExpired` (when TTL expiry is detected) and `SessionCancel` (when `CancelSession` is called) are logged as `Internal` entries. These have synthetic `message_id` values like `"__ttl_expired__"` or `"__session_cancel__"`. -The **Mode** trait defines the interface for coordination logic: +### LogStore Structure + +```rust +pub struct LogStore { + logs: RwLock>>, +} +``` + +**Methods:** + +- **`new()`** — Creates an empty store. +- **`create_session_log(session_id: &str)`** — Creates an empty log vector for a session. Idempotent — if the log already exists, this is a no-op. +- **`append(session_id: &str, entry: LogEntry)`** — Appends an entry to the session's log. If no log exists for the session, one is auto-created. +- **`get_log(session_id: &str) -> Option>`** — Returns a cloned copy of the session's log entries. + +**Design properties:** +- Entries are never deleted or modified — the log is strictly append-only. +- Entries are appended in strict chronological order per session. +- The log persists for the lifetime of the server process (no cleanup). +- Future extensions (replay engine, GetSessionLog RPC) will build on this foundation. + +--- + +## Mode System (mode/) + +### Mode Trait (mode/mod.rs) ```rust pub trait Mode: Send + Sync { @@ -191,24 +474,121 @@ pub trait Mode: Send + Sync { } ``` -Modes receive **immutable** session state and return a `ModeResponse`: -- `NoOp` — no state change -- `PersistState(bytes)` — update mode-specific state -- `Resolve(bytes)` — resolve the session -- `PersistAndResolve{state, resolution}` — both at once +The `Mode` trait is the extension point for coordination logic. It is designed around three principles: + +1. **Immutability** — Modes receive `&Session` (immutable reference). They cannot directly mutate state. +2. **Declarative responses** — Modes return `ModeResponse` values that describe *what should change*, not *how to change it*. +3. **Thread safety** — The `Send + Sync` bounds ensure modes can be shared across async tasks. + +### ModeResponse (mode/mod.rs) + +```rust +pub enum ModeResponse { + NoOp, + PersistState(Vec), + Resolve(Vec), + PersistAndResolve { state: Vec, resolution: Vec }, +} +``` + +The runtime's `apply_mode_response()` is the single mutation point that interprets these responses: + +- **`NoOp`** — Nothing happens. The message was accepted but produced no state change. +- **`PersistState(bytes)`** — `session.mode_state = bytes`. The mode's internal state is updated. +- **`Resolve(bytes)`** — `session.state = Resolved` and `session.resolution = Some(bytes)`. The session terminates with a resolution. +- **`PersistAndResolve { state, resolution }`** — Both of the above in a single atomic operation. + +### DecisionMode (mode/decision.rs) + +The Decision Mode implements the RFC-0001 decision lifecycle. It maintains a `DecisionState` serialized as JSON in `session.mode_state`. + +**Internal state:** + +```rust +pub struct DecisionState { + pub proposals: HashMap, + pub evaluations: Vec, + pub objections: Vec, + pub votes: HashMap, // sender → Vote + pub phase: DecisionPhase, +} + +pub enum DecisionPhase { + Proposal, // Initial — waiting for proposals + Evaluation, // At least one proposal exists + Voting, // Votes being cast + Committed, // Terminal — decision finalized +} +``` + +**Message routing in `on_message()`:** + +The mode inspects `envelope.message_type` and dispatches accordingly: -**DecisionMode** (`src/mode/decision.rs`): -- `on_session_start()` → `NoOp` -- `on_message()` → if `payload == b"resolve"` then `Resolve` else `NoOp` +| message_type | Handler | Returns | +|-------------|---------|---------| +| `"Proposal"` | Parse `ProposalPayload`, validate `proposal_id`, store proposal, advance to `Evaluation` | `PersistState` | +| `"Evaluation"` | Parse `EvaluationPayload`, validate `proposal_id` exists, append evaluation | `PersistState` | +| `"Objection"` | Parse `ObjectionPayload`, validate `proposal_id` exists, append objection | `PersistState` | +| `"Vote"` | Parse `VotePayload`, validate proposals exist, store vote (keyed by sender — overwrites), advance to `Voting` | `PersistState` | +| `"Commitment"` | Parse `CommitmentPayload`, validate votes exist, advance to `Committed` | `PersistAndResolve` | +| `"Message"` with payload `b"resolve"` | Legacy backward compatibility | `Resolve(b"resolve")` | +| Anything else | Ignored | `NoOp` | -**MultiRoundMode** (`src/mode/multi_round.rs`): -- Tracks participants, contributions per round, and convergence -- `on_session_start()` → parses config, returns `PersistState` with initial state -- `on_message()` → updates contributions, checks convergence, returns `PersistState` or `PersistAndResolve` +**Key behaviors:** -### 9. Runtime Kernel (src/runtime.rs) +- Proposals are stored in a `HashMap` keyed by `proposal_id`. Submitting a new proposal with the same ID overwrites the previous one. +- Votes are stored in a `HashMap` keyed by sender. If the same sender votes again, the previous vote is replaced. +- Phase transitions are one-way: `Proposal → Evaluation → Voting → Committed`. +- The `Commitment` message is the terminal message — it resolves the session. -The **Runtime** is the coordination kernel that orchestrates everything: +### MultiRoundMode (mode/multi_round.rs) + +The Multi-Round Mode implements participant-based convergence. It maintains a `MultiRoundState` serialized as JSON in `session.mode_state`. + +**Internal state:** + +```rust +pub struct MultiRoundState { + pub round: u64, + pub participants: Vec, + pub contributions: BTreeMap, // sender → value + pub convergence_type: String, // "all_equal" +} +``` + +The `BTreeMap` is used instead of `HashMap` for **deterministic serialization ordering** — this ensures that the same state always produces the same JSON bytes, enabling reliable comparison and replay. + +**`on_session_start()` flow:** + +1. Reads `session.participants` (populated from `SessionStartPayload.participants`). +2. If participants is empty, returns `Err(MacpError::InvalidPayload)` — multi-round mode requires at least one participant. +3. Creates initial `MultiRoundState` with `round: 0`, the participant list, empty contributions, and `convergence_type: "all_equal"`. +4. Serializes and returns `PersistState`. + +**`on_message()` flow for `Contribute` messages:** + +1. Deserializes `mode_state` into `MultiRoundState`. +2. Parses the JSON payload `{"value": ""}`. +3. Checks if the sender's value has changed: + - **New contribution** (sender not in `contributions`) → insert and increment round. + - **Changed value** (sender's previous value differs) → update and increment round. + - **Same value** (sender resubmits identical value) → update without incrementing round. +4. Checks convergence: + - All participants have contributed (every participant in the list has an entry in `contributions`). + - All contribution values are identical. +5. If converged → `PersistAndResolve` with resolution `{"converged_value": "...", "round": N, "final_values": {...}}`. +6. If not converged → `PersistState` with updated state. + +**Participant extraction note:** The mode tries to read participants from the session first. If the session's participant list is empty (which shouldn't normally happen for multi_round), the runtime attempts to extract participants from the `mode_state` as a fallback. + +--- + +## Runtime Kernel (runtime.rs) + +The `Runtime` is the coordination kernel — the central orchestrator that ties everything together. It holds the session registry, the log store, and the registered modes. + +### Structure ```rust pub struct Runtime { @@ -216,184 +596,484 @@ pub struct Runtime { pub log_store: Arc, modes: HashMap>, } + +pub struct ProcessResult { + pub session_state: SessionState, + pub duplicate: bool, +} ``` -**Processing flow:** +### Mode Registration + +On construction, the runtime registers four entries: + +```rust +modes.insert("macp.mode.decision.v1", DecisionMode); +modes.insert("macp.mode.multi_round.v1", MultiRoundMode); +modes.insert("decision", DecisionMode); // backward-compatible alias +modes.insert("multi_round", MultiRoundMode); // backward-compatible alias ``` -1. Receive Envelope (from MacpServer) -2. For SessionStart: - a. Resolve mode name (empty → "decision") - b. Look up mode implementation → error if unknown - c. Parse TTL from payload - d. Acquire write lock, check for duplicate session - e. Create session log, append Incoming entry - f. Call mode.on_session_start() - g. Insert session, apply ModeResponse -3. For other messages: - a. Acquire write lock, find session - b. TTL check → if expired, log Internal entry, set Expired - c. State check → if not Open, reject - d. Append Incoming log entry - e. Call mode.on_message() - f. Apply ModeResponse -4. Return Ok/Err to MacpServer + +The `mode_names()` method returns the canonical mode names (the RFC-compliant ones, not the aliases). + +### Message Routing: process() + +The `process()` method is the main entry point. It inspects `envelope.message_type` and routes to the appropriate handler: + +```rust +pub async fn process(&self, env: &Envelope) -> Result { + match env.message_type.as_str() { + "SessionStart" => self.process_session_start(env).await, + "Signal" => self.process_signal(env).await, + _ => self.process_message(env).await, + } +} ``` -**`apply_mode_response()`** is the single mutation point: -- `NoOp` → nothing -- `PersistState(s)` → `session.mode_state = s` -- `Resolve(r)` → `session.state = Resolved, session.resolution = Some(r)` -- `PersistAndResolve{s,r}` → both +### process_session_start() + +This is the most complex handler. Here is the complete flow: + +1. **Resolve mode** — Empty mode field → `"macp.mode.decision.v1"`. Look up mode in registry → `MODE_NOT_SUPPORTED` if not found. +2. **Parse payload** — Decode bytes as protobuf `SessionStartPayload` → `INVALID_ENVELOPE` if decode fails. +3. **Validate TTL** — Extract `ttl_ms`, validate range → `INVALID_ENVELOPE` if out of range. +4. **Compute TTL expiry** — `current_time + ttl_ms`. +5. **Acquire write lock** on session registry. +6. **Check for duplicate session:** + - If session exists and `message_id` is in `seen_message_ids` → return `ProcessResult { state, duplicate: true }`. + - If session exists with different `message_id` → return `Err(MacpError::DuplicateSession)`. +7. **Create session log** — `log_store.create_session_log(session_id)`. +8. **Log incoming entry** — Append `Incoming` entry with the SessionStart details. +9. **Create Session object** — state=Open, computed TTL expiry, participants, version metadata, `message_id` in `seen_message_ids`. +10. **Call mode.on_session_start()** — Mode may return `PersistState` with initial state. +11. **Apply ModeResponse** — Mutate session according to the response. +12. **Insert session** into registry. +13. **Return ProcessResult** — state=Open (or Resolved if mode immediately resolved), duplicate=false. + +### process_message() + +Handles all non-SessionStart, non-Signal messages: + +1. **Acquire write lock** on session registry. +2. **Find session** → `SESSION_NOT_FOUND` if not found. +3. **Deduplication check** — If `message_id` in `seen_message_ids` → return `ProcessResult { state, duplicate: true }`. +4. **TTL check** — If session is Open and `now > ttl_expiry`: + - Log internal `TtlExpired` entry. + - Transition session to `Expired`. + - Return `Err(MacpError::TtlExpired)`. +5. **State check** — If session is not `Open` → `Err(MacpError::SessionNotOpen)`. +6. **Participant check** — If `participants` is non-empty and `sender` not in list → `Err(MacpError::InvalidEnvelope)`. +7. **Record message_id** in `seen_message_ids`. +8. **Log incoming entry**. +9. **Look up mode** → `MODE_NOT_SUPPORTED` if not found (should not happen for valid sessions). +10. **Participant extraction fallback** — For multi_round mode, if session participants is empty, try to extract from mode_state. +11. **Call mode.on_message()**. +12. **Apply ModeResponse**. +13. **Return ProcessResult** with current session state. + +### process_signal() + +Signal handling is deliberately simple: + +1. Accept the signal (no session lookup, no state mutation). +2. Return `ProcessResult { state: Open, duplicate: false }`. + +### apply_mode_response() + +The single mutation point for all session state changes: -### 10. MacpServer (src/server.rs) +```rust +fn apply_mode_response(session: &mut Session, response: ModeResponse) { + match response { + ModeResponse::NoOp => {} + ModeResponse::PersistState(s) => { + session.mode_state = s; + } + ModeResponse::Resolve(r) => { + session.state = SessionState::Resolved; + session.resolution = Some(r); + } + ModeResponse::PersistAndResolve { state, resolution } => { + session.mode_state = state; + session.state = SessionState::Resolved; + session.resolution = Some(resolution); + } + } +} +``` + +### cancel_session() + +Session cancellation flow: -The **MacpServer** is now a thin gRPC adapter: +1. **Acquire write lock** on session registry. +2. **Find session** → `Err(MacpError::UnknownSession)` if not found. +3. **Check state:** + - If already `Resolved` or `Expired` → idempotent, return `Ok(())`. + - If `Open` → log internal `SessionCancel` entry, transition to `Expired`. +4. **Return `Ok(())`**. -**Responsibilities:** -1. Validate transport-level fields (version, required fields) -2. Delegate to `Runtime::process()` -3. Convert results to `Ack` responses -4. Handle `GetSession` queries +--- -All coordination logic lives in the Runtime and Mode implementations. +## gRPC Server Adapter (server.rs) -## Data Flow Example +The `MacpServer` struct implements the `MACPRuntimeService` gRPC trait generated by tonic. It is a thin adapter layer that translates between gRPC types and the runtime's internal types. -Let's trace what happens when a client sends a multi-round convergence message: +### Structure -### Step 1: Client sends SessionStart +```rust +pub struct MacpServer { + runtime: Arc, +} ``` -Client → gRPC → MacpServer::send_message() - → validate() - → Runtime::process() - → resolve mode = "multi_round" - → parse TTL - → create session log - → MultiRoundMode::on_session_start() - → PersistState(initial_state) - → insert session with mode_state - ← Ack(accepted=true) + +### Key Methods + +**`validate(env: &Envelope)`** — Transport-level validation: +- Checks `macp_version == "1.0"` → `UNSUPPORTED_PROTOCOL_VERSION`. +- For non-Signal messages: checks `session_id` and `message_id` are non-empty → `INVALID_ENVELOPE`. +- For Signal messages: checks only `message_id` is non-empty. + +**`session_state_to_pb(state: &SessionState) -> i32`** — Maps internal session state to protobuf enum values. + +**`make_error_ack(err: &MacpError, env: &Envelope) -> Ack`** — Constructs a structured `Ack` with: +- `ok: false` +- `message_id` and `session_id` from the envelope +- `accepted_at_unix_ms` set to current time +- `error` containing a `MACPError` with the RFC error code and the error's display string + +### RPC Implementations + +| RPC | Behavior | +|-----|----------| +| **Initialize** | Checks `supported_protocol_versions` for `"1.0"`, returns `RuntimeInfo`, `Capabilities`, supported modes, and instructions. Returns `INVALID_ARGUMENT` gRPC status if no supported version. | +| **Send** | Validates envelope, delegates to `runtime.process()`, constructs `Ack` with ok/duplicate/session_state/error. All protocol errors are in-band (gRPC status is always OK). | +| **GetSession** | Looks up session in registry, returns `SessionMetadata` with state, timestamps, and version fields. Returns `NOT_FOUND` gRPC status if session doesn't exist. | +| **CancelSession** | Delegates to `runtime.cancel_session()`, returns `Ack`. | +| **GetManifest** | Returns `AgentManifest` with runtime identity and supported modes. | +| **ListModes** | Returns two `ModeDescriptor` entries (decision and multi_round) with message types, determinism class, and participant model. | +| **StreamSession** | Bidirectional streaming — processes each incoming envelope and echoes an envelope with updated message_type. | +| **ListRoots** | Returns empty roots list. | +| **WatchModeRegistry** | Returns `UNIMPLEMENTED`. | +| **WatchRoots** | Returns `UNIMPLEMENTED`. | + +--- + +## Data Flow: Complete Message Processing + +Let's trace the complete path of a `Vote` message through the system: + +``` +1. Client sends SendRequest { envelope: Vote message } + │ + ▼ +2. MacpServer::send() receives the gRPC request + │ + ▼ +3. MacpServer::validate(&envelope) + - Checks macp_version == "1.0" ✓ + - Checks session_id non-empty ✓ + - Checks message_id non-empty ✓ + │ + ▼ +4. runtime.process(&envelope) + │ + ▼ +5. message_type == "Vote" → process_message() + │ + ▼ +6. Acquire write lock on session registry + │ + ▼ +7. Find session by session_id ✓ found + │ + ▼ +8. Check seen_message_ids for message_id ✓ not a duplicate + │ + ▼ +9. Check TTL: now < ttl_expiry ✓ not expired + │ + ▼ +10. Check state == Open ✓ + │ + ▼ +11. Check sender in participants ✓ (or empty list) + │ + ▼ +12. Add message_id to seen_message_ids + │ + ▼ +13. log_store.append(session_id, Incoming entry) + │ + ▼ +14. Look up mode by session.mode → DecisionMode + │ + ▼ +15. DecisionMode::on_message(&session, &envelope) + - Deserialize mode_state → DecisionState + - Parse payload as VotePayload + - Validate: proposals exist, phase != Committed + - Store vote in state.votes (keyed by sender) + - Advance phase to Voting + - Serialize updated state + - Return PersistState(serialized_state) + │ + ▼ +16. apply_mode_response(&mut session, PersistState(bytes)) + - session.mode_state = bytes + │ + ▼ +17. Return ProcessResult { state: Open, duplicate: false } + │ + ▼ +18. MacpServer builds Ack { + ok: true, + duplicate: false, + message_id: "...", + session_id: "...", + accepted_at_unix_ms: , + session_state: SESSION_STATE_OPEN, + error: None + } + │ + ▼ +19. Client receives SendResponse { ack } ``` -### Step 2: Client sends Contribute +--- + +## Data Flow: Session Cancellation + ``` -Client → gRPC → MacpServer::send_message() - → validate() - → Runtime::process() - → find session, check TTL, check Open - → append Incoming log entry - → MultiRoundMode::on_message() - → update contributions, check convergence - → PersistState(updated_state) or PersistAndResolve - → apply response - ← Ack(accepted=true) +1. Client sends CancelSessionRequest { session_id, reason } + │ + ▼ +2. MacpServer::cancel_session() receives the gRPC request + │ + ▼ +3. runtime.cancel_session(session_id, reason) + │ + ▼ +4. Acquire write lock on session registry + │ + ▼ +5. Find session by session_id + - Not found → return Err(UnknownSession) + - Found, state == Resolved or Expired → idempotent, return Ok + - Found, state == Open → continue + │ + ▼ +6. log_store.append(session_id, Internal { + message_id: "__session_cancel__", + message_type: "SessionCancel", + sender: "", + raw_payload: reason.as_bytes() + }) + │ + ▼ +7. session.state = Expired + │ + ▼ +8. Return Ok(()) + │ + ▼ +9. MacpServer builds Ack { ok: true, ... } ``` -### Step 3: Client queries state +--- + +## Data Flow: Initialize Handshake + ``` -Client → gRPC → MacpServer::get_session() - → registry.get_session(id) - ← SessionInfo(state, mode, resolution, ...) +1. Client sends InitializeRequest { + supported_protocol_versions: ["1.0"], + client_info: { name: "my-agent", ... }, + capabilities: { ... } + } + │ + ▼ +2. MacpServer::initialize() receives the request + │ + ▼ +3. Check if "1.0" is in supported_protocol_versions + - Not found → return INVALID_ARGUMENT gRPC status + - Found → continue + │ + ▼ +4. Build InitializeResponse { + selected_protocol_version: "1.0", + runtime_info: { name: "macp-runtime", version: "0.2.0", ... }, + capabilities: { sessions, cancellation, progress, manifest, ... }, + supported_modes: ["macp.mode.decision.v1", "macp.mode.multi_round.v1"], + instructions: "MACP Runtime v0.2 ..." + } + │ + ▼ +5. Client receives the response, caches capabilities and modes ``` +--- + ## Concurrency Model -**Question:** What happens if two clients send messages at the same time? +**Question:** What happens when two clients send messages to the same session simultaneously? -**Answer:** They're handled safely: -1. Both enter `send_message()` simultaneously -2. Both validate independently (no conflicts here) -3. First one acquires the write lock -4. First one processes through the mode, modifies the registry -5. First one releases the lock -6. Second one acquires the write lock -7. Second one processes through the mode, modifies the registry -8. Second one releases the lock +**Answer:** They are serialized safely through the `RwLock`: -The `RwLock` ensures they don't interfere with each other. +1. Both gRPC handlers enter `send()` simultaneously. +2. Both call `validate()` independently — no shared state here. +3. Both call `runtime.process()` which attempts to acquire the write lock. +4. **First request** acquires the write lock. +5. First request processes through the full pipeline (dedup check, TTL check, state check, participant check, logging, mode dispatch, apply response). +6. First request releases the write lock. +7. **Second request** acquires the write lock. +8. Second request processes through the same pipeline, but now sees the state changes from the first request (e.g., if the first request resolved the session, the second will get `SessionNotOpen`). + +**Read concurrency:** The `RwLock` allows multiple concurrent readers. `GetSession` calls acquire read locks and can execute simultaneously without blocking each other. + +**Write serialization:** All write operations (SessionStart, regular messages, CancelSession) acquire exclusive write locks. This ensures atomicity but means that high write throughput to the same registry will be serialized. In practice, this is acceptable for the coordination use case where message rates are modest. + +**No background tasks:** There are no background cleanup threads. Expired sessions remain in memory until the server restarts. This is a deliberate simplification — the coordination use case typically involves a bounded number of sessions, and memory pressure from expired sessions is negligible. + +--- ## File Structure ``` runtime/ ├── proto/ -│ └── macp.proto # Protocol definition (Envelope, Ack, SessionQuery, SessionInfo) +│ ├── buf.yaml # Buf linter configuration +│ └── macp/ +│ ├── v1/ +│ │ ├── envelope.proto # Envelope, Ack, MACPError, SessionState +│ │ └── core.proto # Service + all message types +│ └── modes/ +│ └── decision/ +│ └── v1/ +│ └── decision.proto # Decision mode payload types ├── src/ -│ ├── main.rs # Entry point, wires up Runtime + gRPC server -│ ├── lib.rs # Shared library (pb module + public module exports) -│ ├── server.rs # Thin gRPC adapter delegating to Runtime -│ ├── error.rs # MacpError enum (all error variants) -│ ├── session.rs # Session, SessionState, TTL parsing -│ ├── registry.rs # SessionRegistry (thread-safe session store) -│ ├── log_store.rs # Append-only LogStore for session event logs -│ ├── runtime.rs # Runtime kernel (dispatch + apply ModeResponse) +│ ├── main.rs # Entry point — server startup +│ ├── lib.rs # Library root — proto modules + exports +│ ├── server.rs # gRPC adapter (MacpRuntimeService) +│ ├── error.rs # MacpError enum + RFC error codes +│ ├── session.rs # Session struct, SessionState, TTL parsing +│ ├── registry.rs # SessionRegistry (RwLock) +│ ├── log_store.rs # Append-only LogStore +│ ├── runtime.rs # Runtime kernel │ ├── mode/ -│ │ ├── mod.rs # Mode trait + ModeResponse enum -│ │ ├── decision.rs # DecisionMode (payload=="resolve" → Resolve) -│ │ └── multi_round.rs # MultiRoundMode (convergence-based resolution) +│ │ ├── mod.rs # Mode trait + ModeResponse +│ │ ├── decision.rs # DecisionMode (RFC lifecycle) +│ │ └── multi_round.rs # MultiRoundMode (convergence) │ └── bin/ -│ ├── client.rs # Test client (happy path) -│ ├── fuzz_client.rs # Test client (error paths + multi-round) -│ └── multi_round_client.rs # Multi-round convergence demo -├── build.rs # Generates code from .proto -├── Cargo.toml # Dependencies and project config -└── target/ # Build output (binaries, generated code) +│ ├── client.rs # Basic demo client +│ ├── fuzz_client.rs # Comprehensive error-path client +│ └── multi_round_client.rs # Multi-round convergence demo +├── build.rs # tonic-build proto compilation +├── Cargo.toml # Dependencies +├── Makefile # Development shortcuts +├── .githooks/ +│ └── pre-commit # Pre-commit hook (fmt + clippy) +├── .github/ +│ ├── workflows/ +│ │ └── ci.yml # CI/CD pipeline +│ ├── ISSUE_TEMPLATE/ +│ │ ├── bug_report.yml # Bug report template +│ │ └── rfc_proposal.yml # RFC proposal template +│ └── PULL_REQUEST_TEMPLATE.md # PR template +└── docs/ + ├── README.md # Getting started guide + ├── protocol.md # Protocol specification + ├── architecture.md # This document + └── examples.md # Usage examples ``` +--- + ## Build Process -1. `build.rs` runs first - - Reads `proto/macp.proto` - - Generates Rust code in `target/debug/build/*/out/macp.v1.rs` +1. **`build.rs` runs first** — reads the three `.proto` files from the `proto/` directory and generates Rust code via `tonic-build`. The generated code appears in `target/debug/build/macp-runtime-*/out/`. + +2. **Rust compiler compiles:** + - `src/lib.rs` — the library crate with all modules. + - `src/main.rs` — the server binary. + - `src/bin/client.rs` — the basic demo client. + - `src/bin/fuzz_client.rs` — the comprehensive test client. + - `src/bin/multi_round_client.rs` — the convergence demo client. -2. Rust compiler compiles: - - `src/lib.rs` (library with all modules) - - `src/main.rs` (server binary) - - `src/bin/client.rs` (client binary) - - `src/bin/fuzz_client.rs` (fuzz binary) - - `src/bin/multi_round_client.rs` (multi-round demo binary) +3. **Output binaries:** + - `target/debug/macp-runtime` — the server. + - `target/debug/client` — the basic client. + - `target/debug/fuzz_client` — the test client. + - `target/debug/multi_round_client` — the convergence demo. -3. Output binaries: - - `target/debug/macp-runtime` (server) - - `target/debug/client` - - `target/debug/fuzz_client` - - `target/debug/multi_round_client` +--- + +## CI/CD Pipeline + +The GitHub Actions workflow (`.github/workflows/ci.yml`) runs on every push and pull request: + +1. **Checkout** — fetches the code. +2. **Install protoc** — installs the Protocol Buffers compiler. +3. **Install Buf** — installs the Buf CLI for proto linting. +4. **Buf lint** — lints the proto files against the `STANDARD` rules. +5. **Cargo fmt** — checks that all code is formatted. +6. **Cargo clippy** — runs the linter with `-D warnings` (warnings are errors). +7. **Cargo test** — runs the full test suite. +8. **Cargo build** — verifies the project compiles cleanly. + +--- ## Design Principles ### 1. Separation of Concerns -- **Protocol definition** (`.proto`) separate from implementation -- **State management** (`SessionRegistry`) separate from coordination logic -- **Mode logic** separate from runtime kernel -- **Validation** happens before state mutation -- **Logging** happens before mode dispatch + +Each layer has exactly one job: +- **Proto schema** — defines the wire format. +- **MacpServer** — handles gRPC transport concerns. +- **Runtime** — enforces protocol invariants. +- **Modes** — provide coordination logic. +- **Registry/LogStore** — provide storage. ### 2. Pluggable Coordination -- Runtime provides "physics" (invariants, TTL, logging, routing) -- Modes provide "coordination logic" (when to resolve, what state to track) -- New modes can be added without modifying the runtime kernel - -### 3. Fail-Safe -- Invalid messages are rejected, not ignored -- No partial state updates (atomic operations via single mutation point) -- Errors are explicit, not silent - -### 4. Minimal Coordination -- Server doesn't interpret payloads (except through Mode implementations) -- Sessions are independent (no cross-session coordination) -- Modes receive immutable state and return responses - -### 5. Structural Invariants -The system enforces structural rules: -- Can't start a session twice -- Can't send to non-existent session -- Can't send to resolved/expired session -- Must use correct version -- Must reference registered mode + +The runtime provides "physics" (invariants, TTL, logging, routing, deduplication, participant validation). Modes provide "coordination logic" (when to resolve, what state to track). New modes can be added by implementing the `Mode` trait and registering them in the runtime — no changes to the kernel or transport layer required. + +### 3. Fail-Safe Design + +- Invalid messages are rejected, never ignored. +- No partial state updates — `apply_mode_response()` is atomic. +- Errors are explicit and structured — every failure has an RFC error code. +- Validation occurs before state mutation — the system never enters an inconsistent state. +- Log-before-mutate ordering — events are logged before the mode processes them. + +### 4. Idempotent Operations + +- Duplicate messages (same `message_id`) are safely handled as no-ops. +- Duplicate SessionStart with the same `message_id` returns success without re-creating. +- CancelSession on an already-terminal session is idempotent. + +### 5. Minimal Coordination + +- The runtime does not interpret payload contents (that's the mode's job). +- Sessions are independent — no cross-session coordination. +- Modes receive immutable state and return declarative responses. +- The server is stateless except for the in-memory registry and log store. + +### 6. Structural Invariants + +The system enforces protocol-level rules that cannot be violated: +- Cannot start a session twice (with a different message_id). +- Cannot send to a non-existent session. +- Cannot send to a resolved or expired session. +- Must use the correct protocol version. +- Must reference a registered mode. +- Must be a listed participant (if participant list is configured). These are **protocol-level** invariants, not domain-specific business rules. +--- + ## Next Steps -- Read [protocol.md](./protocol.md) for the full protocol specification -- Read [examples.md](./examples.md) for practical usage examples +- Read **[protocol.md](./protocol.md)** for the full protocol specification. +- Read **[examples.md](./examples.md)** for practical usage examples with the new v0.2 RPCs. diff --git a/docs/examples.md b/docs/examples.md index 872db9d..27a78b0 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -1,6 +1,27 @@ # Examples and Usage -This document provides step-by-step examples of using the MACP Runtime. Even if you don't know Rust, you can follow along and understand what's happening. +This document provides step-by-step examples of using the MACP Runtime v0.2. It covers the full lifecycle — from protocol handshake to session creation, decision-making, convergence, cancellation, and error handling — with detailed explanations of what happens at each step. + +--- + +## Table of Contents + +1. [Quick Start](#quick-start) +2. [Example 1: Basic Decision Mode Client](#example-1-basic-decision-mode-client) +3. [Example 2: Full Decision Mode Lifecycle](#example-2-full-decision-mode-lifecycle) +4. [Example 3: Multi-Round Convergence](#example-3-multi-round-convergence) +5. [Example 4: Fuzz Client — Testing Every Error Path](#example-4-fuzz-client--testing-every-error-path) +6. [Example 5: Using the New RPCs](#example-5-using-the-new-rpcs) +7. [Example 6: Session Cancellation](#example-6-session-cancellation) +8. [Example 7: Message Deduplication](#example-7-message-deduplication) +9. [Example 8: Participant Validation](#example-8-participant-validation) +10. [Example 9: Signal Messages](#example-9-signal-messages) +11. [Example 10: Session with Custom TTL](#example-10-session-with-custom-ttl) +12. [Example 11: Multi-Agent Scenario](#example-11-multi-agent-scenario) +13. [Common Patterns](#common-patterns) +14. [Common Questions](#common-questions) + +--- ## Quick Start @@ -14,52 +35,87 @@ cargo run You should see: ``` -macp-runtime v0.1 listening on 127.0.0.1:50051 +macp-runtime v0.2 (RFC-0001) listening on 127.0.0.1:50051 ``` -The server is now ready to accept connections. +The server is now ready to accept connections on port 50051. -### Running the Test Client +### Running the Test Clients -**Terminal 2:** +**Terminal 2** — Basic demo: ```bash cargo run --bin client ``` -You should see output like: +**Terminal 2** — Comprehensive error testing: +```bash +cargo run --bin fuzz_client ``` -SessionStart ack: accepted=true error='' -Message ack: accepted=true error='' -Resolve ack: accepted=true error='' -After-resolve ack: accepted=false error='SessionNotOpen' + +**Terminal 2** — Multi-round convergence: +```bash +cargo run --bin multi_round_client ``` -**What happened?** -1. Client created a session (decision mode) -2. Client sent a normal message -3. Client sent a "resolve" message (session transitions to Resolved) -4. Client tried to send another message (rejected because session is Resolved) +--- -## Example 1: Basic Client Walkthrough +## Example 1: Basic Decision Mode Client -Let's walk through the client code step by step (src/bin/client.rs). +The basic client (`src/bin/client.rs`) demonstrates the core happy path: Initialize, ListModes, SessionStart, Message, Resolve, post-resolve rejection, and GetSession. ### Step 1: Connect to the Server ```rust -let mut client = MacpServiceClient::connect("http://127.0.0.1:50051").await?; +let mut client = MacpRuntimeServiceClient::connect("http://127.0.0.1:50051").await?; ``` -**What this does:** -- Creates a gRPC client -- Connects to the server at `127.0.0.1:50051` -- If server isn't running, this will fail +This creates a gRPC client and connects to the runtime. If the server isn't running, this will fail with a connection error. -### Step 2: Create an Envelope +### Step 2: Initialize — Negotiate Protocol Version + +```rust +let init_resp = client + .initialize(InitializeRequest { + supported_protocol_versions: vec!["1.0".into()], + client_info: None, + capabilities: None, + }) + .await? + .into_inner(); +println!( + "Initialize: version={} runtime={}", + init_resp.selected_protocol_version, + init_resp.runtime_info.as_ref().map(|r| r.name.as_str()).unwrap_or("?") +); +``` + +**What happens:** The client proposes protocol version `"1.0"`. The server confirms it and returns runtime info (name, version), capabilities, and the list of supported modes. + +**Expected output:** +``` +Initialize: version=1.0 runtime=macp-runtime +``` + +### Step 3: Discover Available Modes + +```rust +let modes_resp = client.list_modes(ListModesRequest {}).await?.into_inner(); +println!( + "ListModes: {:?}", + modes_resp.modes.iter().map(|m| &m.mode).collect::>() +); +``` + +**Expected output:** +``` +ListModes: ["macp.mode.decision.v1", "macp.mode.multi_round.v1"] +``` + +### Step 4: Create a Session (SessionStart) ```rust let start = Envelope { - macp_version: "v1".into(), + macp_version: "1.0".into(), mode: "decision".into(), message_type: "SessionStart".into(), message_id: "m1".into(), @@ -68,56 +124,252 @@ let start = Envelope { timestamp_unix_ms: 1_700_000_000_000, payload: vec![], }; + +let ack = client + .send(SendRequest { envelope: Some(start) }) + .await? + .into_inner() + .ack + .unwrap(); +println!("SessionStart ack: ok={} error={:?}", ack.ok, ack.error.as_ref().map(|e| &e.code)); ``` **What each field means:** -- `macp_version`: Protocol version (must be "v1") -- `mode`: Coordination mode ("decision" for simple resolve, "multi_round" for convergence) -- `message_type`: "SessionStart" to create a session -- `message_id`: Unique ID for this message ("m1") -- `session_id`: Which session ("s1") -- `sender`: Who's sending it ("ajit") -- `timestamp_unix_ms`: When sent (Unix timestamp in milliseconds) -- `payload`: Message content (empty for basic SessionStart) +- `macp_version: "1.0"` — Protocol version (must be exactly `"1.0"`). +- `mode: "decision"` — Use the Decision Mode (alias for `"macp.mode.decision.v1"`). +- `message_type: "SessionStart"` — This creates a new session. +- `message_id: "m1"` — Unique ID for this message. +- `session_id: "s1"` — The session ID we're creating. +- `sender: "ajit"` — Who is sending this message. +- `payload: vec![]` — Empty payload means default TTL (60s), no participants. -### Step 3: Send and Receive Ack +**Expected output:** +``` +SessionStart ack: ok=true error=None +``` + +### Step 5: Send a Normal Message ```rust -let ack = client.send_message(start).await?.into_inner(); -println!("SessionStart ack: accepted={} error='{}'", ack.accepted, ack.error); +let msg = Envelope { + macp_version: "1.0".into(), + mode: "decision".into(), + message_type: "Message".into(), + message_id: "m2".into(), + session_id: "s1".into(), + sender: "ajit".into(), + timestamp_unix_ms: 1_700_000_000_001, + payload: b"hello".to_vec(), +}; ``` -### Step 4: Resolve the Session +In the Decision Mode, a `Message` with a non-`"resolve"` payload returns `NoOp` — the message is accepted but produces no state change. This is the backward-compatible behavior from v0.1. + +**Expected output:** +``` +Message ack: ok=true error=None +``` + +### Step 6: Resolve the Session (Legacy) ```rust let resolve = Envelope { // ... - payload: b"resolve".to_vec(), // Magic payload for decision mode + message_type: "Message".into(), + message_id: "m3".into(), + payload: b"resolve".to_vec(), // Legacy resolve trigger +}; +``` + +When the Decision Mode sees `message_type: "Message"` with `payload == b"resolve"`, it resolves the session immediately. This is the backward-compatible resolution mechanism from v0.1. + +**Expected output:** +``` +Resolve ack: ok=true error=None +``` + +### Step 7: Attempt Message After Resolution + +```rust +let after = Envelope { + // ... + message_type: "Message".into(), + message_id: "m4".into(), + payload: b"should-fail".to_vec(), +}; +``` + +The session is now `Resolved` (terminal state). Any further message is rejected with `SESSION_NOT_OPEN`. + +**Expected output:** +``` +After-resolve ack: ok=false error=Some("SESSION_NOT_OPEN") +``` + +### Step 8: Verify State with GetSession + +```rust +let resp = client + .get_session(GetSessionRequest { session_id: "s1".into() }) + .await? + .into_inner(); +let meta = resp.metadata.unwrap(); +println!("GetSession: state={} mode={}", meta.state, meta.mode); +``` + +**Expected output:** +``` +GetSession: state=2 mode=decision +``` + +(State `2` is `SESSION_STATE_RESOLVED` in the protobuf enum.) + +--- + +## Example 2: Full Decision Mode Lifecycle + +The Decision Mode in v0.2 supports a rich lifecycle: Proposal, Evaluation, Objection, Vote, and Commitment. Here is how a complete decision process flows: + +### Step 1: Create a Session + +```rust +let start = Envelope { + macp_version: "1.0".into(), + mode: "macp.mode.decision.v1".into(), // RFC-compliant name + message_type: "SessionStart".into(), + message_id: uuid::Uuid::new_v4().to_string(), + session_id: "decision-001".into(), + sender: "coordinator".into(), + timestamp_unix_ms: chrono::Utc::now().timestamp_millis(), + payload: vec![], +}; +``` + +### Step 2: Submit a Proposal + +```rust +let proposal = Envelope { + macp_version: "1.0".into(), + mode: "macp.mode.decision.v1".into(), + message_type: "Proposal".into(), + message_id: uuid::Uuid::new_v4().to_string(), + session_id: "decision-001".into(), + sender: "agent-alpha".into(), + timestamp_unix_ms: chrono::Utc::now().timestamp_millis(), + payload: serde_json::to_vec(&serde_json::json!({ + "proposal_id": "p1", + "option": "Deploy v2.1 to production", + "rationale": "All integration tests pass, staging validation complete", + "supporting_data": "" + })).unwrap(), +}; +``` + +After this message, the Decision Mode's phase advances from `Proposal` to `Evaluation`. + +### Step 3: Submit an Evaluation + +```rust +let evaluation = Envelope { + // ... + message_type: "Evaluation".into(), + sender: "agent-beta".into(), + payload: serde_json::to_vec(&serde_json::json!({ + "proposal_id": "p1", + "recommendation": "APPROVE", + "confidence": 0.92, + "reason": "Performance metrics look excellent" + })).unwrap(), +}; +``` + +Evaluations are appended to the state — multiple agents can evaluate the same proposal. + +### Step 4: Raise an Objection (Optional) + +```rust +let objection = Envelope { + // ... + message_type: "Objection".into(), + sender: "agent-gamma".into(), + payload: serde_json::to_vec(&serde_json::json!({ + "proposal_id": "p1", + "reason": "Security audit pending for the new auth module", + "severity": "medium" + })).unwrap(), }; ``` -In decision mode, payload `"resolve"` triggers session resolution. +Objections are recorded but do not block the decision process — they are informational. -## Example 2: Multi-Round Convergence +### Step 5: Cast Votes -The multi-round mode enables participant-based convergence. Run the demo: +```rust +let vote = Envelope { + // ... + message_type: "Vote".into(), + sender: "agent-alpha".into(), + payload: serde_json::to_vec(&serde_json::json!({ + "proposal_id": "p1", + "vote": "approve", + "reason": "Objection addressed in patch v2.1.1" + })).unwrap(), +}; +``` + +Votes are keyed by sender — if the same sender votes again, the previous vote is overwritten. The phase advances to `Voting`. + +### Step 6: Commit the Decision + +```rust +let commitment = Envelope { + // ... + message_type: "Commitment".into(), + sender: "coordinator".into(), + payload: serde_json::to_vec(&serde_json::json!({ + "commitment_id": "c1", + "action": "deploy-v2.1.1", + "authority_scope": "team-alpha", + "reason": "Unanimous approval with addressed objection" + })).unwrap(), +}; +``` + +The `Commitment` message finalizes the decision: +- The phase advances to `Committed`. +- The session resolves with the commitment payload as the resolution. +- No further messages are accepted. + +### Full Phase Progression + +``` +Proposal → Evaluation → Voting → Committed (resolved) +``` + +--- + +## Example 3: Multi-Round Convergence + +The multi-round client (`src/bin/multi_round_client.rs`) demonstrates participant-based convergence. + +### Run the Demo -**Terminal 2:** ```bash cargo run --bin multi_round_client ``` -**Expected output:** +### Expected Output + ``` === Multi-Round Convergence Demo === -[session_start] accepted=true error='' -[alice_contributes_a] accepted=true error='' -[bob_contributes_b] accepted=true error='' -[get_session] state=Open mode=multi_round participants=["alice", "bob"] -[bob_revises_to_a] accepted=true error='' -[get_session] state=Resolved resolution={"converged_value":"option_a","round":3,"final":{"alice":"option_a","bob":"option_a"}} -[after_convergence] accepted=false error='SessionNotOpen' +[session_start] ok=true error='' +[alice_contributes_a] ok=true error='' +[bob_contributes_b] ok=true error='' +[get_session] state=1 mode=multi_round +[bob_revises_to_a] ok=true error='' +[get_session] state=2 mode_version= +[after_convergence] ok=false error='SESSION_NOT_OPEN' === Demo Complete === ``` @@ -127,371 +379,766 @@ cargo run --bin multi_round_client #### 1. Create a Multi-Round Session ```rust -let payload = serde_json::json!({ - "participants": ["alice", "bob"], - "convergence": {"type": "all_equal"}, - "ttl_ms": 60000 -}); +let start_payload = SessionStartPayload { + intent: "convergence test".into(), + ttl_ms: 60000, + participants: vec!["alice".into(), "bob".into()], + mode_version: String::new(), + configuration_version: String::new(), + policy_version: String::new(), + context: vec![], + roots: vec![], +}; let start = Envelope { - macp_version: "v1".into(), + macp_version: "1.0".into(), mode: "multi_round".into(), message_type: "SessionStart".into(), message_id: "m0".into(), session_id: "mr1".into(), sender: "coordinator".into(), - timestamp_unix_ms: Utc::now().timestamp_millis(), - payload: payload.to_string().into_bytes(), + timestamp_unix_ms: chrono::Utc::now().timestamp_millis(), + payload: start_payload.encode_to_vec(), // Protobuf-encoded }; ``` -**SessionStart payload for multi_round mode:** -- `participants`: List of participant IDs who will contribute -- `convergence.type`: "all_equal" — resolve when all participants submit the same value -- `ttl_ms`: Optional TTL override +**Key points:** +- The `participants` field declares that `alice` and `bob` are the expected contributors. +- The payload is protobuf-encoded (using `prost::Message::encode_to_vec()`), not JSON. +- The `intent` field provides a human-readable description. -#### 2. Submit Contributions +#### 2. Alice Contributes "option_a" ```rust let contribute = Envelope { - macp_version: "v1".into(), + macp_version: "1.0".into(), mode: "multi_round".into(), message_type: "Contribute".into(), message_id: "m1".into(), session_id: "mr1".into(), sender: "alice".into(), - timestamp_unix_ms: Utc::now().timestamp_millis(), + timestamp_unix_ms: chrono::Utc::now().timestamp_millis(), payload: br#"{"value":"option_a"}"#.to_vec(), }; ``` -Each participant sends a `Contribute` message with `{"value": "..."}` payload. +**State after:** Round 1. Contributions: `{alice: "option_a"}`. Bob hasn't contributed yet — no convergence. + +#### 3. Bob Contributes "option_b" (Divergence) + +```rust +let contribute = Envelope { + // ... + sender: "bob".into(), + payload: br#"{"value":"option_b"}"#.to_vec(), +}; +``` + +**State after:** Round 2. Contributions: `{alice: "option_a", bob: "option_b"}`. All participants have contributed but values differ — no convergence. -#### 3. Convergence +#### 4. Query Session State — Still Open + +```rust +let resp = client.get_session(GetSessionRequest { session_id: "mr1".into() }).await?; +// state=1 (SESSION_STATE_OPEN) +``` + +#### 5. Bob Revises to "option_a" (Convergence!) + +```rust +let contribute = Envelope { + // ... + sender: "bob".into(), + payload: br#"{"value":"option_a"}"#.to_vec(), +}; +``` + +**State after:** Round 3. Contributions: `{alice: "option_a", bob: "option_a"}`. All participants have contributed and all values are identical — **convergence reached!** The session auto-resolves with: -When all participants have contributed and all values are identical, the session auto-resolves with: ```json { "converged_value": "option_a", "round": 3, - "final": { + "final_values": { "alice": "option_a", "bob": "option_a" } } ``` -#### 4. Revisions +#### 6. Attempt After Convergence — Rejected -Participants can revise their contributions. Each new value increments the round counter. Re-submitting the same value does not increment the round. +```rust +let contribute = Envelope { + // ... + sender: "alice".into(), + payload: br#"{"value":"option_c"}"#.to_vec(), +}; +// Ack: ok=false, error="SESSION_NOT_OPEN" +``` -## Example 3: Fuzz Client (Testing Error Paths) +The session is resolved — no further contributions are accepted. -The fuzz client tests all the ways things can go wrong, including multi-round scenarios: +### Why Round 3? -**Terminal 2:** -```bash -cargo run --bin fuzz_client +- Round 0: Session starts with no contributions. +- Round 1: Alice contributes "option_a" (new contribution). +- Round 2: Bob contributes "option_b" (new contribution). +- Round 3: Bob revises to "option_a" (value changed from "option_b"). + +Re-submitting the same value does **not** increment the round. Only substantive changes count. + +--- + +## Example 4: Fuzz Client — Testing Every Error Path + +The fuzz client (`src/bin/fuzz_client.rs`) is a comprehensive test that exercises every error code, every new RPC, and every edge case. Here's what it tests and what each test proves: + +### Expected Output + +``` +[initialize] version=1.0 +[initialize_bad_version] error: ...INVALID_ARGUMENT... +[wrong_version] ok=false duplicate=false error='UNSUPPORTED_PROTOCOL_VERSION' +[missing_fields] ok=false duplicate=false error='INVALID_ENVELOPE' +[unknown_session_message] ok=false duplicate=false error='SESSION_NOT_FOUND' +[session_start_ok] ok=true duplicate=false error='' +[session_start_duplicate] ok=false duplicate=false error='INVALID_ENVELOPE' +[session_start_idempotent] ok=true duplicate=true error='' +[message_ok] ok=true duplicate=false error='' +[message_duplicate] ok=true duplicate=true error='' +[resolve] ok=true duplicate=false error='' +[after_resolve] ok=false duplicate=false error='SESSION_NOT_OPEN' +[ttl_session_start] ok=true duplicate=false error='' +[ttl_expired_message] ok=false duplicate=false error='SESSION_NOT_OPEN' +[invalid_ttl_negative] ok=false duplicate=false error='INVALID_ENVELOPE' +[invalid_ttl_exceeds_max] ok=false duplicate=false error='INVALID_ENVELOPE' +[multi_round_start] ok=true duplicate=false error='' +[multi_round_alice] ok=true duplicate=false error='' +[multi_round_bob_diff] ok=true duplicate=false error='' +[multi_round_bob_converge] ok=true duplicate=false error='' +[multi_round_after_resolve] ok=false duplicate=false error='SESSION_NOT_OPEN' +[cancel_session_start] ok=true duplicate=false error='' +[cancel_session] ok=true +[after_cancel] ok=false duplicate=false error='SESSION_NOT_OPEN' +[participant_session_start] ok=true duplicate=false error='' +[unauthorized_sender] ok=false duplicate=false error='INVALID_ENVELOPE' +[authorized_sender] ok=true duplicate=false error='' +[signal] ok=true duplicate=false error='' +[get_session] state=2 mode=decision +[list_modes] count=2 modes=["macp.mode.decision.v1", "macp.mode.multi_round.v1"] +[get_manifest] agent_id=macp-runtime modes=["macp.mode.decision.v1", "macp.mode.multi_round.v1"] +[list_roots] count=0 ``` -**Expected output:** +### What Each Test Proves + +| Test | What It Proves | +|------|---------------| +| `initialize` | Protocol handshake with version "1.0" succeeds | +| `initialize_bad_version` | Unsupported version "2.0" returns gRPC INVALID_ARGUMENT | +| `wrong_version` | Envelope with `macp_version: "v0"` is rejected | +| `missing_fields` | Empty `message_id` is rejected | +| `unknown_session_message` | Message to non-existent session returns SESSION_NOT_FOUND | +| `session_start_ok` | Valid SessionStart creates a session | +| `session_start_duplicate` | Second SessionStart with different message_id is rejected | +| `session_start_idempotent` | Same SessionStart with same message_id is idempotent (duplicate=true) | +| `message_ok` | Valid message to open session succeeds | +| `message_duplicate` | Same message_id is idempotent (duplicate=true) | +| `resolve` | Legacy `payload="resolve"` resolves the session | +| `after_resolve` | Message to resolved session returns SESSION_NOT_OPEN | +| `ttl_session_start` | Session with 1-second TTL is created | +| `ttl_expired_message` | Message after TTL expiry returns SESSION_NOT_OPEN | +| `invalid_ttl_negative` | Negative TTL is rejected | +| `invalid_ttl_exceeds_max` | TTL > 24h is rejected | +| `multi_round_*` | Full multi-round convergence cycle works | +| `cancel_session` | CancelSession transitions session to Expired | +| `after_cancel` | Message to cancelled session returns SESSION_NOT_OPEN | +| `participant_*` | Unauthorized sender is rejected, authorized sender succeeds | +| `signal` | Signal message with empty session_id succeeds | +| `get_session` | GetSession returns correct state | +| `list_modes` | ListModes returns both registered modes | +| `get_manifest` | GetManifest returns runtime identity and modes | +| `list_roots` | ListRoots returns empty list | + +--- + +## Example 5: Using the New RPCs + +### Initialize + +```rust +let init_resp = client + .initialize(InitializeRequest { + supported_protocol_versions: vec!["1.0".into()], + client_info: Some(ClientInfo { + name: "my-agent".into(), + title: "My Coordination Agent".into(), + version: "1.0.0".into(), + description: "An agent that coordinates deployments".into(), + website_url: String::new(), + }), + capabilities: None, + }) + .await? + .into_inner(); + +println!("Selected version: {}", init_resp.selected_protocol_version); +println!("Runtime: {:?}", init_resp.runtime_info); +println!("Supported modes: {:?}", init_resp.supported_modes); ``` -[wrong_version] accepted=false error='InvalidMacpVersion' -[missing_fields] accepted=false error='InvalidEnvelope' -[unknown_session_message] accepted=false error='UnknownSession' -[session_start_ok] accepted=true error='' -[session_start_duplicate] accepted=false error='DuplicateSession' -[message_ok] accepted=true error='' -[resolve] accepted=true error='' -[after_resolve] accepted=false error='SessionNotOpen' -[ttl_session_start] accepted=true error='' -[ttl_expired_message] accepted=false error='TtlExpired' -[invalid_ttl_zero] accepted=false error='InvalidTtl' -[invalid_ttl_negative] accepted=false error='InvalidTtl' -[invalid_ttl_exceeds_max] accepted=false error='InvalidTtl' -[multi_round_start] accepted=true error='' -[multi_round_alice] accepted=true error='' -[multi_round_bob_diff] accepted=true error='' -[multi_round_bob_converge] accepted=true error='' -[multi_round_after_resolve] accepted=false error='SessionNotOpen' -``` - -The last 5 lines show the multi-round scenario: -1. Create multi-round session with alice and bob -2. Alice contributes "option_a" -3. Bob contributes "option_b" (values differ, no convergence) -4. Bob revises to "option_a" (all equal → convergence → auto-resolved) -5. Alice tries to contribute after resolution → `SessionNotOpen` - -## Example 4: Using GetSession - -Query session state at any time: - -```rust -use macp_runtime::pb::SessionQuery; - -let info = client.get_session(SessionQuery { - session_id: "s1".into(), -}).await?.into_inner(); -println!("Session {} is in state {}", info.session_id, info.state); -println!("Mode: {}", info.mode); -println!("Participants: {:?}", info.participants); +### ListModes -if !info.resolution.is_empty() { - println!("Resolution: {}", String::from_utf8_lossy(&info.resolution)); +```rust +let modes = client.list_modes(ListModesRequest {}).await?.into_inner().modes; +for mode in &modes { + println!("Mode: {} (v{})", mode.mode, mode.mode_version); + println!(" Title: {}", mode.title); + println!(" Message types: {:?}", mode.message_types); + println!(" Terminal types: {:?}", mode.terminal_message_types); + println!(" Determinism: {}", mode.determinism_class); + println!(" Participant model: {}", mode.participant_model); } ``` -**Response fields:** -- `session_id`, `mode`, `state` ("Open"/"Resolved"/"Expired") -- `ttl_expiry` (Unix ms timestamp) -- `resolution` (bytes, empty if not resolved) -- `mode_state` (bytes, mode-specific internal state) -- `participants` (list of participant IDs) +### GetManifest + +```rust +let manifest = client + .get_manifest(GetManifestRequest { agent_id: String::new() }) + .await? + .into_inner() + .manifest + .unwrap(); + +println!("Runtime: {} - {}", manifest.agent_id, manifest.description); +println!("Supported modes: {:?}", manifest.supported_modes); +``` -If the session doesn't exist, the RPC returns a gRPC `NOT_FOUND` status. +### GetSession -## Example 5: Common Patterns +```rust +let metadata = client + .get_session(GetSessionRequest { session_id: "s1".into() }) + .await? + .into_inner() + .metadata + .unwrap(); + +println!("Session: {}", metadata.session_id); +println!("Mode: {}", metadata.mode); +println!("State: {} (1=Open, 2=Resolved, 3=Expired)", metadata.state); +println!("Started at: {}", metadata.started_at_unix_ms); +println!("Expires at: {}", metadata.expires_at_unix_ms); +println!("Mode version: {}", metadata.mode_version); +``` -### Pattern 1: Error Handling +--- -Always check the Ack: +## Example 6: Session Cancellation ```rust -let ack = client.send_message(envelope).await?.into_inner(); - -if ack.accepted { - println!("Success!"); -} else { - match ack.error.as_str() { - "InvalidMacpVersion" => println!("Use version v1"), - "InvalidEnvelope" => println!("Check required fields"), - "DuplicateSession" => println!("Session already exists"), - "UnknownSession" => println!("Session doesn't exist"), - "SessionNotOpen" => println!("Session is resolved/expired"), - "TtlExpired" => println!("Session TTL has elapsed, create a new session"), - "InvalidTtl" => println!("TTL must be 1..=86400000 ms"), - "UnknownMode" => println!("Use 'decision' or 'multi_round'"), - "InvalidModeState" => println!("Internal mode state error"), - "InvalidPayload" => println!("Check mode-specific payload format"), - _ => println!("Unknown error: {}", ack.error), - } -} +// Create a session +let start = Envelope { + macp_version: "1.0".into(), + mode: "decision".into(), + message_type: "SessionStart".into(), + message_id: "m_c1".into(), + session_id: "s_cancel".into(), + sender: "ajit".into(), + timestamp_unix_ms: chrono::Utc::now().timestamp_millis(), + payload: vec![], +}; +client.send(SendRequest { envelope: Some(start) }).await?; + +// Cancel the session +let cancel_resp = client + .cancel_session(CancelSessionRequest { + session_id: "s_cancel".into(), + reason: "User requested cancellation".into(), + }) + .await? + .into_inner(); + +let ack = cancel_resp.ack.unwrap(); +println!("Cancel: ok={}", ack.ok); // ok=true + +// Try to send a message — rejected +let msg = Envelope { + // ... + session_id: "s_cancel".into(), + message_id: "m_c2".into(), + payload: b"should-fail".to_vec(), + // ... +}; +let ack = client.send(SendRequest { envelope: Some(msg) }).await?.into_inner().ack.unwrap(); +println!("After cancel: ok={} error={}", ack.ok, ack.error.unwrap().code); +// ok=false error=SESSION_NOT_OPEN ``` -### Pattern 2: Unique Message IDs +**Key behaviors:** +- Cancelling an open session transitions it to `Expired` and logs the reason. +- Cancelling an already resolved or expired session is idempotent — returns `ok: true`. +- Messages to a cancelled session are rejected with `SESSION_NOT_OPEN`. -Use UUIDs for message IDs: +--- + +## Example 7: Message Deduplication + +The runtime deduplicates messages by `message_id` within a session: ```rust -use uuid::Uuid; +// Send a message +let msg = Envelope { + macp_version: "1.0".into(), + mode: "decision".into(), + message_type: "Message".into(), + message_id: "m_dedup".into(), + session_id: "s1".into(), + sender: "ajit".into(), + timestamp_unix_ms: chrono::Utc::now().timestamp_millis(), + payload: b"hello".to_vec(), +}; -let message_id = Uuid::new_v4().to_string(); +// First send — accepted normally +let ack1 = client.send(SendRequest { envelope: Some(msg.clone()) }).await?.into_inner().ack.unwrap(); +println!("First: ok={} duplicate={}", ack1.ok, ack1.duplicate); +// ok=true duplicate=false -let envelope = Envelope { - message_id: message_id, - // ... other fields +// Second send (same message_id) — idempotent duplicate +let ack2 = client.send(SendRequest { envelope: Some(msg.clone()) }).await?.into_inner().ack.unwrap(); +println!("Second: ok={} duplicate={}", ack2.ok, ack2.duplicate); +// ok=true duplicate=true +``` + +**Why this matters:** Network retries are safe. If a client isn't sure whether a message was received (e.g., timeout on the response), it can safely resend with the same `message_id`. The runtime will recognize it as a duplicate and return success without re-processing. + +This also works for `SessionStart`: + +```rust +// Same SessionStart with same message_id is idempotent +let start = Envelope { + message_type: "SessionStart".into(), + message_id: "m1".into(), + session_id: "s1".into(), + // ... }; + +// If s1 was already created with message_id "m1": +let ack = client.send(SendRequest { envelope: Some(start) }).await?.into_inner().ack.unwrap(); +// ok=true, duplicate=true ``` -### Pattern 3: Current Timestamp +--- -Use the current time for timestamps: +## Example 8: Participant Validation + +Sessions can restrict which senders are allowed: ```rust -use chrono::Utc; +// Create a session with a participant list +let start_payload = SessionStartPayload { + participants: vec!["alice".into(), "bob".into()], + ttl_ms: 60000, + ..Default::default() +}; + +let start = Envelope { + macp_version: "1.0".into(), + mode: "decision".into(), + message_type: "SessionStart".into(), + message_id: "m_p1".into(), + session_id: "s_participant".into(), + sender: "alice".into(), + timestamp_unix_ms: chrono::Utc::now().timestamp_millis(), + payload: start_payload.encode_to_vec(), +}; +client.send(SendRequest { envelope: Some(start) }).await?; -let timestamp = Utc::now().timestamp_millis(); +// Charlie tries to send — REJECTED (not a participant) +let msg = Envelope { + // ... + session_id: "s_participant".into(), + sender: "charlie".into(), + message_id: "m_p2".into(), + // ... +}; +let ack = client.send(SendRequest { envelope: Some(msg) }).await?.into_inner().ack.unwrap(); +println!("Charlie: ok={}", ack.ok); // ok=false (INVALID_ENVELOPE) -let envelope = Envelope { - timestamp_unix_ms: timestamp, - // ... other fields +// Alice sends — ACCEPTED (is a participant) +let msg = Envelope { + // ... + session_id: "s_participant".into(), + sender: "alice".into(), + message_id: "m_p3".into(), + // ... }; +let ack = client.send(SendRequest { envelope: Some(msg) }).await?.into_inner().ack.unwrap(); +println!("Alice: ok={}", ack.ok); // ok=true ``` -### Pattern 4: Helper Function +--- -Create a helper to build envelopes: +## Example 9: Signal Messages + +Signal messages are ambient, session-less messages: ```rust -fn create_envelope( - mode: &str, - message_type: &str, - session_id: &str, - sender: &str, - payload: &[u8], -) -> Envelope { - Envelope { - macp_version: "v1".into(), - mode: mode.into(), - message_type: message_type.into(), - message_id: Uuid::new_v4().to_string(), - session_id: session_id.into(), - sender: sender.into(), - timestamp_unix_ms: Utc::now().timestamp_millis(), - payload: payload.to_vec(), - } -} +let signal = Envelope { + macp_version: "1.0".into(), + mode: String::new(), // mode is optional for signals + message_type: "Signal".into(), + message_id: "sig1".into(), + session_id: String::new(), // session_id can be empty! + sender: "alice".into(), + timestamp_unix_ms: chrono::Utc::now().timestamp_millis(), + payload: vec![], +}; -// Usage: -let start = create_envelope("decision", "SessionStart", "s1", "alice", b""); -let msg = create_envelope("decision", "Message", "s1", "alice", b"hello"); -let contribute = create_envelope("multi_round", "Contribute", "mr1", "alice", - br#"{"value":"option_a"}"#); +let ack = client.send(SendRequest { envelope: Some(signal) }).await?.into_inner().ack.unwrap(); +println!("Signal: ok={}", ack.ok); // ok=true ``` -## Example 6: Multi-Agent Scenario +**Key points:** +- `session_id` may be empty — signals don't belong to any session. +- No session is created or modified. +- The runtime simply acknowledges receipt. +- Useful for heartbeats, coordination hints, or cross-session correlation. -Imagine two agents coordinating via multi-round convergence: +--- -### Coordinator starts the session +## Example 10: Session with Custom TTL ```rust -let payload = serde_json::json!({ - "participants": ["alpha", "beta"], - "convergence": {"type": "all_equal"} -}); +// Create a session with a 1-second TTL +let start_payload = SessionStartPayload { + ttl_ms: 1000, // 1 second + ..Default::default() +}; -let start = create_envelope( - "multi_round", "SessionStart", "decision-001", "coordinator", - payload.to_string().as_bytes() -); -client.send_message(start).await?; +let start = Envelope { + macp_version: "1.0".into(), + mode: "decision".into(), + message_type: "SessionStart".into(), + message_id: "m_ttl1".into(), + session_id: "s_ttl".into(), + sender: "ajit".into(), + timestamp_unix_ms: chrono::Utc::now().timestamp_millis(), + payload: start_payload.encode_to_vec(), +}; + +client.send(SendRequest { envelope: Some(start) }).await?; + +// Wait for TTL to expire +tokio::time::sleep(Duration::from_millis(1200)).await; + +// This message will be rejected — session expired +let msg = Envelope { + macp_version: "1.0".into(), + mode: "decision".into(), + message_type: "Message".into(), + message_id: "m_ttl2".into(), + session_id: "s_ttl".into(), + sender: "ajit".into(), + timestamp_unix_ms: chrono::Utc::now().timestamp_millis(), + payload: b"too-late".to_vec(), +}; + +let ack = client.send(SendRequest { envelope: Some(msg) }).await?.into_inner().ack.unwrap(); +println!("After TTL: ok={} error={}", ack.ok, ack.error.unwrap().code); +// ok=false error=SESSION_NOT_OPEN ``` -### Agent Alpha contributes +**TTL rules:** +- `ttl_ms: 0` (or absent) → default 60 seconds. +- `ttl_ms: 1` to `86,400,000` → custom TTL. +- Negative or > 24h → rejected with `INVALID_ENVELOPE`. +- TTL is enforced lazily on the next message — no background cleanup. + +--- + +## Example 11: Multi-Agent Scenario + +Imagine three agents coordinating a deployment decision using the full Decision Mode lifecycle: + +### Phase 1: Setup ```rust -let contribute = create_envelope( - "multi_round", "Contribute", "decision-001", "alpha", - br#"{"value":"option_a"}"# -); -client.send_message(contribute).await?; +// Coordinator creates a decision session with participant list +let start_payload = SessionStartPayload { + intent: "Decide on v3.0 release strategy".into(), + participants: vec!["lead".into(), "security".into(), "ops".into()], + ttl_ms: 300000, // 5 minutes + ..Default::default() +}; + +// ... send SessionStart ... ``` -### Agent Beta contributes (different value) +### Phase 2: Proposal ```rust -let contribute = create_envelope( - "multi_round", "Contribute", "decision-001", "beta", - br#"{"value":"option_b"}"# -); -client.send_message(contribute).await?; -// Session still Open — values differ +// Lead agent proposes a deployment strategy +let proposal = Envelope { + message_type: "Proposal".into(), + sender: "lead".into(), + payload: serde_json::to_vec(&serde_json::json!({ + "proposal_id": "release-v3", + "option": "Blue-green deployment with 10% canary", + "rationale": "Minimizes risk while allowing quick rollback" + })).unwrap(), + // ... other fields ... +}; ``` -### Agent Beta revises +### Phase 3: Evaluation + Objection ```rust -let contribute = create_envelope( - "multi_round", "Contribute", "decision-001", "beta", - br#"{"value":"option_a"}"# -); -client.send_message(contribute).await?; -// Session auto-resolved! All participants agreed on "option_a" +// Security agent evaluates +let eval = Envelope { + message_type: "Evaluation".into(), + sender: "security".into(), + payload: serde_json::to_vec(&serde_json::json!({ + "proposal_id": "release-v3", + "recommendation": "REVIEW", + "confidence": 0.75, + "reason": "Need to verify WAF rules for new endpoints" + })).unwrap(), + // ... +}; + +// Security agent raises objection +let objection = Envelope { + message_type: "Objection".into(), + sender: "security".into(), + payload: serde_json::to_vec(&serde_json::json!({ + "proposal_id": "release-v3", + "reason": "New /admin endpoint lacks rate limiting", + "severity": "high" + })).unwrap(), + // ... +}; ``` -### Check resolution +### Phase 4: Voting ```rust -let info = client.get_session(SessionQuery { - session_id: "decision-001".into(), -}).await?.into_inner(); +// All agents vote after objection is addressed +for (sender, vote) in [("lead", "approve"), ("security", "approve"), ("ops", "approve")] { + let vote_msg = Envelope { + message_type: "Vote".into(), + sender: sender.into(), + payload: serde_json::to_vec(&serde_json::json!({ + "proposal_id": "release-v3", + "vote": vote, + "reason": "Objection addressed in hotfix" + })).unwrap(), + // ... + }; + client.send(SendRequest { envelope: Some(vote_msg) }).await?; +} +``` + +### Phase 5: Commitment -assert_eq!(info.state, "Resolved"); -// info.resolution contains {"converged_value":"option_a","round":3,"final":{...}} +```rust +// Lead commits the decision — session resolves +let commitment = Envelope { + message_type: "Commitment".into(), + sender: "lead".into(), + payload: serde_json::to_vec(&serde_json::json!({ + "commitment_id": "release-v3-commit", + "action": "deploy-blue-green-canary", + "authority_scope": "release-team", + "reason": "Unanimous approval after security review" + })).unwrap(), + // ... +}; ``` -## Example 7: Session with Custom TTL +The session is now `Resolved` with the commitment as the resolution. + +--- + +## Common Patterns + +### Pattern 1: Structured Error Handling + +```rust +let ack = client.send(SendRequest { envelope: Some(env) }).await?.into_inner().ack.unwrap(); + +if ack.ok { + if ack.duplicate { + println!("Idempotent duplicate — already processed"); + } else { + println!("Success! Session state: {:?}", ack.session_state); + } +} else { + let err = ack.error.unwrap(); + match err.code.as_str() { + "UNSUPPORTED_PROTOCOL_VERSION" => println!("Use macp_version: 1.0"), + "INVALID_ENVELOPE" => println!("Check required fields and payload format"), + "SESSION_NOT_FOUND" => println!("Session doesn't exist — send SessionStart first"), + "SESSION_NOT_OPEN" => println!("Session is resolved or expired"), + "MODE_NOT_SUPPORTED" => println!("Use a registered mode name"), + code => println!("Error {}: {}", code, err.message), + } +} +``` -You can configure a session's time-to-live by providing a JSON payload in `SessionStart`: +### Pattern 2: Protobuf-Encoded SessionStart Payload ```rust -// Start a session with a 5-second TTL +use macp_runtime::pb::SessionStartPayload; +use prost::Message; + +let payload = SessionStartPayload { + intent: "My coordination task".into(), + participants: vec!["agent-a".into(), "agent-b".into()], + ttl_ms: 120000, // 2 minutes + mode_version: "1.0.0".into(), + configuration_version: String::new(), + policy_version: String::new(), + context: vec![], + roots: vec![], +}; + let start = Envelope { - macp_version: "v1".into(), - mode: "decision".into(), - message_type: "SessionStart".into(), - message_id: "m1".into(), - session_id: "s_ttl_demo".into(), - sender: "agent-1".into(), - timestamp_unix_ms: Utc::now().timestamp_millis(), - payload: br#"{"ttl_ms": 5000}"#.to_vec(), + // ... + payload: payload.encode_to_vec(), }; +``` -let ack = client.send_message(start).await?.into_inner(); -assert!(ack.accepted); // Session created with 5s TTL +### Pattern 3: Unique Message IDs with UUIDs -// Wait for TTL to expire -tokio::time::sleep(Duration::from_secs(6)).await; +```rust +use uuid::Uuid; -// This message will be rejected with TtlExpired -let late_msg = Envelope { - // ... - payload: b"too late".to_vec(), +let envelope = Envelope { + message_id: Uuid::new_v4().to_string(), + // ... other fields +}; +``` + +### Pattern 4: Current Timestamp + +```rust +use chrono::Utc; + +let envelope = Envelope { + timestamp_unix_ms: Utc::now().timestamp_millis(), + // ... other fields }; +``` + +### Pattern 5: Helper Function + +```rust +fn create_envelope( + mode: &str, + message_type: &str, + session_id: &str, + sender: &str, + payload: &[u8], +) -> Envelope { + Envelope { + macp_version: "1.0".into(), + mode: mode.into(), + message_type: message_type.into(), + message_id: uuid::Uuid::new_v4().to_string(), + session_id: session_id.into(), + sender: sender.into(), + timestamp_unix_ms: chrono::Utc::now().timestamp_millis(), + payload: payload.to_vec(), + } +} -let ack = client.send_message(late_msg).await?.into_inner(); -assert!(!ack.accepted); -assert_eq!(ack.error, "TtlExpired"); +// Usage: +let start = create_envelope("decision", "SessionStart", "s1", "alice", b""); +let vote = create_envelope("decision", "Vote", "s1", "alice", + &serde_json::to_vec(&serde_json::json!({ + "proposal_id": "p1", "vote": "approve", "reason": "LGTM" + })).unwrap() +); +let contribute = create_envelope("multi_round", "Contribute", "mr1", "alice", + br#"{"value":"option_a"}"# +); ``` -**TTL rules:** -- Empty payload → default 60-second TTL -- `{"ttl_ms": 5000}` → 5-second TTL -- `ttl_ms` must be between 1 and 86,400,000 (24 hours) -- Values outside this range are rejected with `InvalidTtl` +--- ## Common Questions +### Q: What protocol version should I use? + +**A:** Use `macp_version: "1.0"`. This is the only supported version in v0.2. Always call `Initialize` first to confirm. + +### Q: How do I encode the SessionStart payload? + +**A:** Use protobuf encoding. In Rust, create a `SessionStartPayload` and call `.encode_to_vec()`. In other languages, use the generated protobuf code for `macp.v1.SessionStartPayload`. An empty payload (zero bytes) is also valid and uses all defaults. + +### Q: What's the difference between "decision" and "macp.mode.decision.v1"? + +**A:** They refer to the same mode. `"decision"` is a backward-compatible alias; `"macp.mode.decision.v1"` is the RFC-compliant canonical name. Both work identically. The same applies to `"multi_round"` and `"macp.mode.multi_round.v1"`. + ### Q: Can I send messages from different senders to the same session? -**A:** Yes! The `sender` field is informational. Any sender can send to any session. +**A:** Yes, if the session has no participant list (open participation). If the session has a participant list, only listed senders can send. ### Q: What if I use the same message_id twice? -**A:** The server doesn't currently check for duplicate message IDs. This is a validation you'd add in your client. +**A:** The runtime treats it as an idempotent duplicate — it returns `ok: true, duplicate: true` without re-processing. This is by design for safe retries. ### Q: Can I create multiple sessions with the same ID? -**A:** No. The second SessionStart will be rejected with `DuplicateSession`. +**A:** No. The second SessionStart (with a different message_id) will be rejected with `INVALID_ENVELOPE`. However, resending the same SessionStart (same message_id) is idempotent and returns success. ### Q: How long do sessions last? -**A:** Sessions have a configurable TTL (default 60 seconds, max 24 hours). Specify a custom TTL by including `{"ttl_ms": }` in the `SessionStart` payload. +**A:** Sessions have a configurable TTL (default 60 seconds, max 24 hours). Set `ttl_ms` in the `SessionStartPayload`. + +### Q: Can I cancel a session? + +**A:** Yes. Use the `CancelSession` RPC with a session_id and reason. The session transitions to Expired and the reason is logged. ### Q: Can I "unresolve" a session? -**A:** No. Resolved is a terminal state. You'd need to create a new session. +**A:** No. Resolved and Expired are terminal states. Create a new session if you need to continue coordination. ### Q: What happens if the server crashes? -**A:** All session state is lost (it's in-memory only). Clients would need to reconnect and restart sessions. +**A:** All session state is lost (it's in-memory only). Clients would need to reconnect and restart sessions. Future versions may add persistent storage. ### Q: What modes are available? -**A:** Currently two modes: -- `decision` (default): Simple resolve-on-payload mode -- `multi_round`: Multi-round convergence with participant tracking +**A:** Two modes: +- `macp.mode.decision.v1` (alias: `decision`) — RFC lifecycle with Proposal/Evaluation/Objection/Vote/Commitment. +- `macp.mode.multi_round.v1` (alias: `multi_round`) — Participant-based convergence. + +Use `ListModes` to discover them at runtime. + +### Q: How do Signal messages work? -### Q: What happens if I use an empty mode field? +**A:** Signals are fire-and-forget messages that don't require a session_id. They don't create or modify sessions. They're useful for heartbeats, hints, or cross-session correlation. -**A:** It defaults to `"decision"` for backward compatibility. +### Q: What's the difference between the old "resolve" payload and the new Commitment message? -### Q: Can non-participants contribute in multi_round mode? +**A:** The old mechanism (sending `payload: b"resolve"` with `message_type: "Message"`) still works for backward compatibility. The new `Commitment` message type is richer — it carries a `CommitmentPayload` with fields like `commitment_id`, `action`, `authority_scope`, and `reason`. Both resolve the session, but the new mechanism provides much more context in the resolution. -**A:** Currently yes — participant membership gating is planned for a future release. +--- ## Next Steps Now that you've seen examples, you can: -1. **Modify the test clients** - Try different scenarios -2. **Build your own client** - In Python, JavaScript, Go, etc. -3. **Add business logic** - Interpret payloads for your use case -4. **Extend the protocol** - Add new modes with the Mode trait +1. **Build your own client** — in Python, JavaScript, Go, or any language with a gRPC client. Use the `.proto` files to generate client code. +2. **Explore the Decision Mode lifecycle** — try building a multi-agent voting system. +3. **Implement convergence scenarios** — use Multi-Round Mode for consensus-building. +4. **Extend the protocol** — add new modes by implementing the `Mode` trait. +5. **Integrate with your agent framework** — use the `Initialize` and `ListModes` RPCs for dynamic discovery. For deeper understanding: -- Read [architecture.md](./architecture.md) to see how it's implemented -- Read [protocol.md](./protocol.md) for the complete specification +- Read **[architecture.md](./architecture.md)** to see how it's implemented internally. +- Read **[protocol.md](./protocol.md)** for the complete protocol specification. diff --git a/docs/protocol.md b/docs/protocol.md index 3f9a834..0c781c0 100644 --- a/docs/protocol.md +++ b/docs/protocol.md @@ -1,505 +1,1159 @@ -# MACP Protocol Specification - -This document describes the Multi-Agent Coordination Protocol (MACP) in detail. We explain what each field means, what rules apply, and why they exist. +# MACP Protocol Specification (v1.0 — RFC-0001) + +This document is the authoritative specification of the Multi-Agent Coordination Protocol (MACP) as implemented by `macp-runtime` v0.2. It describes every message type, every field, every validation rule, every error code, and every behavioral guarantee in narrative detail. Whether you are building a client, implementing a new mode, or auditing the protocol for correctness, this document is your reference. + +--- + +## Table of Contents + +1. [Protocol Version](#protocol-version) +2. [Core Concepts](#core-concepts) +3. [Protobuf Schema Organization](#protobuf-schema-organization) +4. [The Envelope](#the-envelope) +5. [The Ack Response](#the-ack-response) +6. [Structured Errors (MACPError)](#structured-errors-macperror) +7. [Session State Enum](#session-state-enum) +8. [gRPC Service Definition](#grpc-service-definition) +9. [RPC: Initialize](#rpc-initialize) +10. [RPC: Send](#rpc-send) +11. [RPC: GetSession](#rpc-getsession) +12. [RPC: CancelSession](#rpc-cancelsession) +13. [RPC: GetManifest](#rpc-getmanifest) +14. [RPC: ListModes](#rpc-listmodes) +15. [RPC: StreamSession](#rpc-streamsession) +16. [RPC: ListRoots, WatchModeRegistry, WatchRoots](#rpc-listrootswatchmoderegistrywatchroots) +17. [Message Type: SessionStart](#message-type-sessionstart) +18. [Message Type: Regular Message](#message-type-regular-message) +19. [Message Type: Signal](#message-type-signal) +20. [TTL Configuration](#ttl-configuration) +21. [Message Deduplication](#message-deduplication) +22. [Participant Validation](#participant-validation) +23. [Session State Machine](#session-state-machine) +24. [Mode System](#mode-system) +25. [Decision Mode Specification](#decision-mode-specification) +26. [Multi-Round Mode Specification](#multi-round-mode-specification) +27. [Validation Rules (Complete)](#validation-rules-complete) +28. [Error Codes (Complete)](#error-codes-complete) +29. [Transport](#transport) +30. [Best Practices](#best-practices) +31. [Future Extensions](#future-extensions) + +--- ## Protocol Version -Current version: **v1** +Current version: **`1.0`** + +All messages must carry `macp_version: "1.0"`. The `Initialize` RPC is the mechanism by which client and server agree on a protocol version. The server currently supports only `"1.0"` — if the client proposes only unsupported versions, the `Initialize` call returns a gRPC `INVALID_ARGUMENT` error. + +> **Migration note from v0.1:** The previous protocol used `macp_version: "v1"`. Version 0.2 uses `"1.0"`. Old clients sending `"v1"` will receive `UNSUPPORTED_PROTOCOL_VERSION`. -All messages must specify `macp_version: "v1"` or they will be rejected. +--- ## Core Concepts -### What is a Protocol? +### What Is a Protocol? + +A protocol is a set of rules that all participants agree to follow. Just as HTTP defines how browsers and servers exchange web pages, the MACP protocol defines how agents exchange coordination messages: what information must be included, what order things must happen, what is allowed, and what is forbidden. + +### Why a Formal Protocol? -A protocol is a set of rules that everyone agrees to follow. Like how English grammar has rules (subject-verb-object), the MACP protocol has rules for: -- What information must be included in each message -- What order things must happen -- What's allowed and what's forbidden +Without a formal protocol, different agents might format messages differently, state transitions could be inconsistent, errors would be ambiguous, and debugging would be nearly impossible. With MACP: -### Why Have a Protocol? +- Everyone speaks the same structured language. +- Behavior is predictable and deterministic. +- Tools and clients can be built for any MACP-compliant runtime. +- Audit logs are meaningful because every event follows a known schema. -Without a protocol: -- Different agents might format messages differently -- State transitions could be inconsistent -- Errors would be ambiguous -- Debugging would be impossible +--- -With a protocol: -- Everyone speaks the same "language" -- Behavior is predictable -- Tools can be built to work with any MACP-compliant system +## Protobuf Schema Organization -## Message Types +The protocol is defined across three protobuf files, organized by concern: -### Envelope +``` +proto/ +├── buf.yaml # Buf linter config (STANDARD lint, FILE breaking) +└── macp/ + ├── v1/ + │ ├── envelope.proto # Envelope, Ack, MACPError, SessionState enum + │ └── core.proto # Service definition, all request/response types, + │ # capability messages, session payloads, manifests, + │ # mode descriptors, streaming types + └── modes/ + └── decision/ + └── v1/ + └── decision.proto # ProposalPayload, EvaluationPayload, + # ObjectionPayload, VotePayload +``` -Every message sent to the server is wrapped in an **Envelope**. Think of it as an addressed package. +**`envelope.proto`** contains the foundational types that every message touches: the `Envelope` wrapper, the `Ack` acknowledgment, the `MACPError` structured error, and the `SessionState` enum. These are imported by `core.proto`. -**Fields:** +**`core.proto`** contains everything else: the `MACPRuntimeService` definition with all ten RPCs, the request/response wrappers, capability negotiation messages (`ClientInfo`, `RuntimeInfo`, `Capabilities` and its sub-capabilities), session lifecycle payloads (`SessionStartPayload`, `SessionCancelPayload`, `CommitmentPayload`), introspection types (`AgentManifest`, `ModeDescriptor`), and streaming types. -| Field | Type | Required | Description | -|-------|------|----------|-------------| -| `macp_version` | string | Yes | Protocol version (must be "v1") | -| `mode` | string | Yes | Coordination mode (e.g., "decision", "multi_round") | -| `message_type` | string | Yes | Type of message ("SessionStart", "Message", "Contribute", etc.) | -| `message_id` | string | Yes | Unique ID for this message | -| `session_id` | string | Yes | Which session this belongs to | -| `sender` | string | Yes | Who is sending this message | -| `timestamp_unix_ms` | int64 | Yes | When sent (Unix timestamp in milliseconds) | -| `payload` | bytes | No | The actual message content | +**`decision.proto`** contains the mode-specific payload types for the Decision Mode: `ProposalPayload`, `EvaluationPayload`, `ObjectionPayload`, and `VotePayload`. These are not referenced by the core proto — they are domain-level schemas that clients use to structure their payloads. -**Example (JSON representation for readability):** -```json -{ - "macp_version": "v1", - "mode": "decision", - "message_type": "SessionStart", - "message_id": "m1", - "session_id": "s1", - "sender": "agent-alpha", - "timestamp_unix_ms": 1700000000000, - "payload": "" +The `buf.yaml` file configures the Buf linter with `STANDARD` lint rules and `FILE`-level breaking-change detection, ensuring the proto schema evolves safely. + +--- + +## The Envelope + +Every message sent through the `Send` or `StreamSession` RPC is wrapped in an **Envelope**. The Envelope is the universal container — it carries both the routing metadata and the actual payload. + +```protobuf +message Envelope { + string macp_version = 1; // Must be "1.0" + string mode = 2; // Coordination mode (e.g., "decision", "macp.mode.decision.v1") + string message_type = 3; // Semantic type: "SessionStart", "Message", "Proposal", etc. + string message_id = 4; // Unique ID for this message (used for deduplication) + string session_id = 5; // Session this belongs to (empty for Signal messages) + string sender = 6; // Who is sending + int64 timestamp_unix_ms = 7; // Informational client-side timestamp + bytes payload = 8; // The actual content } ``` -### Ack (Acknowledgment) +**Field-by-field narrative:** -Every message receives an **Ack** response. It tells you if the message was accepted or rejected. +- **`macp_version`** — The protocol version. The server checks this first. If it is not `"1.0"`, the message is immediately rejected with `UNSUPPORTED_PROTOCOL_VERSION`. This is a hard gate — no further processing occurs. -**Fields:** +- **`mode`** — The name of the coordination mode that should handle this message. Accepted values include RFC-compliant names (`macp.mode.decision.v1`, `macp.mode.multi_round.v1`) and backward-compatible aliases (`decision`, `multi_round`). An empty string defaults to `macp.mode.decision.v1`. If the name does not match any registered mode, the message is rejected with `MODE_NOT_SUPPORTED`. -| Field | Type | Description | -|-------|------|-------------| -| `accepted` | bool | `true` if accepted, `false` if rejected | -| `error` | string | Empty if accepted, error name if rejected | +- **`message_type`** — Determines how the runtime routes the message. Three routing categories exist: + - `"SessionStart"` — creates a new session. + - `"Signal"` — ambient message that does not require a session. + - Everything else (`"Message"`, `"Proposal"`, `"Evaluation"`, `"Objection"`, `"Vote"`, `"Commitment"`, `"Contribute"`, etc.) — dispatched to the mode's `on_message()` handler within an existing session. -**Success example:** -```json -{ - "accepted": true, - "error": "" +- **`message_id`** — A client-chosen unique identifier. The runtime uses this for deduplication: if a message with the same `message_id` has already been accepted for a given session, the runtime returns `ok: true, duplicate: true` without re-processing. Clients should use UUIDs or similarly unique values. + +- **`session_id`** — Identifies the session. Required for all message types except `Signal`. For `SessionStart`, this becomes the ID of the newly created session. For subsequent messages, the runtime looks up this session in the registry. + +- **`sender`** — Identifies who is sending the message. If the session has a non-empty participant list, the sender must be a member of that list or the message is rejected with `INVALID_ENVELOPE`. + +- **`timestamp_unix_ms`** — An informational timestamp set by the client. The runtime does not use this for any logic — it records its own `accepted_at_unix_ms` in the Ack. This field exists for client-side tracing and ordering. + +- **`payload`** — The actual content of the message, encoded as raw bytes. The interpretation depends on the `message_type` and the mode: + - For `SessionStart`: a protobuf-encoded `SessionStartPayload` (or empty bytes for defaults). + - For Decision Mode messages: JSON-encoded payloads matching `ProposalPayload`, `EvaluationPayload`, etc. + - For Multi-Round `Contribute` messages: JSON `{"value": ""}`. + - For `Signal`: arbitrary bytes. + +--- + +## The Ack Response + +Every `Send` call returns an `Ack` — a structured acknowledgment that provides complete information about what happened. + +```protobuf +message Ack { + bool ok = 1; // true if accepted + bool duplicate = 2; // true if this was an idempotent replay + string message_id = 3; // echoed from the request + string session_id = 4; // echoed from the request + int64 accepted_at_unix_ms = 5; // server-side timestamp + SessionState session_state = 6; // session state after processing + MACPError error = 7; // structured error (if ok == false) } ``` -**Error example:** -```json -{ - "accepted": false, - "error": "SessionNotOpen" +**Understanding the Ack fields:** + +- **`ok`** — The primary success indicator. `true` means the message was accepted and processed. `false` means it was rejected — consult the `error` field for details. + +- **`duplicate`** — Set to `true` when the runtime recognizes a previously-accepted `message_id` for the same session. The message is not reprocessed; the Ack simply confirms idempotent acceptance. This allows clients to safely retry without side effects. + +- **`message_id`** and **`session_id`** — Echoed back from the request for client-side correlation, especially useful in asynchronous or batched workflows. + +- **`accepted_at_unix_ms`** — The server-side timestamp (milliseconds since Unix epoch) at the moment the message was accepted. This is authoritative — clients should use this rather than their own `timestamp_unix_ms` for ordering guarantees. + +- **`session_state`** — The session's state *after* the message was processed. This tells the client whether the session is still `OPEN`, has been `RESOLVED` (e.g., after a `Commitment` message in Decision Mode), or has `EXPIRED`. For messages that don't touch a session (e.g., `Signal`), this is `SESSION_STATE_OPEN`. + +- **`error`** — A structured `MACPError` object present when `ok == false`. Contains the RFC error code, a human-readable message, and optional correlation fields. See the next section for details. + +> **Migration note from v0.1:** The old `Ack` had only `accepted: bool` and `error: string`. The new Ack is significantly richer — clients should update to read the structured `error` field and the `duplicate` and `session_state` fields. + +--- + +## Structured Errors (MACPError) + +When a message is rejected, the `Ack.error` field contains a structured error: + +```protobuf +message MACPError { + string code = 1; // RFC error code (e.g., "INVALID_ENVELOPE") + string message = 2; // Human-readable description + string session_id = 3; // Correlated session (if applicable) + string message_id = 4; // Correlated message (if applicable) + bytes details = 5; // Optional additional detail payload } ``` -### SessionQuery / SessionInfo +The `code` field uses a fixed vocabulary of RFC-compliant error codes (see [Error Codes](#error-codes-complete) below). The `message` field provides a human-readable explanation. The `session_id` and `message_id` fields echo back the relevant identifiers for correlation. The `details` field is reserved for future use (e.g., structured error payloads for specific modes). -The `GetSession` RPC allows querying session state: +--- -**SessionQuery:** +## Session State Enum -| Field | Type | Description | -|-------|------|-------------| -| `session_id` | string | The session to query | +Session state is represented as a protobuf enum: -**SessionInfo:** +```protobuf +enum SessionState { + SESSION_STATE_UNSPECIFIED = 0; + SESSION_STATE_OPEN = 1; + SESSION_STATE_RESOLVED = 2; + SESSION_STATE_EXPIRED = 3; +} +``` -| Field | Type | Description | -|-------|------|-------------| -| `session_id` | string | Session identifier | -| `mode` | string | Coordination mode | -| `state` | string | "Open", "Resolved", or "Expired" | -| `ttl_expiry` | int64 | TTL expiry timestamp (Unix ms) | -| `resolution` | bytes | Resolution data (if resolved) | -| `mode_state` | bytes | Current mode-specific state | -| `participants` | repeated string | Session participants | +The `UNSPECIFIED` value is the protobuf default and should not be set intentionally. The runtime maps its internal `SessionState` enum (`Open`, `Resolved`, `Expired`) to these wire values. -## Mode Dispatcher +--- -The runtime uses a **Mode Dispatcher** to route messages to the appropriate coordination mode. The `mode` field in the Envelope determines which Mode handles the message. +## gRPC Service Definition -### Available Modes +The `MACPRuntimeService` is the single gRPC service exposed by the runtime: -| Mode | Description | Default? | -|------|-------------|----------| -| `decision` | Simple resolve-on-payload mode | Yes (empty mode field defaults here) | -| `multi_round` | Multi-round convergence with participant tracking | No | +```protobuf +service MACPRuntimeService { + rpc Initialize(InitializeRequest) returns (InitializeResponse); + rpc Send(SendRequest) returns (SendResponse); + rpc StreamSession(stream StreamSessionRequest) returns (stream StreamSessionResponse); + rpc GetSession(GetSessionRequest) returns (GetSessionResponse); + rpc CancelSession(CancelSessionRequest) returns (CancelSessionResponse); + rpc GetManifest(GetManifestRequest) returns (GetManifestResponse); + rpc ListModes(ListModesRequest) returns (ListModesResponse); + rpc WatchModeRegistry(WatchModeRegistryRequest) returns (stream WatchModeRegistryResponse); + rpc ListRoots(ListRootsRequest) returns (ListRootsResponse); + rpc WatchRoots(WatchRootsRequest) returns (stream WatchRootsResponse); +} +``` -### How Modes Work +> **Migration note from v0.1:** The old service was named `MACPService` with only two RPCs (`SendMessage` and `GetSession`). The v0.2 service is `MACPRuntimeService` with ten RPCs. The `SendMessage` RPC has been replaced by `Send` (which wraps the `Envelope` in a `SendRequest`). -1. Runtime receives an Envelope -2. Resolves the mode name (empty string → "decision") -3. Looks up the registered Mode implementation -4. Calls `mode.on_session_start()` or `mode.on_message()` -5. Mode returns a `ModeResponse` (`NoOp`, `PersistState`, `Resolve`, `PersistAndResolve`) -6. Runtime applies the response to mutate session state +--- -Modes are **pure logic** — they receive immutable session state and return a response. The runtime kernel handles all mutation. +## RPC: Initialize -## Message Types +The `Initialize` RPC is a protocol handshake that should be called before any session work begins. It negotiates the protocol version and exchanges capability information. -### SessionStart +**Request:** +```protobuf +message InitializeRequest { + repeated string supported_protocol_versions = 1; // e.g., ["1.0"] + ClientInfo client_info = 2; // optional client metadata + Capabilities capabilities = 3; // optional client capabilities +} +``` -Creates a new session. This must be the first message for any session. - -**Requirements:** -- `message_type` must be `"SessionStart"` -- `session_id` must be unique (not already exist) -- `mode` must reference a registered mode (or be empty for default) -- All required Envelope fields must be present - -**What happens:** -1. Server resolves mode (empty → "decision") -2. If mode is unknown → reject with `UnknownMode` -3. Server parses TTL from payload (see TTL Configuration below) -4. If TTL is invalid → reject with `InvalidTtl` -5. Server checks if session already exists → reject with `DuplicateSession` -6. Creates session log, appends incoming entry -7. Calls `mode.on_session_start()` → may return `PersistState` with initial mode state -8. Creates new session with state: `Open`, configured TTL, and mode state - -**Example (decision mode, default TTL):** -```json -{ - "macp_version": "v1", - "mode": "decision", - "message_type": "SessionStart", - "message_id": "msg-001", - "session_id": "session-alpha", - "sender": "agent-1", - "timestamp_unix_ms": 1700000000000, - "payload": "" +**Response:** +```protobuf +message InitializeResponse { + string selected_protocol_version = 1; // "1.0" + RuntimeInfo runtime_info = 2; // server name, version, description + Capabilities capabilities = 3; // server capabilities + repeated string supported_modes = 4; // registered mode names + string instructions = 5; // human-readable usage instructions } ``` -**Example (multi_round mode):** -```json -{ - "macp_version": "v1", - "mode": "multi_round", - "message_type": "SessionStart", - "message_id": "msg-001", - "session_id": "session-beta", - "sender": "coordinator", - "timestamp_unix_ms": 1700000000000, - "payload": "{\"participants\":[\"alice\",\"bob\"],\"convergence\":{\"type\":\"all_equal\"},\"ttl_ms\":60000}" +**Behavior:** + +1. The server inspects the client's `supported_protocol_versions` list. +2. If `"1.0"` is in the list, it is selected. If not, the RPC returns a gRPC `INVALID_ARGUMENT` status with a descriptive message. +3. The response includes the runtime's identity (`RuntimeInfo` with name `"macp-runtime"`, version `"0.2.0"`), its capabilities (sessions with streaming, cancellation, progress, manifest, mode registry, and roots), and the list of supported modes. +4. The `instructions` field provides a brief human-readable note about the runtime. + +**Capabilities advertised:** + +| Capability | Value | Description | +|------------|-------|-------------| +| `sessions.stream` | `true` | StreamSession RPC is available | +| `cancellation.cancel_session` | `true` | CancelSession RPC is available | +| `progress.progress` | `true` | Progress tracking is supported | +| `manifest.get_manifest` | `true` | GetManifest RPC is available | +| `mode_registry.list_modes` | `true` | ListModes RPC is available | +| `mode_registry.list_changed` | `true` | WatchModeRegistry RPC is available | +| `roots.list_roots` | `true` | ListRoots RPC is available | +| `roots.list_changed` | `true` | WatchRoots RPC is available | + +--- + +## RPC: Send + +The `Send` RPC is the primary message ingestion point. It accepts a `SendRequest` containing an `Envelope` and returns a `SendResponse` containing an `Ack`. + +**Request:** +```protobuf +message SendRequest { + Envelope envelope = 1; +} +``` + +**Response:** +```protobuf +message SendResponse { + Ack ack = 1; } ``` -### TTL Configuration +**Processing flow:** -The `SessionStart` payload can optionally contain a JSON object to configure session TTL: +1. **Validate the Envelope** — check `macp_version == "1.0"`, check that `session_id` and `message_id` are non-empty (except for `Signal` messages where `session_id` may be empty). +2. **Delegate to the Runtime** — the `Runtime::process()` method routes to `process_session_start()`, `process_signal()`, or `process_message()` based on `message_type`. +3. **Build the Ack** — the server constructs a full `Ack` with `ok`, `duplicate`, echoed IDs, server timestamp, session state, and any error. -```json -{"ttl_ms": 5000} +All errors are returned in the Ack — the gRPC status is always `OK` for protocol-level errors. Only infrastructure-level failures (e.g., missing `Envelope` in the request) return non-OK gRPC statuses. + +--- + +## RPC: GetSession + +Retrieves metadata for a specific session. + +**Request:** +```protobuf +message GetSessionRequest { + string session_id = 1; +} +``` + +**Response:** +```protobuf +message GetSessionResponse { + SessionMetadata metadata = 1; +} + +message SessionMetadata { + string session_id = 1; + string mode = 2; + SessionState state = 3; + int64 started_at_unix_ms = 4; + int64 expires_at_unix_ms = 5; + string mode_version = 6; + string configuration_version = 7; + string policy_version = 8; +} ``` -**Rules:** +**Behavior:** + +If the session exists, its metadata is returned — including mode name, current state (as a `SessionState` enum value), creation timestamp, TTL expiry timestamp, and the version fields from the original `SessionStartPayload`. -| Payload | Behavior | -|---------|----------| -| Empty (`b""`) | Default TTL: 60 seconds | -| `{"ttl_ms": 5000}` | Custom TTL: 5 seconds | -| `{}` or `{"ttl_ms": null}` | Default TTL: 60 seconds | -| `{"ttl_ms": 0}` or negative | Rejected with `InvalidTtl` | -| `{"ttl_ms": 86400001}` (>24h) | Rejected with `InvalidTtl` | -| Invalid JSON or non-UTF-8 | Rejected with `InvalidEnvelope` | +If the session does not exist, the RPC returns a gRPC `NOT_FOUND` status. -**Bounds:** `ttl_ms` must be in range `1..=86,400,000` (1ms to 24 hours). +> **Migration note from v0.1:** The old `GetSession` returned a `SessionInfo` with fields like `state` (as a string), `resolution`, `mode_state`, and `participants`. The new response uses `SessionMetadata` with typed `SessionState` enum and version metadata fields. -For multi_round mode, the TTL is part of the mode-specific payload alongside `participants` and `convergence`. +--- -### Regular Message (Decision Mode) +## RPC: CancelSession -Sends content within an existing decision mode session. +Explicitly cancels an active session, transitioning it to `Expired` state. -**Requirements:** -- `message_type` can be anything except `"SessionStart"` -- `session_id` must reference an existing session -- Session must be in `Open` state +**Request:** +```protobuf +message CancelSessionRequest { + string session_id = 1; + string reason = 2; +} +``` -**What happens:** -1. Server finds the session -2. If not found → reject with `UnknownSession` -3. If found, Open, and TTL has expired → log internal entry, transition to `Expired`, reject with `TtlExpired` -4. If found but not Open → reject with `SessionNotOpen` -5. Append incoming log entry -6. Call `mode.on_message()`: - - Decision mode: if payload is `"resolve"` → `Resolve`, else → `NoOp` -7. Apply mode response +**Response:** +```protobuf +message CancelSessionResponse { + Ack ack = 1; +} +``` -### Contribute Message (Multi-Round Mode) +**Behavior:** -Submits a contribution in a multi-round convergence session. +1. If the session does not exist, returns `ok: false` with `SESSION_NOT_FOUND`. +2. If the session is already `Resolved` or `Expired`, the cancellation is idempotent — returns `ok: true` without modification. +3. If the session is `Open`, logs an internal `SessionCancel` entry with the provided reason, transitions the session to `Expired`, and returns `ok: true`. -**Requirements:** -- `message_type` must be `"Contribute"` -- `session_id` must reference an existing multi_round session -- `sender` should be one of the registered participants -- `payload` must be JSON: `{"value": ""}` +The cancellation reason is persisted in the session's audit log, providing a clear record of why the session was terminated. -**What happens:** -1. Mode decodes session's `mode_state` -2. If sender's value changed from previous → increment round counter -3. Check convergence: all participants contributed + all values identical -4. If converged → `PersistAndResolve` with resolution `{"converged_value":"...","round":N,"final":{...}}` -5. If not converged → `PersistState` with updated contributions +--- -## Multi-Round Mode Specification +## RPC: GetManifest + +Retrieves the agent manifest — a description of the runtime's identity and capabilities. + +**Request:** +```protobuf +message GetManifestRequest { + string agent_id = 1; // currently unused +} +``` + +**Response:** +```protobuf +message GetManifestResponse { + AgentManifest manifest = 1; +} + +message AgentManifest { + string agent_id = 1; + string title = 2; + string description = 3; + repeated string supported_modes = 4; + repeated string input_content_types = 5; + repeated string output_content_types = 6; + map metadata = 7; +} +``` + +The response includes the runtime's identity (`"macp-runtime"`, `"MACP Coordination Runtime"`), a description, and the list of supported mode names. + +--- + +## RPC: ListModes + +Discovers the coordination modes registered in the runtime. + +**Request:** `ListModesRequest {}` (empty) + +**Response:** +```protobuf +message ListModesResponse { + repeated ModeDescriptor modes = 1; +} + +message ModeDescriptor { + string mode = 1; + string mode_version = 2; + string title = 3; + string description = 4; + string determinism_class = 5; + string participant_model = 6; + repeated string message_types = 7; + repeated string terminal_message_types = 8; + map schema_uris = 9; +} +``` + +**Currently returned descriptors:** + +1. **Decision Mode:** + - `mode`: `"macp.mode.decision.v1"` + - `mode_version`: `"1.0.0"` + - `title`: `"Decision Mode"` + - `determinism_class`: `"deterministic"` + - `participant_model`: `"open"` + - `message_types`: `["Proposal", "Evaluation", "Objection", "Vote", "Commitment"]` + - `terminal_message_types`: `["Commitment"]` + +2. **Multi-Round Mode:** + - `mode`: `"macp.mode.multi_round.v1"` + - `mode_version`: `"1.0.0"` + - `title`: `"Multi-Round Convergence Mode"` + - `determinism_class`: `"deterministic"` + - `participant_model`: `"closed"` + - `message_types`: `["Contribute"]` + - `terminal_message_types`: `["Contribute"]` (the final Contribute that triggers convergence) + +--- + +## RPC: StreamSession + +Bidirectional streaming RPC for real-time session interaction. + +```protobuf +rpc StreamSession(stream StreamSessionRequest) returns (stream StreamSessionResponse); +``` + +The client sends a stream of `StreamSessionRequest` messages (each wrapping an `Envelope`), and the server responds with a stream of `StreamSessionResponse` messages (each wrapping an echoed `Envelope` with an updated `message_type` reflecting the processing result). This enables real-time, interactive coordination without polling. + +--- + +## RPC: ListRoots, WatchModeRegistry, WatchRoots + +- **ListRoots** — Returns an empty list of `Root` objects. Reserved for future resource-root discovery. +- **WatchModeRegistry** — Server-streaming RPC for mode registry change notifications. Currently returns `UNIMPLEMENTED`. +- **WatchRoots** — Server-streaming RPC for root change notifications. Currently returns `UNIMPLEMENTED`. + +--- + +## Message Type: SessionStart + +A `SessionStart` message creates a new coordination session. + +**Required fields:** +- `message_type`: `"SessionStart"` +- `session_id`: Must be unique — no session with this ID may already exist. +- `message_id`: Must be non-empty. +- `mode`: Must reference a registered mode (or be empty for the default `macp.mode.decision.v1`). + +**Payload:** + +The payload should be a protobuf-encoded `SessionStartPayload`: + +```protobuf +message SessionStartPayload { + string intent = 1; // human-readable purpose + repeated string participants = 2; // participant IDs (empty = open participation) + string mode_version = 3; // version of the mode to use + string configuration_version = 4; // configuration version identifier + string policy_version = 5; // policy version identifier + int64 ttl_ms = 6; // TTL in milliseconds (0 = default 60s) + bytes context = 7; // arbitrary context data + repeated Root roots = 8; // resource roots +} +``` + +An empty payload (zero bytes) is valid — the runtime uses defaults (60s TTL, no participants, empty version strings). + +**Processing sequence:** + +1. Runtime resolves the mode name (empty → `"macp.mode.decision.v1"`). +2. Looks up the mode in the registry — rejects with `MODE_NOT_SUPPORTED` if not found. +3. Decodes the payload as a protobuf `SessionStartPayload` — rejects with `INVALID_ENVELOPE` if decoding fails. +4. Extracts and validates TTL — rejects with `INVALID_ENVELOPE` if out of range (see [TTL Configuration](#ttl-configuration)). +5. Acquires write lock on the session registry. +6. Checks for duplicate session ID: + - If the session exists and the `message_id` matches the session's `seen_message_ids`, returns `ok: true, duplicate: true` (idempotent). + - If the session exists with a different `message_id`, rejects with `INVALID_ENVELOPE` (duplicate session). +7. Creates a session log and appends an `Incoming` entry. +8. Calls `mode.on_session_start()` — the mode may return `PersistState` with initial mode state. +9. Creates a `Session` object with state `Open`, computed TTL expiry, participants, version metadata, and the message_id recorded in `seen_message_ids`. +10. Applies the `ModeResponse` to mutate the session (e.g., storing initial mode state). +11. Inserts the session into the registry. + +--- + +## Message Type: Regular Message + +Any message with a `message_type` other than `"SessionStart"` or `"Signal"` is treated as a regular message dispatched to the session's mode. + +**Required fields:** +- `session_id`: Must reference an existing session. +- `message_id`: Must be non-empty. + +**Processing sequence:** + +1. Acquires write lock on the session registry. +2. Finds the session — rejects with `SESSION_NOT_FOUND` if not found. +3. **Deduplication check** — if `message_id` is already in the session's `seen_message_ids`, returns `ok: true, duplicate: true` without re-processing. +4. **TTL check** — if the session is `Open` and the current time exceeds `ttl_expiry`, logs an internal `TtlExpired` entry, transitions the session to `Expired`, and rejects with `SESSION_NOT_OPEN`. +5. **State check** — if the session is not `Open` (already `Resolved` or `Expired`), rejects with `SESSION_NOT_OPEN`. +6. **Participant check** — if the session has a non-empty `participants` list and the `sender` is not in it, rejects with `INVALID_ENVELOPE`. +7. Records `message_id` in `seen_message_ids`. +8. Appends an `Incoming` log entry. +9. Calls `mode.on_message(session, envelope)`. +10. Applies the `ModeResponse` to mutate session state. + +--- + +## Message Type: Signal + +`Signal` messages are ambient, session-less messages. They are fire-and-forget coordination hints. + +**Special rules:** +- `session_id` may be empty. +- `message_id` must be non-empty. +- No session is created, modified, or looked up. +- The runtime simply acknowledges receipt. + +**Use cases:** +- Heartbeats between agents. +- Out-of-band coordination hints. +- Cross-session correlation signals (using the `SignalPayload.correlation_session_id` field). + +--- + +## TTL Configuration + +Session TTL (time-to-live) determines how long a session remains open before it is considered expired. + +**Encoding:** TTL is specified in the `SessionStartPayload.ttl_ms` field (protobuf int64). + +| `ttl_ms` value | Behavior | +|----------------|----------| +| `0` (or field absent) | Default TTL: **60,000 ms** (60 seconds) | +| `1` to `86,400,000` | Custom TTL in milliseconds | +| Negative | Rejected with `INVALID_ENVELOPE` | +| `> 86,400,000` (> 24h) | Rejected with `INVALID_ENVELOPE` | + +**TTL enforcement:** TTL is enforced **lazily** — the runtime checks `current_time > ttl_expiry` on each non-SessionStart message. When expiry is detected: + +1. An internal `TtlExpired` log entry is appended. +2. The session transitions to `Expired`. +3. The message is rejected with error code `SESSION_NOT_OPEN`. + +There is no background cleanup thread — expired sessions remain in memory until the server is restarted. This is a deliberate simplification; future versions may add background eviction. + +> **Migration note from v0.1:** TTL was previously specified as a JSON payload `{"ttl_ms": }`. It is now a field in the protobuf `SessionStartPayload`. -### SessionStart Payload +--- +## Message Deduplication + +The runtime provides **at-least-once** delivery with idempotent acceptance via message deduplication. + +Each session maintains a `seen_message_ids: HashSet`. When a message arrives: + +1. If `message_id` is already in `seen_message_ids`, the runtime returns `ok: true, duplicate: true` without re-processing the message or calling the mode. +2. If `message_id` is new, it is added to `seen_message_ids` before processing. + +This applies to both `SessionStart` and regular messages: + +- **SessionStart deduplication:** If a `SessionStart` arrives for a session that already exists and the `message_id` matches one in the session's `seen_message_ids`, it is treated as an idempotent retry. If the `message_id` is different, it is rejected as a duplicate session. +- **Regular message deduplication:** If a regular message's `message_id` matches a previously accepted message for that session, it is returned as a duplicate. + +This design allows clients to safely retry failed network requests without causing double-processing. + +--- + +## Participant Validation + +Sessions can optionally restrict which senders are allowed to contribute. + +**Configuration:** The `SessionStartPayload.participants` field is a list of participant identifiers. If this list is non-empty, only senders whose name appears in the list may send messages to the session. + +**Enforcement:** + +- For regular messages (not `SessionStart` or `Signal`), the runtime checks whether `envelope.sender` is in `session.participants`. +- If the participant list is non-empty and the sender is not in it, the message is rejected with error code `INVALID_ENVELOPE`. +- If the participant list is empty, any sender is allowed (open participation). + +**Mode-specific behavior:** + +- In **Multi-Round Mode**, participants are essential — convergence is checked against the participant list. All listed participants must contribute for convergence to trigger. +- In **Decision Mode**, participants are optional — the mode works with or without a restricted participant list. + +--- + +## Session State Machine + +Sessions follow a strict state machine with three states and two terminal transitions: + +``` + SessionStart + │ + ▼ + ┌────────────┐ + │ OPEN │ ← Initial state + └────────────┘ + │ │ + (mode returns (TTL expires or + Resolve or CancelSession) + PersistAndResolve) + │ │ + ▼ ▼ + ┌──────────┐ ┌─────────┐ + │ RESOLVED │ │ EXPIRED │ + └──────────┘ └─────────┘ + (terminal) (terminal) +``` + +**Transition rules:** + +| From | To | Trigger | +|------|----|---------| +| Open | Resolved | Mode returns `ModeResponse::Resolve` or `ModeResponse::PersistAndResolve` | +| Open | Expired | TTL check fails on next message, or `CancelSession` RPC called | +| Resolved | — | Terminal — no transitions allowed | +| Expired | — | Terminal — no transitions allowed | + +Once a session reaches a terminal state, any subsequent message to that session is rejected with `SESSION_NOT_OPEN`. + +--- + +## Mode System + +The Mode system is the heart of MACP's extensibility. The runtime provides "physics" — session invariants, TTL enforcement, logging, routing, participant validation — while Modes provide "coordination logic" — when to resolve, what intermediate state to track, and what convergence criteria to apply. + +### The Mode Trait + +```rust +pub trait Mode: Send + Sync { + fn on_session_start(&self, session: &Session, env: &Envelope) + -> Result; + fn on_message(&self, session: &Session, env: &Envelope) + -> Result; +} +``` + +Both methods receive **immutable** references to the session and envelope. They cannot directly mutate state — they return a `ModeResponse` that the runtime applies as a single atomic mutation. + +### ModeResponse + +```rust +pub enum ModeResponse { + NoOp, // No state change + PersistState(Vec), // Update mode_state bytes + Resolve(Vec), // Set resolution, transition to Resolved + PersistAndResolve { state: Vec, resolution: Vec }, // Both +} +``` + +- **NoOp** — The mode has nothing to do. The message is accepted but no state changes. +- **PersistState** — The mode wants to update its internal state (e.g., record a vote, update a contribution). The bytes are stored in `session.mode_state`. +- **Resolve** — The mode has determined that the session should resolve. The resolution bytes are stored in `session.resolution` and the session transitions to `Resolved`. +- **PersistAndResolve** — Both state update and resolution in a single atomic operation. + +### Mode Registration + +The runtime registers modes by name in a `HashMap`: + +| Key | Mode | +|-----|------| +| `"macp.mode.decision.v1"` | `DecisionMode` | +| `"macp.mode.multi_round.v1"` | `MultiRoundMode` | +| `"decision"` | `DecisionMode` (alias) | +| `"multi_round"` | `MultiRoundMode` (alias) | + +An empty `mode` field in the Envelope defaults to `"macp.mode.decision.v1"`. + +--- + +## Decision Mode Specification + +The Decision Mode (`macp.mode.decision.v1`) implements a structured decision-making lifecycle following RFC-0001. It models the flow from initial proposal through evaluation, optional objection, voting, and final commitment. + +### Decision State + +```rust +pub struct DecisionState { + pub proposals: HashMap, // proposal_id → Proposal + pub evaluations: Vec, + pub objections: Vec, + pub votes: HashMap, // sender → Vote (last vote wins) + pub phase: DecisionPhase, +} + +pub enum DecisionPhase { + Proposal, // Initial phase — waiting for proposals + Evaluation, // At least one proposal exists — accepting evaluations + Voting, // Votes are being cast + Committed, // Terminal — commitment recorded +} +``` + +### Message Types and Lifecycle + +The Decision Mode accepts five message types, each with a corresponding protobuf payload type defined in `decision.proto`: + +#### 1. Proposal + +Creates a new proposal within the session. + +**Payload (JSON-encoded `ProposalPayload`):** ```json { - "participants": ["alice", "bob"], - "convergence": {"type": "all_equal"}, - "ttl_ms": 60000 + "proposal_id": "p1", + "option": "Deploy to production", + "rationale": "All tests pass and staging looks good", + "supporting_data": "" } ``` -- `participants`: Non-empty list of participant identifiers -- `convergence.type`: Must be `"all_equal"` (only supported strategy) -- `ttl_ms`: Optional TTL override +**Validation:** +- `proposal_id` must be non-empty — rejected with `InvalidPayload` if empty. +- A proposal with the same `proposal_id` overwrites the previous one. -### Convergence Strategy: `all_equal` +**Effect:** +- Records the proposal in `state.proposals`. +- Advances the phase to `Evaluation` (enabling evaluations and votes). +- Returns `PersistState` with the updated state. -The session resolves automatically when: -1. All listed participants have submitted a contribution -2. All contribution values are identical +#### 2. Evaluation -### Round Counting +Evaluates an existing proposal with a recommendation. -- Round starts at 0 -- Each time a participant submits a **new or changed** value, the round increments -- Re-submitting the same value does not increment the round +**Payload (JSON-encoded `EvaluationPayload`):** +```json +{ + "proposal_id": "p1", + "recommendation": "APPROVE", + "confidence": 0.95, + "reason": "Implementation looks solid" +} +``` + +**Validation:** +- `proposal_id` must reference an existing proposal — rejected with `InvalidPayload` if not found. + +**Recommendations:** `APPROVE`, `REVIEW`, `BLOCK`, `REJECT` -### Resolution Payload +**Effect:** +- Appends the evaluation to `state.evaluations`. +- Returns `PersistState`. -When convergence is reached, the resolution contains: +#### 3. Objection + +Raises an objection against a proposal. + +**Payload (JSON-encoded `ObjectionPayload`):** ```json { - "converged_value": "option_a", - "round": 3, - "final": { - "alice": "option_a", - "bob": "option_a" - } + "proposal_id": "p1", + "reason": "Security review not completed", + "severity": "high" } ``` -## Session State Machine +**Validation:** +- `proposal_id` must reference an existing proposal — rejected with `InvalidPayload` if not found. + +**Severities:** `low`, `medium`, `high`, `critical` + +**Effect:** +- Appends the objection to `state.objections`. +- Returns `PersistState`. -Sessions follow a strict state machine: +#### 4. Vote +Casts a vote on the current proposals. + +**Payload (JSON-encoded `VotePayload`):** +```json +{ + "proposal_id": "p1", + "vote": "approve", + "reason": "Looks good to me" +} ``` - SessionStart - ↓ - ┌────────┐ - │ OPEN │ ← Initial state - └────────┘ - ↓ - (mode returns Resolve or - PersistAndResolve) - ↓ - ┌──────────┐ - │ RESOLVED │ ← Terminal state - └──────────┘ - (Alternative path) - ↓ - (TTL expires) - ↓ - ┌─────────┐ - │ EXPIRED │ ← Terminal state - └─────────┘ +**Validation:** +- At least one proposal must exist — rejected with `InvalidPayload` if no proposals. +- Cannot vote when phase is `Committed` — rejected with `InvalidPayload`. + +**Votes:** `approve`, `reject`, `abstain` + +**Effect:** +- Records the vote in `state.votes`, keyed by sender. If the same sender votes again, the previous vote is overwritten. +- Advances the phase to `Voting`. +- Returns `PersistState`. + +#### 5. Commitment + +Finalizes the decision and resolves the session. + +**Payload (JSON-encoded `CommitmentPayload`):** +```json +{ + "commitment_id": "c1", + "action": "deploy-v2.1", + "authority_scope": "team-alpha", + "reason": "Unanimous approval" +} +``` + +**Validation:** +- At least one vote must exist — rejected with `InvalidPayload` if no votes. +- Phase must not already be `Committed` — rejected with `InvalidPayload` if so. + +**Effect:** +- Advances the phase to `Committed`. +- Returns `PersistAndResolve` with the commitment payload as resolution bytes and the updated state. +- The session transitions to `Resolved`. + +### Backward Compatibility (Legacy Resolve) + +For backward compatibility with v0.1 clients, the Decision Mode also supports the legacy resolution mechanism: if the `message_type` is `"Message"` and the `payload` equals the bytes `b"resolve"`, the session is immediately resolved with `"resolve"` as the resolution payload. This allows old clients to continue working without modification. + +Any other `Message`-type payload returns `NoOp`. + +### Phase Transitions + +``` + Proposal received + │ + ┌──────────┐ ▼ ┌──────────────┐ + │ Proposal │ ──────────────────→│ Evaluation │ + └──────────┘ └──────────────┘ + │ ↑ + Vote │ │ Evaluation/Objection + received │ │ received + ▼ │ + ┌────────┐ + │ Voting │ + └────────┘ + │ + Commitment received + │ + ▼ + ┌───────────┐ + │ Committed │ (terminal) + └───────────┘ ``` -**State descriptions:** +--- + +## Multi-Round Mode Specification + +The Multi-Round Mode (`macp.mode.multi_round.v1`) implements participant-based convergence. A set of named participants each submit contributions, and the session resolves automatically when all participants agree on the same value. + +### Multi-Round State + +```rust +pub struct MultiRoundState { + pub round: u64, // Current round number + pub participants: Vec, // Expected participant IDs + pub contributions: BTreeMap, // sender → current value + pub convergence_type: String, // "all_equal" +} +``` + +The `BTreeMap` is used instead of `HashMap` for deterministic serialization ordering. + +### SessionStart + +On `SessionStart`, the mode: + +1. Reads the `participants` list from the session (populated from `SessionStartPayload.participants`). +2. Validates that the participant list is non-empty — returns `InvalidPayload` if empty. +3. Initializes the state with `round: 0`, `convergence_type: "all_equal"`, and empty contributions. +4. Returns `PersistState` with the serialized initial state. + +### Contribute Messages + +The mode processes messages with `message_type: "Contribute"`. + +**Payload (JSON):** +```json +{"value": "option_a"} +``` + +**Processing:** + +1. Deserializes the current `mode_state` into `MultiRoundState`. +2. Parses the JSON payload to extract the `value` field. +3. Checks if the sender's value has changed from their previous contribution: + - If this is a new contribution or the value differs from the previous one → **increment the round counter** and update the contribution. + - If the value is identical to the previous one → update without incrementing the round (no change in substance). +4. Checks convergence: **all** listed participants have submitted at least one contribution, **and** all contribution values are identical. +5. If converged → returns `PersistAndResolve` with: + - `state`: the final `MultiRoundState` serialized to JSON. + - `resolution`: a JSON payload containing: + ```json + { + "converged_value": "option_a", + "round": 3, + "final_values": { + "alice": "option_a", + "bob": "option_a" + } + } + ``` +6. If not converged → returns `PersistState` with the updated state. + +Non-`Contribute` messages return `NoOp`. + +### Convergence Strategy: `all_equal` + +The only currently supported convergence strategy. Resolution triggers when: + +1. Every participant in the session's participant list has made at least one contribution. +2. All contribution values are identical. + +If any participant has not contributed, or if any two contributions differ, convergence has not been reached and the session remains open. + +### Round Counting + +- Round starts at `0`. +- Each time a participant submits a **new or changed** value, the round increments by 1. +- Re-submitting the **same** value does not increment the round — this prevents artificial round inflation. +- The final round number in the resolution tells you how many substantive value changes occurred across all participants. -| State | Can receive messages? | Can transition to | -|-------|----------------------|-------------------| -| OPEN | Yes | RESOLVED, EXPIRED | -| RESOLVED | No | (none - terminal) | -| EXPIRED | No | (none - terminal) | +--- -Resolution is now **mode-driven** — the runtime applies whatever `ModeResponse` the mode returns, rather than checking for hardcoded payloads. +## Validation Rules (Complete) -## Validation Rules +The following validation rules are applied in order. The first failing rule produces the error; subsequent rules are not checked. -The server validates every message before processing. Here are all the checks: +### 1. Protocol Version -### 1. Version Check ``` -IF macp_version != "v1" -THEN reject with InvalidMacpVersion +IF macp_version != "1.0" +THEN reject with UNSUPPORTED_PROTOCOL_VERSION ``` -### 2. Required Fields Check +This is checked in the gRPC adapter before any runtime processing. + +### 2. Required Fields + ``` -IF session_id is empty OR message_id is empty -THEN reject with InvalidEnvelope +IF message_type != "Signal": + IF session_id is empty OR message_id is empty + THEN reject with INVALID_ENVELOPE + +IF message_type == "Signal": + IF message_id is empty + THEN reject with INVALID_ENVELOPE + (session_id may be empty) ``` -### 3. Mode Check (for SessionStart) +### 3. Mode Resolution (SessionStart) + ``` -IF mode is not registered -THEN reject with UnknownMode +IF message_type == "SessionStart": + Resolve mode name (empty → "macp.mode.decision.v1") + IF mode not in registered modes + THEN reject with MODE_NOT_SUPPORTED ``` -### 4. Session Existence (for SessionStart) +### 4. SessionStart Payload Parsing + ``` -IF message_type == "SessionStart" AND session exists -THEN reject with DuplicateSession +IF message_type == "SessionStart": + Decode payload as protobuf SessionStartPayload + IF decode fails THEN reject with INVALID_ENVELOPE + + Extract ttl_ms from payload + IF ttl_ms < 0 THEN reject with INVALID_ENVELOPE + IF ttl_ms > 86,400,000 THEN reject with INVALID_ENVELOPE + IF ttl_ms == 0 THEN use default (60,000 ms) ``` -### 5. Session Existence (for other messages) +### 5. Session Existence (SessionStart) + ``` -IF message_type != "SessionStart" AND session does not exist -THEN reject with UnknownSession +IF message_type == "SessionStart": + IF session already exists: + IF message_id matches existing session's seen_message_ids + THEN return ok=true, duplicate=true (idempotent) + ELSE reject with INVALID_ENVELOPE (duplicate session) ``` -### 6. Session State Check +### 6. Session Existence (Regular Messages) + ``` -IF session exists AND session.state != OPEN -THEN reject with SessionNotOpen +IF message_type is not "SessionStart" and not "Signal": + IF session does not exist + THEN reject with SESSION_NOT_FOUND ``` -### 7. TTL Payload Check (for SessionStart) +### 7. Message Deduplication (Regular Messages) + ``` -IF message_type == "SessionStart" AND payload is non-empty -THEN parse payload as JSON {"ttl_ms": } -IF invalid UTF-8 or invalid JSON THEN reject with InvalidEnvelope -IF ttl_ms <= 0 OR ttl_ms > 86400000 THEN reject with InvalidTtl +IF message_id is in session.seen_message_ids +THEN return ok=true, duplicate=true (idempotent) ``` -### 8. TTL Expiry Check (for non-SessionStart) +### 8. TTL Expiry Check + ``` -IF session.state == OPEN AND current_time > session.ttl_expiry -THEN log internal TtlExpired entry, transition session to EXPIRED, reject with TtlExpired +IF session.state == Open AND current_time > session.ttl_expiry: + Log internal TtlExpired entry + Transition session to Expired + reject with SESSION_NOT_OPEN ``` -## Error Codes +### 9. Session State Check -All possible errors: +``` +IF session.state != Open +THEN reject with SESSION_NOT_OPEN +``` -| Error Code | When it occurs | How to fix | -|------------|----------------|------------| -| `InvalidMacpVersion` | `macp_version` is not "v1" | Use `macp_version: "v1"` | -| `InvalidEnvelope` | Missing required fields | Include all required fields | -| `DuplicateSession` | SessionStart for existing session | Use a different `session_id` | -| `UnknownSession` | Message for non-existent session | Send SessionStart first | -| `SessionNotOpen` | Message to resolved/expired session | Can't send more messages | -| `TtlExpired` | Session TTL has elapsed | Create a new session | -| `InvalidTtl` | TTL value out of range (<=0 or >24h) | Use ttl_ms in range 1..=86400000 | -| `UnknownMode` | Mode field references unregistered mode | Use "decision" or "multi_round" | -| `InvalidModeState` | Internal mode state is corrupted | Typically an internal error | -| `InvalidPayload` | Payload doesn't match mode's expected format | Check mode-specific payload requirements | +### 10. Participant Validation -## gRPC Service Definition +``` +IF session.participants is non-empty AND sender not in session.participants +THEN reject with INVALID_ENVELOPE +``` -In Protocol Buffers syntax: +### 11. Mode Dispatch -```protobuf -service MACPService { - rpc SendMessage(Envelope) returns (Ack); - rpc GetSession(SessionQuery) returns (SessionInfo); -} ``` +Call mode.on_message(session, envelope) +IF mode returns Err(e) THEN reject with corresponding error code +ELSE apply ModeResponse +``` + +--- + +## Error Codes (Complete) + +| RFC Error Code | Internal Error | When It Occurs | +|----------------|---------------|----------------| +| `UNSUPPORTED_PROTOCOL_VERSION` | `InvalidMacpVersion` | `macp_version` is not `"1.0"` | +| `INVALID_ENVELOPE` | `InvalidEnvelope` | Missing required fields, or invalid payload encoding | +| `INVALID_ENVELOPE` | `DuplicateSession` | SessionStart for existing session (different message_id) | +| `INVALID_ENVELOPE` | `InvalidTtl` | TTL value out of range (< 0 or > 24h) | +| `INVALID_ENVELOPE` | `InvalidModeState` | Internal mode state cannot be deserialized | +| `INVALID_ENVELOPE` | `InvalidPayload` | Payload does not match mode's expected format | +| `SESSION_NOT_FOUND` | `UnknownSession` | Message for non-existent session | +| `SESSION_NOT_OPEN` | `SessionNotOpen` | Message to resolved or expired session | +| `SESSION_NOT_OPEN` | `TtlExpired` | Session TTL has elapsed | +| `MODE_NOT_SUPPORTED` | `UnknownMode` | Mode field references unregistered mode | +| `FORBIDDEN` | `Forbidden` | Operation not permitted | +| `UNAUTHENTICATED` | `Unauthenticated` | Authentication required | +| `DUPLICATE_MESSAGE` | `DuplicateMessage` | Explicit duplicate detection (distinct from idempotent dedup) | +| `PAYLOAD_TOO_LARGE` | `PayloadTooLarge` | Payload exceeds size limits | +| `RATE_LIMITED` | `RateLimited` | Too many requests | + +Note that several internal error variants map to `INVALID_ENVELOPE` — this groups related validation failures under a single client-facing code while preserving distinct internal error variants for logging and debugging. -**What this means:** -- Service name: `MACPService` -- Two operations: `SendMessage` and `GetSession` -- `SendMessage`: Takes `Envelope`, returns `Ack` -- `GetSession`: Takes `SessionQuery`, returns `SessionInfo` (or gRPC NOT_FOUND) -- Communication: Synchronous (client waits for response) +--- ## Transport The protocol uses **gRPC over HTTP/2**: -**Advantages:** -- Binary protocol (efficient) -- Type-safe (schema enforcement) -- Streaming support (future extension) -- Wide language support -- Built-in authentication (TLS) +- **Binary protocol** — efficient serialization via protobuf. +- **Type-safe** — schema enforcement at compile time. +- **Streaming support** — bidirectional streaming via `StreamSession`. +- **Wide language support** — gRPC clients available for Python, JavaScript, Go, Java, C++, and more. +- **Built-in TLS** — secure transport via standard gRPC TLS configuration. -**Default address:** `127.0.0.1:50051` +**Default address:** `127.0.0.1:50051` (hardcoded in `src/main.rs`). -## Future Extensions (Planned) +--- -### 1. Background TTL Cleanup -Currently, TTL is enforced on message receipt (lazy expiry). Future versions will: -- Run a background task to periodically remove expired sessions from memory -- Reduce memory footprint for long-running servers +## Best Practices -### 2. Replay Engine -Replay session logs to reconstruct state for debugging and auditing. +### For Clients -### 3. GetSessionLog RPC -New RPC to query session event logs: -```protobuf -rpc GetSessionLog(SessionQuery) returns (SessionLog); -``` +1. **Always call Initialize first** — Negotiate the protocol version and discover capabilities before sending session messages. -### 4. Participant Membership Gating -Enforce that only registered participants can send messages to a session. +2. **Check `Ack.ok` and `Ack.error`** — Don't just check the boolean; inspect the `MACPError.code` for specific error handling. -### 5. Additional Convergence Strategies -- `majority` — resolve when a majority of participants agree -- `threshold` — resolve when N participants agree +3. **Use unique message IDs** — UUIDs are recommended. This enables safe retries via the deduplication mechanism. -### 6. Streaming -Support for bidirectional streaming: -```protobuf -rpc StreamMessages(stream Envelope) returns (stream Ack); -``` +4. **Handle duplicates gracefully** — If `Ack.duplicate` is `true`, the message was already processed. Treat this as success. -## Comparison to Other Protocols +5. **Send SessionStart first** — Before any other messages for a session. -### vs HTTP REST -- MACP: Binary, type-safe, generated clients -- REST: Text-based, flexible, manual clients +6. **Respect terminal states** — Once a session is `RESOLVED` or `EXPIRED`, don't send more messages. Cache the state locally. -### vs WebSockets -- MACP: RPC-style (request/response pairs) -- WebSockets: Raw bidirectional streaming +7. **Use CancelSession for cleanup** — Don't let sessions hang until TTL expiry if you know the coordination is over. -### vs Message Queues (RabbitMQ, Kafka) -- MACP: Synchronous acknowledgment, session-oriented -- Message Queues: Asynchronous, topic-oriented +8. **Use ListModes for discovery** — Query available modes and their message types before creating sessions. -## Best Practices +9. **Use GetSession to check state** — Useful for resuming after disconnection or verifying session state. -### For Clients +10. **Declare participants when appropriate** — Use the `participants` field in `SessionStartPayload` to restrict who can contribute, especially for convergence-based modes. + +--- -1. **Always check Ack.accepted** - ```rust - let ack = client.send_message(env).await?.into_inner(); - if !ack.accepted { - println!("Error: {}", ack.error); - } - ``` +## Future Extensions + +### 1. Background TTL Cleanup +Currently, TTL is enforced lazily. Future versions will run a background eviction task. -2. **Use unique message IDs** - - UUIDs are recommended - - Helps with debugging and tracing +### 2. Replay Engine +Replay session logs to reconstruct state for debugging and auditing. + +### 3. GetSessionLog RPC +Query session event logs for audit trails. -3. **Send SessionStart first** - - Before any other messages - - Keep track of which sessions you've started +### 4. Additional Convergence Strategies +- `majority` — resolve when a majority of participants agree. +- `threshold` — resolve when N participants agree. +- `weighted` — resolve based on weighted votes. -4. **Handle all error codes** - - Don't just check `accepted` - - Log specific errors for debugging +### 5. Persistent Storage +Durable session state and log storage (e.g., to SQLite or Postgres). -5. **Respect Resolved state** - - Don't send messages after resolve - - Cache the state locally to avoid unnecessary calls +### 6. Authentication and Authorization +Token-based authentication and role-based access control for sessions. -6. **Use GetSession to check state** - - Query session state before sending messages - - Useful for resuming after disconnection +--- ## Next Steps -- Read [architecture.md](./architecture.md) to understand how this is implemented -- Read [examples.md](./examples.md) for practical code examples +- Read **[architecture.md](./architecture.md)** to understand how this is implemented internally. +- Read **[examples.md](./examples.md)** for practical code examples with the new v0.2 RPCs. diff --git a/proto/buf.yaml b/proto/buf.yaml index 0ed83f4..0a714b4 100644 --- a/proto/buf.yaml +++ b/proto/buf.yaml @@ -1,4 +1,4 @@ -version: v1 +version: v2 lint: use: - STANDARD