diff --git a/astro.config.mjs b/astro.config.mjs
index 3fffb88..7083338 100644
--- a/astro.config.mjs
+++ b/astro.config.mjs
@@ -78,6 +78,19 @@ export default defineConfig({
             { label: 'OpenShift Setup', slug: 'deployment/openshift-setup' },
           ],
         },
+        {
+          label: 'MCP',
+          items: [
+            { label: 'Overview', slug: 'mcp' },
+            { label: 'Installation', slug: 'mcp/installation' },
+            { label: 'Quick Start', slug: 'mcp/quickstart' },
+            { label: 'Configuration', slug: 'mcp/configuration' },
+            { label: 'Tool Reference', slug: 'mcp/tools' },
+            { label: 'Resource Reference', slug: 'mcp/resources' },
+            { label: 'Prompt Reference', slug: 'mcp/prompts' },
+            { label: 'Troubleshooting', slug: 'mcp/troubleshooting' },
+          ],
+        },
         {
           label: 'Adapters',
           items: [
diff --git a/src/content/docs/mcp/configuration.md b/src/content/docs/mcp/configuration.md
new file mode 100644
index 0000000..cb5d4ed
--- /dev/null
+++ b/src/content/docs/mcp/configuration.md
@@ -0,0 +1,169 @@
+---
+title: "Configuration"
+---
+
+The EvalHub MCP server can be configured through CLI flags, a YAML configuration file, or environment variables. When multiple sources set the same value, **CLI flags take highest precedence**, followed by the config file, then environment variables.
+
+## CLI Flags
+
+```
+evalhub-mcp [flags]
+```
+
+| Flag | Default | Description |
+|------|---------|-------------|
+| `--transport` | `stdio` | Transport mode: `stdio`, `http`, or `http-sse` |
+| `--host` | `localhost` | Bind address for HTTP transports |
+| `--port` | `3001` | Port for HTTP transports |
+| `--config` | — | Path to YAML configuration file |
+| `--insecure` | `false` | Skip TLS certificate verification for the EvalHub backend |
+| `--tls-cert` | — | Path to TLS certificate file (for HTTPS on the MCP server) |
+| `--tls-key` | — | Path to TLS private key file (for HTTPS on the MCP server) |
+| `--version` | — | Print version and exit |
+
+Both `--tls-cert` and `--tls-key` must be provided together. When set, the HTTP server listens over HTTPS.
+
+## Configuration File
+
+Pass `--config <path>` to load settings from a YAML file:
+
+```yaml
+# evalhub-mcp.yaml
+base_url: https://evalhub.apps.my-cluster.example.com
+token: <your-api-token>
+tenant: my-team
+transport: http
+host: 0.0.0.0
+port: 3001
+insecure: false
+```
+
+## Environment Variables
+
+| Variable | Description |
+|----------|-------------|
+| `EVALHUB_BASE_URL` | EvalHub backend API URL |
+| `EVALHUB_TOKEN` | Authentication token |
+| `EVALHUB_TENANT` | Tenant identifier |
+| `EVALHUB_TRANSPORT` | Transport mode (`stdio`, `http`, `http-sse`) |
+| `EVALHUB_HOST` | HTTP bind address |
+| `EVALHUB_PORT` | HTTP port |
+| `EVALHUB_INSECURE` | Skip TLS verification for EvalHub backend (`true`/`false`) |
+| `EVALHUB_TLS_CERT_FILE` | Path to TLS certificate |
+| `EVALHUB_TLS_KEY_FILE` | Path to TLS private key |
+| `EVALHUB_LIST_PAGE_LIMIT` | Default page size for list resources |
+
+## Precedence
+
+When the same setting is specified in multiple places:
+
+1. **CLI flags** (highest priority)
+2. **YAML config file** (if `--config` is used)
+3. **Environment variables** (lowest priority)
+
+For example, if `EVALHUB_TRANSPORT=http` is set as an environment variable but you run `evalhub-mcp --transport stdio`, the server uses stdio.
+
+## Kubernetes Operator
+
+When EvalHub is deployed via the TrustyAI operator, the MCP server is configured through the `spec.mcp` section of the EvalHub custom resource:
+
+```yaml
+apiVersion: trustyai.opendatahub.io/v1alpha1
+kind: EvalHub
+metadata:
+  name: evalhub
+  namespace: my-namespace
+spec:
+  replicas: 1
+  mcp:
+    enabled: true
+    replicas: 1
+    transport: http
+    image: quay.io/evalhub/evalhub-mcp:latest
+    authSecret: mcp-auth-token
+    resources:
+      requests:
+        cpu: 100m
+        memory: 128Mi
+      limits:
+        cpu: 500m
+        memory: 256Mi
+    env:
+      - name: LOG_LEVEL
+        value: "debug"
+```
+
+### Operator MCP Fields
+
+| Field | Type | Default | Description |
+|-------|------|---------|-------------|
+| `enabled` | bool | `false` | Enable MCP server deployment |
+| `replicas` | int | `1` | Number of MCP server replicas |
+| `transport` | string | `http` | Client-facing transport (`http` or `http-sse`) |
+| `evalHubTransport` | string | `http` | Transport for internal EvalHub API calls |
+| `image` | string | `quay.io/evalhub/evalhub-mcp:latest` | Container image override |
+| `authSecret` | string | — | Kubernetes Secret containing a `token` key for EvalHub API auth |
+| `resources` | ResourceRequirements | 100m/128Mi request, 500m/256Mi limit | Container resource requests and limits |
+| `env` | []EnvVar | — | Additional environment variables |
+
+### What the Operator Creates
+
+When `spec.mcp.enabled` is `true`, the operator automatically creates:
+
+- **Deployment** (`<name>-mcp`): Runs the MCP server container with health checks
+- **Service** (`<name>-mcp`): ClusterIP service on port 8443
+- **ConfigMap** (`<name>-mcp-config`): Server configuration YAML
+- **Route** (OpenShift only, `<name>-mcp`): Edge-terminated TLS route for external access
+
+TLS certificates are automatically provisioned via OpenShift service signing.
+
+### Checking MCP Status
+
+```bash
+kubectl get evalhub <name> -o jsonpath='{.status.mcp}'
+```
+
+The status includes:
+- `phase`: `Pending`, `Ready`, `Error`, or `Disabled`
+- `ready`: Whether the MCP deployment is available
+- `url`: Internal service URL
+
+## Example Configurations
+
+### Local Development
+
+```bash
+export EVALHUB_BASE_URL="http://localhost:8080"
+export EVALHUB_TOKEN="dev-token"
+export EVALHUB_TENANT="default"
+
+evalhub-mcp --transport stdio
+```
+
+### Shared Team Server
+
+```yaml
+# team-mcp.yaml
+base_url: https://evalhub.apps.cluster.example.com
+token: <team-service-account-token>
+tenant: team-a
+transport: http
+host: 0.0.0.0
+port: 3001
+```
+
+```bash
+evalhub-mcp --config team-mcp.yaml
+```
+
+### Secure Production Server
+
+```bash
+evalhub-mcp \
+  --transport http \
+  --host 0.0.0.0 \
+  --port 8443 \
+  --tls-cert /etc/tls/server.crt \
+  --tls-key /etc/tls/server.key \
+  --config /etc/evalhub-mcp/config.yaml
+```
diff --git a/src/content/docs/mcp/index.md b/src/content/docs/mcp/index.md
new file mode 100644
index 0000000..dc94ff9
--- /dev/null
+++ b/src/content/docs/mcp/index.md
@@ -0,0 +1,62 @@
+---
+title: "MCP Overview"
+---
+
+The EvalHub MCP server implements the [Model Context Protocol](https://modelcontextprotocol.io/) (MCP), enabling AI coding assistants such as Claude Code, VS Code with GitHub Copilot, and other MCP-compatible clients to interact with EvalHub directly from a conversation.
+
+## What is MCP?
+
+MCP is an open standard that lets AI assistants connect to external tools and data sources through a unified protocol. Instead of manually copying commands or switching between terminal windows, your AI assistant can submit evaluations, check job status, browse benchmarks, and follow structured evaluation workflows — all through natural language.
+
+## What the EvalHub MCP Server Provides
+
+### Tools
+
+Actions the AI assistant can execute on your behalf:
+
+| Tool | Description |
+|------|-------------|
+| `submit_evaluation` | Submit a new model evaluation job with benchmarks or a collection |
+| `get_job_status` | Check job progress, state, and per-benchmark status |
+| `cancel_job` | Cancel a running or pending evaluation job |
+
+### Resources
+
+Read-only data the assistant can query using `evalhub://` URIs:
+
+| Resource | URI | Description |
+|----------|-----|-------------|
+| Providers | `evalhub://providers` | List evaluation providers and their benchmarks |
+| Benchmarks | `evalhub://benchmarks` | Browse benchmarks, filter by label |
+| Collections | `evalhub://collections` | List pre-defined benchmark collections |
+| Jobs | `evalhub://jobs` | List evaluation jobs, filter by status |
+| Server Version | `evalhub://server/version` | Server build and version metadata |
+
+All list resources support pagination (`?limit=N&offset=N`). Benchmarks support label filtering (`?label=rag&label=safety`). Jobs support status filtering (`?status=running`).
+
+### Prompts
+
+Structured conversation templates that guide the assistant through complex workflows:
+
+| Prompt | Description |
+|--------|-------------|
+| `edd_workflow` | Evaluation-Driven Development cycle: Define, Measure, Iterate |
+| `evaluate_model` | Step-by-step model evaluation from discovery to results |
+| `compare_runs` | Compare metrics across two or more evaluation jobs |
+
+## Transport Modes
+
+The MCP server supports multiple transport modes for different deployment scenarios:
+
+| Mode | Flag | Use Case |
+|------|------|----------|
+| **stdio** | `--transport stdio` | Local development. The AI client launches the server as a subprocess. |
+| **Streamable HTTP** | `--transport http` | Remote or shared deployments. The server runs as a standalone HTTP service. |
+| **Legacy HTTP+SSE** | `--transport http-sse` | Older MCP clients that don't support Streamable HTTP. |
+
+## Next Steps
+
+- [Install the MCP server](/mcp/installation/) on your platform
+- Follow the [Quick Start](/mcp/quickstart/) to connect your AI assistant in under 5 steps
+- Browse the [Tool](/mcp/tools/), [Resource](/mcp/resources/), and [Prompt](/mcp/prompts/) references
+- See [Configuration](/mcp/configuration/) for all available options
diff --git a/src/content/docs/mcp/installation.md b/src/content/docs/mcp/installation.md
new file mode 100644
index 0000000..a6c7633
--- /dev/null
+++ b/src/content/docs/mcp/installation.md
@@ -0,0 +1,108 @@
+---
+title: "Installation"
+---
+
+import { Tabs, TabItem } from '@astrojs/starlight/components';
+
+The `evalhub-mcp` binary is a standalone server that connects AI assistants to EvalHub. It is available for macOS (Intel and Apple Silicon), Linux (amd64 and arm64), and as a container image.
+
+## Prerequisites
+
+- An EvalHub instance (running locally or on a cluster) with a reachable API endpoint
+- An authentication token for your EvalHub tenant
+- An MCP-compatible AI client ([Claude Code](https://docs.anthropic.com/en/docs/claude-code), [VS Code with GitHub Copilot](https://code.visualstudio.com/), or another MCP client)
+
+## Install the Binary
+
+<Tabs>
+<TabItem label="Homebrew (macOS / Linux)">
+
+```bash
+brew install evalhub-mcp
+```
+
+Verify:
+
+```bash
+evalhub-mcp --version
+```
+
+</TabItem>
+<TabItem label="GitHub Releases">
+
+Download the binary for your platform from [GitHub Releases](https://github.com/eval-hub/eval-hub/releases):
+
+```bash
+# macOS (Apple Silicon)
+curl -Lo evalhub-mcp https://github.com/eval-hub/eval-hub/releases/latest/download/evalhub-mcp-darwin-arm64
+
+# macOS (Intel)
+curl -Lo evalhub-mcp https://github.com/eval-hub/eval-hub/releases/latest/download/evalhub-mcp-darwin-amd64
+
+# Linux (amd64)
+curl -Lo evalhub-mcp https://github.com/eval-hub/eval-hub/releases/latest/download/evalhub-mcp-linux-amd64
+
+# Linux (arm64)
+curl -Lo evalhub-mcp https://github.com/eval-hub/eval-hub/releases/latest/download/evalhub-mcp-linux-arm64
+```
+
+Make it executable and move it to your PATH:
+
+```bash
+chmod +x evalhub-mcp
+sudo mv evalhub-mcp /usr/local/bin/
+```
+
+Verify:
+
+```bash
+evalhub-mcp --version
+```
+
+</TabItem>
+<TabItem label="Build from Source">
+
+Requires Go 1.25 or later.
+
+```bash
+git clone https://github.com/eval-hub/eval-hub.git
+cd eval-hub
+make build-mcp
+```
+
+The binary is placed in `./bin/evalhub-mcp`. Move it to your PATH:
+
+```bash
+sudo mv ./bin/evalhub-mcp /usr/local/bin/
+```
+
+</TabItem>
+</Tabs>
+
+## Kubernetes / OpenShift Deployment
+
+If EvalHub is managed by the TrustyAI operator, the MCP server can be deployed as a sidecar by enabling it in the EvalHub custom resource:
+
+```yaml
+apiVersion: trustyai.opendatahub.io/v1alpha1
+kind: EvalHub
+metadata:
+  name: evalhub
+spec:
+  replicas: 1
+  mcp:
+    enabled: true
+    replicas: 1
+```
+
+The operator creates a Deployment, Service, ConfigMap, and (on OpenShift) a Route for the MCP server automatically. See [Configuration](/mcp/configuration/#kubernetes-operator) for all available fields.
+
+## Using the EvalHub CLI as an MCP Server
+
+If you already have the [EvalHub CLI](/guides/cli/) installed and configured, you can use it as an MCP server directly without installing `evalhub-mcp` separately:
+
+```bash
+claude mcp add evalhub -- evalhub --profile <profile-name> mcp
+```
+
+This uses the CLI's built-in `mcp` subcommand with an existing CLI profile for authentication. See the [Quick Start](/mcp/quickstart/) for the full setup flow using either approach.
diff --git a/src/content/docs/mcp/prompts.md b/src/content/docs/mcp/prompts.md
new file mode 100644
index 0000000..fa61854
--- /dev/null
+++ b/src/content/docs/mcp/prompts.md
@@ -0,0 +1,93 @@
+---
+title: "Prompt Reference"
+---
+
+MCP prompts are structured conversation templates that guide the AI assistant through complex evaluation workflows. Each prompt returns a sequence of messages that the assistant uses to drive the interaction.
+
+## edd_workflow
+
+Guides the AI assistant through the Evaluation-Driven Development (EDD) cycle — a structured methodology for building AI applications with continuous evaluation feedback.
+
+The workflow follows three phases:
+1. **Define** — Establish evaluation criteria for the application type
+2. **Measure** — Run benchmarks and collect metrics
+3. **Iterate** — Analyze results and improve
+
+### Arguments
+
+| Argument | Required | Description |
+|----------|----------|-------------|
+| `application_type` | Yes | Type of application being evaluated. One of: `rag`, `agent`, `safety`, `classifier` |
+
+Each application type loads domain-specific guidance. For example, `rag` focuses on retrieval quality and context relevance, while `safety` emphasizes bias detection and harmful content evaluation.
+
+### Example
+
+**Prompt:**
+```
+Use the edd_workflow prompt for a RAG application.
+```
+
+The assistant receives structured guidance for defining RAG-specific evaluation criteria, selecting appropriate benchmarks, running evaluations, and iterating on results.
+
+### Valid Application Types
+
+| Type | Focus |
+|------|-------|
+| `rag` | Retrieval quality, context relevance, answer accuracy |
+| `agent` | Task completion, tool use, multi-step reasoning |
+| `safety` | Bias detection, harmful content, fairness |
+| `classifier` | Classification accuracy, precision, recall |
+
+## evaluate_model
+
+Step-by-step guidance for evaluating a model end-to-end: discover available benchmarks, select appropriate ones, submit an evaluation, and review results.
+
+### Arguments
+
+| Argument | Required | Description |
+|----------|----------|-------------|
+| `model_url` | No | URL of the model inference endpoint. If omitted, the assistant asks for it. |
+| `benchmark_preferences` | No | Preferences for benchmark selection (e.g. "reasoning", "safety", "general"). If omitted, the assistant helps you choose. |
+
+### Example
+
+**With model URL:**
+```
+Use the evaluate_model prompt with model URL https://llama3.example.com/v1.
+```
+
+The assistant guides you through benchmark selection and evaluation submission for the specified model.
+
+**Without model URL:**
+```
+Use the evaluate_model prompt.
+```
+
+The assistant first helps you identify your model endpoint, then proceeds with benchmark discovery and evaluation.
+
+## compare_runs
+
+Guidance for comparing two or more evaluation runs side-by-side: select jobs, fetch results, compare metrics, and summarize findings.
+
+### Arguments
+
+| Argument | Required | Description |
+|----------|----------|-------------|
+| `job_ids` | No | Comma-separated list of evaluation job IDs to compare. If omitted, the assistant helps you select jobs. |
+
+### Example
+
+**With job IDs:**
+```
+Use the compare_runs prompt to compare jobs a1b2c3d4 and e5f6g7h8.
+```
+
+**Without job IDs:**
+```
+Use the compare_runs prompt.
+```
+
+The assistant fetches the list of completed jobs and helps you select which ones to compare.
+
+At least two job IDs are required for comparison.
diff --git a/src/content/docs/mcp/quickstart.mdx b/src/content/docs/mcp/quickstart.mdx
new file mode 100644
index 0000000..b6d52f9
--- /dev/null
+++ b/src/content/docs/mcp/quickstart.mdx
@@ -0,0 +1,259 @@
+---
+title: "Quick Start"
+---
+
+import { Steps, Tabs, TabItem } from '@astrojs/starlight/components';
+
+Connect your AI assistant to EvalHub in under 5 steps.
+
+## Claude Code
+
+<Tabs>
+<TabItem label="stdio (Recommended)">
+
+The stdio transport is simplest for individual use — Claude Code manages the server process automatically.
+
+<Steps>
+
+1. **Install the MCP server**
+
+   ```bash
+   brew install evalhub-mcp
+   ```
+
+   Or [download the binary](/mcp/installation/) for your platform.
+
+2. **Set your EvalHub credentials**
+
+   ```bash
+   export EVALHUB_BASE_URL="https://<your-evalhub-instance>"
+   export EVALHUB_TOKEN="<your-token>"
+   export EVALHUB_TENANT="<your-tenant>"
+   ```
+
+3. **Register the server with Claude Code**
+
+   ```bash
+   claude mcp add evalhub --transport stdio -- evalhub-mcp
+   ```
+
+4. **Verify the connection**
+
+   ```bash
+   claude mcp list
+   ```
+
+   `evalhub` should appear with transport `stdio` and status available.
+
+5. **Start using it**
+
+   Open a Claude Code conversation and try:
+
+   ```
+   List the available evaluation providers from EvalHub.
+   ```
+
+</Steps>
+
+</TabItem>
+<TabItem label="HTTP">
+
+Use HTTP transport when the MCP server should run as a shared service (e.g. on a remote machine or for team use).
+
+<Steps>
+
+1. **Install the MCP server**
+
+   ```bash
+   brew install evalhub-mcp
+   ```
+
+2. **Start the server**
+
+   ```bash
+   export EVALHUB_BASE_URL="https://<your-evalhub-instance>"
+   export EVALHUB_TOKEN="<your-token>"
+   export EVALHUB_TENANT="<your-tenant>"
+
+   evalhub-mcp --transport http --host localhost --port 3001
+   ```
+
+   For dev environments with self-signed certificates:
+
+   ```bash
+   evalhub-mcp --transport http --host localhost --port 3001 --insecure
+   ```
+
+3. **Register with Claude Code** (in a separate terminal)
+
+   ```bash
+   claude mcp add evalhub --transport http http://localhost:3001
+   ```
+
+4. **Verify the connection**
+
+   ```bash
+   claude mcp list
+   ```
+
+5. **Start using it**
+
+   Open a Claude Code conversation and try:
+
+   ```
+   List the available evaluation providers from EvalHub.
+   ```
+
+</Steps>
+
+</TabItem>
+</Tabs>
+
+### Using the EvalHub CLI instead
+
+If you have the [EvalHub CLI](/guides/cli/) installed, you can use it as the MCP server directly:
+
+```bash
+# Configure a CLI profile for the agent
+evalhub --profile agent config set base_url https://<your-evalhub-instance>
+evalhub --profile agent config set token <your-token>
+evalhub --profile agent config set tenant <your-tenant>
+
+# Register with Claude Code
+claude mcp add evalhub -- evalhub --profile agent mcp
+```
+
+## VS Code / GitHub Copilot
+
+<Tabs>
+<TabItem label="stdio (Recommended)">
+
+<Steps>
+
+1. **Install the MCP server**
+
+   ```bash
+   brew install evalhub-mcp
+   ```
+
+2. **Set your EvalHub credentials**
+
+   ```bash
+   export EVALHUB_BASE_URL="https://<your-evalhub-instance>"
+   export EVALHUB_TOKEN="<your-token>"
+   export EVALHUB_TENANT="<your-tenant>"
+   ```
+
+3. **Add to VS Code settings**
+
+   Open your VS Code `settings.json` (Cmd/Ctrl+Shift+P → "Preferences: Open User Settings (JSON)") and add:
+
+   ```json
+   {
+     "mcp": {
+       "servers": {
+         "evalhub": {
+           "command": "evalhub-mcp",
+           "args": [],
+           "env": {
+             "EVALHUB_BASE_URL": "https://<your-evalhub-instance>",
+             "EVALHUB_TOKEN": "<your-token>",
+             "EVALHUB_TENANT": "<your-tenant>"
+           }
+         }
+       }
+     }
+   }
+   ```
+
+4. **Reload VS Code**
+
+   Restart the window or run "Developer: Reload Window" from the command palette.
+
+5. **Start using it**
+
+   In GitHub Copilot Chat, ask:
+
+   ```
+   @evalhub List the available evaluation providers.
+   ```
+
+</Steps>
+
+</TabItem>
+<TabItem label="HTTP">
+
+<Steps>
+
+1. **Install and start the MCP server**
+
+   ```bash
+   export EVALHUB_BASE_URL="https://<your-evalhub-instance>"
+   export EVALHUB_TOKEN="<your-token>"
+   export EVALHUB_TENANT="<your-tenant>"
+
+   evalhub-mcp --transport http --host localhost --port 3001
+   ```
+
+2. **Add to VS Code settings**
+
+   ```json
+   {
+     "mcp": {
+       "servers": {
+         "evalhub": {
+           "url": "http://localhost:3001"
+         }
+       }
+     }
+   }
+   ```
+
+3. **Reload VS Code**
+
+   Restart the window or run "Developer: Reload Window".
+
+4. **Verify**
+
+   Open GitHub Copilot Chat and ask about available tools.
+
+5. **Start using it**
+
+   ```
+   @evalhub List the available benchmarks.
+   ```
+
+</Steps>
+
+</TabItem>
+</Tabs>
+
+## What to Try Next
+
+Once connected, try these example prompts with your AI assistant:
+
+**Browse what's available:**
+```
+Show me all evaluation providers and their benchmarks.
+```
+
+**Submit an evaluation:**
+```
+Submit an evaluation named "my-first-eval" using the leaderboard-v2 collection
+against my model at http://localhost:11434/v1 named qwen2.5:1.5b.
+```
+
+**Check status:**
+```
+What's the status of my evaluation job?
+```
+
+**Follow a structured workflow:**
+```
+Use the edd_workflow prompt for a RAG application.
+```
+
+For the full list of available tools, resources, and prompts, see the reference pages:
+- [Tools](/mcp/tools/)
+- [Resources](/mcp/resources/)
+- [Prompts](/mcp/prompts/)
diff --git a/src/content/docs/mcp/resources.md b/src/content/docs/mcp/resources.md
new file mode 100644
index 0000000..378ff97
--- /dev/null
+++ b/src/content/docs/mcp/resources.md
@@ -0,0 +1,204 @@
+---
+title: "Resource Reference"
+---
+
+MCP resources are read-only data endpoints that the AI assistant can query. All EvalHub resources use the `evalhub://` URI scheme.
+
+## Providers
+
+### List all providers
+
+| | |
+|---|---|
+| **URI** | `evalhub://providers` |
+| **Description** | List all registered evaluation providers |
+| **Pagination** | `?limit=N&offset=N` |
+
+**Example response:**
+```json
+[
+  {
+    "resource": { "id": "lm_evaluation_harness" },
+    "name": "LM Evaluation Harness",
+    "description": "EleutherAI language model evaluation framework"
+  },
+  {
+    "resource": { "id": "guidellm" },
+    "name": "GuideLLM",
+    "description": "Performance benchmarking"
+  }
+]
+```
+
+### Get a provider by ID
+
+| | |
+|---|---|
+| **URI** | `evalhub://providers/{id}` |
+| **Description** | Get a specific evaluation provider and its details |
+
+**Example:** `evalhub://providers/lm_evaluation_harness`
+
+## Benchmarks
+
+### List all benchmarks
+
+| | |
+|---|---|
+| **URI** | `evalhub://benchmarks` |
+| **Description** | List all benchmarks across all providers |
+
+### Filter by label
+
+| | |
+|---|---|
+| **URI** | `evalhub://benchmarks?label=<label>` |
+| **Description** | Filter benchmarks by one or more labels |
+| **Labels** | `rag`, `safety`, `agents`, and others depending on the server |
+
+Multiple labels can be specified: `evalhub://benchmarks?label=rag&label=safety`
+
+**Example response:**
+```json
+[
+  {
+    "resource": { "id": "mmlu" },
+    "name": "Massive Multitask Language Understanding",
+    "provider_id": "lm_evaluation_harness",
+    "labels": ["reasoning", "knowledge"]
+  }
+]
+```
+
+### Get a benchmark by ID
+
+| | |
+|---|---|
+| **URI** | `evalhub://benchmarks/{id}` |
+| **Description** | Get a specific benchmark with full details |
+
+**Example:** `evalhub://benchmarks/mmlu`
+
+## Collections
+
+### List all collections
+
+| | |
+|---|---|
+| **URI** | `evalhub://collections` |
+| **Description** | List all pre-defined benchmark collections |
+| **Pagination** | `?limit=N&offset=N` |
+
+**Example response:**
+```json
+[
+  {
+    "resource": { "id": "leaderboard-v2" },
+    "name": "Leaderboard v2",
+    "description": "Standard leaderboard benchmark collection",
+    "benchmarks": [
+      { "id": "mmlu", "provider_id": "lm_evaluation_harness" },
+      { "id": "hellaswag", "provider_id": "lm_evaluation_harness" }
+    ]
+  }
+]
+```
+
+### Get a collection by ID
+
+| | |
+|---|---|
+| **URI** | `evalhub://collections/{id}` |
+| **Description** | Get a specific collection with its benchmark list |
+
+**Example:** `evalhub://collections/leaderboard-v2`
+
+## Jobs
+
+### List all jobs
+
+| | |
+|---|---|
+| **URI** | `evalhub://jobs` |
+| **Description** | List all evaluation jobs |
+| **Pagination** | `?limit=N&offset=N` |
+
+### Filter by status
+
+| | |
+|---|---|
+| **URI** | `evalhub://jobs?status=<status>` |
+| **Description** | Filter evaluation jobs by status |
+| **Valid statuses** | `pending`, `running`, `completed`, `failed`, `cancelled`, `partially_failed` |
+
+**Example:** `evalhub://jobs?status=running`
+
+### Get a job by ID
+
+| | |
+|---|---|
+| **URI** | `evalhub://jobs/{id}` |
+| **Description** | Get full job details including configuration, per-benchmark progress, results, and MLflow experiment URLs |
+
+**Example response:**
+```json
+{
+  "resource": { "id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890" },
+  "name": "llama3-safety-check",
+  "config": {
+    "model": { "url": "https://llama3.example.com/v1", "name": "llama-3.2-8b" },
+    "collection": { "id": "safety-and-fairness-v1" }
+  },
+  "status": {
+    "state": "completed",
+    "benchmarks": [
+      {
+        "id": "bbq",
+        "provider_id": "lm_evaluation_harness",
+        "status": "completed",
+        "started_at": "2026-03-25T10:00:00Z",
+        "completed_at": "2026-03-25T10:15:00Z"
+      }
+    ]
+  },
+  "results": {
+    "benchmarks": [
+      { "id": "bbq", "metrics": { "acc": 0.82 } }
+    ]
+  }
+}
+```
+
+## Server Version
+
+| | |
+|---|---|
+| **URI** | `evalhub://server/version` |
+| **Description** | Server version and build metadata |
+
+**Example response:**
+```json
+{
+  "version": "0.4.0",
+  "git_hash": "abc1234",
+  "build_date": "2026-03-20T12:00:00Z",
+  "go_version": "go1.25.0",
+  "os": "linux",
+  "arch": "amd64",
+  "mcp_library": "github.com/modelcontextprotocol/go-sdk",
+  "mcp_library_version": "v1.6.0"
+}
+```
+
+## Autocompletion
+
+The MCP server supports autocompletion for parameterized resource URIs. When typing a URI in a compatible client, the server can suggest:
+
+- Provider IDs for `evalhub://providers/{id}`
+- Benchmark IDs for `evalhub://benchmarks/{id}`
+- Collection IDs for `evalhub://collections/{id}`
+- Job IDs for `evalhub://jobs/{id}`
+- Status values for `evalhub://jobs?status=`
+- Labels for `evalhub://benchmarks?label=`
+
+Completion values are cached for 30 seconds to reduce backend calls.
diff --git a/src/content/docs/mcp/tools.md b/src/content/docs/mcp/tools.md
new file mode 100644
index 0000000..b0b0c1d
--- /dev/null
+++ b/src/content/docs/mcp/tools.md
@@ -0,0 +1,156 @@
+---
+title: "Tool Reference"
+---
+
+MCP tools are actions that the AI assistant can execute on your behalf. The EvalHub MCP server exposes three tools for managing evaluation jobs.
+
+## submit_evaluation
+
+Submit a new model evaluation job. Specify benchmarks individually or use a pre-defined collection.
+
+### Parameters
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `name` | string | Yes | Name for the evaluation job |
+| `description` | string | No | Human-readable description of what this evaluation measures |
+| `tags` | string[] | No | Tags for categorizing the evaluation |
+| `model` | object | Yes | Model to evaluate (see below) |
+| `benchmarks` | object[] | No | List of benchmarks to run. Provide `benchmarks` **or** `collection`, not both |
+| `collection` | object | No | Benchmark collection to run. Provide `collection` **or** `benchmarks`, not both |
+| `experiment` | object | No | MLflow experiment tracking configuration |
+
+**`model` object:**
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `url` | string | Yes | URL of the model inference endpoint |
+| `name` | string | Yes | Display name of the model |
+| `auth_secret` | string | No | Kubernetes secret reference for model authentication |
+
+**`benchmarks` array items:**
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `id` | string | Yes | Benchmark identifier |
+| `provider_id` | string | Yes | Evaluation provider that runs this benchmark |
+
+**`collection` object:**
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `id` | string | Yes | Collection identifier (e.g. `leaderboard-v2`) |
+
+**`experiment` object:**
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `name` | string | No | MLflow experiment name |
+| `tags` | object | No | Key-value tags for the MLflow experiment |
+| `artifact_location` | string | No | Storage location for experiment artifacts |
+
+### Response
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `job_id` | string | Unique identifier for the created job |
+| `state` | string | Initial job state (typically `pending`) |
+
+### Example
+
+**Prompt:**
+```
+Submit an evaluation named "llama3-safety-check" using the safety-and-fairness-v1
+collection against my model at https://llama3.example.com/v1 named llama-3.2-8b.
+```
+
+**What the assistant sends:**
+```json
+{
+  "name": "llama3-safety-check",
+  "model": {
+    "url": "https://llama3.example.com/v1",
+    "name": "llama-3.2-8b"
+  },
+  "collection": {
+    "id": "safety-and-fairness-v1"
+  }
+}
+```
+
+**Response:**
+```
+Evaluation job created: a1b2c3d4-e5f6-7890-abcd-ef1234567890 (state: pending)
+```
+
+## get_job_status
+
+Get the current status of an evaluation job including overall state, progress percentage, and per-benchmark status with timestamps. Designed for polling — call repeatedly to monitor a running evaluation.
+
+### Parameters
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `job_id` | string | Yes | ID of the evaluation job to check |
+
+### Response
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `job_id` | string | Job identifier |
+| `state` | string | Current state: `pending`, `running`, `completed`, `failed`, `cancelled`, or `partially_failed` |
+| `progress_percent` | int | Completion percentage (0–100) |
+| `benchmarks` | object[] | Per-benchmark status (see below) |
+| `created_at` | string | ISO 8601 timestamp |
+| `started_at` | string | ISO 8601 timestamp of first benchmark start |
+
+**`benchmarks` array items:**
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `id` | string | Benchmark identifier |
+| `provider_id` | string | Provider running this benchmark |
+| `status` | string | Benchmark-level status |
+| `started_at` | string | ISO 8601 timestamp |
+| `completed_at` | string | ISO 8601 timestamp |
+
+### Example
+
+**Prompt:**
+```
+Check the status of job a1b2c3d4-e5f6-7890-abcd-ef1234567890.
+```
+
+**Response:**
+```
+Job a1b2c3d4-e5f6-7890-abcd-ef1234567890: running (50% complete)
+```
+
+## cancel_job
+
+Cancel a running or pending evaluation job. The job will be stopped and its benchmarks marked as cancelled.
+
+### Parameters
+
+| Parameter | Type | Required | Description |
+|-----------|------|----------|-------------|
+| `job_id` | string | Yes | ID of the evaluation job to cancel |
+
+### Response
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `job_id` | string | Job identifier |
+| `message` | string | Confirmation message |
+
+### Example
+
+**Prompt:**
+```
+Cancel the evaluation job a1b2c3d4-e5f6-7890-abcd-ef1234567890.
+```
+
+**Response:**
+```
+Job a1b2c3d4-e5f6-7890-abcd-ef1234567890 cancelled successfully
+```
diff --git a/src/content/docs/mcp/troubleshooting.md b/src/content/docs/mcp/troubleshooting.md
new file mode 100644
index 0000000..4c33bdd
--- /dev/null
+++ b/src/content/docs/mcp/troubleshooting.md
@@ -0,0 +1,150 @@
+---
+title: "Troubleshooting"
+---
+
+## Server Unreachable / Connection Refused
+
+**Symptoms:** The AI client reports the MCP server is not available or connection was refused.
+
+**For stdio transport:**
+- Verify the `evalhub-mcp` binary is on your `PATH`:
+  ```bash
+  which evalhub-mcp
+  ```
+- Test the binary runs correctly:
+  ```bash
+  echo '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"protocolVersion":"2024-11-05","capabilities":{},"clientInfo":{"name":"test","version":"0.1"}}}' | evalhub-mcp
+  ```
+  This should return a JSON-RPC response with server capabilities.
+- Re-register the server:
+  ```bash
+  claude mcp remove evalhub
+  claude mcp add evalhub --transport stdio -- evalhub-mcp
+  ```
+
+**For HTTP transport:**
+- Check the server process is running:
+  ```bash
+  curl http://localhost:3001/health
+  ```
+  Expected response: `{"status":"ok"}`
+- Verify the port is not in use by another process:
+  ```bash
+  lsof -i :3001
+  ```
+- Check firewall rules if connecting from another machine.
+
+## Authentication Failure
+
+**Symptoms:** Tools or resources return authentication errors.
+
+- Verify your token is set and valid:
+  ```bash
+  echo $EVALHUB_TOKEN
+  ```
+- Generate a fresh token if using Kubernetes ServiceAccount authentication:
+  ```bash
+  export EVALHUB_TOKEN=$(kubectl create token <service-account> -n <namespace>)
+  ```
+- Confirm `EVALHUB_TENANT` matches your assigned tenant.
+- Restart the MCP server after updating credentials.
+
+## Client Not Detecting Server
+
+**Symptoms:** The server doesn't appear in `claude mcp list` or VS Code doesn't recognize it.
+
+**Claude Code (stdio):**
+- Confirm the registration:
+  ```bash
+  claude mcp list
+  ```
+- If the server shows errors, remove and re-add:
+  ```bash
+  claude mcp remove evalhub
+  claude mcp add evalhub --transport stdio -- evalhub-mcp
+  ```
+- Ensure environment variables (`EVALHUB_BASE_URL`, `EVALHUB_TOKEN`, `EVALHUB_TENANT`) are set in the shell where you launched Claude Code.
+
+**Claude Code (HTTP):**
+- Ensure the server is running before registering:
+  ```bash
+  evalhub-mcp --transport http --host localhost --port 3001 &
+  claude mcp add evalhub --transport http http://localhost:3001
+  ```
+
+**VS Code:**
+- Check your `settings.json` MCP configuration for syntax errors.
+- Reload the VS Code window (Cmd/Ctrl+Shift+P → "Developer: Reload Window").
+- Check the VS Code Output panel for MCP-related error messages.
+
+## TLS Certificate Errors
+
+**Symptoms:** Errors mentioning certificate verification, `x509`, or `certificate signed by unknown authority`.
+
+**For self-signed EvalHub backends:**
+- Add the `--insecure` flag to skip TLS verification for the backend connection:
+  ```bash
+  evalhub-mcp --transport http --insecure
+  ```
+  Or set the environment variable:
+  ```bash
+  export EVALHUB_INSECURE=true
+  ```
+
+**For the MCP server's own HTTPS:**
+- Ensure both `--tls-cert` and `--tls-key` point to valid PEM files.
+- On Kubernetes with OpenShift, TLS certificates are provisioned automatically by the operator.
+
+The `--insecure` flag only affects the connection from the MCP server **to** the EvalHub backend. It does not affect the MCP server's own TLS configuration.
+
+## EvalHub Backend Unreachable
+
+**Symptoms:** The MCP server starts but tools and resources return connection errors.
+
+The MCP server is designed to start even if the EvalHub backend is unreachable. Verify the backend URL:
+
+```bash
+curl -k $EVALHUB_BASE_URL/api/v1/health
+```
+
+If the backend is down:
+- Check the EvalHub deployment:
+  ```bash
+  kubectl get pods -l app=evalhub
+  ```
+- Verify `EVALHUB_BASE_URL` points to the correct host and port.
+- For Kubernetes deployments, ensure the service is accessible from where the MCP server runs.
+
+## Common Error Messages
+
+| Error | Cause | Resolution |
+|-------|-------|------------|
+| `validation error: provide at least one of 'benchmarks' or 'collection'` | `submit_evaluation` called without specifying what to evaluate | Include either a `benchmarks` array or a `collection` object |
+| `validation error: provide 'benchmarks' or 'collection', not both` | Both `benchmarks` and `collection` specified | Use one or the other, not both |
+| `validation error: 'job_id' is required` | `cancel_job` or `get_job_status` called without a job ID | Pass the job ID returned by `submit_evaluation` |
+| `invalid job status "xyz"` | Invalid status filter on jobs resource | Use: `pending`, `running`, `completed`, `failed`, `cancelled`, or `partially_failed` |
+| `resource not found` | Requested ID does not exist | Check the ID with a list resource first |
+
+## Using MCP Inspector for Debugging
+
+The [MCP Inspector](https://github.com/modelcontextprotocol/inspector) is a visual debugging tool for MCP servers:
+
+```bash
+npx @modelcontextprotocol/inspector
+```
+
+Configure it with:
+- **Command:** `evalhub-mcp`
+- **Arguments:** (leave empty for stdio, or configure for HTTP)
+
+The inspector lets you browse available tools, resources, and prompts, and test them interactively.
+
+## Health Check Endpoint
+
+When running in HTTP mode, the server exposes a health endpoint:
+
+```bash
+curl http://localhost:3001/health
+```
+
+A `200 OK` response with `{"status":"ok"}` confirms the server is running and accepting connections.
diff --git a/src/content/docs/reference/mcp.md b/src/content/docs/reference/mcp.md
index 5f3e7c1..b5dc882 100644
--- a/src/content/docs/reference/mcp.md
+++ b/src/content/docs/reference/mcp.md
@@ -2,13 +2,15 @@
 title: "MCP"
 ---
 
-This guide provides reference details for the MCP server to interact with EvalHub.
+For comprehensive MCP documentation including installation, quick-start guides, and full API reference, see the dedicated [MCP section](/mcp/).
 
-## Prerequisites
+## Using the EvalHub CLI as an MCP Server
 
-The following installation steps assumes you want to use a dedicated "agent" ServiceAccount when using [EvalHub multi-tenant](/architecture/multi-tenancy/) deployed on an OpenShift cluster.
+If you already have the [EvalHub CLI](/guides/cli/) installed and configured with profiles, you can use it as the MCP server directly. This is useful when EvalHub is running on a Kubernetes/OpenShift cluster with [multi-tenant](/architecture/multi-tenancy/) RBAC.
 
-Create a `team-a-agent` ServiceAccount:
+### Prerequisites
+
+Create a dedicated ServiceAccount for the agent:
 
 ```sh
 oc apply -f - <<EOF
@@ -20,7 +22,7 @@ metadata:
 EOF
 ```
 
-Grant `team-a-agent` ServiceAccount the required permissions:
+Grant the ServiceAccount the required permissions:
 
 ```sh
 oc apply -f - <<EOF
@@ -49,13 +51,13 @@ roleRef:
 subjects:
   - kind: ServiceAccount
     name: team-a-agent
-    namespace: team-a       # required for ServiceAccount subjects
+    namespace: team-a
 EOF
-``` 
+```
 
-## Installation of MCP in the AI Agent
+### Configure and register
 
-Set "agent" ServiceAccount values in a dedicated profile for the [EvalHub CLI](/guides/cli/):
+Set up an "agent" configuration profile for the CLI:
 
 ```sh
 evalhub --profile agent config set base_url https://evalhub-opendatahub.apps.(...).openshiftapps.com
@@ -63,48 +65,30 @@ evalhub --profile agent config set tenant team-a
 evalhub --profile agent config set token $(oc create token team-a-agent -n team-a --duration=8760h)
 ```
 
-This makes an "agent" configuration profile for the CLI:
+Register MCP with Claude Code:
 
-```yaml
-active_profile: default
-profiles:
-  agent:
-    base_url: https://evalhub-opendatahub.apps.(...).openshiftapps.com
-    tenant: team-a
-    token: ...
+```sh
+claude mcp add evalhub -- evalhub --profile agent mcp
 ```
 
-Then add MCP "evalhub" via `evalhub` CLI (this example assumes Claude as the AI Agent):
+To install globally (for all Claude Code projects), add `-s user`:
 
 ```sh
-claude mcp add evalhub -- evalhub --profile agent mcp
+claude mcp add -s user evalhub -- evalhub --profile agent mcp
 ```
 
-Please notice this adds the mcp to the current (Claude's) Project, to add globally you need:
-- use `-s user` when adding MCP so to install the MCP globally in `~/.claude.json` for all `projects`
-- you need evalhub CLI available system-wide
-
-## Troubleshooting
+### Troubleshooting
 
-Ensure evalhub "agent" configuration is healthy:
+Verify the EvalHub connection is healthy:
 
 ```sh
 evalhub --profile agent health
 ```
 
-Use evalhub "agent" configuration with MCP Inspector by starting it where the evalhub CLI is available:
+Use MCP Inspector to debug:
 
 ```sh
 npx @modelcontextprotocol/inspector
 ```
 
-Use:
-
-```
-command:
-evalhub
-
-arguments:
---profile agent mcp
-```
-
+Configure with command `evalhub` and arguments `--profile agent mcp`.