diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 685778e..f7c31f2 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -19,6 +19,8 @@ on:
       - pyproject.toml
       - mkdocs.yml
       - '**.png'
+      - '**.jpeg'
+      - '**.jpg'
       - '**.svg'
   pull_request:
     branches:
@@ -35,6 +37,8 @@ on:
       - '**.md'
       - mkdocs.yml
       - '**.png'
+      - '**.jpeg'
+      - '**.jpg'
       - '**.svg'
 
 jobs:
@@ -47,7 +51,7 @@ jobs:
       - name: Install uv
         uses: astral-sh/setup-uv@v7.6.0
         with:
-          version: "0.9.11"
+          version: "0.11.15"
           enable-cache: true
 
       - name: Set up Python
diff --git a/docs/api.md b/docs/api.md
deleted file mode 100644
index 3cdae0d..0000000
--- a/docs/api.md
+++ /dev/null
@@ -1,8 +0,0 @@
-# API Reference
-
-## Top Level Module
-
-::: agentfinvqa
-    options:
-      show_root_heading: true
-      show_root_full_path: true
diff --git a/docs/assets/AIXPERTLogo.png b/docs/assets/AIXPERTLogo.png
new file mode 100644
index 0000000..141949e
Binary files /dev/null and b/docs/assets/AIXPERTLogo.png differ
diff --git a/docs/assets/Teaser.jpeg b/docs/assets/Teaser.jpeg
new file mode 100644
index 0000000..b28a063
Binary files /dev/null and b/docs/assets/Teaser.jpeg differ
diff --git a/docs/assets/Teaser_color.jpeg b/docs/assets/Teaser_color.jpeg
new file mode 100644
index 0000000..03afe14
Binary files /dev/null and b/docs/assets/Teaser_color.jpeg differ
diff --git a/docs/assets/VectorLogo.png b/docs/assets/VectorLogo.png
new file mode 100644
index 0000000..4e0ed74
Binary files /dev/null and b/docs/assets/VectorLogo.png differ
diff --git a/docs/assets/VectorLogo_Black.png b/docs/assets/VectorLogo_Black.png
new file mode 100644
index 0000000..a45b2e8
Binary files /dev/null and b/docs/assets/VectorLogo_Black.png differ
diff --git a/docs/index.html b/docs/index.html
new file mode 100644
index 0000000..2319e65
--- /dev/null
+++ b/docs/index.html
@@ -0,0 +1,449 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8" />
+  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+  <title>AgentFinVQA: A Multi-Agent Framework for Financial Chart VQA</title>
+  <meta name="description" content="AgentFinVQA: A multi-agent evaluation framework for visual question answering on financial charts, using a Plan-Inspect-Verify pipeline with traceable Model Evaluation Packets." />
+  <link rel="stylesheet" href="style.css" />
+  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.0/css/all.min.css" />
+  <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css" />
+  <script defer src="main.js"></script>
+  <script type="application/ld+json">
+  {
+    "@context": "https://schema.org",
+    "@type": "ScholarlyArticle",
+    "headline": "AgentFinVQA: A Multi-Agent Framework for Financial Chart VQA",
+    "description": "A multi-agent evaluation framework for visual question answering on financial charts using a Plan-Inspect-Verify pipeline.",
+    "keywords": ["financial VQA", "chart understanding", "multi-agent AI", "visual question answering", "LLM evaluation", "FinMME"],
+    "publisher": { "@type": "Organization", "name": "Vector Institute" },
+    "url": "https://vectorinstitute.github.io/AgentFinVQA"
+  }
+  </script>
+</head>
+<body>
+  <header class="topbar" id="top">
+    <div class="topbar-inner">
+      <div class="topbar-logos">
+        <a href="https://vectorinstitute.ai/" target="_blank" rel="noreferrer">
+          <img src="assets/VectorLogo_Black.png" alt="Vector Institute" class="logo-img" />
+        </a>
+        <a href="https://aixpert-project.eu/" target="_blank" rel="noreferrer">
+          <img src="assets/AIXPERTLogo.png" alt="AIXPERT" class="logo-img" />
+        </a>
+      </div>
+      <a class="brand" href="#top" aria-label="AgentFinVQA Home">
+        <span class="brand-mark">↗</span>
+        <span class="brand-name">AgentFinVQA</span>
+      </a>
+      <button class="nav-toggle" id="navToggle" aria-label="Toggle navigation" aria-expanded="false">
+        <span></span><span></span><span></span>
+      </button>
+      <nav class="nav" id="nav" aria-label="Site navigation">
+        <a href="#abstract">Abstract</a>
+        <a href="#method">Method</a>
+        <a href="#dataset">Dataset</a>
+        <a href="#results">Results</a>
+        <a href="#quickstart">Quick Start</a>
+        <a href="#citation">BibTeX</a>
+      </nav>
+    </div>
+  </header>
+
+  <section class="hero">
+    <div class="container">
+      <div class="hero-content">
+        <div class="hero-header">
+          <h1 class="title">AgentFinVQA: A Multi-Agent Framework for Financial Chart Visual Question Answering</h1>
+
+          <p class="authors">
+            <a href="https://scholar.google.com/citations?user=aravindnarayanan" target="_blank" rel="noreferrer">Aravind Narayanan</a><sup>1</sup>,
+            <a href="https://scholar.google.com/citations?user=chcz7RMAAAAJ" target="_blank" rel="noreferrer">Shaina Raza</a><sup>1</sup>
+          </p>
+
+          <p class="affiliations">
+            <sup>1</sup>Vector Institute, Canada
+          </p>
+
+          <div class="button-row">
+            <a class="btn btn-primary" href="#" target="_blank" rel="noreferrer">
+              <!-- TODO: replace href with arXiv link -->
+              <i class="ai ai-arxiv btn-icon-fa"></i>
+              arXiv
+            </a>
+            <a class="btn btn-primary" href="https://github.com/VectorInstitute/AgentFinVQA" target="_blank" rel="noreferrer">
+              <i class="fab fa-github btn-icon-fa"></i>
+              Code
+            </a>
+            <a class="btn" href="https://huggingface.co/datasets/luojunyu/FinMME" target="_blank" rel="noreferrer">
+              <i class="fas fa-database btn-icon-fa"></i>
+              FinMME Dataset
+            </a>
+            <a class="btn" href="#citation">
+              <i class="fas fa-quote-left btn-icon-fa"></i>
+              BibTeX
+            </a>
+          </div>
+
+          <p class="hero-attribution">Evaluated on <a href="https://huggingface.co/datasets/luojunyu/FinMME" target="_blank" rel="noreferrer">FinMME</a> (Luo et al., 2025) — we do not own this dataset.</p>
+        </div>
+
+        <div class="hero-figure">
+          <figure>
+            <img
+              src="assets/Teaser.jpeg"
+              alt="FinMME example: zero-shot selects one GNPA period; AgentFinVQA recovers the full multi-select answer via per-choice verification"
+              class="teaser-img"
+              width="1200"
+              height="675"
+              loading="eager"
+              decoding="async"
+            />
+            <figcaption>
+              <strong>Figure 1.</strong> Agent recovers multi-select answers zero-shot misses. On a FinMME GNPA chart, zero-shot selects a single wrong period (1HFY23, 5.1%), while AgentFinVQA applies a Plan → OCR → Legend → Vision → Verify pipeline, audits each MCQ option against the 4% threshold, and recovers the full answer (1QFY24 + 9MFY24). The full reasoning trace is stored as a Model Evaluation Packet (MEP).
+            </figcaption>
+          </figure>
+        </div>
+
+        <div class="hero-intro">
+          <p class="lead">
+            AgentFinVQA is a multi-agent framework for evaluating vision-language models on financial chart understanding.
+            It decomposes chart QA into an explicit <strong>Plan → Inspect → Verify</strong> loop,
+            producing fully traceable <strong>Model Evaluation Packets (MEPs)</strong> for reproducible,
+            post-hoc analysis across VLM backends.
+          </p>
+
+          <div class="badge-row">
+            <span class="pill">11,099 FinMME samples</span>
+            <span class="pill">Plan → Inspect → Verify</span>
+            <span class="pill">+7.7 pp over zero-shot</span>
+            <span class="pill">Gemini · GPT-4o · Qwen3.5/3.6 (vLLM)</span>
+          </div>
+        </div>
+      </div>
+    </div>
+  </section>
+
+  <main class="container">
+
+    <section id="abstract" class="section">
+      <h2>Abstract</h2>
+      <div class="abstract-box">
+        <p>
+          Financial chart question answering in regulated settings demands more than accuracy: practitioners
+          must know which answers to trust before acting on them, and many institutions cannot send client
+          data to external model providers. Yet existing chart-QA agents are accuracy-focused and opaque,
+          and most assume proprietary API access; none is both auditable and deployable on-premise.
+        </p>
+        <p>
+          We present <strong>AgentFinVQA</strong>, a multi-agent pipeline that decomposes each query into
+          planning, OCR, legend grounding, visual inspection, and verification, recording every step in a
+          traceable <strong>Model Evaluation Packet (MEP)</strong> per sample. On FinMME, AgentFinVQA
+          improves <strong>+7.68&nbsp;pp</strong> over a model-matched zero-shot baseline with a proprietary
+          backbone (Gemini-3; 71.24% vs.&nbsp;63.56%, McNemar p&nbsp;≈&nbsp;1&nbsp;×&nbsp;10⁻¹⁶), and
+          <strong>+4.84&nbsp;pp</strong> with open-weights Qwen3.6-27B-FP8 served locally on a single A100
+          at roughly one-tenth the cost, so the gains do not depend on a proprietary API.
+        </p>
+        <p>
+          The verifier's verdict also serves as a useful confidence signal (68.2% vs.&nbsp;55.6% exact
+          accuracy on confirmed vs.&nbsp;revised answers), enabling human-in-the-loop review routing.
+          Together these results show that auditable, on-premise financial chart QA is practical and that
+          the open-weights system keeps most of the accuracy gains at a fraction of the cost. We release
+          our code and per-sample MEPs to support reproducible evaluation.
+        </p>
+      </div>
+    </section>
+
+    <section id="method" class="section">
+      <h2>Method</h2>
+      <p class="section-intro">
+        AgentFinVQA coordinates four specialized components in a structured pipeline.
+        Unlike single-pass VLM approaches, each component has a clearly scoped role,
+        and the entire execution trace is captured for post-hoc explainability.
+      </p>
+
+      <div class="pipeline">
+        <div class="pipeline-step">
+          <div class="step-number">1</div>
+          <div class="step-body">
+            <h3>PlannerAgent</h3>
+            <p>Text-only LLM that generates a structured JSON inspection plan — <strong>without seeing the image</strong>.
+            Separates strategic reasoning from visual perception.</p>
+          </div>
+        </div>
+        <div class="pipeline-arrow">↓</div>
+        <div class="pipeline-step">
+          <div class="step-number">2</div>
+          <div class="step-body">
+            <h3>OcrReaderTool</h3>
+            <p>Focused VLM call that transcribes all visible chart text (axes, labels, legend, annotations)
+            into structured JSON, grounding downstream agents in observed text rather than hallucinated values.</p>
+          </div>
+        </div>
+        <div class="pipeline-arrow">↓</div>
+        <div class="pipeline-step">
+          <div class="step-number">3</div>
+          <div class="step-body">
+            <h3>VisionAgent</h3>
+            <p>CrewAI-orchestrated agent that executes the planner's steps using <code>vision_qa_tool</code>
+            to produce a draft answer and explanation.</p>
+          </div>
+        </div>
+        <div class="pipeline-arrow">↓</div>
+        <div class="pipeline-step">
+          <div class="step-number">4</div>
+          <div class="step-body">
+            <h3>VerifierAgent</h3>
+            <p>Second VLM that critically reviews the draft answer against the chart image and issues a
+            <strong>CONFIRM</strong> or <strong>REVISE</strong> verdict with confidence score.</p>
+          </div>
+        </div>
+        <div class="pipeline-arrow">↓</div>
+        <div class="pipeline-step pipeline-step--output">
+          <div class="step-number">✓</div>
+          <div class="step-body">
+            <h3>Model Evaluation Packet (MEP)</h3>
+            <p>Portable JSON artifact capturing the full trace: inspection plan, OCR output, vision reasoning,
+            verifier critique, tool call logs, timestamps, and per-stage errors.
+            Enables reproducible evaluation and model comparison across backends.</p>
+          </div>
+        </div>
+      </div>
+
+      <div class="info-box" style="margin-top: 2rem;">
+        <h3 class="h3">Supported Backends</h3>
+        <p>All agents support swappable VLM backends via a unified interface:</p>
+        <ul class="bullets">
+          <li><strong>Gemini</strong> — Google Gemini 3 Flash / 2.5 Flash (default)</li>
+          <li><strong>OpenAI-compatible</strong> — GPT-4o, or any locally-served model via vLLM (e.g. Qwen3.5 / Qwen3.6)</li>
+          <li>Planner and Verifier can use different backends from the VisionAgent</li>
+        </ul>
+      </div>
+    </section>
+
+    <section id="dataset" class="section">
+      <h2>Dataset</h2>
+      <div class="two-col">
+        <div>
+          <h3 class="h3">FinMME</h3>
+          <p>
+            AgentFinVQA is evaluated on <strong>FinMME</strong>, a challenging financial multimodal benchmark
+            spanning 18 financial domains and 6 asset classes. Samples are sourced from real-world financial
+            research reports and cover three cognitive levels, comprehensive understanding, fine-grained
+            perception, and analysis &amp; reasoning, validated by 20 expert annotators.
+          </p>
+          <ul class="bullets">
+            <li><strong>10 major chart types</strong>, 21 subtypes (bar, line, pie, candlestick, and more)</li>
+            <li><strong>3 cognitive levels:</strong> comprehensive understanding, fine-grained perception, analysis &amp; reasoning</li>
+            <li><strong>3 knowledge domains:</strong> equity research, macroeconomic research, assets &amp; financial products</li>
+            <!-- <li><strong>FinScore</strong> evaluation with hallucination penalties and multi-dimensional assessment</li> -->
+          </ul>
+        </div>
+        <div class="card">
+          <h3 class="h3">Dataset at a Glance</h3>
+          <div class="grid-2">
+            <div class="stat"><div class="stat-value">11,099</div><div class="stat-label">Total samples</div></div>
+            <div class="stat"><div class="stat-value">4,458</div><div class="stat-label">Unique images</div></div>
+            <div class="stat"><div class="stat-value">18</div><div class="stat-label">Financial domains</div></div>
+            <div class="stat"><div class="stat-value">10</div><div class="stat-label">Chart types</div></div>
+          </div>
+        </div>
+      </div>
+    </section>
+
+    <section id="results" class="section">
+      <h2>Results</h2>
+      <p class="section-intro">
+        AgentFinVQA consistently outperforms same-model zero-shot baselines across both evaluated backbones on FinMME.
+        With Gemini-3 Flash the agent gains <strong>+7.68 pp mean accuracy</strong>
+        (p&nbsp;≈&nbsp;1.1&nbsp;×&nbsp;10⁻¹⁶); with the locally-served
+        <strong>Qwen3.6-27B-FP8</strong> (vLLM) it gains <strong>+4.84 pp</strong>
+        (p&nbsp;≈&nbsp;3.0&nbsp;×&nbsp;10⁻⁶), demonstrating backend-agnostic improvements.
+      </p>
+
+      <div class="table-wrap" role="region" aria-label="FinMME results" tabindex="0">
+        <table class="table">
+          <thead>
+            <tr>
+              <th>System</th>
+              <th>Backbone</th>
+              <th class="center">Mean<br/><span class="metric-desc">Acc. ↑</span></th>
+              <th class="center">Exact<br/><span class="metric-desc">Acc. ↑</span></th>
+              <th class="center">MCQ Mean<br/><span class="metric-desc">↑</span></th>
+              <th class="center">Std Mean<br/><span class="metric-desc">↑</span></th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>Zero-shot</td>
+              <td>Gemini-3 Flash Preview</td>
+              <td class="center">63.56%</td>
+              <td class="center">56.40%</td>
+              <td class="center">64.4%</td>
+              <td class="center">54.0%</td>
+            </tr>
+            <tr class="highlight">
+              <td>AgentFinVQA</td>
+              <td>Gemini-3 Flash Preview</td>
+              <td class="center"><strong>71.24%</strong></td>
+              <td class="center"><strong>65.12%</strong></td>
+              <td class="center"><strong>72.5%</strong></td>
+              <td class="center"><strong>57.0%</strong></td>
+            </tr>
+            <tr class="delta-row">
+              <td>Δ</td>
+              <td></td>
+              <td class="center"><strong>+7.68 pp</strong></td>
+              <td class="center"><strong>+8.72 pp</strong></td>
+              <td class="center"><strong>+8.1 pp</strong></td>
+              <td class="center"><strong>+3.0 pp</strong></td>
+            </tr>
+            <tr style="border-top: 2px solid var(--color-border);">
+              <td>Zero-shot</td>
+              <td>Qwen3.6-27B-FP8</td>
+              <td class="center">61.68%</td>
+              <td class="center">53.52%</td>
+              <td class="center">62.8%</td>
+              <td class="center">49.0%</td>
+            </tr>
+            <tr class="highlight">
+              <td>AgentFinVQA</td>
+              <td>Qwen3.6-27B-FP8</td>
+              <td class="center"><strong>66.52%</strong></td>
+              <td class="center"><strong>60.24%</strong></td>
+              <td class="center"><strong>68.1%</strong></td>
+              <td class="center"><strong>48.0%</strong></td>
+            </tr>
+            <tr class="delta-row">
+              <td>Δ</td>
+              <td></td>
+              <td class="center"><strong>+4.84 pp</strong></td>
+              <td class="center"><strong>+6.72 pp</strong></td>
+              <td class="center"><strong>+5.3 pp</strong></td>
+              <td class="center">−1.0 pp†</td>
+            </tr>
+          </tbody>
+        </table>
+      </div>
+      <p class="table-note">
+        FinMME train split (MCQ n&nbsp;=&nbsp;9,247; Standard n&nbsp;=&nbsp;1,862).
+        McNemar p&nbsp;≈&nbsp;1.1&nbsp;×&nbsp;10⁻¹⁶ (Gemini-3) and p&nbsp;≈&nbsp;3.0&nbsp;×&nbsp;10⁻⁶ (Qwen3.6-FP8).
+        †Standard Δ for Qwen is within noise (CI&nbsp;≈&nbsp;±10&nbsp;pp); treat as exploratory.
+      </p>
+
+      <div class="key-findings" style="margin-top: 2rem;">
+        <h3 class="h3">Key Findings</h3>
+        <ul class="bullets">
+          <li><strong>+7.7 pp over same-model zero-shot</strong> on FinMME (1,250 matched samples, statistically significant at p&nbsp;&lt;&nbsp;10⁻¹⁵).</li>
+          <li><strong>Multi-select MCQ</strong> accuracy improved from 30.2% → 53.5% (+23.3 pp) by giving each agent a dedicated multi-select reasoning path.</li>
+          <li><strong>Verifier adds value:</strong> removing the VerifierAgent drops accuracy by ~1.2 pp; revised samples achieve 55.6% exact accuracy vs 68.2% on confirmed samples, confirming the verifier correctly targets harder items.</li>
+        </ul>
+      </div>
+    </section>
+
+    <section id="quickstart" class="section">
+      <h2>Quick Start</h2>
+
+      <div class="code-section">
+        <h3 class="h3">Installation</h3>
+        <div class="code-card">
+          <button class="code-copy" data-target="code-install" aria-label="Copy installation code">
+            <svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+              <path d="M4 2a2 2 0 0 1 2-2h8a2 2 0 0 1 2 2v8a2 2 0 0 1-2 2H6a2 2 0 0 1-2-2V2zm2-1a1 1 0 0 0-1 1v8a1 1 0 0 0 1 1h8a1 1 0 0 0 1-1V2a1 1 0 0 0-1-1H6zM2 5a1 1 0 0 0-1 1v8a1 1 0 0 0 1 1h8a1 1 0 0 0 1-1v-1h1v1a2 2 0 0 1-2 2H2a2 2 0 0 1-2-2V6a2 2 0 0 1 2-2h1v1H2z"/>
+            </svg>
+          </button>
+          <pre id="code-install"><code>git clone https://github.com/VectorInstitute/AgentFinVQA.git
+cd AgentFinVQA
+
+# Core dependencies
+uv sync
+
+# Agentic pipeline (CrewAI, Gemini, Streamlit dashboard)
+uv sync --group agentic-xai-eval
+
+source .venv/bin/activate
+cp .env.example .env  # fill in API keys</code></pre>
+        </div>
+      </div>
+
+      <div class="code-section">
+        <h3 class="h3">Run the Pipeline</h3>
+        <div class="code-card">
+          <button class="code-copy" data-target="code-run" aria-label="Copy run code">
+            <svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+              <path d="M4 2a2 2 0 0 1 2-2h8a2 2 0 0 1 2 2v8a2 2 0 0 1-2 2H6a2 2 0 0 1-2-2V2zm2-1a1 1 0 0 0-1 1v8a1 1 0 0 0 1 1h8a1 1 0 0 0 1-1V2a1 1 0 0 0-1-1H6zM2 5a1 1 0 0 0-1 1v8a1 1 0 0 0 1 1h8a1 1 0 0 0 1-1v-1h1v1a2 2 0 0 1-2 2H2a2 2 0 0 1-2-2V6a2 2 0 0 1 2-2h1v1H2z"/>
+            </svg>
+          </button>
+          <pre id="code-run"><code># Generate MEPs for a dataset split
+uv run --env-file .env -m agentfinvqa.runner.run_generate_meps \
+    --dataset finmme \
+    --split test \
+    --n 200 \
+    --config openai_gemini \
+    --workers 8 \
+    --out meps/
+
+# Evaluate and summarize
+uv run -m agentfinvqa.eval.summarize --mep-dir meps/
+
+# Launch interactive dashboard
+uv run streamlit run src/agentfinvqa/eval/dashboard.py -- --mep-dir meps/</code></pre>
+        </div>
+      </div>
+    </section>
+
+    <section id="citation" class="section">
+      <h2>BibTeX</h2>
+      <p class="muted">If you use AgentFinVQA in your research, please cite:</p>
+      <div class="bibtex-section">
+        <button class="btn btn-small btn-copy" id="copyBibtex" type="button" aria-label="Copy BibTeX">
+          <svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+            <path d="M4 2a2 2 0 0 1 2-2h8a2 2 0 0 1 2 2v8a2 2 0 0 1-2 2H6a2 2 0 0 1-2-2V2zm2-1a1 1 0 0 0-1 1v8a1 1 0 0 0 1 1h8a1 1 0 0 0 1-1V2a1 1 0 0 0-1-1H6zM2 5a1 1 0 0 0-1 1v8a1 1 0 0 0 1 1h8a1 1 0 0 0 1-1v-1h1v1a2 2 0 0 1-2 2H2a2 2 0 0 1-2-2V6a2 2 0 0 1 2-2h1v1H2z"/>
+          </svg>
+          Copy BibTeX
+        </button>
+        <!-- TODO: fill in real citation details -->
+        <pre class="bibtex" id="bibtexBlock"><code>@article{narayanan2026agentfinvqa,
+  title   = {AgentFinVQA: A Multi-Agent Framework for Financial Chart Visual Question Answering},
+  author  = {Narayanan, Aravind and Raza, Shaina},
+  journal = {arXiv preprint arXiv:XXXX.XXXXX},
+  year    = {2026}
+}</code></pre>
+        <p class="copy-status" id="copyStatus" role="status" aria-live="polite"></p>
+      </div>
+    </section>
+
+    <section id="acknowledgements" class="section">
+      <h2>Acknowledgements</h2>
+      <p>
+        Resources used in preparing this research were provided, in part, by the Province of Ontario
+        and the Government of Canada through CIFAR, as well as companies sponsoring the Vector Institute
+        (<a href="https://www.vectorinstitute.ai/#partners" target="_blank" rel="noreferrer">vectorinstitute.ai/#partners</a>).
+      </p>
+      <p style="margin-top: 1rem;">
+        This research was funded by the European Union's Horizon Europe research and innovation programme
+        under the <a href="https://aixpert-project.eu/" target="_blank" rel="noreferrer">AIXPERT project</a>
+        (Grant Agreement No.&nbsp;101214389), which aims to develop an agentic, multi-layered, GenAI-powered
+        framework for creating explainable, accountable, and transparent AI systems.
+      </p>
+    </section>
+
+  </main>
+
+  <footer class="footer">
+    <div class="container footer-inner">
+      <a href="https://vectorinstitute.ai/" target="_blank" rel="noreferrer">
+        <img src="assets/VectorLogo_Black.png" alt="Vector Institute" class="footer-logo" />
+      </a>
+      <a href="https://aixpert-project.eu/" target="_blank" rel="noreferrer">
+        <img src="assets/AIXPERTLogo.png" alt="AIXPERT" class="footer-logo footer-logo--color" />
+      </a>
+      <p class="footer-text">
+        Built by the AI Engineering team at the <a href="https://vectorinstitute.ai/" target="_blank" rel="noreferrer">Vector Institute</a>
+        &nbsp;·&nbsp; © 2026 &nbsp;·&nbsp;
+        <a href="https://github.com/VectorInstitute/AgentFinVQA" target="_blank" rel="noreferrer">GitHub</a>
+      </p>
+    </div>
+  </footer>
+</body>
+</html>
diff --git a/docs/index.md b/docs/index.md
deleted file mode 100644
index 019f42f..0000000
--- a/docs/index.md
+++ /dev/null
@@ -1,4 +0,0 @@
-# AgentFinVQA
-
-Please check out the user guide page for more detailed information on using this project.
-For existing projects, the [user guide](user_guide.md) can be followed to get started.
diff --git a/docs/main.js b/docs/main.js
new file mode 100644
index 0000000..de1df53
--- /dev/null
+++ b/docs/main.js
@@ -0,0 +1,51 @@
+"use strict";
+
+// Mobile nav toggle
+const navToggle = document.getElementById("navToggle");
+const nav = document.getElementById("nav");
+if (navToggle && nav) {
+  navToggle.addEventListener("click", () => {
+    const open = nav.classList.toggle("open");
+    navToggle.setAttribute("aria-expanded", String(open));
+  });
+  // Close on nav link click
+  nav.querySelectorAll("a").forEach((a) => {
+    a.addEventListener("click", () => {
+      nav.classList.remove("open");
+      navToggle.setAttribute("aria-expanded", "false");
+    });
+  });
+}
+
+// Inline code-copy buttons (data-target points to a <pre> id)
+document.querySelectorAll(".code-copy").forEach((btn) => {
+  btn.addEventListener("click", () => {
+    const target = document.getElementById(btn.dataset.target);
+    if (!target) return;
+    navigator.clipboard.writeText(target.textContent.trim()).then(() => {
+      btn.textContent = "Copied!";
+      setTimeout(() => {
+        btn.innerHTML = copyIconSVG();
+      }, 1800);
+    });
+  });
+});
+
+// BibTeX copy button
+const copyBibtex = document.getElementById("copyBibtex");
+const bibtexBlock = document.getElementById("bibtexBlock");
+const copyStatus = document.getElementById("copyStatus");
+if (copyBibtex && bibtexBlock) {
+  copyBibtex.addEventListener("click", () => {
+    navigator.clipboard.writeText(bibtexBlock.textContent.trim()).then(() => {
+      if (copyStatus) copyStatus.textContent = "Copied to clipboard!";
+      setTimeout(() => { if (copyStatus) copyStatus.textContent = ""; }, 2000);
+    });
+  });
+}
+
+function copyIconSVG() {
+  return `<svg width="16" height="16" viewBox="0 0 16 16" fill="currentColor">
+    <path d="M4 2a2 2 0 0 1 2-2h8a2 2 0 0 1 2 2v8a2 2 0 0 1-2 2H6a2 2 0 0 1-2-2V2zm2-1a1 1 0 0 0-1 1v8a1 1 0 0 0 1 1h8a1 1 0 0 0 1-1V2a1 1 0 0 0-1-1H6zM2 5a1 1 0 0 0-1 1v8a1 1 0 0 0 1 1h8a1 1 0 0 0 1-1v-1h1v1a2 2 0 0 1-2 2H2a2 2 0 0 1-2-2V6a2 2 0 0 1 2-2h1v1H2z"/>
+  </svg>`;
+}
diff --git a/docs/style.css b/docs/style.css
new file mode 100644
index 0000000..6477f65
--- /dev/null
+++ b/docs/style.css
@@ -0,0 +1,221 @@
+/* ========== Reset & base ========== */
+*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
+
+:root {
+  --color-bg: #ffffff;
+  --color-surface: #f8f9fa;
+  --color-border: #e2e8f0;
+  --color-text: #1a202c;
+  --color-muted: #64748b;
+  --color-primary: #0e3c5c;   /* Vector dark blue */
+  --color-accent: #1a9fc4;    /* Vector teal */
+  --color-accent-light: #e8f6fb;
+  --color-highlight: #fff8e1;
+  --font-sans: system-ui, -apple-system, "Segoe UI", sans-serif;
+  --font-mono: "JetBrains Mono", "Fira Code", "Cascadia Code", monospace;
+  --radius: 8px;
+  --shadow-sm: 0 1px 3px rgba(0,0,0,.08);
+  --shadow-md: 0 4px 12px rgba(0,0,0,.10);
+  --max-w: 1100px;
+  --nav-h: 56px;
+}
+
+html { scroll-behavior: smooth; scroll-padding-top: calc(var(--nav-h) + 16px); }
+body { font-family: var(--font-sans); color: var(--color-text); background: var(--color-bg); line-height: 1.65; font-size: 16px; }
+a { color: var(--color-accent); text-decoration: none; }
+a:hover { text-decoration: underline; }
+img { max-width: 100%; height: auto; display: block; }
+code { font-family: var(--font-mono); font-size: .88em; background: var(--color-surface); padding: .1em .35em; border-radius: 4px; }
+pre code { background: none; padding: 0; font-size: .85rem; }
+
+/* ========== Layout ========== */
+.container { max-width: var(--max-w); margin: 0 auto; padding: 0 1.5rem; }
+.section { padding: 4rem 0; border-bottom: 1px solid var(--color-border); }
+.section:last-child { border-bottom: none; }
+.section-intro { font-size: 1.05rem; color: var(--color-muted); margin-bottom: 1.75rem; max-width: 70ch; }
+
+/* ========== Topbar ========== */
+.topbar {
+  position: sticky; top: 0; z-index: 100;
+  background: rgba(255,255,255,.95); backdrop-filter: blur(8px);
+  border-bottom: 1px solid var(--color-border);
+  height: var(--nav-h);
+}
+.topbar-inner {
+  max-width: var(--max-w); margin: 0 auto; padding: 0 1.5rem;
+  height: 100%; display: flex; align-items: center; gap: 1rem;
+}
+.topbar-logos { display: flex; align-items: center; gap: .75rem; }
+.logo-img { height: 28px; width: auto; }
+.brand { display: flex; align-items: center; gap: .4rem; font-weight: 700; font-size: 1rem; color: var(--color-primary); margin-left: .5rem; }
+.brand:hover { text-decoration: none; }
+.brand-mark { font-size: .9em; opacity: .7; }
+.nav { display: flex; gap: 1.5rem; margin-left: auto; align-items: center; }
+.nav a { font-size: .9rem; color: var(--color-muted); font-weight: 500; transition: color .15s; }
+.nav a:hover { color: var(--color-accent); text-decoration: none; }
+.nav-toggle { display: none; background: none; border: none; cursor: pointer; padding: .4rem; flex-direction: column; gap: 5px; }
+.nav-toggle span { display: block; width: 22px; height: 2px; background: var(--color-text); border-radius: 2px; transition: .2s; }
+
+/* ========== Hero ========== */
+.hero { background: var(--color-bg); color: var(--color-text); padding: 3rem 0 2.5rem; border-bottom: 1px solid var(--color-border); }
+.hero-content { display: flex; flex-direction: column; gap: 2.5rem; align-items: stretch; }
+.hero-header { text-align: center; max-width: 820px; margin: 0 auto; width: 100%; }
+.hero-intro { text-align: center; max-width: 820px; margin: 0 auto; width: 100%; }
+.title { font-size: clamp(1.5rem, 3vw, 2.25rem); font-weight: 800; line-height: 1.25; margin-bottom: 1rem; color: var(--color-primary); }
+.authors { font-size: .95rem; margin-bottom: .4rem; color: var(--color-text); }
+.authors a { color: var(--color-accent); }
+.authors a:hover { color: var(--color-primary); }
+.affiliations { font-size: .85rem; color: var(--color-muted); margin-bottom: 1.25rem; }
+.lead { font-size: 1rem; color: var(--color-muted); margin-bottom: 1.25rem; line-height: 1.7; }
+.hero-attribution { font-size: .78rem; color: var(--color-muted); margin-bottom: 0; }
+.hero-attribution a { color: var(--color-accent); text-decoration: underline; }
+.lead strong { color: var(--color-text); }
+.hero-header .button-row { justify-content: center; margin-bottom: 1rem; }
+.hero-intro .badge-row { justify-content: center; }
+.hero-figure { width: 100%; max-width: var(--max-w); margin: 0 auto; }
+.hero-figure figure { background: var(--color-surface); border: 1px solid var(--color-border); border-radius: var(--radius); overflow: hidden; box-shadow: var(--shadow-sm); }
+.hero-figure .teaser-img { display: block; width: 100%; height: auto; vertical-align: middle; }
+.hero-figure figcaption { padding: .85rem 1.1rem; font-size: .82rem; color: var(--color-muted); line-height: 1.55; text-align: left; background: var(--color-bg); }
+.hero-figure figcaption code { font-size: .78rem; background: var(--color-surface); padding: .1rem .35rem; border-radius: 4px; }
+
+/* ========== Buttons ========== */
+.button-row { display: flex; flex-wrap: wrap; gap: .6rem; margin-bottom: 1.5rem; }
+.btn {
+  display: inline-flex; align-items: center; gap: .4rem;
+  padding: .45rem 1rem; border-radius: 6px; font-size: .88rem; font-weight: 600;
+  border: 1.5px solid var(--color-border); color: var(--color-text); background: var(--color-bg);
+  transition: background .15s, border-color .15s; cursor: pointer; text-decoration: none;
+}
+.btn:hover { background: var(--color-surface); border-color: var(--color-accent); text-decoration: none; color: var(--color-primary); }
+.btn-primary { background: var(--color-accent); border-color: var(--color-accent); color: #fff; }
+.btn-primary:hover { background: #1589ab; border-color: #1589ab; color: #fff; }
+.btn-small { padding: .3rem .7rem; font-size: .8rem; }
+.btn-copy { border: 1.5px solid var(--color-border); color: var(--color-muted); background: var(--color-surface); }
+.btn-copy:hover { background: var(--color-accent-light); color: var(--color-accent); border-color: var(--color-accent); }
+.btn-icon { width: 16px; height: 16px; flex-shrink: 0; }
+.btn-icon-fa { font-size: 1rem; line-height: 1; }
+
+/* ========== Badges / pills ========== */
+.badge-row { display: flex; flex-wrap: wrap; gap: .5rem; }
+.pill { background: var(--color-accent-light); border: 1px solid #b3e0f0; color: var(--color-primary); border-radius: 20px; padding: .3rem .85rem; font-size: .8rem; font-weight: 500; }
+.pill-small { background: var(--color-accent-light); color: var(--color-accent); border-radius: 20px; padding: .2rem .65rem; font-size: .78rem; font-weight: 600; }
+
+/* ========== Headings ========== */
+h2 { font-size: 1.65rem; font-weight: 800; color: var(--color-primary); margin-bottom: 1.25rem; }
+.h3 { font-size: 1.1rem; font-weight: 700; color: var(--color-text); margin-bottom: .75rem; }
+
+/* ========== Abstract box ========== */
+.abstract-box { background: var(--color-surface); border-left: 4px solid var(--color-accent); border-radius: 0 var(--radius) var(--radius) 0; padding: 1.5rem 1.75rem; }
+.abstract-box p + p { margin-top: .9rem; }
+
+/* ========== Pipeline ========== */
+.pipeline { display: flex; flex-direction: column; align-items: center; gap: 0; max-width: 640px; margin: 0 auto; }
+.pipeline-step {
+  display: flex; align-items: flex-start; gap: 1rem;
+  background: var(--color-surface); border: 1px solid var(--color-border);
+  border-radius: var(--radius); padding: 1rem 1.25rem; width: 100%;
+}
+.pipeline-step--output { background: var(--color-accent-light); border-color: var(--color-accent); }
+.step-number {
+  flex-shrink: 0; width: 32px; height: 32px; border-radius: 50%;
+  background: var(--color-accent); color: #fff; font-weight: 800; font-size: .9rem;
+  display: flex; align-items: center; justify-content: center;
+}
+.pipeline-step--output .step-number { background: var(--color-primary); }
+.step-body h3 { font-size: 1rem; font-weight: 700; margin-bottom: .3rem; }
+.step-body p { font-size: .9rem; color: var(--color-muted); }
+.pipeline-arrow { color: var(--color-accent); font-size: 1.4rem; padding: .2rem 0 .2rem 15px; }
+
+/* ========== Two-col grid ========== */
+.two-col { display: grid; grid-template-columns: 1fr 1fr; gap: 2rem; align-items: start; }
+.grid-2 { display: grid; grid-template-columns: 1fr 1fr; gap: 1rem; }
+.grid-4 { display: grid; grid-template-columns: repeat(4, 1fr); gap: 1rem; margin-bottom: 2rem; }
+
+/* ========== Stat ========== */
+.stat { text-align: center; padding: 1rem; background: var(--color-surface); border-radius: var(--radius); border: 1px solid var(--color-border); }
+.stat-value { font-size: 2rem; font-weight: 800; color: var(--color-primary); line-height: 1; }
+.stat-label { font-size: .8rem; color: var(--color-muted); margin-top: .3rem; }
+
+/* ========== Card ========== */
+.card { background: var(--color-surface); border: 1px solid var(--color-border); border-radius: var(--radius); padding: 1.5rem; }
+.info-box { background: var(--color-accent-light); border: 1px solid #b3e0f0; border-radius: var(--radius); padding: 1.25rem 1.5rem; }
+.info-box .h3 { color: var(--color-primary); }
+
+/* ========== Lists ========== */
+.bullets { list-style: none; padding: 0; }
+.bullets li { padding: .35rem 0 .35rem 1.4rem; position: relative; font-size: .95rem; }
+.bullets li::before { content: "→"; position: absolute; left: 0; color: var(--color-accent); font-weight: 700; }
+.compact { list-style: disc; padding-left: 1.2rem; font-size: .9rem; }
+.compact li { padding: .15rem 0; }
+ol.compact { list-style: decimal; padding-left: 1.4rem; }
+ol.compact li { padding: .2rem 0; font-size: .9rem; }
+
+/* ========== Table ========== */
+.table-wrap { overflow-x: auto; border-radius: var(--radius); border: 1px solid var(--color-border); }
+.table { width: 100%; border-collapse: collapse; font-size: .9rem; }
+.table th { background: var(--color-primary); color: #fff; padding: .65rem 1rem; text-align: left; font-weight: 600; white-space: nowrap; }
+.table td { padding: .6rem 1rem; border-bottom: 1px solid var(--color-border); }
+.table tbody tr:last-child td { border-bottom: none; }
+.table tbody tr:hover td { background: var(--color-surface); }
+.table .highlight td { background: #f0f9ff; font-weight: 600; }
+.table .delta-row td { background: #f0fdf4; font-size: .85rem; color: #166534; border-bottom: 2px solid var(--color-border); }
+.table .center { text-align: center; }
+.metric-desc { font-weight: 400; font-size: .78rem; color: rgba(255,255,255,.75); }
+.table-note { font-size: .82rem; color: var(--color-muted); margin-top: .75rem; }
+
+/* ========== Figure placeholder ========== */
+.figure-placeholder {
+  background: rgba(255,255,255,.08); border: 2px dashed rgba(255,255,255,.2);
+  border-radius: var(--radius); padding: 3rem 1rem;
+  display: flex; flex-direction: column; align-items: center; justify-content: center;
+  gap: .5rem; color: rgba(255,255,255,.5); text-align: center; min-height: 220px;
+}
+.figure-placeholder span { font-size: 1rem; font-weight: 600; }
+.figure-placeholder p { font-size: .85rem; }
+
+/* ========== Code cards ========== */
+.code-section { margin-bottom: 2rem; }
+.code-card { position: relative; background: #1e293b; border-radius: var(--radius); overflow: hidden; }
+.code-card pre { padding: 1.25rem 1.5rem; overflow-x: auto; }
+.code-card code { color: #e2e8f0; font-size: .85rem; }
+.code-copy {
+  position: absolute; top: .6rem; right: .6rem;
+  background: rgba(255,255,255,.1); border: 1px solid rgba(255,255,255,.15);
+  color: rgba(255,255,255,.7); border-radius: 5px; padding: .3rem .5rem;
+  cursor: pointer; display: flex; align-items: center; transition: background .15s;
+}
+.code-copy:hover { background: rgba(255,255,255,.2); color: #fff; }
+
+/* ========== BibTeX ========== */
+.bibtex-section { position: relative; margin-top: 1rem; }
+.bibtex { background: #1e293b; color: #e2e8f0; border-radius: var(--radius); padding: 1.25rem 1.5rem; overflow-x: auto; font-size: .85rem; margin-top: .75rem; }
+.copy-status { font-size: .82rem; color: var(--color-accent); height: 1.2em; margin-top: .4rem; }
+
+/* ========== Footer ========== */
+.footer { background: var(--color-primary); color: rgba(255,255,255,.7); padding: 2rem 0; margin-top: 2rem; }
+.footer-inner { display: flex; align-items: center; gap: 1.5rem; }
+.footer-logo { height: 28px; filter: brightness(0) invert(1); opacity: .85; }
+.footer-logo--color { filter: none; opacity: .9; }
+.footer-text { font-size: .88rem; }
+.footer-text a { color: rgba(255,255,255,.7); }
+.footer-text a:hover { color: #fff; }
+
+/* ========== Misc ========== */
+.muted { color: var(--color-muted); }
+.small { font-size: .85rem; }
+sup { font-size: .7em; vertical-align: super; }
+
+/* ========== Responsive ========== */
+@media (max-width: 768px) {
+  .two-col { grid-template-columns: 1fr; }
+  .grid-4 { grid-template-columns: 1fr 1fr; }
+  .nav { display: none; flex-direction: column; position: absolute; top: var(--nav-h); left: 0; right: 0; background: #fff; border-bottom: 1px solid var(--color-border); padding: 1rem 1.5rem; gap: .75rem; z-index: 99; }
+  .nav.open { display: flex; }
+  .nav-toggle { display: flex; margin-left: auto; }
+  .footer-inner { flex-direction: column; text-align: center; gap: .75rem; }
+}
+
+@media (max-width: 480px) {
+  .grid-4 { grid-template-columns: 1fr; }
+  .grid-2 { grid-template-columns: 1fr; }
+}
diff --git a/docs/user_guide.md b/docs/user_guide.md
deleted file mode 100644
index fb42741..0000000
--- a/docs/user_guide.md
+++ /dev/null
@@ -1,158 +0,0 @@
-# User Guide
-
-## pyproject.toml file and dependency management
-
-If your project doesn't have a pyproject.toml file, simply copy the one from the
-template and update file according to your project.
-
-For managing dependencies, this template makes use of [uv](https://docs.astral.sh/uv/),
-which according to some [benchmarks](https://github.com/astral-sh/uv/blob/main/BENCHMARKS.md)
-is faster than alternative like Poetry (which our original AI Engineering Template
-makes use of).
-
-Hence, be sure to install uv in order to to setup the development virtual environment.
-Instructions for installing uv can be found [here](https://docs.astral.sh/uv/getting-started/installation/).
-Note that uv supports [optional dependency groups](https://docs.astral.sh/uv/concepts/projects/dependencies/#dependency-groups)
-which helps to manage dependencies for different parts of development such as `documentation`,
-`testing`, etc. The core dependencies are installed using the command:
-
-```bash
-uv sync
-```
-
-Additional dependency groups can be installed using the `--group` flag followed
-by the group name. For example:
-
-```bash
-uv sync --all-extras --group docs --group test
-```
-
-!!! important "mypy configuration options"
-    By default, the `mypy` configuration in the `pyproject.toml` disallows subclassing
-    the `Any` type - `allow_subclassing_any = false`. In cases where the type checker
-    is not able to determine the types of objects in some external library (e.g. `PyTorch`),
-    it will treat them as `Any` and raise errors. If your codebase has many of such
-    cases, you can set `allow_subclassing_any = true` in the `mypy` configuration or
-    remove it entirely to use the default value (which is `true`). For example, in
-    a `PyTorch` project, subclassing `nn.Module` will raise errors if `allow_subclassing_any`
-    is set to `false`.
-
-
-## pre-commit
-
-You can use [pre-commit](https://pre-commit.com/) to run pre-commit hooks (code checks,
-liniting, etc.) when you run `git commit` and commit your code. Simply copy the
-`.pre-commit-config.yaml` file to the root of the repository and install the test
-dependencies which installs pre-commit. Then run:
-
-```bash
-pre-commit install
-```
-
-If you prefer to not enforce using pre-commit every time you run `git commit`,
-you will have to run `pre-commit run --all-files` from the command line before you
-commit your code.
-
-!!! important "hook configuration"
-    Some of the pre-commit hooks use [supported hooks](https://pre-commit.com/hooks.html)
-    from the web.
-
-    For some others, they are locally installed and hence use the python virtual environment
-    locally. If `language` is set to `python`, each time the hook is installed, a separate
-    python virtual environment is created and you can specify dependencies needed using
-    `additional_dependencies`.
-
-    If `language` is set to `system`, the activated python virtual environment is used and
-    and hence you have to ensure that the required dependencies and their versions are
-    correctly installed.
-
-    ```yaml
-      - repo: local
-        hooks:
-        - id: pytest
-          name: pytest
-          entry: python3 -m pytest -m "not integration_test"
-          language: python/system # set according to your project needs
-    ```
-
-!!! warning "typos"
-    The [typos](https://github.com/crate-ci/typos) pre-commit hook is used to check for
-    common spelling mistakes in the codebase. While useful, it may require some
-    configuration to ignore certain words or phrases that are not typos. You can
-    [configure the typos hook](https://github.com/crate-ci/typos/blob/master/docs/reference.md)
-    in the `pyproject.toml` file. In a large codebase, it may be useful to disable
-    the typos hook and only run it occasionally on the entire codebase.
-
-
-### pre-commit ci
-
-Instead of fixing pre-commit errors manually, a CI to fix them as well as update
-pre-commit hooks periodically can be enabled for your repository. Please check
-[pre-commit.ci](https://pre-commit.ci/) and add your repository. The configuration for
-``pre-commit.ci`` can be added to the ``.pre-commit-config.yaml`` file.
-
-
-## documentation
-
-If your project doesn't have documentation, copy the directory named `docs` to the root
-directory of your repository. This template uses [MkDocs](https://www.mkdocs.org/) with
-the [Material for MkDocs](https://squidfunk.github.io/mkdocs-material/) theme.
-
-In order to build the documentation, install the documentation dependencies as mentioned
-in the previous section, then run the command:
-
-```bash
-mkdocs build
-```
-
-If you're making changes to the docs, and want to serve them locally on your machine,
-then you can use this command instead:
-
-```bash
-mkdocs serve
-```
-
-The above will launch the docs locally on `http://127.0.0.1:8000`, which you can
-enter into your browser of choice. Conveniently, this process also watches for any
-changes you make to the docs and will update them as they occur.
-
-You can configure the documentation by updating the `mkdocs.yml` file at the root of
-your repository. The markdown files in the `docs` directory can be updated to reflect
-the project's documentation.
-
-### GitHub Pages Setup
-
-To serve your documentation on GitHub Pages:
-
-1. Go to your repository's **Settings** tab
-2. Navigate to **Pages** in the left sidebar
-3. Under **Source**, select **Deploy from a branch**
-4. Choose the **gh-pages** branch and **/ (root)** folder
-5. Click **Save**
-
-The documentation will be automatically built and deployed to GitHub Pages whenever you push changes to the main branch (thanks to the `.github/workflows/docs.yml` workflow). Your site will be available at `https://[username].github.io/[repository-name]/`.
-
-
-## github actions
-
-The template consists of some github action continuous integration workflows that you
-can add to your repository.
-
-The available workflows are:
-
-- [code checks](https://github.com/VectorInstitute/aieng-template/blob/main/.github/workflows/code_checks.yml): Static code analysis, code formatting and unit tests
-- [documentation](https://github.com/VectorInstitute/aieng-template/blob/main/.github/workflows/docs.yml): Project documentation including example API reference
-- [integration tests](https://github.com/VectorInstitute/aieng-template/blob/main/.github/workflows/integration_tests.yml): Integration tests
-- [publish](https://github.com/VectorInstitute/aieng-template/blob/main/.github/workflows/publish.yml):
-Publishing python package to PyPI. Create a `PYPI_API_TOKEN` and add it to the
-repository's actions [secret variables](https://docs.github.com/en/actions/security-guides/using-secrets-in-github-actions)
-in order to publish PyPI packages when new software releases are created on Github.
-
-The test workflows also compute coverage and upload code coverage metrics to
-[codecov.io](https://app.codecov.io/gh/VectorInstitute/aieng-template). Create a
-`CODECOV_TOKEN` and add it to the repository's actions [secret variables](https://docs.github.com/en/actions/security-guides/using-secrets-in-github-actions).
-
-!!! warning "codecov"
-    The [codecov](https://app.codecov.io/github/VectorInstitute) tool is subscribed under the free tier
-    which makes it usable only for public open-source repos. Hence, if you would like to develop in a
-    private repo, it is recommended to remove the codecov actions from the github workflow files.
diff --git a/mkdocs.yml b/mkdocs.yml
index f9ba705..94c0c35 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -7,60 +7,19 @@ extra:
       link: 404.html
     - icon: fontawesome/brands/github
       link: https://github.com/VectorInstitute/AgentFinVQA
-markdown_extensions:
-  - attr_list
-  - admonition
-  - md_in_html
-  - pymdownx.highlight:
-      anchor_linenums: true
-      line_spans: __span
-      pygments_lang_class: true
-  - pymdownx.inlinehilite
-  - pymdownx.details
-  - pymdownx.snippets
-  - pymdownx.superfences
-  - pymdownx.emoji:
-      emoji_index: !!python/name:material.extensions.emoji.twemoji
-      emoji_generator: !!python/name:material.extensions.emoji.to_svg
-  - toc:
-      permalink: true
-nav:
-  - Home: index.md
-  - User Guide: user_guide.md
-  - API Reference: api.md
-plugins:
-  - search
-  - mkdocstrings:
-      handlers:
-        python:
-          options:
-            docstring_style: numpy
-            members_order: source
-            separate_signature: true
-            show_overloads: true
-            show_submodules: true
-            show_root_heading: false
-            show_root_full_path: true
-            show_root_toc_entry: false
-            show_symbol_type_heading: true
-            show_symbol_type_toc: true
 repo_url: https://github.com/VectorInstitute/AgentFinVQA
 repo_name: VectorInstitute/AgentFinVQA
 site_name: AgentFinVQA
+plugins:
+  - search
 theme:
   custom_dir: docs/overrides
   favicon: assets/favicon-48x48.svg
   features:
-    - content.code.annotate
-    - content.code.copy
     - navigation.footer
-    - navigation.indexes
-    - navigation.instant
-    - navigation.tabs
     - navigation.top
     - search.suggest
     - search.highlight
-    - toc.follow
   icon:
     repo: fontawesome/brands/github
   logo: assets/vector-logo.svg

System	Backbone	Mean Acc. ↑	Exact Acc. ↑	MCQ Mean ↑	Std Mean ↑
Zero-shot	Gemini-3 Flash Preview	63.56%	56.40%	64.4%	54.0%
AgentFinVQA	Gemini-3 Flash Preview	71.24%	65.12%	72.5%	57.0%
Δ		+7.68 pp	+8.72 pp	+8.1 pp	+3.0 pp
Zero-shot	Qwen3.6-27B-FP8	61.68%	53.52%	62.8%	49.0%
AgentFinVQA	Qwen3.6-27B-FP8	66.52%	60.24%	68.1%	48.0%
Δ		+4.84 pp	+6.72 pp	+5.3 pp	−1.0 pp†