diff --git a/CHANGELOG.md b/CHANGELOG.md
index 29a2b68..04a6868 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
+- **MarkItDown engine adapter** — wraps Microsoft's [`markitdown`](https://github.com/microsoft/markitdown) pure-Python library that converts Office files, PDFs, HTML, images, CSV/JSON/XML, ePub, audio, and ZIP archives into LLM-friendly Markdown. Added to the `benchmark.py` harness alongside the other local engines. Install: `pip install docfold[markitdown]`.
+- **Non-PDF benchmark fixtures** — `benchmark.py` now also generates synthetic DOCX (built with stdlib `zipfile` + minimal Office Open XML, no extra deps), HTML, and CSV documents, and filters engines per-doc by `supported_extensions` so PyMuPDF / OCR engines no longer log spurious errors on Office or web fixtures.
 - **OpenDataLoader PDF engine adapter** — wraps the Java-based [`opendataloader-pdf`](https://github.com/opendataloader-project/opendataloader-pdf) tool (via its bundled-JAR Python wheel). Local, deterministic extraction with typed structural elements (heading, paragraph, table, list, header, footer) and per-element bounding boxes. Install: `pip install docfold[opendataloader]` (also requires Java 11+).
 - **Multi-script benchmark coverage** — `benchmark.py` now generates Arabic (RTL + shaping), Hebrew (RTL, no shaping), and Simplified Chinese (CJK) synthetic PDFs alongside the existing English docs. Fonts are bundled under `tests/fixtures/fonts/` (OFL-1.1, subsetted where relevant) so the benchmark is reproducible without system font packages.
 
diff --git a/README.md b/README.md
index 7c2d8f9..c623586 100644
--- a/README.md
+++ b/README.md
@@ -35,6 +35,7 @@ Docfold is the open-source extraction engine from [Datatera.ai](https://datatera
 | [**Azure Doc Intel**](https://azure.microsoft.com/en-us/products/ai-services/ai-document-intelligence) | ✅ | SaaS | Paid | ★★★ | ★★★ | ★★★ | ✅ | ✅ | Fast | $$ |
 | [**Nougat**](https://github.com/facebookresearch/nougat) | ✅ | Local | MIT | ★★★ | ★★☆ | ★★☆ | — | — | Slow | Free |
 | [**Surya**](https://github.com/VikParuchuri/surya) | ✅ | Local | GPL | ★★☆ | ★★★ | ★★☆ | ✅ | ✅ | Medium | Free |
+| [**MarkItDown**](https://github.com/microsoft/markitdown) | ✅ | Local | MIT | ★★☆ | ★☆☆ | ★★☆ | — | — | Fast | Free |
 
 **★★★** Excellent **★★☆** Good **★☆☆** Basic **☆☆☆** Not supported — **$$** ~$1-3/1K pages **$$$** ~$5-15/1K pages — **BBox** Bounding boxes — **Conf** Confidence scores
 
@@ -108,6 +109,7 @@ for name, res in results.items():
 | [**Azure Doc Intel**](https://azure.microsoft.com/en-us/products/ai-services/ai-document-intelligence) | SaaS | Paid | PDF, Office, HTML, images | N/A | `pip install docfold[azure-docint]` |
 | [**Nougat**](https://github.com/facebookresearch/nougat) | Local | MIT (code) | PDF | Recommended | `pip install docfold[nougat]` |
 | [**Surya**](https://github.com/VikParuchuri/surya) | Local | GPL-3.0 | PDF, images | Optional | `pip install docfold[surya]` |
+| [**MarkItDown**](https://github.com/microsoft/markitdown) | Local | MIT | PDF, Office, HTML, images, CSV/JSON/XML, ePub, audio, ZIP | No | `pip install docfold[markitdown]` |
 
 > **Adding your own engine?** Implement the `DocumentEngine` interface — see [Adding a Custom Engine](#adding-a-custom-engine) below.
 
diff --git a/benchmark.py b/benchmark.py
index 287d5a9..e70427f 100644
--- a/benchmark.py
+++ b/benchmark.py
@@ -286,9 +286,133 @@ def generate_benchmark_documents(tmpdir: str) -> list[dict]:
     # round-trip extraction for those scripts (null bytes, dropped matras).
     # They need real-world fixture PDFs — see docs/tasks/ for a follow-up.
 
+    # --- Doc 8: DOCX (Office) ---
+    # Minimal valid Office Open XML built with stdlib only — no python-docx
+    # dependency. Exercises engines that handle Office formats (markitdown,
+    # docling, unstructured, liteparse, ...).
+    doc8_path = os.path.join(tmpdir, "office_memo.docx")
+    doc8_paragraphs = [
+        "Internal Memo",
+        "To: All Staff",
+        "Date: April 25, 2026",
+        "Subject: Q1 2026 Results",
+        "Revenue grew 18 percent year-over-year, exceeding the plan.",
+        "Operating margin improved to 24.1 percent.",
+    ]
+    create_docx(doc8_path, doc8_paragraphs)
+    documents.append({
+        "name": "office_memo",
+        "path": doc8_path,
+        "ground_truth": "\n".join(doc8_paragraphs),
+        "pages": 1,
+        "category": "office",
+    })
+
+    # --- Doc 9: HTML page ---
+    doc9_path = os.path.join(tmpdir, "blog_post.html")
+    doc9_paragraphs = [
+        "How Document Processing Works",
+        "Document processing converts unstructured files into structured data.",
+        "Modern pipelines combine layout analysis, OCR, and language models.",
+        "Open-source toolkits make these capabilities widely accessible.",
+    ]
+    doc9_html = (
+        "<!DOCTYPE html><html><head><title>Doc Processing</title></head><body>"
+        f"<h1>{doc9_paragraphs[0]}</h1>"
+        + "".join(f"<p>{p}</p>" for p in doc9_paragraphs[1:])
+        + "</body></html>"
+    )
+    with open(doc9_path, "w", encoding="utf-8") as f:
+        f.write(doc9_html)
+    documents.append({
+        "name": "blog_post",
+        "path": doc9_path,
+        "ground_truth": "\n".join(doc9_paragraphs),
+        "pages": 1,
+        "category": "web",
+    })
+
+    # --- Doc 10: CSV (tabular) ---
+    # Engines that target Markdown output (markitdown, docling, ...) render
+    # CSV as a Markdown table.  The ground truth is the canonical Markdown
+    # table so CER/WER measure formatting fidelity, not how cells are joined.
+    doc10_path = os.path.join(tmpdir, "sales.csv")
+    doc10_rows = [
+        ["Region", "Q1", "Q2", "Q3", "Q4"],
+        ["North", "120", "135", "150", "180"],
+        ["South", "98", "110", "125", "140"],
+        ["East", "85", "92", "100", "118"],
+        ["West", "140", "155", "170", "200"],
+    ]
+    with open(doc10_path, "w", encoding="utf-8") as f:
+        for row in doc10_rows:
+            f.write(",".join(row) + "\n")
+    header = doc10_rows[0]
+    sep = ["---"] * len(header)
+    md_lines = (
+        ["| " + " | ".join(header) + " |", "| " + " | ".join(sep) + " |"]
+        + ["| " + " | ".join(row) + " |" for row in doc10_rows[1:]]
+    )
+    documents.append({
+        "name": "sales_csv",
+        "path": doc10_path,
+        "ground_truth": "\n".join(md_lines),
+        "pages": 1,
+        "category": "tabular",
+    })
+
     return documents
 
 
+def create_docx(path: str, paragraphs: list[str]) -> None:
+    """Build a minimal but valid .docx (Office Open XML) with no dependencies.
+
+    Only enough structure to round-trip plain paragraphs through engines like
+    python-docx, docling, markitdown, unstructured, liteparse, ...
+    """
+    import zipfile
+
+    def _xml_escape(s: str) -> str:
+        return (
+            s.replace("&", "&amp;")
+             .replace("<", "&lt;")
+             .replace(">", "&gt;")
+        )
+
+    body = "".join(
+        f'<w:p><w:r><w:t xml:space="preserve">{_xml_escape(p)}</w:t></w:r></w:p>'
+        for p in paragraphs
+    )
+    document_xml = (
+        '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
+        '<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">'
+        f'<w:body>{body}</w:body>'
+        '</w:document>'
+    )
+    content_types = (
+        '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
+        '<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">'
+        '<Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>'
+        '<Default Extension="xml" ContentType="application/xml"/>'
+        '<Override PartName="/word/document.xml" '
+        'ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>'
+        '</Types>'
+    )
+    rels = (
+        '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
+        '<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">'
+        '<Relationship Id="rId1" '
+        'Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" '
+        'Target="word/document.xml"/>'
+        '</Relationships>'
+    )
+
+    with zipfile.ZipFile(path, "w", zipfile.ZIP_DEFLATED) as zf:
+        zf.writestr("[Content_Types].xml", content_types)
+        zf.writestr("_rels/.rels", rels)
+        zf.writestr("word/document.xml", document_xml)
+
+
 def compute_cer(predicted: str, reference: str) -> float:
     """Character Error Rate — Levenshtein distance / reference length."""
     if not reference:
@@ -351,6 +475,7 @@ async def main():
     from docfold.engines.easyocr_engine import EasyOCREngine
     from docfold.engines.liteparse_engine import LiteParseEngine
     from docfold.engines.marker_local_engine import MarkerLocalEngine
+    from docfold.engines.markitdown_engine import MarkItDownEngine
     from docfold.engines.mineru_engine import MinerUEngine
     from docfold.engines.nougat_engine import NougatEngine
     from docfold.engines.opendataloader_engine import OpenDataLoaderEngine
@@ -377,6 +502,7 @@ async def main():
         (PaddleOCREngine(), "pip install paddleocr"),
         (TesseractEngine(), "pip install pytesseract"),
         (UnstructuredEngine(), "pip install unstructured"),
+        (MarkItDownEngine(), "pip install docfold[markitdown]"),
     ]
 
     # Skip engines that hang on CPU for multi-doc benchmarks
@@ -414,8 +540,15 @@ async def main():
             print(f"{'─' * 90}")
 
             gt = doc["ground_truth"]
+            doc_ext = os.path.splitext(doc["path"])[1].lstrip(".").lower()
 
             for engine in engines:
+                # Skip engines whose declared supported_extensions don't include
+                # this doc's format — keeps the report free of noise like
+                # "PyMuPDF can't open .docx".
+                if doc_ext and doc_ext not in engine.supported_extensions:
+                    continue
+
                 result, error = await run_engine(
                     engine, doc["path"], OutputFormat.MARKDOWN
                 )
diff --git a/docs/benchmark_results.json b/docs/benchmark_results.json
index 631f884..ba7961b 100644
--- a/docs/benchmark_results.json
+++ b/docs/benchmark_results.json
@@ -1,8 +1,8 @@
 {
-  "benchmark_date": "2026-04-17 10:17:34",
+  "benchmark_date": "2026-04-25 18:26:40",
   "engines": [
     "pymupdf",
-    "opendataloader"
+    "markitdown"
   ],
   "documents": [
     {
@@ -39,11 +39,26 @@
       "name": "hebrew_report",
       "pages": 1,
       "category": "rtl"
+    },
+    {
+      "name": "office_memo",
+      "pages": 1,
+      "category": "office"
+    },
+    {
+      "name": "blog_post",
+      "pages": 1,
+      "category": "web"
+    },
+    {
+      "name": "sales_csv",
+      "pages": 1,
+      "category": "tabular"
     }
   ],
   "summary": {
     "pymupdf": {
-      "avg_time_ms": 6.4,
+      "avg_time_ms": 4.4,
       "avg_cer": 0.0,
       "avg_wer": 0.0,
       "avg_bbox_count": 5.3,
@@ -52,7 +67,7 @@
       "results": [
         {
           "doc": "simple_text",
-          "time_ms": 11,
+          "time_ms": 5,
           "cer": 0.0,
           "wer": 0.0,
           "bbox_count": 5,
@@ -61,7 +76,7 @@
         },
         {
           "doc": "multi_page",
-          "time_ms": 4,
+          "time_ms": 3,
           "cer": 0.0,
           "wer": 0.0,
           "bbox_count": 4,
@@ -70,7 +85,7 @@
         },
         {
           "doc": "dense_financial",
-          "time_ms": 5,
+          "time_ms": 3,
           "cer": 0.0,
           "wer": 0.0,
           "bbox_count": 10,
@@ -79,7 +94,7 @@
         },
         {
           "doc": "mixed_formatting",
-          "time_ms": 4,
+          "time_ms": 2,
           "cer": 0.0,
           "wer": 0.0,
           "bbox_count": 6,
@@ -88,7 +103,7 @@
         },
         {
           "doc": "arabic_report",
-          "time_ms": 6,
+          "time_ms": 3,
           "cer": 0.0,
           "wer": 0.0,
           "bbox_count": 4,
@@ -97,7 +112,7 @@
         },
         {
           "doc": "chinese_report",
-          "time_ms": 4,
+          "time_ms": 3,
           "cer": 0.0,
           "wer": 0.0,
           "bbox_count": 4,
@@ -106,7 +121,7 @@
         },
         {
           "doc": "hebrew_report",
-          "time_ms": 11,
+          "time_ms": 12,
           "cer": 0.0,
           "wer": 0.0,
           "bbox_count": 4,
@@ -115,76 +130,103 @@
         }
       ]
     },
-    "opendataloader": {
-      "avg_time_ms": 796.6,
-      "avg_cer": 0.257,
-      "avg_wer": 0.3756,
-      "avg_bbox_count": 3.1,
+    "markitdown": {
+      "avg_time_ms": 47.0,
+      "avg_cer": 0.0343,
+      "avg_wer": 0.1726,
+      "avg_bbox_count": 0.0,
       "errors": 0,
-      "successes": 7,
+      "successes": 10,
       "results": [
         {
           "doc": "simple_text",
-          "time_ms": 1083,
-          "cer": 0.0165,
-          "wer": 0.0556,
-          "bbox_count": 2,
-          "content_length": 123,
-          "pages": 1
+          "time_ms": 144,
+          "cer": 0.0,
+          "wer": 0.0,
+          "bbox_count": 0,
+          "content_length": 121,
+          "pages": null
         },
         {
           "doc": "multi_page",
-          "time_ms": 756,
-          "cer": 0.0133,
-          "wer": 0.0513,
-          "bbox_count": 2,
-          "content_length": 304,
-          "pages": 2
+          "time_ms": 27,
+          "cer": 0.0,
+          "wer": 0.0,
+          "bbox_count": 0,
+          "content_length": 300,
+          "pages": null
         },
         {
           "doc": "dense_financial",
-          "time_ms": 725,
+          "time_ms": 21,
           "cer": 0.0,
           "wer": 0.0,
-          "bbox_count": 1,
+          "bbox_count": 0,
           "content_length": 298,
-          "pages": 1
+          "pages": null
         },
         {
           "doc": "mixed_formatting",
-          "time_ms": 737,
-          "cer": 0.0308,
-          "wer": 0.0811,
-          "bbox_count": 5,
-          "content_length": 268,
-          "pages": 1
+          "time_ms": 23,
+          "cer": 0.0,
+          "wer": 0.0,
+          "bbox_count": 0,
+          "content_length": 260,
+          "pages": null
         },
         {
           "doc": "arabic_report",
-          "time_ms": 766,
-          "cer": 0.8675,
-          "wer": 1.08,
-          "bbox_count": 4,
-          "content_length": 151,
-          "pages": 1
+          "time_ms": 69,
+          "cer": 0.2517,
+          "wer": 0.32,
+          "bbox_count": 0,
+          "content_length": 179,
+          "pages": null
         },
         {
           "doc": "chinese_report",
-          "time_ms": 721,
-          "cer": 0.0317,
-          "wer": 0.25,
-          "bbox_count": 4,
-          "content_length": 65,
-          "pages": 1
+          "time_ms": 20,
+          "cer": 0.0476,
+          "wer": 1.0,
+          "bbox_count": 0,
+          "content_length": 60,
+          "pages": null
         },
         {
           "doc": "hebrew_report",
-          "time_ms": 788,
-          "cer": 0.8392,
-          "wer": 1.1111,
-          "bbox_count": 4,
-          "content_length": 149,
-          "pages": 1
+          "time_ms": 148,
+          "cer": 0.035,
+          "wer": 0.3704,
+          "bbox_count": 0,
+          "content_length": 138,
+          "pages": null
+        },
+        {
+          "doc": "office_memo",
+          "time_ms": 10,
+          "cer": 0.0,
+          "wer": 0.0,
+          "bbox_count": 0,
+          "content_length": 176,
+          "pages": null
+        },
+        {
+          "doc": "blog_post",
+          "time_ms": 5,
+          "cer": 0.0087,
+          "wer": 0.0357,
+          "bbox_count": 0,
+          "content_length": 233,
+          "pages": null
+        },
+        {
+          "doc": "sales_csv",
+          "time_ms": 3,
+          "cer": 0.0,
+          "wer": 0.0,
+          "bbox_count": 0,
+          "content_length": 193,
+          "pages": null
         }
       ]
     }
diff --git a/docs/tasks/MARKITDOWN_ENGINE.md b/docs/tasks/MARKITDOWN_ENGINE.md
new file mode 100644
index 0000000..41e6942
--- /dev/null
+++ b/docs/tasks/MARKITDOWN_ENGINE.md
@@ -0,0 +1,127 @@
+---
+purpose: "Integrate Microsoft's markitdown as a new docfold engine and include it in the benchmark harness."
+status: "IN_PROGRESS"
+priority: "P1"
+created: "2026-04-24"
+---
+
+# Feature: markitdown engine adapter + benchmark coverage
+
+## Problem
+
+Microsoft ships [markitdown](https://github.com/microsoft/markitdown), a pure-Python
+library that converts a wide range of formats (PDF, DOCX, PPTX, XLSX, HTML, CSV,
+JSON, XML, images, audio, ePub, ZIP, YouTube URLs) into LLM-friendly Markdown.
+It is MIT-licensed, has no heavy runtime, and is a sensible "lowest common
+denominator" baseline that users expect docfold to support alongside Docling,
+Marker, Unstructured, etc. Today there is no adapter, no extras group, and it
+does not appear in `benchmark.py`.
+
+Task from the user: "connect it and run benches" — so we need both the adapter
+and synthetic-PDF benchmark coverage comparable to the other local engines.
+
+## Proposed Solution
+
+1. New `MarkItDownEngine` adapter under `src/docfold/engines/markitdown_engine.py`
+   that conforms to the `DocumentEngine` ABC:
+   - Lazy-imports `markitdown` (keeps the base package dep-free).
+   - Calls `MarkItDown().convert(file_path)` inside an executor (the library's
+     API is synchronous).
+   - Returns an `EngineResult` with `format=OutputFormat.MARKDOWN` (markitdown
+     always emits Markdown; for `HTML`/`JSON`/`TEXT` we serialize the Markdown
+     string into a minimal wrapper so the contract holds).
+   - `is_available()` returns True only when `markitdown` is importable.
+   - `capabilities` is empty — markitdown returns plain text, no bboxes.
+2. New `markitdown` extras in `pyproject.toml`:
+   `markitdown = ["markitdown[all]>=0.0.1"]` and add to `all = [...]`.
+3. Register it in `engines/router.py` priority lists for the formats it
+   handles (PDF, Office, HTML, images, CSV/text, ePub, ZIP) — placed near the
+   Unstructured/Marker tier since it is a similar "convert to Markdown"
+   baseline rather than a layout analyzer.
+4. Wire it into `benchmark.py` as an additional candidate engine so it runs
+   on the same 7 synthetic PDFs as the existing engines and reports CER/WER/
+   time like the others.
+5. Add a row for markitdown to the two engine tables in `README.md`.
+
+## Affected Files
+
+- `src/docfold/engines/markitdown_engine.py` — new adapter.
+- `tests/engines/test_markitdown_engine.py` — new test file, mocks the
+  `markitdown` package (tests do not require it installed).
+- `src/docfold/engines/router.py` — add `"markitdown"` to extension priority
+  lists and the default fallback.
+- `pyproject.toml` — add `markitdown` extras, include in `all`.
+- `benchmark.py` — import and register `MarkItDownEngine` in the candidate
+  list.
+- `README.md` — add markitdown row to the two engine overview tables.
+
+## Test Plan
+
+### Unit / Functional Tests
+
+- [ ] `test_name` — engine name is `"markitdown"`.
+- [ ] `test_supported_extensions` — covers PDF, DOCX, PPTX, XLSX, HTML, images,
+      CSV, JSON, XML, ePub.
+- [ ] `test_capabilities_defaults_to_empty` — no bboxes/confidence etc.
+- [ ] `test_is_available_true_when_importable` — patched import succeeds.
+- [ ] `test_is_available_false_when_missing` — `ImportError` short-circuits.
+- [ ] `test_process_markdown_returns_engine_result` — mock the `MarkItDown`
+      class so `convert(...).text_content` is a known Markdown string; assert
+      the `EngineResult` fields (content, format, engine_name, time).
+- [ ] `test_process_runs_convert_in_executor` — the synchronous `convert` call
+      must be dispatched via `loop.run_in_executor` so we don't block the
+      event loop.
+- [ ] `test_process_missing_dependency_raises` — when markitdown isn't
+      installed, `.process()` should raise a clear `RuntimeError` (or similar)
+      so callers see *why* it failed.
+
+### Integration / E2E Tests
+
+- [ ] `benchmark.py` runs on a host where `markitdown` is installed and
+      produces a row for it in the summary table.
+
+### Test Commands
+```bash
+# Run just the new engine tests
+pytest tests/engines/test_markitdown_engine.py -v
+
+# Full suite (should stay green)
+pytest tests/
+
+# E2E benchmark (requires: pip install docfold[markitdown])
+python benchmark.py
+```
+
+## Edge Cases
+
+- `markitdown` not installed on CI — tests must mock the import path and not
+  require the real dependency (mirrors `test_liteparse_engine.py`).
+- `OutputFormat.HTML` / `JSON` / `TEXT` — markitdown only produces Markdown.
+  We honor the request by wrapping the Markdown string (HTML: wrap in
+  `<pre>`; JSON: `{"markdown": "..."}`; TEXT: pass through).
+- Unicode / CJK / RTL documents — ensure the string is passed through without
+  encoding munging (the benchmark's Arabic/Hebrew/Chinese fixtures will
+  cover this in the E2E run).
+
+## Out of Scope
+
+- No plugin hooks for markitdown's extensibility system (custom converters).
+- No attempt to extract bounding boxes — markitdown doesn't produce them.
+- No audio / YouTube / ZIP extensions enrollment in the router priority map;
+  we only register formats that already exist in `_EXTENSION_PRIORITY`.
+
+## Follow-up: non-PDF benchmark coverage
+
+The first round of `benchmark.py` only generated PDFs, which is the format
+where markitdown is *least* differentiated (PyMuPDF dominates on digital text
+PDFs). To actually exercise where markitdown shines, the harness now also
+produces:
+
+- A synthetic **DOCX** (built via stdlib `zipfile` + minimal Word XML — no
+  new runtime deps).
+- A synthetic **HTML** page with a heading, paragraphs, and a small table.
+- A synthetic **CSV** with a few rows.
+
+Engines are filtered per-doc by `supported_extensions`, so PyMuPDF / OCR
+engines simply don't run on Office / web / tabular fixtures (instead of
+spamming the report with errors).
diff --git a/pyproject.toml b/pyproject.toml
index 29af5f8..fb25214 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -108,6 +108,9 @@ surya = [
 firecrawl = [
     "firecrawl-py>=1.0",
 ]
+markitdown = [
+    "markitdown[all]>=0.0.1",
+]
 evaluation = [
     "jiwer>=3.0",          # WER/CER computation
     "numpy>=1.23",
@@ -116,7 +119,7 @@ evaluation = [
     "psutil>=5.9",         # Memory measurement
 ]
 all = [
-    "docfold[docling,mineru,marker,pymupdf,paddleocr,tesseract,easyocr,unstructured,llamaparse,liteparse,opendataloader,mistral-ocr,textract,google-docai,azure-docint,nougat,chandra,surya,firecrawl,evaluation]",
+    "docfold[docling,mineru,marker,pymupdf,paddleocr,tesseract,easyocr,unstructured,llamaparse,liteparse,opendataloader,mistral-ocr,textract,google-docai,azure-docint,nougat,chandra,surya,firecrawl,markitdown,evaluation]",
     # Note: zerox excluded from [all] — py-zerox requires Python 3.11+
     # Install separately: pip install docfold[zerox]
 ]
diff --git a/src/docfold/engines/markitdown_engine.py b/src/docfold/engines/markitdown_engine.py
new file mode 100644
index 0000000..cb47704
--- /dev/null
+++ b/src/docfold/engines/markitdown_engine.py
@@ -0,0 +1,135 @@
+"""MarkItDown engine adapter — Microsoft's open-source document-to-Markdown library.
+
+MarkItDown is a pure-Python tool that converts a wide range of document formats
+(Office files, PDFs, images, HTML, CSV/JSON/XML, ePub, audio, ZIP, ...) into
+LLM-friendly Markdown.  See https://github.com/microsoft/markitdown.
+
+Install: ``pip install docfold[markitdown]``
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import logging
+import time
+from typing import Any
+
+from docfold.engines.base import (
+    DocumentEngine,
+    EngineCapabilities,
+    EngineResult,
+    OutputFormat,
+)
+
+logger = logging.getLogger(__name__)
+
+_SUPPORTED_EXTENSIONS = {
+    # Office
+    "docx", "pptx", "xlsx", "xls",
+    # PDFs
+    "pdf",
+    # Web / markup
+    "html", "htm", "xml",
+    # Tabular / structured data
+    "csv", "tsv", "json",
+    # Images (markitdown runs OCR/LLM captioning when configured)
+    "png", "jpg", "jpeg", "gif", "bmp", "tiff", "tif", "webp",
+    # Audio (transcription)
+    "mp3", "wav", "m4a",
+    # eBooks / archives / misc
+    "epub", "zip", "txt", "md",
+}
+
+
+class MarkItDownEngine(DocumentEngine):
+    """Adapter for Microsoft's ``markitdown`` library.
+
+    Markitdown converts documents to Markdown via its synchronous ``convert``
+    method.  We dispatch the call through ``run_in_executor`` so it does not
+    block the event loop.
+    """
+
+    def __init__(self, enable_plugins: bool = False) -> None:
+        self._enable_plugins = enable_plugins
+        self._converter: Any = None
+
+    @property
+    def name(self) -> str:
+        return "markitdown"
+
+    @property
+    def supported_extensions(self) -> set[str]:
+        return _SUPPORTED_EXTENSIONS
+
+    @property
+    def capabilities(self) -> EngineCapabilities:
+        # Markitdown returns a Markdown string — no layout, bboxes, or
+        # confidence scores.
+        return EngineCapabilities()
+
+    def is_available(self) -> bool:
+        # markitdown's import chain pulls in pdfminer/cryptography, which can
+        # raise non-ImportError exceptions (e.g. a broken PyO3 binding).
+        # Treat any import failure as "unavailable" so a broken env cannot
+        # knock out the whole router / benchmark harness.
+        try:
+            import markitdown  # noqa: F401
+            return True
+        except Exception:  # noqa: BLE001
+            return False
+
+    def _get_converter(self) -> Any:
+        if self._converter is None:
+            from markitdown import MarkItDown
+            self._converter = MarkItDown(enable_plugins=self._enable_plugins)
+        return self._converter
+
+    async def process(
+        self,
+        file_path: str,
+        output_format: OutputFormat = OutputFormat.MARKDOWN,
+        **kwargs: Any,
+    ) -> EngineResult:
+        start = time.perf_counter()
+
+        try:
+            converter = self._get_converter()
+        except ImportError as exc:
+            raise RuntimeError(
+                "markitdown is not installed. Install with: pip install docfold[markitdown]"
+            ) from exc
+        except TypeError:
+            # Older markitdown versions don't accept enable_plugins kwarg.
+            from markitdown import MarkItDown
+            self._converter = MarkItDown()
+            converter = self._converter
+
+        loop = asyncio.get_running_loop()
+        convert_result = await loop.run_in_executor(None, converter.convert, file_path)
+
+        markdown_text: str = getattr(convert_result, "text_content", "") or ""
+        title = getattr(convert_result, "title", None)
+
+        if output_format == OutputFormat.JSON:
+            content = json.dumps({"markdown": markdown_text, "title": title},
+                                 ensure_ascii=False)
+        elif output_format == OutputFormat.HTML:
+            # Minimal wrapper — markitdown doesn't render HTML itself.
+            content = f"<pre class=\"markdown\">{markdown_text}</pre>"
+        else:
+            # MARKDOWN and TEXT both return the markdown string as-is.
+            content = markdown_text
+
+        elapsed_ms = int((time.perf_counter() - start) * 1000)
+
+        return EngineResult(
+            content=content,
+            format=output_format,
+            engine_name=self.name,
+            processing_time_ms=elapsed_ms,
+            metadata={
+                "title": title,
+                "enable_plugins": self._enable_plugins,
+            },
+        )
diff --git a/src/docfold/engines/router.py b/src/docfold/engines/router.py
index d23fe57..8cafa77 100644
--- a/src/docfold/engines/router.py
+++ b/src/docfold/engines/router.py
@@ -25,6 +25,7 @@
 _IMAGE_PRIORITY = [
     "chandra", "surya", "paddleocr", "tesseract", "easyocr", "docling", "liteparse",
     "mistral_ocr", "google_docai", "textract", "azure_docint", "zerox", "marker",
+    "markitdown",
 ]
 
 _EXTENSION_PRIORITY: dict[str, list[str]] = {
@@ -33,31 +34,41 @@
         "docling", "mineru", "chandra", "unstructured", "marker",
         "llamaparse", "liteparse", "mistral_ocr", "firecrawl", "google_docai",
         "azure_docint", "textract", "zerox", "nougat", "surya", "pymupdf",
-        "paddleocr", "tesseract", "easyocr",
+        "paddleocr", "tesseract", "easyocr", "markitdown",
     ],
     # --- Office ---
     "docx": [
         "docling", "marker", "unstructured", "llamaparse",
-        "liteparse", "firecrawl", "azure_docint",
+        "liteparse", "firecrawl", "azure_docint", "markitdown",
     ],
     "doc":  ["docling", "marker", "unstructured", "llamaparse", "liteparse", "azure_docint"],
-    "pptx": ["docling", "marker", "unstructured", "llamaparse", "liteparse", "azure_docint"],
+    "pptx": [
+        "docling", "marker", "unstructured", "llamaparse",
+        "liteparse", "azure_docint", "markitdown",
+    ],
     "ppt":  ["docling", "marker", "unstructured", "llamaparse", "liteparse", "azure_docint"],
-    "xlsx": ["docling", "marker", "unstructured", "llamaparse", "liteparse", "azure_docint"],
-    "xls":  ["docling", "marker", "unstructured", "llamaparse", "liteparse", "azure_docint"],
+    "xlsx": [
+        "docling", "marker", "unstructured", "llamaparse",
+        "liteparse", "azure_docint", "markitdown",
+    ],
+    "xls":  [
+        "docling", "marker", "unstructured", "llamaparse",
+        "liteparse", "azure_docint", "markitdown",
+    ],
     "odt":  ["marker", "unstructured"],
     "odp":  ["marker", "unstructured"],
     "ods":  ["marker", "unstructured"],
     # --- Web / markup ---
-    "html": ["docling", "firecrawl", "unstructured", "marker", "azure_docint"],
-    "htm":  ["docling", "firecrawl", "unstructured", "marker", "azure_docint"],
-    "xml":  ["firecrawl", "unstructured"],
-    "md":   ["unstructured"],
+    "html": ["docling", "firecrawl", "unstructured", "marker", "azure_docint", "markitdown"],
+    "htm":  ["docling", "firecrawl", "unstructured", "marker", "azure_docint", "markitdown"],
+    "xml":  ["firecrawl", "unstructured", "markitdown"],
+    "md":   ["unstructured", "markitdown"],
     "rst":  ["unstructured"],
-    "csv":  ["unstructured"],
-    "tsv":  ["unstructured"],
-    "txt":  ["unstructured"],
+    "csv":  ["unstructured", "markitdown"],
+    "tsv":  ["unstructured", "markitdown"],
+    "txt":  ["unstructured", "markitdown"],
     "rtf":  ["unstructured"],
+    "json": ["markitdown"],
     # --- Images ---
     "png":  _IMAGE_PRIORITY,
     "jpg":  _IMAGE_PRIORITY,
@@ -71,7 +82,13 @@
     "eml":  ["unstructured"],
     "msg":  ["unstructured"],
     # --- eBooks ---
-    "epub": ["unstructured", "marker"],
+    "epub": ["unstructured", "marker", "markitdown"],
+    # --- Audio (transcription) ---
+    "mp3": ["markitdown"],
+    "wav": ["markitdown"],
+    "m4a": ["markitdown"],
+    # --- Archives ---
+    "zip": ["markitdown"],
 }
 
 # Ultimate fallback when extension is unknown or missing from the map.
@@ -79,7 +96,7 @@
     "docling", "mineru", "chandra", "unstructured", "marker",
     "llamaparse", "liteparse", "mistral_ocr", "google_docai", "azure_docint",
     "textract", "zerox", "nougat", "surya", "pymupdf", "paddleocr", "tesseract",
-    "easyocr",
+    "easyocr", "markitdown",
 ]
 
 
diff --git a/tests/engines/test_markitdown_engine.py b/tests/engines/test_markitdown_engine.py
new file mode 100644
index 0000000..9139e3c
--- /dev/null
+++ b/tests/engines/test_markitdown_engine.py
@@ -0,0 +1,209 @@
+"""Tests for the MarkItDown engine adapter.
+
+The ``markitdown`` package is not a test-time dependency; these tests mock the
+import path and the ``MarkItDown`` class so they run on any host.
+"""
+
+from __future__ import annotations
+
+import sys
+import types
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from docfold.engines.base import EngineResult, OutputFormat
+
+
+def _install_fake_markitdown(text_content: str = "# Hello\n\nWorld") -> MagicMock:
+    """Inject a fake ``markitdown`` module into ``sys.modules``.
+
+    Returns the mock ``MarkItDown`` class so individual tests can assert
+    on how it was called.
+    """
+    fake_module = types.ModuleType("markitdown")
+    mock_class = MagicMock(name="MarkItDown")
+
+    # Default: MarkItDown().convert(path).text_content = text_content
+    instance = MagicMock()
+    convert_result = MagicMock()
+    convert_result.text_content = text_content
+    convert_result.title = None
+    instance.convert.return_value = convert_result
+    mock_class.return_value = instance
+
+    fake_module.MarkItDown = mock_class
+    sys.modules["markitdown"] = fake_module
+    return mock_class
+
+
+def _remove_fake_markitdown() -> None:
+    sys.modules.pop("markitdown", None)
+
+
+@pytest.fixture
+def fake_markitdown():
+    mock_class = _install_fake_markitdown()
+    try:
+        yield mock_class
+    finally:
+        _remove_fake_markitdown()
+
+
+class TestMarkItDownEngineMetadata:
+    def test_name(self):
+        from docfold.engines.markitdown_engine import MarkItDownEngine
+
+        assert MarkItDownEngine().name == "markitdown"
+
+    def test_supported_extensions_covers_markitdown_formats(self):
+        from docfold.engines.markitdown_engine import MarkItDownEngine
+
+        exts = MarkItDownEngine().supported_extensions
+        # The formats markitdown documents support: Office, PDFs, images,
+        # web/markup, tabular, ePub, audio.
+        for fmt in ("pdf", "docx", "pptx", "xlsx", "html", "htm",
+                    "png", "jpg", "jpeg", "csv", "json", "xml", "epub"):
+            assert fmt in exts, f"expected '{fmt}' in supported_extensions"
+
+    def test_capabilities_are_empty_by_default(self):
+        from docfold.engines.markitdown_engine import MarkItDownEngine
+
+        caps = MarkItDownEngine().capabilities
+        # markitdown returns plain markdown with no layout info
+        assert caps.bounding_boxes is False
+        assert caps.confidence is False
+        assert caps.table_structure is False
+
+    def test_is_available_true_when_importable(self, fake_markitdown):
+        from docfold.engines.markitdown_engine import MarkItDownEngine
+
+        assert MarkItDownEngine().is_available() is True
+
+    def test_is_available_false_when_missing(self):
+        from docfold.engines.markitdown_engine import MarkItDownEngine
+
+        _remove_fake_markitdown()
+        with patch.dict(sys.modules, {"markitdown": None}):
+            assert MarkItDownEngine().is_available() is False
+
+
+class TestMarkItDownEngineProcess:
+    @pytest.mark.asyncio
+    async def test_process_markdown_returns_engine_result(self, fake_markitdown):
+        from docfold.engines.markitdown_engine import MarkItDownEngine
+
+        # Custom markdown payload for this test
+        instance = fake_markitdown.return_value
+        instance.convert.return_value.text_content = (
+            "# Invoice 2024\n\nAmount: **$1,250.00**"
+        )
+
+        engine = MarkItDownEngine()
+        result = await engine.process("invoice.pdf", output_format=OutputFormat.MARKDOWN)
+
+        assert isinstance(result, EngineResult)
+        assert result.engine_name == "markitdown"
+        assert result.format == OutputFormat.MARKDOWN
+        assert "Invoice 2024" in result.content
+        assert "$1,250.00" in result.content
+        assert result.processing_time_ms >= 0
+
+        # MarkItDown().convert("invoice.pdf") must have been called
+        instance.convert.assert_called_once()
+        call_args = instance.convert.call_args
+        assert call_args.args[0] == "invoice.pdf"
+
+    @pytest.mark.asyncio
+    async def test_process_runs_convert_in_executor(self, fake_markitdown):
+        """The sync convert() call must be dispatched through run_in_executor
+        so it does not block the event loop."""
+        import asyncio
+
+        from docfold.engines.markitdown_engine import MarkItDownEngine
+
+        engine = MarkItDownEngine()
+
+        loop = asyncio.get_running_loop()
+        original_run_in_executor = loop.run_in_executor
+        call_count = {"n": 0}
+
+        async def spy(*args, **kwargs):
+            call_count["n"] += 1
+            return await original_run_in_executor(*args, **kwargs)
+
+        with patch.object(loop, "run_in_executor", side_effect=spy):
+            await engine.process("some.pdf", output_format=OutputFormat.MARKDOWN)
+
+        assert call_count["n"] >= 1, "convert() must be dispatched via run_in_executor"
+
+    @pytest.mark.asyncio
+    async def test_process_text_format_returns_plain_markdown(self, fake_markitdown):
+        from docfold.engines.markitdown_engine import MarkItDownEngine
+
+        instance = fake_markitdown.return_value
+        instance.convert.return_value.text_content = "# Title\n\nBody"
+
+        result = await MarkItDownEngine().process("x.pdf", output_format=OutputFormat.TEXT)
+
+        assert result.format == OutputFormat.TEXT
+        # TEXT format should pass the markdown string through unchanged.
+        assert "Title" in result.content
+        assert "Body" in result.content
+
+    @pytest.mark.asyncio
+    async def test_process_json_format_wraps_markdown(self, fake_markitdown):
+        import json as _json
+
+        from docfold.engines.markitdown_engine import MarkItDownEngine
+
+        instance = fake_markitdown.return_value
+        instance.convert.return_value.text_content = "# Doc\n\nHello"
+
+        result = await MarkItDownEngine().process("x.pdf", output_format=OutputFormat.JSON)
+
+        assert result.format == OutputFormat.JSON
+        parsed = _json.loads(result.content)
+        assert isinstance(parsed, dict)
+        assert "markdown" in parsed
+        assert "Doc" in parsed["markdown"]
+
+    @pytest.mark.asyncio
+    async def test_process_html_format_wraps_markdown(self, fake_markitdown):
+        from docfold.engines.markitdown_engine import MarkItDownEngine
+
+        instance = fake_markitdown.return_value
+        instance.convert.return_value.text_content = "# Doc"
+
+        result = await MarkItDownEngine().process("x.pdf", output_format=OutputFormat.HTML)
+
+        assert result.format == OutputFormat.HTML
+        # Markdown text must be preserved inside the HTML wrapper.
+        assert "Doc" in result.content
+        assert result.content.strip().startswith("<")
+
+    @pytest.mark.asyncio
+    async def test_process_missing_dependency_raises(self):
+        """When markitdown isn't installed, process() must raise a clear error."""
+        from docfold.engines.markitdown_engine import MarkItDownEngine
+
+        _remove_fake_markitdown()
+        with patch.dict(sys.modules, {"markitdown": None}):
+            engine = MarkItDownEngine()
+            with pytest.raises((RuntimeError, ImportError, ModuleNotFoundError)):
+                await engine.process("any.pdf", output_format=OutputFormat.MARKDOWN)
+
+    @pytest.mark.asyncio
+    async def test_process_preserves_unicode(self, fake_markitdown):
+        """Non-ASCII text (Arabic, CJK, Hebrew) must pass through unchanged."""
+        from docfold.engines.markitdown_engine import MarkItDownEngine
+
+        payload = "تقرير سنوي 2024\n\n2024年度报告\n\nדוח שנתי 2024"
+        instance = fake_markitdown.return_value
+        instance.convert.return_value.text_content = payload
+
+        result = await MarkItDownEngine().process("i18n.pdf", output_format=OutputFormat.MARKDOWN)
+
+        assert "تقرير" in result.content
+        assert "年度报告" in result.content
+        assert "דוח" in result.content