diff --git a/benchmark.py b/benchmark.py
new file mode 100644
index 0000000..85e0703
--- /dev/null
+++ b/benchmark.py
@@ -0,0 +1,326 @@
+"""Benchmark script — compares LiteParse vs PyMuPDF on synthetic PDF documents.
+
+Generates test PDFs with known content, runs both engines, and measures:
+- Processing speed (ms)
+- Text extraction quality (CER, WER)
+- Bounding box coverage
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import os
+import sys
+import tempfile
+import time
+
+# Ensure docfold is importable
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "src"))
+
+
+def create_text_pdf(path: str, pages: list[dict]) -> None:
+    """Create a PDF with known text content using PyMuPDF."""
+    import fitz
+
+    doc = fitz.open()
+    for page_data in pages:
+        page = doc.new_page(width=612, height=792)
+        y = 72
+        for block in page_data.get("blocks", []):
+            text = block["text"]
+            fontsize = block.get("fontsize", 11)
+            font = block.get("font", "helv")
+            page.insert_text((72, y), text, fontsize=fontsize, fontname=font)
+            y += fontsize * 1.5 + 8
+    doc.save(path)
+    doc.close()
+
+
+def generate_benchmark_documents(tmpdir: str) -> list[dict]:
+    """Generate synthetic PDFs and return metadata with ground truth."""
+    documents = []
+
+    # --- Doc 1: Simple single-page text ---
+    doc1_path = os.path.join(tmpdir, "simple_text.pdf")
+    doc1_text = "Invoice Number: INV-2024-001\nDate: January 15, 2024\nBill To: Acme Corporation\nAmount Due: $1,250.00\nPayment Terms: Net 30"
+    create_text_pdf(doc1_path, [
+        {"blocks": [
+            {"text": "Invoice Number: INV-2024-001", "fontsize": 14},
+            {"text": "Date: January 15, 2024", "fontsize": 11},
+            {"text": "Bill To: Acme Corporation", "fontsize": 11},
+            {"text": "Amount Due: $1,250.00", "fontsize": 11},
+            {"text": "Payment Terms: Net 30", "fontsize": 11},
+        ]}
+    ])
+    documents.append({
+        "name": "simple_text",
+        "path": doc1_path,
+        "ground_truth": doc1_text,
+        "pages": 1,
+        "category": "invoice",
+    })
+
+    # --- Doc 2: Multi-page document ---
+    doc2_path = os.path.join(tmpdir, "multi_page.pdf")
+    paragraphs = [
+        "Chapter 1: Introduction to Document Processing",
+        "Document processing is the task of converting unstructured documents into structured data formats.",
+        "This involves text extraction, layout analysis, and semantic understanding of content.",
+        "Modern approaches use deep learning models for accurate extraction.",
+    ]
+    doc2_text = "\n".join(paragraphs)
+    page1_blocks = [{"text": p, "fontsize": 12} for p in paragraphs[:2]]
+    page2_blocks = [{"text": p, "fontsize": 12} for p in paragraphs[2:]]
+    create_text_pdf(doc2_path, [
+        {"blocks": page1_blocks},
+        {"blocks": page2_blocks},
+    ])
+    documents.append({
+        "name": "multi_page",
+        "path": doc2_path,
+        "ground_truth": doc2_text,
+        "pages": 2,
+        "category": "report",
+    })
+
+    # --- Doc 3: Dense text ---
+    doc3_path = os.path.join(tmpdir, "dense_text.pdf")
+    dense_lines = [
+        "Financial Summary Report Q4 2024",
+        "Total Revenue: $4,523,891.00",
+        "Operating Expenses: $2,187,432.50",
+        "Net Income: $2,336,458.50",
+        "Gross Margin: 51.7%",
+        "Year-over-Year Growth: 23.4%",
+        "Accounts Receivable: $892,100.00",
+        "Accounts Payable: $445,200.00",
+        "Cash and Equivalents: $3,112,750.00",
+        "Total Assets: $12,445,890.00",
+    ]
+    doc3_text = "\n".join(dense_lines)
+    create_text_pdf(doc3_path, [
+        {"blocks": [{"text": line, "fontsize": 10} for line in dense_lines]}
+    ])
+    documents.append({
+        "name": "dense_financial",
+        "path": doc3_path,
+        "ground_truth": doc3_text,
+        "pages": 1,
+        "category": "financial",
+    })
+
+    # --- Doc 4: Mixed font sizes (headings + body) ---
+    doc4_path = os.path.join(tmpdir, "mixed_formatting.pdf")
+    doc4_blocks = [
+        {"text": "Annual Report 2024", "fontsize": 18},
+        {"text": "Executive Summary", "fontsize": 14},
+        {"text": "Our company achieved record growth this fiscal year with revenue exceeding expectations.", "fontsize": 10},
+        {"text": "Key Metrics", "fontsize": 14},
+        {"text": "Customer satisfaction score improved from 87% to 94%.", "fontsize": 10},
+        {"text": "Employee retention rate reached 96%, the highest in company history.", "fontsize": 10},
+    ]
+    doc4_text = "\n".join(b["text"] for b in doc4_blocks)
+    create_text_pdf(doc4_path, [{"blocks": doc4_blocks}])
+    documents.append({
+        "name": "mixed_formatting",
+        "path": doc4_path,
+        "ground_truth": doc4_text,
+        "pages": 1,
+        "category": "report",
+    })
+
+    return documents
+
+
+def compute_cer(predicted: str, reference: str) -> float:
+    """Character Error Rate — Levenshtein distance / reference length."""
+    if not reference:
+        return 0.0 if not predicted else 1.0
+
+    # Simple Levenshtein
+    n, m = len(reference), len(predicted)
+    dp = list(range(n + 1))
+    for j in range(1, m + 1):
+        prev = dp[:]
+        dp[0] = j
+        for i in range(1, n + 1):
+            cost = 0 if reference[i - 1] == predicted[j - 1] else 1
+            dp[i] = min(prev[i] + 1, dp[i - 1] + 1, prev[i - 1] + cost)
+    return dp[n] / n
+
+
+def compute_wer(predicted: str, reference: str) -> float:
+    """Word Error Rate."""
+    ref_words = reference.split()
+    pred_words = predicted.split()
+    if not ref_words:
+        return 0.0 if not pred_words else 1.0
+
+    n, m = len(ref_words), len(pred_words)
+    dp = list(range(n + 1))
+    for j in range(1, m + 1):
+        prev = dp[:]
+        dp[0] = j
+        for i in range(1, n + 1):
+            cost = 0 if ref_words[i - 1] == pred_words[j - 1] else 1
+            dp[i] = min(prev[i] + 1, dp[i - 1] + 1, prev[i - 1] + cost)
+    return dp[n] / n
+
+
+def normalize_text(text: str) -> str:
+    """Normalize whitespace for fair comparison."""
+    import re
+    text = re.sub(r'\s+', ' ', text.strip())
+    return text
+
+
+async def run_engine(engine, file_path: str, fmt):
+    """Run an engine and return (result, error)."""
+    try:
+        result = await engine.process(file_path, output_format=fmt)
+        return result, None
+    except Exception as exc:
+        return None, str(exc)
+
+
+async def main():
+    from docfold.engines.base import OutputFormat
+    from docfold.engines.liteparse_engine import LiteParseEngine
+    from docfold.engines.pymupdf_engine import PyMuPDFEngine
+
+    # Use --no-ocr for digital PDFs (Tesseract.js may not work in all envs)
+    liteparse = LiteParseEngine(ocr_enabled=False)
+    pymupdf = PyMuPDFEngine()
+
+    engines = []
+    if pymupdf.is_available():
+        engines.append(pymupdf)
+    else:
+        print("WARNING: PyMuPDF not available, skipping")
+    if liteparse.is_available():
+        engines.append(liteparse)
+    else:
+        print("WARNING: LiteParse not available (install: npm i -g @llamaindex/liteparse)")
+
+    if not engines:
+        print("ERROR: No engines available for benchmarking")
+        return
+
+    print(f"Engines: {[e.name for e in engines]}")
+    print()
+
+    with tempfile.TemporaryDirectory() as tmpdir:
+        documents = generate_benchmark_documents(tmpdir)
+        print(f"Generated {len(documents)} benchmark documents")
+        print("=" * 90)
+
+        # Collect all results
+        all_results: dict[str, list[dict]] = {e.name: [] for e in engines}
+
+        for doc in documents:
+            print(f"\n{'─' * 90}")
+            print(f"Document: {doc['name']} | Pages: {doc['pages']} | Category: {doc['category']}")
+            print(f"{'─' * 90}")
+
+            gt = doc["ground_truth"]
+
+            for engine in engines:
+                result, error = await run_engine(
+                    engine, doc["path"], OutputFormat.MARKDOWN
+                )
+
+                if error:
+                    print(f"  {engine.name:<14} ERROR: {error}")
+                    all_results[engine.name].append({
+                        "doc": doc["name"],
+                        "error": error,
+                    })
+                    continue
+
+                extracted = normalize_text(result.content)
+                gt_norm = normalize_text(gt)
+
+                cer = compute_cer(extracted, gt_norm)
+                wer = compute_wer(extracted, gt_norm)
+                bbox_count = len(result.bounding_boxes) if result.bounding_boxes else 0
+                time_ms = result.processing_time_ms
+
+                score = {
+                    "doc": doc["name"],
+                    "time_ms": time_ms,
+                    "cer": round(cer, 4),
+                    "wer": round(wer, 4),
+                    "bbox_count": bbox_count,
+                    "content_length": len(extracted),
+                    "pages": result.pages,
+                }
+                all_results[engine.name].append(score)
+
+                print(
+                    f"  {engine.name:<14} "
+                    f"time={time_ms:>6}ms  "
+                    f"CER={cer:.4f}  "
+                    f"WER={wer:.4f}  "
+                    f"BBoxes={bbox_count:>3}  "
+                    f"len={len(extracted):>5}"
+                )
+
+        # Summary
+        print(f"\n{'=' * 90}")
+        print("BENCHMARK SUMMARY")
+        print(f"{'=' * 90}")
+        print(
+            f"  {'Engine':<14} {'Avg Time':>10} {'Avg CER':>10} {'Avg WER':>10} "
+            f"{'Avg BBoxes':>11} {'Errors':>8}"
+        )
+        print(f"  {'─' * 68}")
+
+        summary = {}
+        for engine_name, results in all_results.items():
+            successes = [r for r in results if "error" not in r]
+            errors = [r for r in results if "error" in r]
+
+            if successes:
+                avg_time = sum(r["time_ms"] for r in successes) / len(successes)
+                avg_cer = sum(r["cer"] for r in successes) / len(successes)
+                avg_wer = sum(r["wer"] for r in successes) / len(successes)
+                avg_bbox = sum(r["bbox_count"] for r in successes) / len(successes)
+            else:
+                avg_time = avg_cer = avg_wer = avg_bbox = 0
+
+            summary[engine_name] = {
+                "avg_time_ms": round(avg_time, 1),
+                "avg_cer": round(avg_cer, 4),
+                "avg_wer": round(avg_wer, 4),
+                "avg_bbox_count": round(avg_bbox, 1),
+                "errors": len(errors),
+                "successes": len(successes),
+                "results": results,
+            }
+
+            print(
+                f"  {engine_name:<14} {avg_time:>9.1f}ms {avg_cer:>10.4f} {avg_wer:>10.4f} "
+                f"{avg_bbox:>11.1f} {len(errors):>8}"
+            )
+
+        # Write JSON report
+        report_path = os.path.join(
+            os.path.dirname(__file__), "docs", "benchmark_results.json"
+        )
+        report = {
+            "benchmark_date": time.strftime("%Y-%m-%d %H:%M:%S"),
+            "engines": list(all_results.keys()),
+            "documents": [
+                {"name": d["name"], "pages": d["pages"], "category": d["category"]}
+                for d in documents
+            ],
+            "summary": summary,
+        }
+        with open(report_path, "w") as f:
+            json.dump(report, f, indent=2, ensure_ascii=False)
+        print(f"\nDetailed report saved to: {report_path}")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/docs/benchmark_results.json b/docs/benchmark_results.json
new file mode 100644
index 0000000..5eb2e7f
--- /dev/null
+++ b/docs/benchmark_results.json
@@ -0,0 +1,123 @@
+{
+  "benchmark_date": "2026-03-22 14:07:03",
+  "engines": [
+    "pymupdf",
+    "liteparse"
+  ],
+  "documents": [
+    {
+      "name": "simple_text",
+      "pages": 1,
+      "category": "invoice"
+    },
+    {
+      "name": "multi_page",
+      "pages": 2,
+      "category": "report"
+    },
+    {
+      "name": "dense_financial",
+      "pages": 1,
+      "category": "financial"
+    },
+    {
+      "name": "mixed_formatting",
+      "pages": 1,
+      "category": "report"
+    }
+  ],
+  "summary": {
+    "pymupdf": {
+      "avg_time_ms": 4.5,
+      "avg_cer": 0.0,
+      "avg_wer": 0.0,
+      "avg_bbox_count": 6.2,
+      "errors": 0,
+      "successes": 4,
+      "results": [
+        {
+          "doc": "simple_text",
+          "time_ms": 10,
+          "cer": 0.0,
+          "wer": 0.0,
+          "bbox_count": 5,
+          "content_length": 121,
+          "pages": 1
+        },
+        {
+          "doc": "multi_page",
+          "time_ms": 3,
+          "cer": 0.0,
+          "wer": 0.0,
+          "bbox_count": 4,
+          "content_length": 300,
+          "pages": 2
+        },
+        {
+          "doc": "dense_financial",
+          "time_ms": 3,
+          "cer": 0.0,
+          "wer": 0.0,
+          "bbox_count": 10,
+          "content_length": 298,
+          "pages": 1
+        },
+        {
+          "doc": "mixed_formatting",
+          "time_ms": 2,
+          "cer": 0.0,
+          "wer": 0.0,
+          "bbox_count": 6,
+          "content_length": 260,
+          "pages": 1
+        }
+      ]
+    },
+    "liteparse": {
+      "avg_time_ms": 382.0,
+      "avg_cer": 0.0,
+      "avg_wer": 0.0,
+      "avg_bbox_count": 31.8,
+      "errors": 0,
+      "successes": 4,
+      "results": [
+        {
+          "doc": "simple_text",
+          "time_ms": 426,
+          "cer": 0.0,
+          "wer": 0.0,
+          "bbox_count": 18,
+          "content_length": 121,
+          "pages": 1
+        },
+        {
+          "doc": "multi_page",
+          "time_ms": 359,
+          "cer": 0.0,
+          "wer": 0.0,
+          "bbox_count": 39,
+          "content_length": 300,
+          "pages": 2
+        },
+        {
+          "doc": "dense_financial",
+          "time_ms": 364,
+          "cer": 0.0,
+          "wer": 0.0,
+          "bbox_count": 33,
+          "content_length": 298,
+          "pages": 1
+        },
+        {
+          "doc": "mixed_formatting",
+          "time_ms": 379,
+          "cer": 0.0,
+          "wer": 0.0,
+          "bbox_count": 37,
+          "content_length": 260,
+          "pages": 1
+        }
+      ]
+    }
+  }
+}
\ No newline at end of file
diff --git a/docs/benchmarks.md b/docs/benchmarks.md
index 6ae98fd..82abecb 100644
--- a/docs/benchmarks.md
+++ b/docs/benchmarks.md
@@ -17,6 +17,7 @@ This guide helps you choose the right document processing engine for your use ca
 | **EasyOCR** | Local | Apache-2.0 | ★☆☆ | ★★★ | ☆☆☆ | ☆☆☆ | ★★★ (80+) | Medium | Free |
 | **Unstructured** | Local/SaaS | Apache-2.0 | ★★☆ | ★★☆ | ★★☆ | ★☆☆ | ★★☆ | Medium | Free / Paid API |
 | **LlamaParse** | SaaS | Paid | ★★★ | ★★★ | ★★★ | ★★★ | ★★☆ | Fast | ~$3/1K pages |
+| **LiteParse** | Local | Apache-2.0 | ★★★ | ★★☆ | ★★☆ | ☆☆☆ | ★★☆ | Fast | Free |
 | **Mistral OCR** | SaaS | Paid | ★★★ | ★★★ | ★★★ | ★★★ | ★★★ | Fast | ~$1/1K pages |
 | **Zerox** | VLM | MIT | ★★★ | ★★★ | ★★☆ | ★★☆ | ★★★ | Slow | VLM API cost |
 | **Nougat** | Local | MIT | ★★★ | ★★☆ | ★★☆ | ★★★ | ★☆☆ | Slow | Free |
@@ -123,6 +124,16 @@ This guide helps you choose the right document processing engine for your use ca
 - **Install:** `pip install docfold[llamaparse]`
 - **Links:** [Docs](https://docs.llamaindex.ai/en/stable/llama_cloud/llama_parse/)
 
+### LiteParse (LlamaIndex)
+
+**Best for:** Fast local PDF parsing with bounding boxes, no cloud dependencies.
+
+- **Strengths:** Fast, lightweight local parser built on PDF.js. Bounding boxes with confidence scores. Wide format support (PDF, Office, images) via LibreOffice conversion. Flexible OCR integration (Tesseract.js built-in, or connect PaddleOCR/EasyOCR servers). Apache 2.0 license. No API key required.
+- **Weaknesses:** Requires Node.js 18+. No formula recognition. Table extraction is basic (no cell-level structure). Non-Python — runs as subprocess. Needs LibreOffice for non-PDF formats.
+- **GPU:** Not needed.
+- **Install:** `npm i -g @llamaindex/liteparse` then `pip install docfold[liteparse]`
+- **Links:** [GitHub](https://github.com/run-llama/liteparse)
+
 ### Mistral OCR
 
 **Best for:** High-accuracy document understanding with strong multilingual support.
@@ -231,6 +242,7 @@ Capabilities each engine can populate in `EngineResult`:
 | Tesseract | — | — | — | — | — | — |
 | Unstructured | — | — | — | ✅ | ✅ | — |
 | LlamaParse | — | — | — | ✅ | ✅ | — |
+| LiteParse | ✅ | ✅ | — | — | — | — |
 | Mistral OCR | — | — | — | ✅ | ✅ | — |
 | Zerox | — | — | — | — | — | — |
 | **Textract** | ✅ | ✅ | — | ✅ | — | ✅ |
@@ -263,6 +275,7 @@ Capabilities each engine can populate in `EngineResult`:
 | Tesseract | ✅* | — | — | — | — | ✅ | — | — | — |
 | Unstructured | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | — | ✅ |
 | LlamaParse | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | — | — | ✅ |
+| LiteParse | ✅ | ✅ | ✅ | ✅ | — | ✅ | — | — | — |
 | Mistral OCR | ✅ | — | — | — | — | ✅ | — | — | — |
 | Zerox | ✅ | — | — | — | — | ✅ | — | — | — |
 | **Textract** | ✅ | — | — | — | — | ✅ | — | — | — |
@@ -290,6 +303,7 @@ Capabilities each engine can populate in `EngineResult`:
 | Unstructured (hi_res) | 8 GB | 16 GB | Optional | ~2 GB |
 | Nougat | 8 GB | 16 GB | CUDA 8+ GB | ~1.5 GB |
 | Surya | 4 GB | 8 GB | Optional | ~1 GB |
+| LiteParse | 512 MB | 1 GB | — | ~100 MB (Node.js) |
 
 
 *SaaS engines (LlamaParse, Mistral OCR, Zerox, Marker API, Textract, Google DocAI, Azure DocInt) have no local hardware requirements.*
@@ -311,6 +325,7 @@ Capabilities each engine can populate in `EngineResult`:
 | Marker API | SaaS | ~$1 |
 | Mistral OCR | SaaS | ~$1 (token-based) |
 | LlamaParse | SaaS | ~$3 (free: 1K/day) |
+| LiteParse | Free | $0 (Node.js runtime) |
 | AWS Textract | SaaS | ~$1.50 |
 | Google Doc AI | SaaS | ~$1.50 |
 | Azure Doc Intel | SaaS | ~$1.50 |
diff --git a/pyproject.toml b/pyproject.toml
index 10f5ea5..f2ea736 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -71,6 +71,9 @@ unstructured = [
 llamaparse = [
     "llama-parse>=0.5",
 ]
+liteparse = [
+    # No Python deps — requires Node.js 18+ and: npm i -g @llamaindex/liteparse
+]
 mistral-ocr = [
     "mistralai>=1.0",
 ]
@@ -106,7 +109,7 @@ evaluation = [
     "psutil>=5.9",         # Memory measurement
 ]
 all = [
-    "docfold[docling,mineru,marker,pymupdf,paddleocr,tesseract,easyocr,unstructured,llamaparse,mistral-ocr,textract,google-docai,azure-docint,nougat,surya,firecrawl,evaluation]",
+    "docfold[docling,mineru,marker,pymupdf,paddleocr,tesseract,easyocr,unstructured,llamaparse,liteparse,mistral-ocr,textract,google-docai,azure-docint,nougat,surya,firecrawl,evaluation]",
     # Note: zerox excluded from [all] — py-zerox requires Python 3.11+
     # Install separately: pip install docfold[zerox]
 ]
diff --git a/src/docfold/cli.py b/src/docfold/cli.py
index 39a819c..ac32dc3 100644
--- a/src/docfold/cli.py
+++ b/src/docfold/cli.py
@@ -136,6 +136,12 @@ def _build_router():
     except Exception:
         pass
 
+    try:
+        from docfold.engines.liteparse_engine import LiteParseEngine
+        router.register(LiteParseEngine())
+    except Exception:
+        pass
+
     try:
         from docfold.engines.mistral_ocr_engine import MistralOCREngine
         router.register(MistralOCREngine())
diff --git a/src/docfold/engines/liteparse_engine.py b/src/docfold/engines/liteparse_engine.py
new file mode 100644
index 0000000..9791eac
--- /dev/null
+++ b/src/docfold/engines/liteparse_engine.py
@@ -0,0 +1,219 @@
+"""LiteParse engine adapter — fast local document parsing via CLI.
+
+LiteParse is a standalone OSS tool by LlamaIndex for high-speed PDF parsing
+with bounding boxes.  It runs locally with no API key required.
+
+Requires Node.js 18+ and the ``lit`` CLI:
+``npm i -g @llamaindex/liteparse``
+
+See https://github.com/run-llama/liteparse
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import logging
+import shutil
+import time
+from typing import Any
+
+from docfold.engines.base import (
+    BoundingBox,
+    DocumentEngine,
+    EngineCapabilities,
+    EngineResult,
+    OutputFormat,
+)
+
+logger = logging.getLogger(__name__)
+
+_SUPPORTED_EXTENSIONS = {
+    "pdf", "docx", "doc", "pptx", "ppt", "xlsx", "xls",
+    "odt", "rtf", "odp", "csv", "tsv",
+    "png", "jpg", "jpeg", "gif", "bmp", "tiff", "tif", "webp",
+}
+
+
+class LiteParseEngine(DocumentEngine):
+    """Adapter for LiteParse (run-llama/liteparse).
+
+    Calls the ``lit parse`` CLI as a subprocess and parses the structured
+    JSON output.  Supports bounding boxes and confidence scores out of the box.
+    """
+
+    def __init__(
+        self,
+        cli_path: str = "lit",
+        ocr_enabled: bool = True,
+        ocr_language: str = "en",
+        dpi: int = 150,
+        num_workers: int | None = None,
+        max_pages: int | None = None,
+    ) -> None:
+        self._cli_path = cli_path
+        self._ocr_enabled = ocr_enabled
+        self._ocr_language = ocr_language
+        self._dpi = dpi
+        self._num_workers = num_workers
+        self._max_pages = max_pages
+
+    @property
+    def name(self) -> str:
+        return "liteparse"
+
+    @property
+    def supported_extensions(self) -> set[str]:
+        return _SUPPORTED_EXTENSIONS
+
+    @property
+    def capabilities(self) -> EngineCapabilities:
+        return EngineCapabilities(bounding_boxes=True, confidence=True)
+
+    def is_available(self) -> bool:
+        return shutil.which(self._cli_path) is not None
+
+    async def process(
+        self,
+        file_path: str,
+        output_format: OutputFormat = OutputFormat.MARKDOWN,
+        **kwargs: Any,
+    ) -> EngineResult:
+        start = time.perf_counter()
+
+        # For text output we use --format text; for everything else use json
+        # so we can extract bounding boxes.
+        use_json = output_format != OutputFormat.TEXT
+
+        cmd = self._build_command(file_path, use_json=use_json)
+
+        proc = await asyncio.create_subprocess_exec(
+            *cmd,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
+        )
+        stdout, stderr = await proc.communicate()
+
+        if proc.returncode != 0:
+            err_msg = stderr.decode(errors="replace").strip()
+            raise RuntimeError(
+                f"liteparse failed (exit {proc.returncode}): {err_msg}"
+            )
+
+        raw = stdout.decode(errors="replace")
+        elapsed_ms = int((time.perf_counter() - start) * 1000)
+
+        if use_json:
+            return self._parse_json_output(raw, output_format, elapsed_ms)
+        else:
+            return EngineResult(
+                content=raw,
+                format=output_format,
+                engine_name=self.name,
+                processing_time_ms=elapsed_ms,
+                metadata={"cli": self._cli_path},
+            )
+
+    # ------------------------------------------------------------------
+    # Internal helpers
+    # ------------------------------------------------------------------
+
+    def _build_command(self, file_path: str, *, use_json: bool) -> list[str]:
+        cmd = [self._cli_path, "parse", file_path]
+
+        if use_json:
+            cmd += ["--format", "json"]
+        else:
+            cmd += ["--format", "text"]
+
+        if not self._ocr_enabled:
+            cmd.append("--no-ocr")
+        else:
+            cmd += ["--ocr-language", self._ocr_language]
+
+        if self._dpi != 150:
+            cmd += ["--dpi", str(self._dpi)]
+
+        if self._num_workers is not None:
+            cmd += ["--num-workers", str(self._num_workers)]
+
+        if self._max_pages is not None:
+            cmd += ["--max-pages", str(self._max_pages)]
+
+        return cmd
+
+    @staticmethod
+    def _extract_json(raw: str) -> str:
+        """Extract JSON object from raw output that may contain log lines."""
+        # The CLI may print log/progress lines before the JSON.
+        # Find the first '{' that starts the JSON object.
+        idx = raw.find("{")
+        if idx == -1:
+            return raw
+        return raw[idx:]
+
+    def _parse_json_output(
+        self,
+        raw: str,
+        output_format: OutputFormat,
+        elapsed_ms: int,
+    ) -> EngineResult:
+        data = json.loads(self._extract_json(raw))
+        pages = data.get("pages", [])
+
+        texts: list[str] = []
+        bboxes: list[dict[str, Any]] = []
+
+        for page_data in pages:
+            page_num = page_data.get("page", 1)
+            # LiteParse uses "text" at page level (not nested in "content")
+            page_text = page_data.get("text", "")
+            texts.append(page_text)
+
+            pw = page_data.get("width")
+            ph = page_data.get("height")
+
+            # LiteParse provides "textItems" with {text, x, y, width, height}
+            # and "boundingBoxes" with {x1, y1, x2, y2}
+            for idx, item in enumerate(page_data.get("textItems", [])):
+                x = item.get("x", 0)
+                y = item.get("y", 0)
+                w = item.get("width", 0)
+                h = item.get("height", 0)
+                bboxes.append(
+                    BoundingBox(
+                        type="Text",
+                        bbox=[x, y, x + w, y + h],
+                        page=page_num,
+                        text=item.get("text", "").strip(),
+                        id=f"p{page_num}-i{idx}",
+                        confidence=item.get("confidence"),
+                        page_width=pw,
+                        page_height=ph,
+                    ).to_dict()
+                )
+
+        full_text = "\n\n".join(texts)
+        page_count = len(pages)
+
+        if output_format == OutputFormat.JSON:
+            content = json.dumps(data, ensure_ascii=False)
+        elif output_format == OutputFormat.HTML:
+            html_parts = [
+                f"<div class='page' data-page='{i + 1}'><p>{t}</p></div>"
+                for i, t in enumerate(texts)
+            ]
+            content = "<html><body>" + "\n".join(html_parts) + "</body></html>"
+        else:
+            # MARKDOWN or TEXT — return extracted text
+            content = full_text
+
+        return EngineResult(
+            content=content,
+            format=output_format,
+            engine_name=self.name,
+            pages=page_count,
+            processing_time_ms=elapsed_ms,
+            bounding_boxes=bboxes or None,
+            metadata={"cli": self._cli_path, "page_count": page_count},
+        )
diff --git a/src/docfold/engines/router.py b/src/docfold/engines/router.py
index 02e35b0..99d5cfe 100644
--- a/src/docfold/engines/router.py
+++ b/src/docfold/engines/router.py
@@ -23,25 +23,25 @@
 # the first *available* engine that supports the extension.
 
 _IMAGE_PRIORITY = [
-    "surya", "paddleocr", "tesseract", "easyocr", "docling", "mistral_ocr",
-    "google_docai", "textract", "azure_docint", "zerox", "marker",
+    "surya", "paddleocr", "tesseract", "easyocr", "docling", "liteparse",
+    "mistral_ocr", "google_docai", "textract", "azure_docint", "zerox", "marker",
 ]
 
 _EXTENSION_PRIORITY: dict[str, list[str]] = {
     # --- PDF ---
     "pdf": [
         "docling", "mineru", "unstructured", "marker",
-        "llamaparse", "mistral_ocr", "firecrawl", "google_docai", "azure_docint",
-        "textract", "zerox", "nougat", "surya", "pymupdf", "paddleocr", "tesseract",
-        "easyocr",
+        "llamaparse", "liteparse", "mistral_ocr", "firecrawl", "google_docai",
+        "azure_docint", "textract", "zerox", "nougat", "surya", "pymupdf",
+        "paddleocr", "tesseract", "easyocr",
     ],
     # --- Office ---
-    "docx": ["docling", "marker", "unstructured", "llamaparse", "firecrawl", "azure_docint"],
-    "doc":  ["docling", "marker", "unstructured", "llamaparse", "azure_docint"],
-    "pptx": ["docling", "marker", "unstructured", "llamaparse", "azure_docint"],
-    "ppt":  ["docling", "marker", "unstructured", "llamaparse", "azure_docint"],
-    "xlsx": ["docling", "marker", "unstructured", "llamaparse", "azure_docint"],
-    "xls":  ["docling", "marker", "unstructured", "llamaparse", "azure_docint"],
+    "docx": ["docling", "marker", "unstructured", "llamaparse", "liteparse", "firecrawl", "azure_docint"],
+    "doc":  ["docling", "marker", "unstructured", "llamaparse", "liteparse", "azure_docint"],
+    "pptx": ["docling", "marker", "unstructured", "llamaparse", "liteparse", "azure_docint"],
+    "ppt":  ["docling", "marker", "unstructured", "llamaparse", "liteparse", "azure_docint"],
+    "xlsx": ["docling", "marker", "unstructured", "llamaparse", "liteparse", "azure_docint"],
+    "xls":  ["docling", "marker", "unstructured", "llamaparse", "liteparse", "azure_docint"],
     "odt":  ["marker", "unstructured"],
     "odp":  ["marker", "unstructured"],
     "ods":  ["marker", "unstructured"],
@@ -74,8 +74,9 @@
 # Ultimate fallback when extension is unknown or missing from the map.
 _DEFAULT_FALLBACK = [
     "docling", "mineru", "unstructured", "marker",
-    "llamaparse", "mistral_ocr", "google_docai", "azure_docint", "textract",
-    "zerox", "nougat", "surya", "pymupdf", "paddleocr", "tesseract", "easyocr",
+    "llamaparse", "liteparse", "mistral_ocr", "google_docai", "azure_docint",
+    "textract", "zerox", "nougat", "surya", "pymupdf", "paddleocr", "tesseract",
+    "easyocr",
 ]
 
 
diff --git a/tests/engines/test_liteparse_engine.py b/tests/engines/test_liteparse_engine.py
new file mode 100644
index 0000000..7db8bc2
--- /dev/null
+++ b/tests/engines/test_liteparse_engine.py
@@ -0,0 +1,300 @@
+"""Tests for LiteParse engine adapter — CLI-based local document parser."""
+
+import json
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from docfold.engines.base import EngineResult, OutputFormat
+
+
+def _make_liteparse_json(pages: list[dict]) -> str:
+    """Build a JSON string matching the real LiteParse CLI output format."""
+    return json.dumps({"pages": pages})
+
+
+def _simple_page(
+    page: int = 1,
+    text: str = "Hello",
+    text_items: list[dict] | None = None,
+    width: int = 612,
+    height: int = 792,
+) -> dict:
+    """Build a single page in LiteParse JSON format."""
+    if text_items is None:
+        text_items = [
+            {"text": text, "x": 72, "y": 100, "width": 50, "height": 14},
+        ]
+    return {
+        "page": page,
+        "width": width,
+        "height": height,
+        "text": text,
+        "textItems": text_items,
+        "boundingBoxes": [],
+    }
+
+
+class TestLiteParseEngineMetadata:
+    def test_name(self):
+        from docfold.engines.liteparse_engine import LiteParseEngine
+
+        e = LiteParseEngine()
+        assert e.name == "liteparse"
+
+    def test_supported_extensions(self):
+        from docfold.engines.liteparse_engine import LiteParseEngine
+
+        e = LiteParseEngine()
+        exts = e.supported_extensions
+        assert "pdf" in exts
+        assert "docx" in exts
+        assert "pptx" in exts
+        assert "xlsx" in exts
+        assert "png" in exts
+        assert "jpg" in exts
+
+    def test_capabilities(self):
+        from docfold.engines.liteparse_engine import LiteParseEngine
+
+        e = LiteParseEngine()
+        caps = e.capabilities
+        assert caps.bounding_boxes is True
+        assert caps.confidence is True
+
+    def test_is_available_when_lit_exists(self):
+        from docfold.engines.liteparse_engine import LiteParseEngine
+
+        e = LiteParseEngine()
+        with patch("shutil.which", return_value="/usr/local/bin/lit"):
+            assert e.is_available() is True
+
+    def test_is_available_when_lit_missing(self):
+        from docfold.engines.liteparse_engine import LiteParseEngine
+
+        e = LiteParseEngine()
+        with patch("shutil.which", return_value=None):
+            assert e.is_available() is False
+
+    def test_custom_cli_path(self):
+        from docfold.engines.liteparse_engine import LiteParseEngine
+
+        e = LiteParseEngine(cli_path="/opt/bin/lit")
+        assert e._cli_path == "/opt/bin/lit"
+
+    def test_config_stored(self):
+        from docfold.engines.liteparse_engine import LiteParseEngine
+
+        e = LiteParseEngine(ocr_enabled=False, ocr_language="fra", dpi=300)
+        assert e._ocr_enabled is False
+        assert e._ocr_language == "fra"
+        assert e._dpi == 300
+
+
+class TestLiteParseEngineProcess:
+    @pytest.mark.asyncio
+    async def test_process_text_format(self):
+        from docfold.engines.liteparse_engine import LiteParseEngine
+
+        e = LiteParseEngine()
+
+        mock_result = MagicMock()
+        mock_result.returncode = 0
+        mock_result.communicate = AsyncMock(
+            return_value=(b"Hello world\nThis is a test document.", b"")
+        )
+
+        with patch("asyncio.create_subprocess_exec", new_callable=AsyncMock) as mock_exec:
+            mock_exec.return_value = mock_result
+
+            result = await e.process("test.pdf", output_format=OutputFormat.TEXT)
+
+            assert isinstance(result, EngineResult)
+            assert result.engine_name == "liteparse"
+            assert "Hello world" in result.content
+            assert result.format == OutputFormat.TEXT
+            assert result.processing_time_ms >= 0
+
+    @pytest.mark.asyncio
+    async def test_process_markdown_format(self):
+        from docfold.engines.liteparse_engine import LiteParseEngine
+
+        e = LiteParseEngine()
+
+        json_output = _make_liteparse_json([
+            {
+                "page": 1,
+                "width": 612,
+                "height": 792,
+                "text": "# Title\n\nSome paragraph text.",
+                "textItems": [
+                    {"text": "Title", "x": 10, "y": 20, "width": 190, "height": 30},
+                    {"text": "Some paragraph text.", "x": 10, "y": 60, "width": 390, "height": 30},
+                ],
+                "boundingBoxes": [],
+            }
+        ])
+
+        mock_result = MagicMock()
+        mock_result.returncode = 0
+        mock_result.communicate = AsyncMock(
+            return_value=(json_output.encode(), b"")
+        )
+
+        with patch("asyncio.create_subprocess_exec", new_callable=AsyncMock) as mock_exec:
+            mock_exec.return_value = mock_result
+
+            result = await e.process("test.pdf", output_format=OutputFormat.MARKDOWN)
+
+            assert isinstance(result, EngineResult)
+            assert result.engine_name == "liteparse"
+            assert result.format == OutputFormat.MARKDOWN
+            assert result.bounding_boxes is not None
+            assert len(result.bounding_boxes) == 2
+            assert result.pages == 1
+
+    @pytest.mark.asyncio
+    async def test_process_json_format(self):
+        from docfold.engines.liteparse_engine import LiteParseEngine
+
+        e = LiteParseEngine()
+
+        json_output = _make_liteparse_json([_simple_page(text="Test content")])
+
+        mock_result = MagicMock()
+        mock_result.returncode = 0
+        mock_result.communicate = AsyncMock(
+            return_value=(json_output.encode(), b"")
+        )
+
+        with patch("asyncio.create_subprocess_exec", new_callable=AsyncMock) as mock_exec:
+            mock_exec.return_value = mock_result
+
+            result = await e.process("test.pdf", output_format=OutputFormat.JSON)
+
+            assert result.format == OutputFormat.JSON
+            parsed = json.loads(result.content)
+            assert isinstance(parsed, dict)
+
+    @pytest.mark.asyncio
+    async def test_process_failure_raises(self):
+        from docfold.engines.liteparse_engine import LiteParseEngine
+
+        e = LiteParseEngine()
+
+        mock_result = MagicMock()
+        mock_result.returncode = 1
+        mock_result.communicate = AsyncMock(
+            return_value=(b"", b"Error: file not found")
+        )
+
+        with patch("asyncio.create_subprocess_exec", new_callable=AsyncMock) as mock_exec:
+            mock_exec.return_value = mock_result
+
+            with pytest.raises(RuntimeError, match="liteparse"):
+                await e.process("missing.pdf")
+
+    @pytest.mark.asyncio
+    async def test_ocr_disabled_flag(self):
+        from docfold.engines.liteparse_engine import LiteParseEngine
+
+        e = LiteParseEngine(ocr_enabled=False)
+
+        mock_result = MagicMock()
+        mock_result.returncode = 0
+        mock_result.communicate = AsyncMock(return_value=(b"text", b""))
+
+        with patch("asyncio.create_subprocess_exec", new_callable=AsyncMock) as mock_exec:
+            mock_exec.return_value = mock_result
+            await e.process("test.pdf", output_format=OutputFormat.TEXT)
+
+            call_args = mock_exec.call_args
+            assert "--no-ocr" in call_args[0]
+
+    @pytest.mark.asyncio
+    async def test_custom_dpi_passed(self):
+        from docfold.engines.liteparse_engine import LiteParseEngine
+
+        e = LiteParseEngine(dpi=300)
+
+        mock_result = MagicMock()
+        mock_result.returncode = 0
+        mock_result.communicate = AsyncMock(return_value=(b"text", b""))
+
+        with patch("asyncio.create_subprocess_exec", new_callable=AsyncMock) as mock_exec:
+            mock_exec.return_value = mock_result
+            await e.process("test.pdf", output_format=OutputFormat.TEXT)
+
+            call_args = mock_exec.call_args
+            assert "--dpi" in call_args[0]
+            assert "300" in call_args[0]
+
+    @pytest.mark.asyncio
+    async def test_bounding_boxes_from_text_items(self):
+        from docfold.engines.liteparse_engine import LiteParseEngine
+
+        e = LiteParseEngine()
+
+        json_output = _make_liteparse_json([
+            {
+                "page": 1,
+                "width": 612,
+                "height": 792,
+                "text": "Hello",
+                "textItems": [
+                    {
+                        "text": "Hello",
+                        "x": 10.0,
+                        "y": 20.0,
+                        "width": 90.0,
+                        "height": 20.0,
+                    }
+                ],
+                "boundingBoxes": [],
+            }
+        ])
+
+        mock_result = MagicMock()
+        mock_result.returncode = 0
+        mock_result.communicate = AsyncMock(
+            return_value=(json_output.encode(), b"")
+        )
+
+        with patch("asyncio.create_subprocess_exec", new_callable=AsyncMock) as mock_exec:
+            mock_exec.return_value = mock_result
+
+            result = await e.process("test.pdf", output_format=OutputFormat.MARKDOWN)
+
+            assert result.bounding_boxes is not None
+            bbox = result.bounding_boxes[0]
+            # bbox should be [x, y, x+width, y+height]
+            assert bbox["bbox"] == [10.0, 20.0, 100.0, 40.0]
+            assert bbox["text"] == "Hello"
+            assert bbox["page"] == 1
+            assert bbox["page_width"] == 612
+            assert bbox["page_height"] == 792
+
+    @pytest.mark.asyncio
+    async def test_extract_json_with_log_prefix(self):
+        """LiteParse CLI may output log lines before JSON."""
+        from docfold.engines.liteparse_engine import LiteParseEngine
+
+        e = LiteParseEngine()
+
+        raw_json = _make_liteparse_json([_simple_page(text="Test")])
+        # Simulate log lines before JSON
+        prefixed = f"Processing file: test.pdf\nLoaded PDF with 1 pages\n{raw_json}"
+
+        mock_result = MagicMock()
+        mock_result.returncode = 0
+        mock_result.communicate = AsyncMock(
+            return_value=(prefixed.encode(), b"")
+        )
+
+        with patch("asyncio.create_subprocess_exec", new_callable=AsyncMock) as mock_exec:
+            mock_exec.return_value = mock_result
+
+            result = await e.process("test.pdf", output_format=OutputFormat.MARKDOWN)
+
+            assert result.content == "Test"
+            assert result.pages == 1