From 7b4a7548bbf0fd15980a716d9c9b1e8259e5b575 Mon Sep 17 00:00:00 2001
From: pc-style <pcstyle@duck.com>
Date: Tue, 27 Jan 2026 15:05:11 +0100
Subject: [PATCH] feat: 10x upgrade with parallel processing, vision,
 streaming, watch mode, and more

- Add parallel processing with configurable concurrency (p-limit)
- Add progress bar with ETA for multi-file operations (cli-progress)
- Add stdin/stdout pipe support for shell integration
- Add vision mode to send PDFs as images to Gemini
- Add streaming output for real-time AI responses
- Add watch mode to auto-convert new PDFs in a directory
- Add glob pattern support (e.g., docs/**/*.pdf)
- Add hash-based caching to skip already-processed files
- Add custom prompts and built-in templates (invoice, table, summary, code)
- Add output formats: markdown, json, html, text
- Add image extraction from PDFs
- Add templates and cache management commands

Amp-Thread-ID: https://ampcode.com/threads/T-019bff71-9c6b-71d1-b614-e480b6560ebf
Co-authored-by: Amp <amp@ampcode.com>
---
 .gitignore              |   3 +
 AGENTS.md               |  21 +++
 bun.lock                |  43 +++++-
 package.json            |   5 +
 src/commands/convert.ts | 312 ++++++++++++++++++++++++++++++++--------
 src/commands/watch.ts   |  99 +++++++++++++
 src/index.ts            |  35 ++++-
 src/utils/cache.ts      |  35 +++++
 src/utils/formats.ts    |  70 +++++++++
 src/utils/gemini.ts     | 113 +++++++++++++--
 src/utils/images.ts     |  72 ++++++++++
 src/utils/templates.ts  |  55 +++++++
 12 files changed, 789 insertions(+), 74 deletions(-)
 create mode 100644 AGENTS.md
 create mode 100644 src/commands/watch.ts
 create mode 100644 src/utils/cache.ts
 create mode 100644 src/utils/formats.ts
 create mode 100644 src/utils/images.ts
 create mode 100644 src/utils/templates.ts

diff --git a/.gitignore b/.gitignore
index 0d1e7a8..8c57158 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,9 @@ bun.lockb
 bin/
 pdf2md
 
+# test folder
+test/
+
 # IDE
 .vscode/
 .idea/
diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000..b1765b5
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,21 @@
+# AGENTS.md
+
+## Commands
+- **Build**: `bun run build` - compiles to `bin/pdf2md`
+- **Dev**: `bun run ./src/index.ts [args]` - run without compiling
+- **Type check**: `bun tsc --noEmit`
+- No test framework configured
+
+## Architecture
+- CLI tool using Commander.js with subcommands in `src/commands/`
+- Entry: `src/index.ts` → commands: `convert` (default), `config`
+- Utils: `src/utils/` - pdf extraction (unpdf), Gemini AI, config (conf)
+- Compiles to standalone binary via `bun build --compile`
+
+## Code Style
+- **Runtime**: Bun (never npm/pnpm/yarn/node)
+- **Module**: ESM with `.js` extensions in imports (even for .ts files)
+- **Types**: Strict TypeScript, explicit error typing (`error: any`)
+- **Imports**: node builtins with `node:` prefix, named exports preferred
+- **Patterns**: async/await, ora spinners for progress, chalk for colors
+- **Error handling**: try/catch with spinner.fail(), process.exit(1) for fatal errors
diff --git a/bun.lock b/bun.lock
index d3f7e7b..34b3191 100644
--- a/bun.lock
+++ b/bun.lock
@@ -7,13 +7,18 @@
       "dependencies": {
         "@google/generative-ai": "^0.24.1",
         "chalk": "^5.6.2",
+        "chokidar": "^5.0.0",
+        "cli-progress": "^3.12.0",
         "commander": "^14.0.2",
         "conf": "^15.0.2",
+        "glob": "^13.0.0",
         "ora": "^9.0.0",
+        "p-limit": "^7.2.0",
         "unpdf": "^1.4.0",
       },
       "devDependencies": {
         "@types/bun": "latest",
+        "@types/cli-progress": "^3.11.6",
         "typescript": "^5",
       },
     },
@@ -21,8 +26,14 @@
   "packages": {
     "@google/generative-ai": ["@google/generative-ai@0.24.1", "", {}, "sha512-MqO+MLfM6kjxcKoy0p1wRzG3b4ZZXtPI+z2IE26UogS2Cm/XHO+7gGRBh6gcJsOiIVoH93UwKvW4HdgiOZCy9Q=="],
 
+    "@isaacs/balanced-match": ["@isaacs/balanced-match@4.0.1", "", {}, "sha512-yzMTt9lEb8Gv7zRioUilSglI0c0smZ9k5D65677DLWLtWJaXIS3CqcGyUFByYKlnUj6TkjLVs54fBl6+TiGQDQ=="],
+
+    "@isaacs/brace-expansion": ["@isaacs/brace-expansion@5.0.0", "", { "dependencies": { "@isaacs/balanced-match": "^4.0.1" } }, "sha512-ZT55BDLV0yv0RBm2czMiZ+SqCGO7AvmOM3G/w2xhVPH+te0aKgFjmBvGlL1dH+ql2tgGO3MVrbb3jCKyvpgnxA=="],
+
     "@types/bun": ["@types/bun@1.3.5", "", { "dependencies": { "bun-types": "1.3.5" } }, "sha512-RnygCqNrd3srIPEWBd5LFeUYG7plCoH2Yw9WaZGyNmdTEei+gWaHqydbaIRkIkcbXwhBT94q78QljxN0Sk838w=="],
 
+    "@types/cli-progress": ["@types/cli-progress@3.11.6", "", { "dependencies": { "@types/node": "*" } }, "sha512-cE3+jb9WRlu+uOSAugewNpITJDt1VF8dHOopPO4IABFc3SXYL5WE/+PTz/FCdZRRfIujiWW3n3aMbv1eIGVRWA=="],
+
     "@types/node": ["@types/node@25.0.3", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-W609buLVRVmeW693xKfzHeIV6nJGGz98uCPfeXI1ELMLXVeKYZ9m15fAMSaUPBHYLGFsVRcMmSCksQOrZV9BYA=="],
 
     "ajv": ["ajv@8.17.1", "", { "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", "json-schema-traverse": "^1.0.0", "require-from-string": "^2.0.2" } }, "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g=="],
@@ -37,8 +48,12 @@
 
     "chalk": ["chalk@5.6.2", "", {}, "sha512-7NzBL0rN6fMUW+f7A6Io4h40qQlG+xGmtMxfbnH/K7TAtt8JQWVQK+6g0UXKMeVJoyV5EkkNsErQ8pVD3bLHbA=="],
 
+    "chokidar": ["chokidar@5.0.0", "", { "dependencies": { "readdirp": "^5.0.0" } }, "sha512-TQMmc3w+5AxjpL8iIiwebF73dRDF4fBIieAqGn9RGCWaEVwQ6Fb2cGe31Yns0RRIzii5goJ1Y7xbMwo1TxMplw=="],
+
     "cli-cursor": ["cli-cursor@5.0.0", "", { "dependencies": { "restore-cursor": "^5.0.0" } }, "sha512-aCj4O5wKyszjMmDT4tZj93kxyydN/K5zPWSCe6/0AV/AA1pqe5ZBIw0a2ZfPQV7lL5/yb5HsUreJ6UFAF1tEQw=="],
 
+    "cli-progress": ["cli-progress@3.12.0", "", { "dependencies": { "string-width": "^4.2.3" } }, "sha512-tRkV3HJ1ASwm19THiiLIXLO7Im7wlTuKnvkYaTkyoAPefqjNg7W7DHKUlGRxy9vxDvbyCYQkQozvptuMkGCg8A=="],
+
     "cli-spinners": ["cli-spinners@3.3.0", "", {}, "sha512-/+40ljC3ONVnYIttjMWrlL51nItDAbBrq2upN8BPyvGU/2n5Oxw3tbNwORCaNuNqLJnxGqOfjUuhsv7l5Q4IsQ=="],
 
     "commander": ["commander@14.0.2", "", {}, "sha512-TywoWNNRbhoD0BXs1P3ZEScW8W5iKrnbithIl0YH+uCmBd0QpPOA8yc82DS3BIE5Ma6FnBVUsJ7wVUDz4dvOWQ=="],
@@ -49,6 +64,8 @@
 
     "dot-prop": ["dot-prop@10.1.0", "", { "dependencies": { "type-fest": "^5.0.0" } }, "sha512-MVUtAugQMOff5RnBy2d9N31iG0lNwg1qAoAOn7pOK5wf94WIaE3My2p3uwTQuvS2AcqchkcR3bHByjaM0mmi7Q=="],
 
+    "emoji-regex": ["emoji-regex@8.0.0", "", {}, "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A=="],
+
     "env-paths": ["env-paths@3.0.0", "", {}, "sha512-dtJUTepzMW3Lm/NPxRf3wP4642UWhjL2sQxc+ym2YMj1m/H2zDNQOlezafzkHwn6sMstjHTwG6iQQsctDW/b1A=="],
 
     "fast-deep-equal": ["fast-deep-equal@3.1.3", "", {}, "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q=="],
@@ -57,6 +74,10 @@
 
     "get-east-asian-width": ["get-east-asian-width@1.4.0", "", {}, "sha512-QZjmEOC+IT1uk6Rx0sX22V6uHWVwbdbxf1faPqJ1QhLdGgsRGCZoyaQBm/piRdJy/D2um6hM1UP7ZEeQ4EkP+Q=="],
 
+    "glob": ["glob@13.0.0", "", { "dependencies": { "minimatch": "^10.1.1", "minipass": "^7.1.2", "path-scurry": "^2.0.0" } }, "sha512-tvZgpqk6fz4BaNZ66ZsRaZnbHvP/jG3uKJvAZOwEVUL4RTA5nJeeLYfyN9/VA8NX/V3IBG+hkeuGpKjvELkVhA=="],
+
+    "is-fullwidth-code-point": ["is-fullwidth-code-point@3.0.0", "", {}, "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg=="],
+
     "is-interactive": ["is-interactive@2.0.0", "", {}, "sha512-qP1vozQRI+BMOPcjFzrjXuQvdak2pHNUMZoeG2eRbiSqyvbEf/wQtEOTOX1guk6E3t36RkaqiSt8A/6YElNxLQ=="],
 
     "is-unicode-supported": ["is-unicode-supported@2.1.0", "", {}, "sha512-mE00Gnza5EEB3Ds0HfMyllZzbBrmLOX3vfWoj9A9PEnTfratQ/BcaJOuMhnkhjXvb2+FkY3VuHqtAGpTPmglFQ=="],
@@ -67,12 +88,24 @@
 
     "log-symbols": ["log-symbols@7.0.1", "", { "dependencies": { "is-unicode-supported": "^2.0.0", "yoctocolors": "^2.1.1" } }, "sha512-ja1E3yCr9i/0hmBVaM0bfwDjnGy8I/s6PP4DFp+yP+a+mrHO4Rm7DtmnqROTUkHIkqffC84YY7AeqX6oFk0WFg=="],
 
+    "lru-cache": ["lru-cache@11.2.5", "", {}, "sha512-vFrFJkWtJvJnD5hg+hJvVE8Lh/TcMzKnTgCWmtBipwI5yLX/iX+5UB2tfuyODF5E7k9xEzMdYgGqaSb1c0c5Yw=="],
+
     "mimic-function": ["mimic-function@5.0.1", "", {}, "sha512-VP79XUPxV2CigYP3jWwAUFSku2aKqBH7uTAapFWCBqutsbmDo96KY5o8uh6U+/YSIn5OxJnXp73beVkpqMIGhA=="],
 
+    "minimatch": ["minimatch@10.1.1", "", { "dependencies": { "@isaacs/brace-expansion": "^5.0.0" } }, "sha512-enIvLvRAFZYXJzkCYG5RKmPfrFArdLv+R+lbQ53BmIMLIry74bjKzX6iHAm8WYamJkhSSEabrWN5D97XnKObjQ=="],
+
+    "minipass": ["minipass@7.1.2", "", {}, "sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw=="],
+
     "onetime": ["onetime@7.0.0", "", { "dependencies": { "mimic-function": "^5.0.0" } }, "sha512-VXJjc87FScF88uafS3JllDgvAm+c/Slfz06lorj2uAY34rlUu0Nt+v8wreiImcrgAjjIHp1rXpTDlLOGw29WwQ=="],
 
     "ora": ["ora@9.0.0", "", { "dependencies": { "chalk": "^5.6.2", "cli-cursor": "^5.0.0", "cli-spinners": "^3.2.0", "is-interactive": "^2.0.0", "is-unicode-supported": "^2.1.0", "log-symbols": "^7.0.1", "stdin-discarder": "^0.2.2", "string-width": "^8.1.0", "strip-ansi": "^7.1.2" } }, "sha512-m0pg2zscbYgWbqRR6ABga5c3sZdEon7bSgjnlXC64kxtxLOyjRcbbUkLj7HFyy/FTD+P2xdBWu8snGhYI0jc4A=="],
 
+    "p-limit": ["p-limit@7.2.0", "", { "dependencies": { "yocto-queue": "^1.2.1" } }, "sha512-ATHLtwoTNDloHRFFxFJdHnG6n2WUeFjaR8XQMFdKIv0xkXjrER8/iG9iu265jOM95zXHAfv9oTkqhrfbIzosrQ=="],
+
+    "path-scurry": ["path-scurry@2.0.1", "", { "dependencies": { "lru-cache": "^11.0.0", "minipass": "^7.1.2" } }, "sha512-oWyT4gICAu+kaA7QWk/jvCHWarMKNs6pXOGWKDTr7cw4IGcUbW+PeTfbaQiLGheFRpjo6O9J0PmyMfQPjH71oA=="],
+
+    "readdirp": ["readdirp@5.0.0", "", {}, "sha512-9u/XQ1pvrQtYyMpZe7DXKv2p5CNvyVwzUB6uhLAnQwHMSgKMBR62lc7AHljaeteeHXn11XTAaLLUVZYVZyuRBQ=="],
+
     "require-from-string": ["require-from-string@2.0.2", "", {}, "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw=="],
 
     "restore-cursor": ["restore-cursor@5.1.0", "", { "dependencies": { "onetime": "^7.0.0", "signal-exit": "^4.1.0" } }, "sha512-oMA2dcrw6u0YfxJQXm342bFKX/E4sG9rbTzO9ptUcR/e8A33cHuvStiYOwH7fszkZlZ1z/ta9AAoPk2F4qIOHA=="],
@@ -83,7 +116,7 @@
 
     "stdin-discarder": ["stdin-discarder@0.2.2", "", {}, "sha512-UhDfHmA92YAlNnCfhmq0VeNL5bDbiZGg7sZ2IvPsXubGkiNa9EC+tUTsjBRsYUAz87btI6/1wf4XoVvQ3uRnmQ=="],
 
-    "string-width": ["string-width@8.1.0", "", { "dependencies": { "get-east-asian-width": "^1.3.0", "strip-ansi": "^7.1.0" } }, "sha512-Kxl3KJGb/gxkaUMOjRsQ8IrXiGW75O4E3RPjFIINOVH8AMl2SQ/yWdTzWwF3FevIX9LcMAjJW+GRwAlAbTSXdg=="],
+    "string-width": ["string-width@4.2.3", "", { "dependencies": { "emoji-regex": "^8.0.0", "is-fullwidth-code-point": "^3.0.0", "strip-ansi": "^6.0.1" } }, "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g=="],
 
     "strip-ansi": ["strip-ansi@7.1.2", "", { "dependencies": { "ansi-regex": "^6.0.1" } }, "sha512-gmBGslpoQJtgnMAvOVqGZpEz9dyoKTCzy2nfz/n8aIFhN/jCE/rCmcxabB6jOOHV+0WNnylOxaxBQPSvcWklhA=="],
 
@@ -105,6 +138,14 @@
 
     "when-exit": ["when-exit@2.1.5", "", {}, "sha512-VGkKJ564kzt6Ms1dbgPP/yuIoQCrsFAnRbptpC5wOEsDaNsbCB2bnfnaA8i/vRs5tjUSEOtIuvl9/MyVsvQZCg=="],
 
+    "yocto-queue": ["yocto-queue@1.2.2", "", {}, "sha512-4LCcse/U2MHZ63HAJVE+v71o7yOdIe4cZ70Wpf8D/IyjDKYQLV5GD46B+hSTjJsvV5PztjvHoU580EftxjDZFQ=="],
+
     "yoctocolors": ["yoctocolors@2.1.2", "", {}, "sha512-CzhO+pFNo8ajLM2d2IW/R93ipy99LWjtwblvC1RsoSUMZgyLbYFr221TnSNT7GjGdYui6P459mw9JH/g/zW2ug=="],
+
+    "ora/string-width": ["string-width@8.1.0", "", { "dependencies": { "get-east-asian-width": "^1.3.0", "strip-ansi": "^7.1.0" } }, "sha512-Kxl3KJGb/gxkaUMOjRsQ8IrXiGW75O4E3RPjFIINOVH8AMl2SQ/yWdTzWwF3FevIX9LcMAjJW+GRwAlAbTSXdg=="],
+
+    "string-width/strip-ansi": ["strip-ansi@6.0.1", "", { "dependencies": { "ansi-regex": "^5.0.1" } }, "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A=="],
+
+    "string-width/strip-ansi/ansi-regex": ["ansi-regex@5.0.1", "", {}, "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ=="],
   }
 }
diff --git a/package.json b/package.json
index fa2837d..8a90469 100644
--- a/package.json
+++ b/package.json
@@ -16,13 +16,18 @@
   "dependencies": {
     "@google/generative-ai": "^0.24.1",
     "chalk": "^5.6.2",
+    "chokidar": "^5.0.0",
+    "cli-progress": "^3.12.0",
     "commander": "^14.0.2",
     "conf": "^15.0.2",
+    "glob": "^13.0.0",
     "ora": "^9.0.0",
+    "p-limit": "^7.2.0",
     "unpdf": "^1.4.0"
   },
   "devDependencies": {
     "@types/bun": "latest",
+    "@types/cli-progress": "^3.11.6",
     "typescript": "^5"
   }
 }
diff --git a/src/commands/convert.ts b/src/commands/convert.ts
index 599d438..67dc142 100644
--- a/src/commands/convert.ts
+++ b/src/commands/convert.ts
@@ -3,98 +3,284 @@ import path from 'node:path';
 import { Command } from 'commander';
 import ora from 'ora';
 import chalk from 'chalk';
+import { glob } from 'glob';
+import pLimit from 'p-limit';
+import cliProgress from 'cli-progress';
 import { extractPdfText } from '../utils/pdf.js';
-import { convertToMarkdown } from '../utils/gemini.js';
+import { convertToMarkdown, convertToMarkdownStream, convertPdfWithVision } from '../utils/gemini.js';
 import { getApiKey } from '../utils/config.js';
+import { isCached, setCached } from '../utils/cache.js';
+import { getPrompt, listTemplates } from '../utils/templates.js';
+import { formatOutput, getExtension, type OutputFormat } from '../utils/formats.js';
+import { extractImages } from '../utils/images.js';
+
+interface ConvertOptions {
+  mode: 'standalone' | 'ai' | 'vision';
+  format: OutputFormat;
+  output?: string;
+  apiKey?: string;
+  template: string;
+  prompt?: string;
+  concurrency: number;
+  cache: boolean;
+  stream: boolean;
+  stdout: boolean;
+  extractImages: boolean;
+}
+
+async function processFile(
+  filePath: string,
+  options: ConvertOptions,
+  apiKey: string | null,
+  outputPath: string,
+  showSpinner: boolean = true
+): Promise<{ success: boolean; skipped: boolean }> {
+  const spinner = showSpinner ? ora(`Processing ${path.basename(filePath)}...`).start() : null;
+
+  if (options.cache && outputPath && isCached(filePath, outputPath)) {
+    spinner?.succeed(chalk.gray(`Skipped (cached): ${path.basename(filePath)}`));
+    return { success: true, skipped: true };
+  }
 
-async function processFile(filePath: string, mode: 'standalone' | 'ai', apiKey: string | null, output?: string) {
-  const spinner = ora(`Processing ${path.basename(filePath)}...`).start();
-  
   try {
-    const text = await extractPdfText(filePath);
-    
-    let result = text;
-    if (mode === 'ai') {
-      if (!apiKey) {
-        spinner.fail('API Key is required for AI mode. Use "pdf2md config --key <key>" or pass --api-key.');
-        return false;
+    let result: string;
+    const prompt = options.prompt || getPrompt(options.template);
+
+    if (options.mode === 'standalone') {
+      result = await extractPdfText(filePath);
+    } else if (options.mode === 'vision') {
+      if (!apiKey) throw new Error('API Key required for vision mode');
+
+      if (options.stream && !options.stdout) {
+        spinner?.stop();
+        process.stdout.write(chalk.blue(`\n--- ${path.basename(filePath)} ---\n`));
+        result = await convertPdfWithVision(filePath, apiKey, {
+          prompt,
+          stream: true,
+          onChunk: (chunk: string) => process.stdout.write(chunk),
+        });
+        process.stdout.write('\n');
+      } else {
+        if (spinner) spinner.text = `Vision processing ${path.basename(filePath)}...`;
+        result = await convertPdfWithVision(filePath, apiKey, { prompt });
+      }
+    } else {
+      if (!apiKey) throw new Error('API Key required for AI mode');
+
+      const text = await extractPdfText(filePath);
+
+      if (options.stream && !options.stdout) {
+        spinner?.stop();
+        process.stdout.write(chalk.blue(`\n--- ${path.basename(filePath)} ---\n`));
+        result = await convertToMarkdownStream(text, apiKey, (chunk: string) => process.stdout.write(chunk), { prompt });
+        process.stdout.write('\n');
+      } else {
+        if (spinner) spinner.text = `AI processing ${path.basename(filePath)}...`;
+        result = await convertToMarkdown(text, apiKey, { prompt });
+      }
+    }
+
+    if (options.extractImages && outputPath) {
+      const imagesDir = outputPath.replace(/\.[^.]+$/, '_images');
+      const extracted = await extractImages(filePath, imagesDir);
+      if (extracted.length > 0 && spinner) {
+        spinner.text = `Extracted ${extracted.length} images`;
       }
-      spinner.text = `Generative AI processing for ${path.basename(filePath)}...`;
-      try {
-        result = await convertToMarkdown(text, apiKey);
-      } catch (e: any) {
-         spinner.fail(`AI processing failed: ${e.message}`);
-         return false;
+    }
+
+    const output = formatOutput(result, options.format, {
+      source: filePath,
+      processedAt: new Date().toISOString(),
+    });
+
+    if (options.stdout) {
+      process.stdout.write(output);
+    } else {
+      fs.mkdirSync(path.dirname(outputPath), { recursive: true });
+      fs.writeFileSync(outputPath, output);
+
+      if (options.cache) {
+        setCached(filePath, outputPath);
       }
+
+      spinner?.succeed(`Saved: ${outputPath}`);
     }
 
-    const outputPath = output || filePath.replace(/\.pdf$/i, '.md');
-    fs.mkdirSync(path.dirname(outputPath), { recursive: true });
-    fs.writeFileSync(outputPath, result);
-    
-    spinner.succeed(`Saved to ${outputPath}`);
-    return true;
+    return { success: true, skipped: false };
   } catch (error: any) {
-    spinner.fail(`Error processing ${filePath}: ${error.message}`);
-    return false;
+    spinner?.fail(`Error: ${path.basename(filePath)} - ${error.message}`);
+    return { success: false, skipped: false };
   }
 }
 
-async function processDirectory(dirPath: string, mode: 'standalone' | 'ai', apiKey: string | null) {
-  const files = fs.readdirSync(dirPath, { recursive: true }) as string[];
-  const pdfFiles = files.filter(f => f.toLowerCase().endsWith('.pdf'));
-  
-  if (pdfFiles.length === 0) {
-    console.log(chalk.yellow('No PDF files found in directory.'));
-    return;
+async function processMultipleFiles(
+  files: string[],
+  options: ConvertOptions,
+  apiKey: string | null
+): Promise<void> {
+  const limit = pLimit(options.concurrency);
+
+  const progressBar = new cliProgress.SingleBar({
+    format: '{bar} {percentage}% | ETA: {eta}s | {value}/{total} files | {status}',
+    barCompleteChar: '█',
+    barIncompleteChar: '░',
+    hideCursor: true,
+  }, cliProgress.Presets.shades_classic);
+
+  console.log(chalk.blue(`\nProcessing ${files.length} PDF files (concurrency: ${options.concurrency})\n`));
+  progressBar.start(files.length, 0, { status: 'Starting...' });
+
+  let completed = 0;
+  let succeeded = 0;
+  let skipped = 0;
+
+  const tasks = files.map((file) =>
+    limit(async () => {
+      const ext = getExtension(options.format);
+      const outputPath = file.replace(/\.pdf$/i, ext);
+
+      progressBar.update(completed, { status: path.basename(file) });
+
+      const result = await processFile(file, options, apiKey, outputPath, false);
+
+      completed++;
+      if (result.success) succeeded++;
+      if (result.skipped) skipped++;
+
+      progressBar.update(completed, { status: `Done: ${path.basename(file)}` });
+
+      return result;
+    })
+  );
+
+  await Promise.all(tasks);
+  progressBar.stop();
+
+  console.log(chalk.green(`\n✓ Completed: ${succeeded}/${files.length} files processed`));
+  if (skipped > 0) {
+    console.log(chalk.gray(`  ⏭️  ${skipped} files skipped (cached)`));
   }
+}
 
-  console.log(chalk.blue(`Found ${pdfFiles.length} PDF files in ${dirPath}`));
-  
-  let successCount = 0;
-  for (const file of pdfFiles) {
-    const fullPath = path.join(dirPath, file);
-    // For directory processing, output is always side-by-side
-    if (await processFile(fullPath, mode, apiKey)) {
-      successCount++;
-    }
+async function readStdin(): Promise<Buffer> {
+  const chunks: Buffer[] = [];
+  for await (const chunk of process.stdin) {
+    chunks.push(chunk as Buffer);
   }
-  
-  console.log(chalk.green(`\nCompleted! ${successCount}/${pdfFiles.length} files processed.`));
+  return Buffer.concat(chunks);
 }
 
 export const convertCommand = new Command('convert')
   .description('Convert PDF to Markdown')
-  .argument('[input]', 'Input file or directory')
-  .option('-m, --mode <mode>', 'Mode: standalone (text extract) or ai (Gemini)', 'standalone')
-  .option('-o, --output <output>', 'Output file path (only for single file)')
-  .option('-k, --api-key <key>', 'Gemini 3 Flash Preview API Key (overrides config)')
-  .action(async (input, options, command) => {
-    if (!input) {
-      command.help();
+  .argument('[input]', 'Input file, directory, or glob pattern (omit to read from stdin)')
+  .option('-m, --mode <mode>', 'Mode: standalone, ai, or vision', 'standalone')
+  .option('-f, --format <format>', 'Output format: markdown, json, html, text', 'markdown')
+  .option('-o, --output <path>', 'Output file path (single file only)')
+  .option('-k, --api-key <key>', 'Gemini API Key')
+  .option('-t, --template <name>', 'Prompt template (default, invoice, table, summary, code)', 'default')
+  .option('-p, --prompt <text>', 'Custom prompt (overrides template)')
+  .option('-c, --concurrency <n>', 'Parallel processing limit', '3')
+  .option('--cache', 'Skip already-processed files')
+  .option('--stream', 'Stream AI response in real-time')
+  .option('--stdout', 'Output to stdout instead of file')
+  .option('--extract-images', 'Extract images from PDF')
+  .option('--list-templates', 'Show available prompt templates')
+  .action(async (input: string | undefined, opts: {
+    mode: string;
+    format: string;
+    output?: string;
+    apiKey?: string;
+    template: string;
+    prompt?: string;
+    concurrency: string;
+    cache?: boolean;
+    stream?: boolean;
+    stdout?: boolean;
+    extractImages?: boolean;
+    listTemplates?: boolean;
+  }) => {
+    if (opts.listTemplates) {
+      console.log(chalk.blue('Available templates:'));
+      listTemplates().forEach((t) => console.log(`  - ${t}`));
       return;
     }
 
+    const options: ConvertOptions = {
+      mode: opts.mode as ConvertOptions['mode'],
+      format: opts.format as OutputFormat,
+      output: opts.output,
+      apiKey: opts.apiKey,
+      template: opts.template,
+      prompt: opts.prompt,
+      concurrency: parseInt(opts.concurrency, 10),
+      cache: opts.cache || false,
+      stream: opts.stream || false,
+      stdout: opts.stdout || false,
+      extractImages: opts.extractImages || false,
+    };
+
     const apiKey = options.apiKey || getApiKey();
-    const mode = options.mode;
 
-    // Check if input exists
+    if (!input) {
+      if (!process.stdin.isTTY) {
+        const spinner = ora('Reading from stdin...').start();
+        try {
+          const buffer = await readStdin();
+          const tmpPath = `/tmp/pdf2md-stdin-${Date.now()}.pdf`;
+          fs.writeFileSync(tmpPath, buffer);
+
+          spinner.text = 'Processing...';
+          const ext = getExtension(options.format);
+          const outputPath = options.output || (options.stdout ? '' : `output${ext}`);
+
+          await processFile(tmpPath, { ...options, stdout: options.stdout || !options.output }, apiKey, outputPath);
+          fs.unlinkSync(tmpPath);
+        } catch (error: any) {
+          spinner.fail(`Error: ${error.message}`);
+          process.exit(1);
+        }
+        return;
+      }
+
+      console.log(chalk.yellow('No input provided. Use --help for usage.'));
+      return;
+    }
+
+    if (input.includes('*')) {
+      const files = await glob(input, { nodir: true });
+      const pdfFiles = files.filter((f: string) => f.toLowerCase().endsWith('.pdf'));
+
+      if (pdfFiles.length === 0) {
+        console.log(chalk.yellow('No PDF files matched the pattern.'));
+        return;
+      }
+
+      await processMultipleFiles(pdfFiles, options, apiKey);
+      return;
+    }
+
     if (!fs.existsSync(input)) {
-        console.error(chalk.red(`Error: Input "${input}" not found.`));
-        process.exit(1);
+      console.error(chalk.red(`Error: "${input}" not found.`));
+      process.exit(1);
     }
 
     const stats = fs.statSync(input);
-    
+
     if (stats.isFile()) {
-        await processFile(input, mode, apiKey, options.output);
+      const ext = getExtension(options.format);
+      const outputPath = options.output || input.replace(/\.pdf$/i, ext);
+      await processFile(input, options, apiKey, outputPath);
     } else if (stats.isDirectory()) {
-        if (options.output) {
-            console.warn(chalk.yellow('Warning: --output is ignored when processing a directory.'));
-        }
-        await processDirectory(input, mode, apiKey);
-    } else {
-        console.error(chalk.red('Error: Input is not a file or directory.'));
-        process.exit(1);
+      const files = fs.readdirSync(input, { recursive: true }) as string[];
+      const pdfFiles = files
+        .filter((f) => f.toLowerCase().endsWith('.pdf'))
+        .map((f) => path.join(input, f));
+
+      if (pdfFiles.length === 0) {
+        console.log(chalk.yellow('No PDF files found in directory.'));
+        return;
+      }
+
+      await processMultipleFiles(pdfFiles, options, apiKey);
     }
   });
diff --git a/src/commands/watch.ts b/src/commands/watch.ts
new file mode 100644
index 0000000..9cebe1a
--- /dev/null
+++ b/src/commands/watch.ts
@@ -0,0 +1,99 @@
+import fs from 'node:fs';
+import path from 'node:path';
+import { Command } from 'commander';
+import chalk from 'chalk';
+import chokidar from 'chokidar';
+import ora from 'ora';
+import { extractPdfText } from '../utils/pdf.js';
+import { convertToMarkdown, convertPdfWithVision } from '../utils/gemini.js';
+import { getApiKey } from '../utils/config.js';
+import { isCached, setCached } from '../utils/cache.js';
+import { getPrompt } from '../utils/templates.js';
+import { formatOutput, getExtension, type OutputFormat } from '../utils/formats.js';
+
+export const watchCommand = new Command('watch')
+  .description('Watch a directory for new PDFs and auto-convert them')
+  .argument('<directory>', 'Directory to watch')
+  .option('-m, --mode <mode>', 'Mode: standalone, ai, or vision', 'ai')
+  .option('-f, --format <format>', 'Output format: markdown, json, html, text', 'markdown')
+  .option('-k, --api-key <key>', 'Gemini API Key')
+  .option('-t, --template <name>', 'Prompt template or custom prompt', 'default')
+  .option('--no-cache', 'Disable caching (reprocess all files)')
+  .action(async (directory, options) => {
+    const apiKey = options.apiKey || getApiKey();
+    const format = options.format as OutputFormat;
+    const prompt = getPrompt(options.template);
+
+    if (!fs.existsSync(directory)) {
+      console.error(chalk.red(`Directory "${directory}" not found.`));
+      process.exit(1);
+    }
+
+    if (options.mode !== 'standalone' && !apiKey) {
+      console.error(chalk.red('API Key required for AI/vision mode. Use "pdf2md config --key <key>".'));
+      process.exit(1);
+    }
+
+    console.log(chalk.blue(`👀 Watching ${directory} for PDF files...`));
+    console.log(chalk.gray(`   Mode: ${options.mode} | Format: ${format} | Template: ${options.template}`));
+    console.log(chalk.gray('   Press Ctrl+C to stop.\n'));
+
+    const watcher = chokidar.watch(path.join(directory, '**/*.pdf'), {
+      persistent: true,
+      ignoreInitial: false,
+      awaitWriteFinish: {
+        stabilityThreshold: 1000,
+        pollInterval: 100,
+      },
+    });
+
+    const processFile = async (filePath: string) => {
+      const ext = getExtension(format);
+      const outputPath = filePath.replace(/\.pdf$/i, ext);
+
+      if (options.cache && isCached(filePath, outputPath)) {
+        console.log(chalk.gray(`⏭️  Skipped (cached): ${path.basename(filePath)}`));
+        return;
+      }
+
+      const spinner = ora(`Processing ${path.basename(filePath)}...`).start();
+
+      try {
+        let result: string;
+
+        if (options.mode === 'standalone') {
+          result = await extractPdfText(filePath);
+        } else if (options.mode === 'vision') {
+          result = await convertPdfWithVision(filePath, apiKey!, { prompt });
+        } else {
+          const text = await extractPdfText(filePath);
+          result = await convertToMarkdown(text, apiKey!, { prompt });
+        }
+
+        const output = formatOutput(result, format, {
+          source: filePath,
+          processedAt: new Date().toISOString(),
+        });
+
+        fs.mkdirSync(path.dirname(outputPath), { recursive: true });
+        fs.writeFileSync(outputPath, output);
+
+        if (options.cache) {
+          setCached(filePath, outputPath);
+        }
+
+        spinner.succeed(`Saved: ${path.basename(outputPath)}`);
+      } catch (error: any) {
+        spinner.fail(`Error: ${error.message}`);
+      }
+    };
+
+    watcher.on('add', processFile);
+    watcher.on('change', processFile);
+
+    process.on('SIGINT', () => {
+      console.log(chalk.yellow('\n\nStopping watcher...'));
+      watcher.close();
+      process.exit(0);
+    });
+  });
diff --git a/src/index.ts b/src/index.ts
index acc1687..bdbb5c0 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -2,14 +2,45 @@
 import { Command } from 'commander';
 import { convertCommand } from './commands/convert.js';
 import { configCommand } from './commands/config.js';
+import { watchCommand } from './commands/watch.js';
+import { clearCache } from './utils/cache.js';
+import { listTemplates } from './utils/templates.js';
+import chalk from 'chalk';
 import packageJson from '../package.json' with { type: "json" };
 
 const program = new Command();
 
 program
-  .description('Convert PDFs to Markdown using Bun and Gemini 3 Flash Preview AI')
+  .description('Convert PDFs to Markdown using Bun and Gemini AI')
   .version(packageJson.version)
   .addCommand(convertCommand, { isDefault: true })
-  .addCommand(configCommand);
+  .addCommand(configCommand)
+  .addCommand(watchCommand);
+
+// Templates subcommand
+program
+  .command('templates')
+  .description('List available prompt templates')
+  .action(() => {
+    console.log(chalk.blue('Available templates:\n'));
+    listTemplates().forEach((t) => {
+      console.log(`  ${chalk.green(t)}`);
+    });
+    console.log(chalk.gray('\nUse with: pdf2md --template <name>'));
+  });
+
+// Cache subcommand  
+program
+  .command('cache')
+  .description('Manage the file cache')
+  .option('--clear', 'Clear all cached entries')
+  .action((opts) => {
+    if (opts.clear) {
+      clearCache();
+      console.log(chalk.green('Cache cleared.'));
+    } else {
+      console.log('Use --clear to clear the cache.');
+    }
+  });
 
 program.parse();
diff --git a/src/utils/cache.ts b/src/utils/cache.ts
new file mode 100644
index 0000000..45a97e2
--- /dev/null
+++ b/src/utils/cache.ts
@@ -0,0 +1,35 @@
+import fs from 'node:fs';
+import path from 'node:path';
+import crypto from 'node:crypto';
+import Conf from 'conf';
+
+const cacheStore = new Conf<Record<string, string>>({
+  projectName: 'pdf2md-cli',
+  configName: 'cache',
+});
+
+export function getFileHash(filePath: string): string {
+  const content = fs.readFileSync(filePath);
+  return crypto.createHash('sha256').update(content).digest('hex');
+}
+
+export function isCached(filePath: string, outputPath: string): boolean {
+  const hash = getFileHash(filePath);
+  const cacheKey = `${path.resolve(filePath)}:${path.resolve(outputPath)}`;
+  const cachedHash = cacheStore.get(cacheKey);
+
+  if (cachedHash === hash && fs.existsSync(outputPath)) {
+    return true;
+  }
+  return false;
+}
+
+export function setCached(filePath: string, outputPath: string): void {
+  const hash = getFileHash(filePath);
+  const cacheKey = `${path.resolve(filePath)}:${path.resolve(outputPath)}`;
+  cacheStore.set(cacheKey, hash);
+}
+
+export function clearCache(): void {
+  cacheStore.clear();
+}
diff --git a/src/utils/formats.ts b/src/utils/formats.ts
new file mode 100644
index 0000000..0392a3b
--- /dev/null
+++ b/src/utils/formats.ts
@@ -0,0 +1,70 @@
+export type OutputFormat = 'markdown' | 'json' | 'html' | 'text';
+
+export function getExtension(format: OutputFormat): string {
+  switch (format) {
+    case 'markdown': return '.md';
+    case 'json': return '.json';
+    case 'html': return '.html';
+    case 'text': return '.txt';
+  }
+}
+
+export function formatOutput(content: string, format: OutputFormat, metadata?: { source: string; processedAt: string }): string {
+  switch (format) {
+    case 'markdown':
+      return content;
+    
+    case 'json':
+      return JSON.stringify({
+        content,
+        metadata: metadata || {},
+      }, null, 2);
+    
+    case 'html':
+      return `<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>PDF Conversion</title>
+  <style>
+    body { font-family: system-ui, sans-serif; max-width: 800px; margin: 2rem auto; padding: 0 1rem; line-height: 1.6; }
+    pre { background: #f4f4f4; padding: 1rem; overflow-x: auto; }
+    code { background: #f4f4f4; padding: 0.2rem 0.4rem; }
+    table { border-collapse: collapse; width: 100%; }
+    th, td { border: 1px solid #ddd; padding: 0.5rem; text-align: left; }
+  </style>
+</head>
+<body>
+${markdownToHtml(content)}
+</body>
+</html>`;
+    
+    case 'text':
+      return content
+        .replace(/#{1,6}\s*/g, '')
+        .replace(/\*\*([^*]+)\*\*/g, '$1')
+        .replace(/\*([^*]+)\*/g, '$1')
+        .replace(/`([^`]+)`/g, '$1')
+        .replace(/```[\s\S]*?```/g, (match) => match.replace(/```\w*\n?/g, '').trim())
+        .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1');
+  }
+}
+
+function markdownToHtml(md: string): string {
+  return md
+    .replace(/^### (.*$)/gm, '<h3>$1</h3>')
+    .replace(/^## (.*$)/gm, '<h2>$1</h2>')
+    .replace(/^# (.*$)/gm, '<h1>$1</h1>')
+    .replace(/\*\*([^*]+)\*\*/g, '<strong>$1</strong>')
+    .replace(/\*([^*]+)\*/g, '<em>$1</em>')
+    .replace(/`([^`]+)`/g, '<code>$1</code>')
+    .replace(/```(\w*)\n([\s\S]*?)```/g, '<pre><code>$2</code></pre>')
+    .replace(/^\- (.*$)/gm, '<li>$1</li>')
+    .replace(/(<li>.*<\/li>\n?)+/g, '<ul>$&</ul>')
+    .replace(/\n\n/g, '</p><p>')
+    .replace(/^(.+)$/gm, (match) => {
+      if (match.startsWith('<')) return match;
+      return `<p>${match}</p>`;
+    });
+}
diff --git a/src/utils/gemini.ts b/src/utils/gemini.ts
index bfba349..7985e8a 100644
--- a/src/utils/gemini.ts
+++ b/src/utils/gemini.ts
@@ -1,22 +1,119 @@
-import { GoogleGenerativeAI } from '@google/generative-ai';
+import fs from 'node:fs';
+import { GoogleGenerativeAI, type GenerateContentStreamResult } from '@google/generative-ai';
+import { getDocumentProxy, renderPageAsImage } from 'unpdf';
 
-export async function convertToMarkdown(text: string, apiKey: string, modelName: string = 'gemini-3-flash-preview'): Promise<string> {
-  const genAI = new GoogleGenerativeAI(apiKey);
-  const model = genAI.getGenerativeModel({ model: modelName });
-
-  const prompt = `Convert the following raw PDF text into well-formatted markdown.
+const DEFAULT_PROMPT = `Convert the following into well-formatted markdown.
 Rules:
 - Use appropriate heading levels
 - Format lists properly
 - Preserve code blocks if present
 - Add proper spacing
 - Make it readable and well-structured
-- Do not output any preamble or explanation, just the markdown.
+- Do not output any preamble or explanation, just the markdown.`;
+
+export async function convertToMarkdown(
+  text: string, 
+  apiKey: string, 
+  options: {
+    modelName?: string;
+    prompt?: string;
+  } = {}
+): Promise<string> {
+  const { modelName = 'gemini-2.0-flash', prompt = DEFAULT_PROMPT } = options;
+  
+  const genAI = new GoogleGenerativeAI(apiKey);
+  const model = genAI.getGenerativeModel({ model: modelName });
+
+  const fullPrompt = `${prompt}
 
 Raw text:
 ${text}`;
 
-  const result = await model.generateContent(prompt);
+  const result = await model.generateContent(fullPrompt);
   const response = await result.response;
   return response.text();
 }
+
+export async function convertToMarkdownStream(
+  text: string,
+  apiKey: string,
+  onChunk: (chunk: string) => void,
+  options: {
+    modelName?: string;
+    prompt?: string;
+  } = {}
+): Promise<string> {
+  const { modelName = 'gemini-2.0-flash', prompt = DEFAULT_PROMPT } = options;
+  
+  const genAI = new GoogleGenerativeAI(apiKey);
+  const model = genAI.getGenerativeModel({ model: modelName });
+
+  const fullPrompt = `${prompt}
+
+Raw text:
+${text}`;
+
+  const result = await model.generateContentStream(fullPrompt);
+  
+  let fullText = '';
+  for await (const chunk of result.stream) {
+    const chunkText = chunk.text();
+    fullText += chunkText;
+    onChunk(chunkText);
+  }
+  
+  return fullText;
+}
+
+export async function convertPdfWithVision(
+  filePath: string,
+  apiKey: string,
+  options: {
+    modelName?: string;
+    prompt?: string;
+    stream?: boolean;
+    onChunk?: (chunk: string) => void;
+  } = {}
+): Promise<string> {
+  const { modelName = 'gemini-2.0-flash', prompt = DEFAULT_PROMPT, stream = false, onChunk } = options;
+  
+  const genAI = new GoogleGenerativeAI(apiKey);
+  const model = genAI.getGenerativeModel({ model: modelName });
+
+  // Load PDF and render pages as images
+  const dataBuffer = fs.readFileSync(filePath);
+  const pdf = await getDocumentProxy(new Uint8Array(dataBuffer));
+  
+  const imageParts: { inlineData: { data: string; mimeType: string } }[] = [];
+  
+  for (let i = 1; i <= pdf.numPages; i++) {
+    const imageResult = await renderPageAsImage(pdf, i, { scale: 2 });
+    // renderPageAsImage returns an ArrayBuffer
+    const base64 = Buffer.from(imageResult).toString('base64');
+    imageParts.push({
+      inlineData: {
+        data: base64,
+        mimeType: 'image/png',
+      },
+    });
+  }
+
+  const fullPrompt = `${prompt}
+
+Convert this PDF document (${pdf.numPages} pages shown as images) to markdown.`;
+
+  if (stream && onChunk) {
+    const result = await model.generateContentStream([fullPrompt, ...imageParts]);
+    let fullText = '';
+    for await (const chunk of result.stream) {
+      const chunkText = chunk.text();
+      fullText += chunkText;
+      onChunk(chunkText);
+    }
+    return fullText;
+  } else {
+    const result = await model.generateContent([fullPrompt, ...imageParts]);
+    const response = await result.response;
+    return response.text();
+  }
+}
diff --git a/src/utils/images.ts b/src/utils/images.ts
new file mode 100644
index 0000000..2f01ff0
--- /dev/null
+++ b/src/utils/images.ts
@@ -0,0 +1,72 @@
+import fs from 'node:fs';
+import path from 'node:path';
+import { getDocumentProxy } from 'unpdf';
+
+export interface ExtractedImage {
+  name: string;
+  data: Uint8Array;
+  width: number;
+  height: number;
+}
+
+export async function extractImages(filePath: string, outputDir: string): Promise<string[]> {
+  const dataBuffer = fs.readFileSync(filePath);
+  const pdf = await getDocumentProxy(new Uint8Array(dataBuffer));
+  
+  const savedPaths: string[] = [];
+  fs.mkdirSync(outputDir, { recursive: true });
+  
+  let imageCount = 0;
+  
+  for (let i = 1; i <= pdf.numPages; i++) {
+    const page = await pdf.getPage(i);
+    const ops = await page.getOperatorList();
+    
+    // Look for image objects in the operator list
+    for (let j = 0; j < ops.fnArray.length; j++) {
+      // OPS.paintImageXObject = 85
+      if (ops.fnArray[j] === 85) {
+        const imgName = ops.argsArray[j][0];
+        try {
+          const img = await page.objs.get(imgName);
+          if (img && img.data) {
+            imageCount++;
+            const fileName = `image-${imageCount}.png`;
+            const outputPath = path.join(outputDir, fileName);
+            
+            // Create a simple PNG from raw image data
+            // For simplicity, save as raw data - users can convert
+            const rawPath = path.join(outputDir, `image-${imageCount}.raw`);
+            fs.writeFileSync(rawPath, Buffer.from(img.data));
+            savedPaths.push(rawPath);
+          }
+        } catch {
+          // Skip images that can't be extracted
+        }
+      }
+    }
+  }
+  
+  return savedPaths;
+}
+
+export async function hasImages(filePath: string): Promise<boolean> {
+  try {
+    const dataBuffer = fs.readFileSync(filePath);
+    const pdf = await getDocumentProxy(new Uint8Array(dataBuffer));
+    
+    for (let i = 1; i <= pdf.numPages; i++) {
+      const page = await pdf.getPage(i);
+      const ops = await page.getOperatorList();
+      
+      for (let j = 0; j < ops.fnArray.length; j++) {
+        if (ops.fnArray[j] === 85) {
+          return true;
+        }
+      }
+    }
+    return false;
+  } catch {
+    return false;
+  }
+}
diff --git a/src/utils/templates.ts b/src/utils/templates.ts
new file mode 100644
index 0000000..5048a43
--- /dev/null
+++ b/src/utils/templates.ts
@@ -0,0 +1,55 @@
+export const TEMPLATES: Record<string, string> = {
+  default: `Convert the following raw PDF text into well-formatted markdown.
+Rules:
+- Use appropriate heading levels
+- Format lists properly
+- Preserve code blocks if present
+- Add proper spacing
+- Make it readable and well-structured
+- Do not output any preamble or explanation, just the markdown.`,
+
+  invoice: `Extract invoice data from the following PDF text.
+Format as markdown with:
+- Invoice number, date, due date as headers
+- Vendor and customer info in sections
+- Line items as a markdown table
+- Totals clearly formatted
+- Do not output any preamble, just the structured markdown.`,
+
+  table: `Extract all tables from the following PDF text.
+Rules:
+- Convert each table to proper markdown table format
+- Preserve column headers
+- Maintain data alignment
+- If no tables found, state "No tables found"
+- Do not output any preamble or explanation.`,
+
+  summary: `Summarize the following PDF text into a concise markdown document.
+Rules:
+- Create a brief executive summary
+- List key points as bullet points
+- Keep it under 500 words
+- Use appropriate headings
+- Do not output any preamble.`,
+
+  code: `Extract and format code from the following PDF text.
+Rules:
+- Identify code blocks and wrap in appropriate markdown code fences
+- Try to detect the programming language
+- Preserve indentation
+- Add brief comments for context if helpful
+- Do not output any preamble.`,
+};
+
+export function getPrompt(templateOrCustom: string): string {
+  // Check if it's a built-in template name
+  if (TEMPLATES[templateOrCustom]) {
+    return TEMPLATES[templateOrCustom];
+  }
+  // Otherwise treat as custom prompt
+  return templateOrCustom;
+}
+
+export function listTemplates(): string[] {
+  return Object.keys(TEMPLATES);
+}