diff --git a/.gitignore b/.gitignore
index 0d1e7a8..8c57158 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,9 @@ bun.lockb
bin/
pdf2md
+# test folder
+test/
+
# IDE
.vscode/
.idea/
diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000..b1765b5
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,21 @@
+# AGENTS.md
+
+## Commands
+- **Build**: `bun run build` - compiles to `bin/pdf2md`
+- **Dev**: `bun run ./src/index.ts [args]` - run without compiling
+- **Type check**: `bun tsc --noEmit`
+- No test framework configured
+
+## Architecture
+- CLI tool using Commander.js with subcommands in `src/commands/`
+- Entry: `src/index.ts` → commands: `convert` (default), `config`
+- Utils: `src/utils/` - pdf extraction (unpdf), Gemini AI, config (conf)
+- Compiles to standalone binary via `bun build --compile`
+
+## Code Style
+- **Runtime**: Bun (never npm/pnpm/yarn/node)
+- **Module**: ESM with `.js` extensions in imports (even for .ts files)
+- **Types**: Strict TypeScript, explicit error typing (`error: any`)
+- **Imports**: node builtins with `node:` prefix, named exports preferred
+- **Patterns**: async/await, ora spinners for progress, chalk for colors
+- **Error handling**: try/catch with spinner.fail(), process.exit(1) for fatal errors
diff --git a/bun.lock b/bun.lock
index d3f7e7b..34b3191 100644
--- a/bun.lock
+++ b/bun.lock
@@ -7,13 +7,18 @@
"dependencies": {
"@google/generative-ai": "^0.24.1",
"chalk": "^5.6.2",
+ "chokidar": "^5.0.0",
+ "cli-progress": "^3.12.0",
"commander": "^14.0.2",
"conf": "^15.0.2",
+ "glob": "^13.0.0",
"ora": "^9.0.0",
+ "p-limit": "^7.2.0",
"unpdf": "^1.4.0",
},
"devDependencies": {
"@types/bun": "latest",
+ "@types/cli-progress": "^3.11.6",
"typescript": "^5",
},
},
@@ -21,8 +26,14 @@
"packages": {
"@google/generative-ai": ["@google/generative-ai@0.24.1", "", {}, "sha512-MqO+MLfM6kjxcKoy0p1wRzG3b4ZZXtPI+z2IE26UogS2Cm/XHO+7gGRBh6gcJsOiIVoH93UwKvW4HdgiOZCy9Q=="],
+ "@isaacs/balanced-match": ["@isaacs/balanced-match@4.0.1", "", {}, "sha512-yzMTt9lEb8Gv7zRioUilSglI0c0smZ9k5D65677DLWLtWJaXIS3CqcGyUFByYKlnUj6TkjLVs54fBl6+TiGQDQ=="],
+
+ "@isaacs/brace-expansion": ["@isaacs/brace-expansion@5.0.0", "", { "dependencies": { "@isaacs/balanced-match": "^4.0.1" } }, "sha512-ZT55BDLV0yv0RBm2czMiZ+SqCGO7AvmOM3G/w2xhVPH+te0aKgFjmBvGlL1dH+ql2tgGO3MVrbb3jCKyvpgnxA=="],
+
"@types/bun": ["@types/bun@1.3.5", "", { "dependencies": { "bun-types": "1.3.5" } }, "sha512-RnygCqNrd3srIPEWBd5LFeUYG7plCoH2Yw9WaZGyNmdTEei+gWaHqydbaIRkIkcbXwhBT94q78QljxN0Sk838w=="],
+ "@types/cli-progress": ["@types/cli-progress@3.11.6", "", { "dependencies": { "@types/node": "*" } }, "sha512-cE3+jb9WRlu+uOSAugewNpITJDt1VF8dHOopPO4IABFc3SXYL5WE/+PTz/FCdZRRfIujiWW3n3aMbv1eIGVRWA=="],
+
"@types/node": ["@types/node@25.0.3", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-W609buLVRVmeW693xKfzHeIV6nJGGz98uCPfeXI1ELMLXVeKYZ9m15fAMSaUPBHYLGFsVRcMmSCksQOrZV9BYA=="],
"ajv": ["ajv@8.17.1", "", { "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", "json-schema-traverse": "^1.0.0", "require-from-string": "^2.0.2" } }, "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g=="],
@@ -37,8 +48,12 @@
"chalk": ["chalk@5.6.2", "", {}, "sha512-7NzBL0rN6fMUW+f7A6Io4h40qQlG+xGmtMxfbnH/K7TAtt8JQWVQK+6g0UXKMeVJoyV5EkkNsErQ8pVD3bLHbA=="],
+ "chokidar": ["chokidar@5.0.0", "", { "dependencies": { "readdirp": "^5.0.0" } }, "sha512-TQMmc3w+5AxjpL8iIiwebF73dRDF4fBIieAqGn9RGCWaEVwQ6Fb2cGe31Yns0RRIzii5goJ1Y7xbMwo1TxMplw=="],
+
"cli-cursor": ["cli-cursor@5.0.0", "", { "dependencies": { "restore-cursor": "^5.0.0" } }, "sha512-aCj4O5wKyszjMmDT4tZj93kxyydN/K5zPWSCe6/0AV/AA1pqe5ZBIw0a2ZfPQV7lL5/yb5HsUreJ6UFAF1tEQw=="],
+ "cli-progress": ["cli-progress@3.12.0", "", { "dependencies": { "string-width": "^4.2.3" } }, "sha512-tRkV3HJ1ASwm19THiiLIXLO7Im7wlTuKnvkYaTkyoAPefqjNg7W7DHKUlGRxy9vxDvbyCYQkQozvptuMkGCg8A=="],
+
"cli-spinners": ["cli-spinners@3.3.0", "", {}, "sha512-/+40ljC3ONVnYIttjMWrlL51nItDAbBrq2upN8BPyvGU/2n5Oxw3tbNwORCaNuNqLJnxGqOfjUuhsv7l5Q4IsQ=="],
"commander": ["commander@14.0.2", "", {}, "sha512-TywoWNNRbhoD0BXs1P3ZEScW8W5iKrnbithIl0YH+uCmBd0QpPOA8yc82DS3BIE5Ma6FnBVUsJ7wVUDz4dvOWQ=="],
@@ -49,6 +64,8 @@
"dot-prop": ["dot-prop@10.1.0", "", { "dependencies": { "type-fest": "^5.0.0" } }, "sha512-MVUtAugQMOff5RnBy2d9N31iG0lNwg1qAoAOn7pOK5wf94WIaE3My2p3uwTQuvS2AcqchkcR3bHByjaM0mmi7Q=="],
+ "emoji-regex": ["emoji-regex@8.0.0", "", {}, "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A=="],
+
"env-paths": ["env-paths@3.0.0", "", {}, "sha512-dtJUTepzMW3Lm/NPxRf3wP4642UWhjL2sQxc+ym2YMj1m/H2zDNQOlezafzkHwn6sMstjHTwG6iQQsctDW/b1A=="],
"fast-deep-equal": ["fast-deep-equal@3.1.3", "", {}, "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q=="],
@@ -57,6 +74,10 @@
"get-east-asian-width": ["get-east-asian-width@1.4.0", "", {}, "sha512-QZjmEOC+IT1uk6Rx0sX22V6uHWVwbdbxf1faPqJ1QhLdGgsRGCZoyaQBm/piRdJy/D2um6hM1UP7ZEeQ4EkP+Q=="],
+ "glob": ["glob@13.0.0", "", { "dependencies": { "minimatch": "^10.1.1", "minipass": "^7.1.2", "path-scurry": "^2.0.0" } }, "sha512-tvZgpqk6fz4BaNZ66ZsRaZnbHvP/jG3uKJvAZOwEVUL4RTA5nJeeLYfyN9/VA8NX/V3IBG+hkeuGpKjvELkVhA=="],
+
+ "is-fullwidth-code-point": ["is-fullwidth-code-point@3.0.0", "", {}, "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg=="],
+
"is-interactive": ["is-interactive@2.0.0", "", {}, "sha512-qP1vozQRI+BMOPcjFzrjXuQvdak2pHNUMZoeG2eRbiSqyvbEf/wQtEOTOX1guk6E3t36RkaqiSt8A/6YElNxLQ=="],
"is-unicode-supported": ["is-unicode-supported@2.1.0", "", {}, "sha512-mE00Gnza5EEB3Ds0HfMyllZzbBrmLOX3vfWoj9A9PEnTfratQ/BcaJOuMhnkhjXvb2+FkY3VuHqtAGpTPmglFQ=="],
@@ -67,12 +88,24 @@
"log-symbols": ["log-symbols@7.0.1", "", { "dependencies": { "is-unicode-supported": "^2.0.0", "yoctocolors": "^2.1.1" } }, "sha512-ja1E3yCr9i/0hmBVaM0bfwDjnGy8I/s6PP4DFp+yP+a+mrHO4Rm7DtmnqROTUkHIkqffC84YY7AeqX6oFk0WFg=="],
+ "lru-cache": ["lru-cache@11.2.5", "", {}, "sha512-vFrFJkWtJvJnD5hg+hJvVE8Lh/TcMzKnTgCWmtBipwI5yLX/iX+5UB2tfuyODF5E7k9xEzMdYgGqaSb1c0c5Yw=="],
+
"mimic-function": ["mimic-function@5.0.1", "", {}, "sha512-VP79XUPxV2CigYP3jWwAUFSku2aKqBH7uTAapFWCBqutsbmDo96KY5o8uh6U+/YSIn5OxJnXp73beVkpqMIGhA=="],
+ "minimatch": ["minimatch@10.1.1", "", { "dependencies": { "@isaacs/brace-expansion": "^5.0.0" } }, "sha512-enIvLvRAFZYXJzkCYG5RKmPfrFArdLv+R+lbQ53BmIMLIry74bjKzX6iHAm8WYamJkhSSEabrWN5D97XnKObjQ=="],
+
+ "minipass": ["minipass@7.1.2", "", {}, "sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw=="],
+
"onetime": ["onetime@7.0.0", "", { "dependencies": { "mimic-function": "^5.0.0" } }, "sha512-VXJjc87FScF88uafS3JllDgvAm+c/Slfz06lorj2uAY34rlUu0Nt+v8wreiImcrgAjjIHp1rXpTDlLOGw29WwQ=="],
"ora": ["ora@9.0.0", "", { "dependencies": { "chalk": "^5.6.2", "cli-cursor": "^5.0.0", "cli-spinners": "^3.2.0", "is-interactive": "^2.0.0", "is-unicode-supported": "^2.1.0", "log-symbols": "^7.0.1", "stdin-discarder": "^0.2.2", "string-width": "^8.1.0", "strip-ansi": "^7.1.2" } }, "sha512-m0pg2zscbYgWbqRR6ABga5c3sZdEon7bSgjnlXC64kxtxLOyjRcbbUkLj7HFyy/FTD+P2xdBWu8snGhYI0jc4A=="],
+ "p-limit": ["p-limit@7.2.0", "", { "dependencies": { "yocto-queue": "^1.2.1" } }, "sha512-ATHLtwoTNDloHRFFxFJdHnG6n2WUeFjaR8XQMFdKIv0xkXjrER8/iG9iu265jOM95zXHAfv9oTkqhrfbIzosrQ=="],
+
+ "path-scurry": ["path-scurry@2.0.1", "", { "dependencies": { "lru-cache": "^11.0.0", "minipass": "^7.1.2" } }, "sha512-oWyT4gICAu+kaA7QWk/jvCHWarMKNs6pXOGWKDTr7cw4IGcUbW+PeTfbaQiLGheFRpjo6O9J0PmyMfQPjH71oA=="],
+
+ "readdirp": ["readdirp@5.0.0", "", {}, "sha512-9u/XQ1pvrQtYyMpZe7DXKv2p5CNvyVwzUB6uhLAnQwHMSgKMBR62lc7AHljaeteeHXn11XTAaLLUVZYVZyuRBQ=="],
+
"require-from-string": ["require-from-string@2.0.2", "", {}, "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw=="],
"restore-cursor": ["restore-cursor@5.1.0", "", { "dependencies": { "onetime": "^7.0.0", "signal-exit": "^4.1.0" } }, "sha512-oMA2dcrw6u0YfxJQXm342bFKX/E4sG9rbTzO9ptUcR/e8A33cHuvStiYOwH7fszkZlZ1z/ta9AAoPk2F4qIOHA=="],
@@ -83,7 +116,7 @@
"stdin-discarder": ["stdin-discarder@0.2.2", "", {}, "sha512-UhDfHmA92YAlNnCfhmq0VeNL5bDbiZGg7sZ2IvPsXubGkiNa9EC+tUTsjBRsYUAz87btI6/1wf4XoVvQ3uRnmQ=="],
- "string-width": ["string-width@8.1.0", "", { "dependencies": { "get-east-asian-width": "^1.3.0", "strip-ansi": "^7.1.0" } }, "sha512-Kxl3KJGb/gxkaUMOjRsQ8IrXiGW75O4E3RPjFIINOVH8AMl2SQ/yWdTzWwF3FevIX9LcMAjJW+GRwAlAbTSXdg=="],
+ "string-width": ["string-width@4.2.3", "", { "dependencies": { "emoji-regex": "^8.0.0", "is-fullwidth-code-point": "^3.0.0", "strip-ansi": "^6.0.1" } }, "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g=="],
"strip-ansi": ["strip-ansi@7.1.2", "", { "dependencies": { "ansi-regex": "^6.0.1" } }, "sha512-gmBGslpoQJtgnMAvOVqGZpEz9dyoKTCzy2nfz/n8aIFhN/jCE/rCmcxabB6jOOHV+0WNnylOxaxBQPSvcWklhA=="],
@@ -105,6 +138,14 @@
"when-exit": ["when-exit@2.1.5", "", {}, "sha512-VGkKJ564kzt6Ms1dbgPP/yuIoQCrsFAnRbptpC5wOEsDaNsbCB2bnfnaA8i/vRs5tjUSEOtIuvl9/MyVsvQZCg=="],
+ "yocto-queue": ["yocto-queue@1.2.2", "", {}, "sha512-4LCcse/U2MHZ63HAJVE+v71o7yOdIe4cZ70Wpf8D/IyjDKYQLV5GD46B+hSTjJsvV5PztjvHoU580EftxjDZFQ=="],
+
"yoctocolors": ["yoctocolors@2.1.2", "", {}, "sha512-CzhO+pFNo8ajLM2d2IW/R93ipy99LWjtwblvC1RsoSUMZgyLbYFr221TnSNT7GjGdYui6P459mw9JH/g/zW2ug=="],
+
+ "ora/string-width": ["string-width@8.1.0", "", { "dependencies": { "get-east-asian-width": "^1.3.0", "strip-ansi": "^7.1.0" } }, "sha512-Kxl3KJGb/gxkaUMOjRsQ8IrXiGW75O4E3RPjFIINOVH8AMl2SQ/yWdTzWwF3FevIX9LcMAjJW+GRwAlAbTSXdg=="],
+
+ "string-width/strip-ansi": ["strip-ansi@6.0.1", "", { "dependencies": { "ansi-regex": "^5.0.1" } }, "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A=="],
+
+ "string-width/strip-ansi/ansi-regex": ["ansi-regex@5.0.1", "", {}, "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ=="],
}
}
diff --git a/package.json b/package.json
index fa2837d..8a90469 100644
--- a/package.json
+++ b/package.json
@@ -16,13 +16,18 @@
"dependencies": {
"@google/generative-ai": "^0.24.1",
"chalk": "^5.6.2",
+ "chokidar": "^5.0.0",
+ "cli-progress": "^3.12.0",
"commander": "^14.0.2",
"conf": "^15.0.2",
+ "glob": "^13.0.0",
"ora": "^9.0.0",
+ "p-limit": "^7.2.0",
"unpdf": "^1.4.0"
},
"devDependencies": {
"@types/bun": "latest",
+ "@types/cli-progress": "^3.11.6",
"typescript": "^5"
}
}
diff --git a/src/commands/convert.ts b/src/commands/convert.ts
index 599d438..67dc142 100644
--- a/src/commands/convert.ts
+++ b/src/commands/convert.ts
@@ -3,98 +3,284 @@ import path from 'node:path';
import { Command } from 'commander';
import ora from 'ora';
import chalk from 'chalk';
+import { glob } from 'glob';
+import pLimit from 'p-limit';
+import cliProgress from 'cli-progress';
import { extractPdfText } from '../utils/pdf.js';
-import { convertToMarkdown } from '../utils/gemini.js';
+import { convertToMarkdown, convertToMarkdownStream, convertPdfWithVision } from '../utils/gemini.js';
import { getApiKey } from '../utils/config.js';
+import { isCached, setCached } from '../utils/cache.js';
+import { getPrompt, listTemplates } from '../utils/templates.js';
+import { formatOutput, getExtension, type OutputFormat } from '../utils/formats.js';
+import { extractImages } from '../utils/images.js';
+
+interface ConvertOptions {
+ mode: 'standalone' | 'ai' | 'vision';
+ format: OutputFormat;
+ output?: string;
+ apiKey?: string;
+ template: string;
+ prompt?: string;
+ concurrency: number;
+ cache: boolean;
+ stream: boolean;
+ stdout: boolean;
+ extractImages: boolean;
+}
+
+async function processFile(
+ filePath: string,
+ options: ConvertOptions,
+ apiKey: string | null,
+ outputPath: string,
+ showSpinner: boolean = true
+): Promise<{ success: boolean; skipped: boolean }> {
+ const spinner = showSpinner ? ora(`Processing ${path.basename(filePath)}...`).start() : null;
+
+ if (options.cache && outputPath && isCached(filePath, outputPath)) {
+ spinner?.succeed(chalk.gray(`Skipped (cached): ${path.basename(filePath)}`));
+ return { success: true, skipped: true };
+ }
-async function processFile(filePath: string, mode: 'standalone' | 'ai', apiKey: string | null, output?: string) {
- const spinner = ora(`Processing ${path.basename(filePath)}...`).start();
-
try {
- const text = await extractPdfText(filePath);
-
- let result = text;
- if (mode === 'ai') {
- if (!apiKey) {
- spinner.fail('API Key is required for AI mode. Use "pdf2md config --key " or pass --api-key.');
- return false;
+ let result: string;
+ const prompt = options.prompt || getPrompt(options.template);
+
+ if (options.mode === 'standalone') {
+ result = await extractPdfText(filePath);
+ } else if (options.mode === 'vision') {
+ if (!apiKey) throw new Error('API Key required for vision mode');
+
+ if (options.stream && !options.stdout) {
+ spinner?.stop();
+ process.stdout.write(chalk.blue(`\n--- ${path.basename(filePath)} ---\n`));
+ result = await convertPdfWithVision(filePath, apiKey, {
+ prompt,
+ stream: true,
+ onChunk: (chunk: string) => process.stdout.write(chunk),
+ });
+ process.stdout.write('\n');
+ } else {
+ if (spinner) spinner.text = `Vision processing ${path.basename(filePath)}...`;
+ result = await convertPdfWithVision(filePath, apiKey, { prompt });
+ }
+ } else {
+ if (!apiKey) throw new Error('API Key required for AI mode');
+
+ const text = await extractPdfText(filePath);
+
+ if (options.stream && !options.stdout) {
+ spinner?.stop();
+ process.stdout.write(chalk.blue(`\n--- ${path.basename(filePath)} ---\n`));
+ result = await convertToMarkdownStream(text, apiKey, (chunk: string) => process.stdout.write(chunk), { prompt });
+ process.stdout.write('\n');
+ } else {
+ if (spinner) spinner.text = `AI processing ${path.basename(filePath)}...`;
+ result = await convertToMarkdown(text, apiKey, { prompt });
+ }
+ }
+
+ if (options.extractImages && outputPath) {
+ const imagesDir = outputPath.replace(/\.[^.]+$/, '_images');
+ const extracted = await extractImages(filePath, imagesDir);
+ if (extracted.length > 0 && spinner) {
+ spinner.text = `Extracted ${extracted.length} images`;
}
- spinner.text = `Generative AI processing for ${path.basename(filePath)}...`;
- try {
- result = await convertToMarkdown(text, apiKey);
- } catch (e: any) {
- spinner.fail(`AI processing failed: ${e.message}`);
- return false;
+ }
+
+ const output = formatOutput(result, options.format, {
+ source: filePath,
+ processedAt: new Date().toISOString(),
+ });
+
+ if (options.stdout) {
+ process.stdout.write(output);
+ } else {
+ fs.mkdirSync(path.dirname(outputPath), { recursive: true });
+ fs.writeFileSync(outputPath, output);
+
+ if (options.cache) {
+ setCached(filePath, outputPath);
}
+
+ spinner?.succeed(`Saved: ${outputPath}`);
}
- const outputPath = output || filePath.replace(/\.pdf$/i, '.md');
- fs.mkdirSync(path.dirname(outputPath), { recursive: true });
- fs.writeFileSync(outputPath, result);
-
- spinner.succeed(`Saved to ${outputPath}`);
- return true;
+ return { success: true, skipped: false };
} catch (error: any) {
- spinner.fail(`Error processing ${filePath}: ${error.message}`);
- return false;
+ spinner?.fail(`Error: ${path.basename(filePath)} - ${error.message}`);
+ return { success: false, skipped: false };
}
}
-async function processDirectory(dirPath: string, mode: 'standalone' | 'ai', apiKey: string | null) {
- const files = fs.readdirSync(dirPath, { recursive: true }) as string[];
- const pdfFiles = files.filter(f => f.toLowerCase().endsWith('.pdf'));
-
- if (pdfFiles.length === 0) {
- console.log(chalk.yellow('No PDF files found in directory.'));
- return;
+async function processMultipleFiles(
+ files: string[],
+ options: ConvertOptions,
+ apiKey: string | null
+): Promise {
+ const limit = pLimit(options.concurrency);
+
+ const progressBar = new cliProgress.SingleBar({
+ format: '{bar} {percentage}% | ETA: {eta}s | {value}/{total} files | {status}',
+ barCompleteChar: '█',
+ barIncompleteChar: '░',
+ hideCursor: true,
+ }, cliProgress.Presets.shades_classic);
+
+ console.log(chalk.blue(`\nProcessing ${files.length} PDF files (concurrency: ${options.concurrency})\n`));
+ progressBar.start(files.length, 0, { status: 'Starting...' });
+
+ let completed = 0;
+ let succeeded = 0;
+ let skipped = 0;
+
+ const tasks = files.map((file) =>
+ limit(async () => {
+ const ext = getExtension(options.format);
+ const outputPath = file.replace(/\.pdf$/i, ext);
+
+ progressBar.update(completed, { status: path.basename(file) });
+
+ const result = await processFile(file, options, apiKey, outputPath, false);
+
+ completed++;
+ if (result.success) succeeded++;
+ if (result.skipped) skipped++;
+
+ progressBar.update(completed, { status: `Done: ${path.basename(file)}` });
+
+ return result;
+ })
+ );
+
+ await Promise.all(tasks);
+ progressBar.stop();
+
+ console.log(chalk.green(`\n✓ Completed: ${succeeded}/${files.length} files processed`));
+ if (skipped > 0) {
+ console.log(chalk.gray(` ⏭️ ${skipped} files skipped (cached)`));
}
+}
- console.log(chalk.blue(`Found ${pdfFiles.length} PDF files in ${dirPath}`));
-
- let successCount = 0;
- for (const file of pdfFiles) {
- const fullPath = path.join(dirPath, file);
- // For directory processing, output is always side-by-side
- if (await processFile(fullPath, mode, apiKey)) {
- successCount++;
- }
+async function readStdin(): Promise {
+ const chunks: Buffer[] = [];
+ for await (const chunk of process.stdin) {
+ chunks.push(chunk as Buffer);
}
-
- console.log(chalk.green(`\nCompleted! ${successCount}/${pdfFiles.length} files processed.`));
+ return Buffer.concat(chunks);
}
export const convertCommand = new Command('convert')
.description('Convert PDF to Markdown')
- .argument('[input]', 'Input file or directory')
- .option('-m, --mode ', 'Mode: standalone (text extract) or ai (Gemini)', 'standalone')
- .option('-o, --output
')
+ .replace(/^(.+)$/gm, (match) => {
+ if (match.startsWith('<')) return match;
+ return `
${match}
`;
+ });
+}
diff --git a/src/utils/gemini.ts b/src/utils/gemini.ts
index bfba349..7985e8a 100644
--- a/src/utils/gemini.ts
+++ b/src/utils/gemini.ts
@@ -1,22 +1,119 @@
-import { GoogleGenerativeAI } from '@google/generative-ai';
+import fs from 'node:fs';
+import { GoogleGenerativeAI, type GenerateContentStreamResult } from '@google/generative-ai';
+import { getDocumentProxy, renderPageAsImage } from 'unpdf';
-export async function convertToMarkdown(text: string, apiKey: string, modelName: string = 'gemini-3-flash-preview'): Promise {
- const genAI = new GoogleGenerativeAI(apiKey);
- const model = genAI.getGenerativeModel({ model: modelName });
-
- const prompt = `Convert the following raw PDF text into well-formatted markdown.
+const DEFAULT_PROMPT = `Convert the following into well-formatted markdown.
Rules:
- Use appropriate heading levels
- Format lists properly
- Preserve code blocks if present
- Add proper spacing
- Make it readable and well-structured
-- Do not output any preamble or explanation, just the markdown.
+- Do not output any preamble or explanation, just the markdown.`;
+
+export async function convertToMarkdown(
+ text: string,
+ apiKey: string,
+ options: {
+ modelName?: string;
+ prompt?: string;
+ } = {}
+): Promise {
+ const { modelName = 'gemini-2.0-flash', prompt = DEFAULT_PROMPT } = options;
+
+ const genAI = new GoogleGenerativeAI(apiKey);
+ const model = genAI.getGenerativeModel({ model: modelName });
+
+ const fullPrompt = `${prompt}
Raw text:
${text}`;
- const result = await model.generateContent(prompt);
+ const result = await model.generateContent(fullPrompt);
const response = await result.response;
return response.text();
}
+
+export async function convertToMarkdownStream(
+ text: string,
+ apiKey: string,
+ onChunk: (chunk: string) => void,
+ options: {
+ modelName?: string;
+ prompt?: string;
+ } = {}
+): Promise {
+ const { modelName = 'gemini-2.0-flash', prompt = DEFAULT_PROMPT } = options;
+
+ const genAI = new GoogleGenerativeAI(apiKey);
+ const model = genAI.getGenerativeModel({ model: modelName });
+
+ const fullPrompt = `${prompt}
+
+Raw text:
+${text}`;
+
+ const result = await model.generateContentStream(fullPrompt);
+
+ let fullText = '';
+ for await (const chunk of result.stream) {
+ const chunkText = chunk.text();
+ fullText += chunkText;
+ onChunk(chunkText);
+ }
+
+ return fullText;
+}
+
+export async function convertPdfWithVision(
+ filePath: string,
+ apiKey: string,
+ options: {
+ modelName?: string;
+ prompt?: string;
+ stream?: boolean;
+ onChunk?: (chunk: string) => void;
+ } = {}
+): Promise {
+ const { modelName = 'gemini-2.0-flash', prompt = DEFAULT_PROMPT, stream = false, onChunk } = options;
+
+ const genAI = new GoogleGenerativeAI(apiKey);
+ const model = genAI.getGenerativeModel({ model: modelName });
+
+ // Load PDF and render pages as images
+ const dataBuffer = fs.readFileSync(filePath);
+ const pdf = await getDocumentProxy(new Uint8Array(dataBuffer));
+
+ const imageParts: { inlineData: { data: string; mimeType: string } }[] = [];
+
+ for (let i = 1; i <= pdf.numPages; i++) {
+ const imageResult = await renderPageAsImage(pdf, i, { scale: 2 });
+ // renderPageAsImage returns an ArrayBuffer
+ const base64 = Buffer.from(imageResult).toString('base64');
+ imageParts.push({
+ inlineData: {
+ data: base64,
+ mimeType: 'image/png',
+ },
+ });
+ }
+
+ const fullPrompt = `${prompt}
+
+Convert this PDF document (${pdf.numPages} pages shown as images) to markdown.`;
+
+ if (stream && onChunk) {
+ const result = await model.generateContentStream([fullPrompt, ...imageParts]);
+ let fullText = '';
+ for await (const chunk of result.stream) {
+ const chunkText = chunk.text();
+ fullText += chunkText;
+ onChunk(chunkText);
+ }
+ return fullText;
+ } else {
+ const result = await model.generateContent([fullPrompt, ...imageParts]);
+ const response = await result.response;
+ return response.text();
+ }
+}
diff --git a/src/utils/images.ts b/src/utils/images.ts
new file mode 100644
index 0000000..2f01ff0
--- /dev/null
+++ b/src/utils/images.ts
@@ -0,0 +1,72 @@
+import fs from 'node:fs';
+import path from 'node:path';
+import { getDocumentProxy } from 'unpdf';
+
+export interface ExtractedImage {
+ name: string;
+ data: Uint8Array;
+ width: number;
+ height: number;
+}
+
+export async function extractImages(filePath: string, outputDir: string): Promise {
+ const dataBuffer = fs.readFileSync(filePath);
+ const pdf = await getDocumentProxy(new Uint8Array(dataBuffer));
+
+ const savedPaths: string[] = [];
+ fs.mkdirSync(outputDir, { recursive: true });
+
+ let imageCount = 0;
+
+ for (let i = 1; i <= pdf.numPages; i++) {
+ const page = await pdf.getPage(i);
+ const ops = await page.getOperatorList();
+
+ // Look for image objects in the operator list
+ for (let j = 0; j < ops.fnArray.length; j++) {
+ // OPS.paintImageXObject = 85
+ if (ops.fnArray[j] === 85) {
+ const imgName = ops.argsArray[j][0];
+ try {
+ const img = await page.objs.get(imgName);
+ if (img && img.data) {
+ imageCount++;
+ const fileName = `image-${imageCount}.png`;
+ const outputPath = path.join(outputDir, fileName);
+
+ // Create a simple PNG from raw image data
+ // For simplicity, save as raw data - users can convert
+ const rawPath = path.join(outputDir, `image-${imageCount}.raw`);
+ fs.writeFileSync(rawPath, Buffer.from(img.data));
+ savedPaths.push(rawPath);
+ }
+ } catch {
+ // Skip images that can't be extracted
+ }
+ }
+ }
+ }
+
+ return savedPaths;
+}
+
+export async function hasImages(filePath: string): Promise {
+ try {
+ const dataBuffer = fs.readFileSync(filePath);
+ const pdf = await getDocumentProxy(new Uint8Array(dataBuffer));
+
+ for (let i = 1; i <= pdf.numPages; i++) {
+ const page = await pdf.getPage(i);
+ const ops = await page.getOperatorList();
+
+ for (let j = 0; j < ops.fnArray.length; j++) {
+ if (ops.fnArray[j] === 85) {
+ return true;
+ }
+ }
+ }
+ return false;
+ } catch {
+ return false;
+ }
+}
diff --git a/src/utils/templates.ts b/src/utils/templates.ts
new file mode 100644
index 0000000..5048a43
--- /dev/null
+++ b/src/utils/templates.ts
@@ -0,0 +1,55 @@
+export const TEMPLATES: Record = {
+ default: `Convert the following raw PDF text into well-formatted markdown.
+Rules:
+- Use appropriate heading levels
+- Format lists properly
+- Preserve code blocks if present
+- Add proper spacing
+- Make it readable and well-structured
+- Do not output any preamble or explanation, just the markdown.`,
+
+ invoice: `Extract invoice data from the following PDF text.
+Format as markdown with:
+- Invoice number, date, due date as headers
+- Vendor and customer info in sections
+- Line items as a markdown table
+- Totals clearly formatted
+- Do not output any preamble, just the structured markdown.`,
+
+ table: `Extract all tables from the following PDF text.
+Rules:
+- Convert each table to proper markdown table format
+- Preserve column headers
+- Maintain data alignment
+- If no tables found, state "No tables found"
+- Do not output any preamble or explanation.`,
+
+ summary: `Summarize the following PDF text into a concise markdown document.
+Rules:
+- Create a brief executive summary
+- List key points as bullet points
+- Keep it under 500 words
+- Use appropriate headings
+- Do not output any preamble.`,
+
+ code: `Extract and format code from the following PDF text.
+Rules:
+- Identify code blocks and wrap in appropriate markdown code fences
+- Try to detect the programming language
+- Preserve indentation
+- Add brief comments for context if helpful
+- Do not output any preamble.`,
+};
+
+export function getPrompt(templateOrCustom: string): string {
+ // Check if it's a built-in template name
+ if (TEMPLATES[templateOrCustom]) {
+ return TEMPLATES[templateOrCustom];
+ }
+ // Otherwise treat as custom prompt
+ return templateOrCustom;
+}
+
+export function listTemplates(): string[] {
+ return Object.keys(TEMPLATES);
+}