Skip to content

Commit 0efa8f1

Browse files
committed
add mistral_ocr, init can expand @filename in paths
1 parent df12a5b commit 0efa8f1

7 files changed

Lines changed: 128 additions & 8 deletions

File tree

README.md

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ Basic toolset for [Tune](https://github.com/iovdin/tune).
1717
- [search_tools](#search_tools) find appropriate tools for a task
1818
- [osa](#osa) manage reminders/notes/calendar (AppleScript/macOS)
1919
- [jina_r](#jina_r) fetch webpage content
20+
- [mistral_ocr](#mistral_ocr) extract text from documents and images via OCR
2021
- [websearch](#websearch) search the web with web-enabled llms
2122
- [list](#list) keep list of tasks todo (loops for LLM)
2223
- [sqlite](#sqlite) execute sqlite queries
@@ -291,6 +292,40 @@ Tune is a versatile toolkit designed for developers and users to effectively int
291292
<cut for brevity>
292293
```
293294

295+
### `mistral_ocr`
296+
Extract text from documents and images using the [Mistral OCR API](https://mistral.ai/). Requires a `MISTRAL_KEY` set in `.env`.
297+
298+
Supports documents: `.pdf`, `.docx`, `.pptx`, `.txt`, `.epub`, `.xml`, `.rtf`, `.odt`, `.bib`, `.fb2`, `.ipynb`, `.tex`, `.opml`, `.1`, `.man`
299+
300+
Supports images: `.jpg`, `.jpeg`, `.png`, `.avif`, `.tiff`, `.gif`, `.heic`, `.heif`, `.bmp`, `.webp`
301+
302+
```chat
303+
user: @mistral_ocr
304+
extract text from invoice.pdf
305+
306+
tool_call: mistral_ocr {"filename":"invoice.pdf"}
307+
tool_result:
308+
# Invoice #1042
309+
310+
**Date:** 2024-04-01
311+
**Bill To:** Acme Corp
312+
313+
| Description | Amount |
314+
|--------------------|---------|
315+
| Consulting (10h) | $1500 |
316+
| Hosting (1 month) | $50 |
317+
| **Total** | $1550 |
318+
319+
user:
320+
what does this screenshot say?
321+
322+
tool_call: mistral_ocr {"filename":"screenshot.png"}
323+
tool_result:
324+
## System Alert
325+
326+
Your disk usage has exceeded 90%. Please free up space to avoid performance issues.
327+
```
328+
294329
### `websearch`
295330
Search the web with web enabled llms
296331
Supports search with `perplexity/sonar`, `perplexity/sonar-pro`, `gpt-4o-search-preview`, `gpt-4o-mini-search-preview` models via the `model` parameter (defaults to `perplexity/sonar`).

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "tune-basic-toolset",
3-
"version": "0.1.21",
3+
"version": "0.1.22",
44
"description": "Basic toolset for tune",
55
"main": "src/index.js",
66
"files": [

src/init.proc.js

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,7 @@ module.exports = async function init(node, args, ctx) {
66

77
// include file
88
if (content.indexOf("@") === 0) {
9-
return {
10-
type: "text",
11-
read: async () => this.read(content.replace(/^@{1,2}/, ""))
12-
}
9+
return ctx.resolve(content.replace(/^@{1,2}/, ""))
1310
}
1411

1512
return {

src/jina_r.schema.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,5 +18,5 @@
1818
},
1919
"required": ["url"]
2020
},
21-
"$escape_output": false
21+
"$escape_output": true
2222
}

src/mistral_ocr.schema.json

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
"description": "Perform OCR on a local files using Mistral OCR API, returns extracted text as markdown",
3+
"parameters": {
4+
"type": "object",
5+
"properties": {
6+
"filename": {
7+
"type": "string",
8+
"description": "Path to the file to perform OCR on"
9+
}
10+
},
11+
"required": ["filename"]
12+
},
13+
"$escape_output": true
14+
}

src/mistral_ocr.tool.js

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
const path = require('path');
2+
3+
const MIME_TYPES = {
4+
// Documents → document_url
5+
'.pdf': { kind: 'document', mime: 'application/pdf' },
6+
'.docx': { kind: 'document', mime: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' },
7+
'.pptx': { kind: 'document', mime: 'application/vnd.openxmlformats-officedocument.presentationml.presentation' },
8+
'.txt': { kind: 'document', mime: 'text/plain' },
9+
'.epub': { kind: 'document', mime: 'application/epub+zip' },
10+
'.xml': { kind: 'document', mime: 'application/xml' },
11+
'.rtf': { kind: 'document', mime: 'application/rtf' },
12+
'.odt': { kind: 'document', mime: 'application/vnd.oasis.opendocument.text' },
13+
'.bib': { kind: 'document', mime: 'application/x-bibtex' },
14+
'.fb2': { kind: 'document', mime: 'application/x-fictionbook+xml' },
15+
'.ipynb': { kind: 'document', mime: 'application/x-ipynb+json' },
16+
'.tex': { kind: 'document', mime: 'application/x-tex' },
17+
'.opml': { kind: 'document', mime: 'text/x-opml' },
18+
'.1': { kind: 'document', mime: 'application/x-troff-man' },
19+
'.man': { kind: 'document', mime: 'application/x-troff-man' },
20+
// Images → image_url
21+
'.jpg': { kind: 'image', mime: 'image/jpeg' },
22+
'.jpeg': { kind: 'image', mime: 'image/jpeg' },
23+
'.png': { kind: 'image', mime: 'image/png' },
24+
'.avif': { kind: 'image', mime: 'image/avif' },
25+
'.tiff': { kind: 'image', mime: 'image/tiff' },
26+
'.gif': { kind: 'image', mime: 'image/gif' },
27+
'.heic': { kind: 'image', mime: 'image/heic' },
28+
'.heif': { kind: 'image', mime: 'image/heif' },
29+
'.bmp': { kind: 'image', mime: 'image/bmp' },
30+
'.webp': { kind: 'image', mime: 'image/webp' },
31+
};
32+
33+
module.exports = async function mistralOcr({ filename }, ctx) {
34+
const apiKey = await ctx.read("MISTRAL_KEY");
35+
if (!apiKey) {
36+
throw new Error("MISTRAL_KEY is not set");
37+
}
38+
39+
const ext = path.extname(filename).toLowerCase();
40+
const typeInfo = MIME_TYPES[ext];
41+
if (!typeInfo) {
42+
throw new Error(`Unsupported file extension: ${ext}`);
43+
}
44+
45+
const buf = await ctx.read(filename);
46+
const base64 = buf.toString('base64');
47+
const dataUrl = `data:${typeInfo.mime};base64,${base64}`;
48+
49+
const document = typeInfo.kind === 'image'
50+
? { type: 'image_url', image_url: dataUrl }
51+
: { type: 'document_url', document_url: dataUrl };
52+
53+
const response = await fetch("https://api.mistral.ai/v1/ocr", {
54+
method: "POST",
55+
headers: {
56+
"Content-Type": "application/json",
57+
"Authorization": `Bearer ${apiKey}`,
58+
},
59+
body: JSON.stringify({
60+
model: "mistral-ocr-latest",
61+
document,
62+
include_image_base64: true,
63+
}),
64+
});
65+
66+
if (!response.ok) {
67+
const errorText = await response.text();
68+
throw new Error(`Mistral OCR error: ${response.status} ${response.statusText} - ${errorText}`);
69+
}
70+
71+
const result = await response.json();
72+
73+
return result.pages.map(page => page.markdown).join("\n\n");
74+
};

src/sh.schema.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@
55
"properties": {
66
"text": {
77
"type": "string",
8-
"description": "The shell command to execute"
8+
"description": "The shell command to execute, always prepend input with comment explaining the command so user that is not familiar with shell can understand and estimate how dangerous the command is"
99
},
1010
"host": {
1111
"type": "string",
12-
"description": "remote host like user@host.com to execute the shell comand on (uses ssh)"
12+
"description": "remote host like user@host.com to execute the shell comand on (uses ssh), default is empty (local shell) "
1313
}
1414
},
1515
"required": ["text"]

0 commit comments

Comments
 (0)