Skip to content

Commit a1e2a9e

Browse files
committed
Updated
1 parent 603c217 commit a1e2a9e

1 file changed

Lines changed: 63 additions & 0 deletions

File tree

mcp/index.js

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -471,6 +471,44 @@ async function extractTablesHandler({
471471
return result;
472472
}
473473

474+
async function convertDocxToJson({ file, outputPath, lineSplitMode }) {
475+
const filePath = resolve(file);
476+
if (!fs.existsSync(filePath)) {
477+
return { error: `File not found: ${filePath}` };
478+
}
479+
if (extname(filePath).toLowerCase() !== '.docx') {
480+
return { error: `File is not a .docx file: ${filePath}` };
481+
}
482+
483+
const outPath = outputPath
484+
? resolve(outputPath)
485+
: filePath.replace(/\.docx$/i, '.scribe.json');
486+
487+
await ensureInit();
488+
489+
const prevLineSplitMode = scribe.opt.docxLineSplitMode;
490+
if (lineSplitMode) {
491+
scribe.opt.docxLineSplitMode = lineSplitMode;
492+
}
493+
494+
try {
495+
await scribe.importFiles([filePath]);
496+
currentFile = filePath;
497+
currentDataFile = null;
498+
499+
const scribeJson = await scribe.exportData('scribe');
500+
fs.writeFileSync(outPath, scribeJson);
501+
502+
return {
503+
outputPath: outPath,
504+
pageCount: scribe.inputData.pageCount,
505+
lineSplitMode: scribe.opt.docxLineSplitMode,
506+
};
507+
} finally {
508+
scribe.opt.docxLineSplitMode = prevLineSplitMode;
509+
}
510+
}
511+
474512
// --- MCP Protocol (JSON-RPC over stdio) ---
475513

476514
const TOOLS = [
@@ -776,6 +814,30 @@ const TOOLS = [
776814
required: [],
777815
},
778816
},
817+
{
818+
name: 'convert_docx_to_json',
819+
description: 'Convert a .docx file to .scribe.json format. '
820+
+ 'Parses the docx document structure and exports it as a scribe.json file containing page/line/word data with styling and font information.',
821+
inputSchema: {
822+
type: 'object',
823+
properties: {
824+
file: {
825+
type: 'string',
826+
description: 'Path to the .docx file to convert.',
827+
},
828+
outputPath: {
829+
type: 'string',
830+
description: 'Path for the output .scribe.json file. Default: same directory and basename as input with .scribe.json extension.',
831+
},
832+
lineSplitMode: {
833+
type: 'string',
834+
enum: ['width', 'sentence'],
835+
description: 'How to split text into lines. "width" wraps at page width (default). "sentence" splits at sentence boundaries.',
836+
},
837+
},
838+
required: ['file'],
839+
},
840+
},
779841
];
780842

781843
const toolHandlers = {
@@ -789,6 +851,7 @@ const toolHandlers = {
789851
merge_pdfs: (args) => enqueue(() => mergePdfs(args)),
790852
define_tables: (args) => enqueue(() => defineTablesHandler(args)),
791853
extract_tables: (args) => enqueue(() => extractTablesHandler(args)),
854+
convert_docx_to_json: (args) => enqueue(() => convertDocxToJson(args)),
792855
};
793856

794857
// JSON-RPC message handling

0 commit comments

Comments
 (0)