@@ -471,6 +471,44 @@ async function extractTablesHandler({
471471 return result ;
472472}
473473
474+ async function convertDocxToJson ( { file, outputPath, lineSplitMode } ) {
475+ const filePath = resolve ( file ) ;
476+ if ( ! fs . existsSync ( filePath ) ) {
477+ return { error : `File not found: ${ filePath } ` } ;
478+ }
479+ if ( extname ( filePath ) . toLowerCase ( ) !== '.docx' ) {
480+ return { error : `File is not a .docx file: ${ filePath } ` } ;
481+ }
482+
483+ const outPath = outputPath
484+ ? resolve ( outputPath )
485+ : filePath . replace ( / \. d o c x $ / i, '.scribe.json' ) ;
486+
487+ await ensureInit ( ) ;
488+
489+ const prevLineSplitMode = scribe . opt . docxLineSplitMode ;
490+ if ( lineSplitMode ) {
491+ scribe . opt . docxLineSplitMode = lineSplitMode ;
492+ }
493+
494+ try {
495+ await scribe . importFiles ( [ filePath ] ) ;
496+ currentFile = filePath ;
497+ currentDataFile = null ;
498+
499+ const scribeJson = await scribe . exportData ( 'scribe' ) ;
500+ fs . writeFileSync ( outPath , scribeJson ) ;
501+
502+ return {
503+ outputPath : outPath ,
504+ pageCount : scribe . inputData . pageCount ,
505+ lineSplitMode : scribe . opt . docxLineSplitMode ,
506+ } ;
507+ } finally {
508+ scribe . opt . docxLineSplitMode = prevLineSplitMode ;
509+ }
510+ }
511+
474512// --- MCP Protocol (JSON-RPC over stdio) ---
475513
476514const TOOLS = [
@@ -776,6 +814,30 @@ const TOOLS = [
776814 required : [ ] ,
777815 } ,
778816 } ,
817+ {
818+ name : 'convert_docx_to_json' ,
819+ description : 'Convert a .docx file to .scribe.json format. '
820+ + 'Parses the docx document structure and exports it as a scribe.json file containing page/line/word data with styling and font information.' ,
821+ inputSchema : {
822+ type : 'object' ,
823+ properties : {
824+ file : {
825+ type : 'string' ,
826+ description : 'Path to the .docx file to convert.' ,
827+ } ,
828+ outputPath : {
829+ type : 'string' ,
830+ description : 'Path for the output .scribe.json file. Default: same directory and basename as input with .scribe.json extension.' ,
831+ } ,
832+ lineSplitMode : {
833+ type : 'string' ,
834+ enum : [ 'width' , 'sentence' ] ,
835+ description : 'How to split text into lines. "width" wraps at page width (default). "sentence" splits at sentence boundaries.' ,
836+ } ,
837+ } ,
838+ required : [ 'file' ] ,
839+ } ,
840+ } ,
779841] ;
780842
781843const toolHandlers = {
@@ -789,6 +851,7 @@ const toolHandlers = {
789851 merge_pdfs : ( args ) => enqueue ( ( ) => mergePdfs ( args ) ) ,
790852 define_tables : ( args ) => enqueue ( ( ) => defineTablesHandler ( args ) ) ,
791853 extract_tables : ( args ) => enqueue ( ( ) => extractTablesHandler ( args ) ) ,
854+ convert_docx_to_json : ( args ) => enqueue ( ( ) => convertDocxToJson ( args ) ) ,
792855} ;
793856
794857// JSON-RPC message handling
0 commit comments