@@ -56,30 +56,53 @@ def __init__(self):
5656 )
5757 from docling .datamodel .pipeline_options import (
5858 PdfPipelineOptions ,
59+ ConvertPipelineOptions ,
60+ PictureDescriptionApiOptions ,
5961 )
6062 from docling .document_converter import (
6163 DocumentConverter ,
6264 PdfFormatOption ,
65+ WordFormatOption ,
66+ ExcelFormatOption ,
67+ PowerpointFormatOption ,
6368 )
6469
65- pipeline_options = PdfPipelineOptions ()
66- pipeline_options .do_ocr = True
67- pipeline_options .do_table_structure = True
68- pipeline_options .table_structure_options .do_cell_matching = True
70+ pdf_pipeline_options = PdfPipelineOptions ()
71+ pdf_pipeline_options .do_ocr = True
72+ pdf_pipeline_options .do_table_structure = True
73+ pdf_pipeline_options .table_structure_options .do_cell_matching = True
6974
70- pipeline_options .accelerator_options = AcceleratorOptions (
75+ pdf_pipeline_options .accelerator_options = AcceleratorOptions (
7176 num_threads = 2 , device = AcceleratorDevice .MPS
7277 )
7378
79+ # Explicitly disable enrichment features and use a safe picture_description_options
80+ # https://github.com/docling-project/docling/issues/2515
81+ word_pipeline_options = ConvertPipelineOptions (
82+ do_picture_classification = False ,
83+ do_picture_description = False ,
84+ enable_remote_services = False ,
85+ picture_description_options = PictureDescriptionApiOptions (),
86+ )
87+
7488 if sys .platform != "darwin" :
75- pipeline_options .accelerator_options = AcceleratorOptions (
89+ pdf_pipeline_options .accelerator_options = AcceleratorOptions (
7690 num_threads = 4 , device = AcceleratorDevice .AUTO
7791 )
7892 self .converter = DocumentConverter (
7993 format_options = {
8094 InputFormat .PDF : PdfFormatOption (
81- pipeline_options = pipeline_options
82- )
95+ pipeline_options = pdf_pipeline_options
96+ ),
97+ InputFormat .DOCX : WordFormatOption (
98+ pipeline_options = word_pipeline_options
99+ ),
100+ InputFormat .XLSX : ExcelFormatOption (
101+ pipeline_options = word_pipeline_options
102+ ),
103+ InputFormat .PPTX : PowerpointFormatOption (
104+ pipeline_options = word_pipeline_options
105+ ),
83106 }
84107 )
85108 logger .info ("Docling converter initialized successfully" )
0 commit comments