diff --git a/docx2everything/converters/markdown_converter.py b/docx2everything/converters/markdown_converter.py index 0ff408f..5b2f36d 100644 --- a/docx2everything/converters/markdown_converter.py +++ b/docx2everything/converters/markdown_converter.py @@ -347,6 +347,22 @@ def parse_paragraph_to_markdown(p_elem, numbering_info=None, hyperlinks=None, im para_text += '\n![' + img_filename + '](' + img_md_path + ')\n' elif img_path: para_text += '\n![' + os.path.basename(img_path) + '](' + img_path + ')\n' + + # Handle legacy VML images. + for pict in p_elem.findall('.//' + qn('w:pict')): + image_data = pict.find('.//{urn:schemas-microsoft-com:vml}imagedata') + if image_data is None: + continue + + rel_id = image_data.get('{http://schemas.openxmlformats.org/officeDocument/2006/relationships}id') + if rel_id and images: + img_path = images.get(rel_id, '') + if img_dir and img_path: + img_filename = os.path.basename(img_path) + img_md_path = os.path.join(img_dir, img_filename) + para_text += '\n![' + img_filename + '](' + img_md_path + ')\n' + elif img_path: + para_text += '\n![' + os.path.basename(img_path) + '](' + img_path + ')\n' para_text = para_text.strip() diff --git a/tests/test_markdown_vml_pict_images.py b/tests/test_markdown_vml_pict_images.py new file mode 100644 index 0000000..8f4737f --- /dev/null +++ b/tests/test_markdown_vml_pict_images.py @@ -0,0 +1,29 @@ +import xml.etree.ElementTree as ET + +from docx2everything.converters.markdown_converter import parse_paragraph_to_markdown + + +W_NS = "http://schemas.openxmlformats.org/wordprocessingml/2006/main" +R_NS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships" +V_NS = "urn:schemas-microsoft-com:vml" + + +def test_vml_pict_image_is_converted_to_markdown_image(): + paragraph = ET.fromstring(f""" + + + + + + + + + + """) + + markdown = parse_paragraph_to_markdown( + paragraph, + images={"rIdImage1": "media/image1.png"}, + ) + + assert markdown == "![image1.png](media/image1.png)"