Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions docx2everything/converters/markdown_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,22 @@ def parse_paragraph_to_markdown(p_elem, numbering_info=None, hyperlinks=None, im
para_text += '\n![' + img_filename + '](' + img_md_path + ')\n'
elif img_path:
para_text += '\n![' + os.path.basename(img_path) + '](' + img_path + ')\n'

# Handle legacy VML images.
for pict in p_elem.findall('.//' + qn('w:pict')):
image_data = pict.find('.//{urn:schemas-microsoft-com:vml}imagedata')
if image_data is None:
continue

rel_id = image_data.get('{http://schemas.openxmlformats.org/officeDocument/2006/relationships}id')
if rel_id and images:
img_path = images.get(rel_id, '')
if img_dir and img_path:
img_filename = os.path.basename(img_path)
img_md_path = os.path.join(img_dir, img_filename)
para_text += '\n![' + img_filename + '](' + img_md_path + ')\n'
elif img_path:
para_text += '\n![' + os.path.basename(img_path) + '](' + img_path + ')\n'

para_text = para_text.strip()

Expand Down
29 changes: 29 additions & 0 deletions tests/test_markdown_vml_pict_images.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import xml.etree.ElementTree as ET

from docx2everything.converters.markdown_converter import parse_paragraph_to_markdown


W_NS = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
R_NS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships"
V_NS = "urn:schemas-microsoft-com:vml"


def test_vml_pict_image_is_converted_to_markdown_image():
paragraph = ET.fromstring(f"""
<w:p xmlns:w="{W_NS}" xmlns:r="{R_NS}" xmlns:v="{V_NS}">
<w:r>
<w:pict>
<v:shape>
<v:imagedata r:id="rIdImage1"/>
</v:shape>
</w:pict>
</w:r>
</w:p>
""")

markdown = parse_paragraph_to_markdown(
paragraph,
images={"rIdImage1": "media/image1.png"},
)

assert markdown == "![image1.png](media/image1.png)"