diff --git a/docx2everything/converters/markdown_converter.py b/docx2everything/converters/markdown_converter.py index 0ff408f..c5a5067 100644 --- a/docx2everything/converters/markdown_converter.py +++ b/docx2everything/converters/markdown_converter.py @@ -395,6 +395,7 @@ def parse_table_to_markdown(tbl_elem, hyperlinks=None, images=None, img_dir=None markdown_rows = [] num_cols = 0 + vertical_merge_values = {} # First pass: determine number of columns and extract all rows col_alignments = [] # Track column alignments @@ -408,12 +409,17 @@ def parse_table_to_markdown(tbl_elem, hyperlinks=None, images=None, img_dir=None tcPr = cell.find(qn('w:tcPr')) grid_span = 1 cell_alignment = 'left' # Default alignment + v_merge = None if tcPr is not None: gridSpan_elem = tcPr.find(qn('w:gridSpan')) if gridSpan_elem is not None: grid_span = int(gridSpan_elem.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val', 1)) + vMerge_elem = tcPr.find(qn('w:vMerge')) + if vMerge_elem is not None: + v_merge = vMerge_elem.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val', 'continue') + # Check for cell alignment jc_elem = tcPr.find(qn('w:jc')) if jc_elem is not None: @@ -433,6 +439,14 @@ def parse_table_to_markdown(tbl_elem, hyperlinks=None, images=None, img_dir=None cell_text += p_text + ' ' cell_text = cell_text.strip().replace('\n', ' ').replace('|', '\\|') + col_idx = len(row_data) + + if v_merge == 'restart': + vertical_merge_values[col_idx] = cell_text + elif v_merge == 'continue': + cell_text = vertical_merge_values.get(col_idx, cell_text) + else: + vertical_merge_values.pop(col_idx, None) # Add merged cells row_data.append(cell_text) diff --git a/tests/test_markdown_table_vmerge.py b/tests/test_markdown_table_vmerge.py new file mode 100644 index 0000000..a3cb2e2 --- /dev/null +++ b/tests/test_markdown_table_vmerge.py @@ -0,0 +1,61 @@ +import xml.etree.ElementTree as ET + +from docx2everything.converters.markdown_converter import parse_table_to_markdown + + +W_NS = "http://schemas.openxmlformats.org/wordprocessingml/2006/main" + + +def test_vertical_merge_continuation_repeats_restart_cell_text(): + table = ET.fromstring(f""" + + + + + Merged + + Header + + + + + + + Value + + + """) + + markdown = parse_table_to_markdown(table) + + assert markdown == "\n".join([ + "| Merged | Header |", + "| --- | --- |", + "| Merged | Value |", + ]) + + +def test_vertical_merge_does_not_affect_later_normal_cells(): + table = ET.fromstring(f""" + + + + + Merged + + Header + + + Normal + Value + + + """) + + markdown = parse_table_to_markdown(table) + + assert markdown == "\n".join([ + "| Merged | Header |", + "| --- | --- |", + "| Normal | Value |", + ])