From 480e362ce5eb1c7ccc1318fd371c6597371984f7 Mon Sep 17 00:00:00 2001
From: Bruce-anle <840596168@qq.com>
Date: Sat, 9 May 2026 01:13:15 +0800
Subject: [PATCH] fix: preserve vertical merge table text
Background: DOCX tables can use w:vMerge for vertically merged cells. Markdown has no rowspan support, but leaving continuation cells empty loses the visible merged cell text.\n\nChanges: track vertical merge restart values by column and repeat them for continuation cells, while clearing the tracked value when a normal cell appears.\n\nVerification: /home/brucean/doc4agent/.venv/bin/python -m pytest tests -q -p no:cacheprovider passed.
---
.../converters/markdown_converter.py | 14 +++++
tests/test_markdown_table_vmerge.py | 61 +++++++++++++++++++
2 files changed, 75 insertions(+)
create mode 100644 tests/test_markdown_table_vmerge.py
diff --git a/docx2everything/converters/markdown_converter.py b/docx2everything/converters/markdown_converter.py
index 0ff408f..c5a5067 100644
--- a/docx2everything/converters/markdown_converter.py
+++ b/docx2everything/converters/markdown_converter.py
@@ -395,6 +395,7 @@ def parse_table_to_markdown(tbl_elem, hyperlinks=None, images=None, img_dir=None
markdown_rows = []
num_cols = 0
+ vertical_merge_values = {}
# First pass: determine number of columns and extract all rows
col_alignments = [] # Track column alignments
@@ -408,12 +409,17 @@ def parse_table_to_markdown(tbl_elem, hyperlinks=None, images=None, img_dir=None
tcPr = cell.find(qn('w:tcPr'))
grid_span = 1
cell_alignment = 'left' # Default alignment
+ v_merge = None
if tcPr is not None:
gridSpan_elem = tcPr.find(qn('w:gridSpan'))
if gridSpan_elem is not None:
grid_span = int(gridSpan_elem.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val', 1))
+ vMerge_elem = tcPr.find(qn('w:vMerge'))
+ if vMerge_elem is not None:
+ v_merge = vMerge_elem.get('{http://schemas.openxmlformats.org/wordprocessingml/2006/main}val', 'continue')
+
# Check for cell alignment
jc_elem = tcPr.find(qn('w:jc'))
if jc_elem is not None:
@@ -433,6 +439,14 @@ def parse_table_to_markdown(tbl_elem, hyperlinks=None, images=None, img_dir=None
cell_text += p_text + ' '
cell_text = cell_text.strip().replace('\n', ' ').replace('|', '\\|')
+ col_idx = len(row_data)
+
+ if v_merge == 'restart':
+ vertical_merge_values[col_idx] = cell_text
+ elif v_merge == 'continue':
+ cell_text = vertical_merge_values.get(col_idx, cell_text)
+ else:
+ vertical_merge_values.pop(col_idx, None)
# Add merged cells
row_data.append(cell_text)
diff --git a/tests/test_markdown_table_vmerge.py b/tests/test_markdown_table_vmerge.py
new file mode 100644
index 0000000..a3cb2e2
--- /dev/null
+++ b/tests/test_markdown_table_vmerge.py
@@ -0,0 +1,61 @@
+import xml.etree.ElementTree as ET
+
+from docx2everything.converters.markdown_converter import parse_table_to_markdown
+
+
+W_NS = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
+
+
+def test_vertical_merge_continuation_repeats_restart_cell_text():
+ table = ET.fromstring(f"""
+
+
+
+
+ Merged
+
+ Header
+
+
+
+
+
+
+ Value
+
+
+ """)
+
+ markdown = parse_table_to_markdown(table)
+
+ assert markdown == "\n".join([
+ "| Merged | Header |",
+ "| --- | --- |",
+ "| Merged | Value |",
+ ])
+
+
+def test_vertical_merge_does_not_affect_later_normal_cells():
+ table = ET.fromstring(f"""
+
+
+
+
+ Merged
+
+ Header
+
+
+ Normal
+ Value
+
+
+ """)
+
+ markdown = parse_table_to_markdown(table)
+
+ assert markdown == "\n".join([
+ "| Merged | Header |",
+ "| --- | --- |",
+ "| Normal | Value |",
+ ])