Skip to content

Commit 7d0080d

Browse files
committed
Fix: blank cells in .xlsx/.xls render as NaN in markdown output
1 parent ee14fec commit 7d0080d

3 files changed

Lines changed: 13 additions & 2 deletions

File tree

packages/markitdown/src/markitdown/converters/_xlsx_converter.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ def convert(
8484
md_content = ""
8585
for s in sheets:
8686
md_content += f"## {s}\n"
87-
html_content = sheets[s].to_html(index=False)
87+
html_content = sheets[s].to_html(index=False, na_rep="")
8888
md_content += (
8989
self._html_converter.convert_string(
9090
html_content, **kwargs
@@ -146,7 +146,7 @@ def convert(
146146
md_content = ""
147147
for s in sheets:
148148
md_content += f"## {s}\n"
149-
html_content = sheets[s].to_html(index=False)
149+
html_content = sheets[s].to_html(index=False, na_rep="")
150150
md_content += (
151151
self._html_converter.convert_string(
152152
html_content, **kwargs
4.8 KB
Binary file not shown.

packages/markitdown/tests/test_module_misc.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -382,6 +382,17 @@ def test_exceptions() -> None:
382382
assert type(exc_info.value.attempts[0].converter).__name__ == "PptxConverter"
383383

384384

385+
def test_xlsx_blank_cells() -> None:
386+
# Blank cells in .xlsx should render as empty strings, not "NaN"
387+
markitdown = MarkItDown()
388+
result = markitdown.convert(
389+
os.path.join(TEST_FILES_DIR, "test_xlsx_blank_cells.xlsx")
390+
)
391+
assert "NaN" not in result.markdown
392+
assert "Alice" in result.markdown
393+
assert "Bob" in result.markdown
394+
395+
385396
@pytest.mark.skipif(
386397
skip_exiftool,
387398
reason="do not run if exiftool is not installed",

0 commit comments

Comments
 (0)