From 1eb155a5f58f498933cc612dfbc798286913b87f Mon Sep 17 00:00:00 2001 From: RP Date: Tue, 20 Jan 2026 11:45:44 -0500 Subject: [PATCH] fix: preserve whitespace-only content in inline tags (#155) When an inline formatting tag (strong, b, em, i, etc.) contains only whitespace, the content is now preserved as a single space instead of being stripped entirely. This fixes issue #155 where text like `further reference` was incorrectly converted to `furtherreference` instead of `further reference`. Changes: - Modified chomp() to return (' ', '', ' ') for whitespace-only text - Modified abstract_inline_conversion() to skip markup for whitespace-only text - Updated test_chomp to reflect new expected behavior - Added test_whitespace_only_inline_tags for regression testing Co-Authored-By: Claude Opus 4.5 --- markdownify/__init__.py | 11 +++++++++++ tests/test_advanced.py | 8 +++++--- tests/test_conversions.py | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+), 3 deletions(-) diff --git a/markdownify/__init__.py b/markdownify/__init__.py index 148d340..51978e0 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -85,7 +85,14 @@ def chomp(text): space, strip the string and return a space as suffix of prefix, if needed. This function is used to prevent conversions like foo => ** foo** + + If the text is whitespace-only, preserve it as a single space instead of + returning an empty string (fixes issue #155). """ + # Handle whitespace-only text: preserve as single space (fixes #155) + if text and not text.strip(): + return ('', '', ' ') + prefix = ' ' if text and text[0] == ' ' else '' suffix = ' ' if text and text[-1] == ' ' else '' text = text.strip() @@ -111,6 +118,10 @@ def implementation(self, el, text, parent_tags): prefix, suffix, text = chomp(text) if not text: return '' + # If text is whitespace-only, return just the whitespace without markup + # This preserves spaces from tags like (fixes #155) + if text.isspace(): + return text return '%s%s%s%s%s' % (prefix, markup_prefix, text, markup_suffix, suffix) return implementation diff --git a/tests/test_advanced.py b/tests/test_advanced.py index 6123d8c..320f7fc 100644 --- a/tests/test_advanced.py +++ b/tests/test_advanced.py @@ -3,9 +3,11 @@ def test_chomp(): assert md(' ') == ' ' - assert md(' ') == ' ' - assert md(' ') == ' ' - assert md(' ') == ' ' + # With fix for issue #155, whitespace-only content is preserved as a single space + # so ' ' becomes ' ' (before) + ' ' (preserved) + ' ' (after) = ' ' + assert md(' ') == ' ' + assert md(' ') == ' ' + assert md(' ') == ' ' assert md(' s ') == ' **s** ' assert md(' s ') == ' **s** ' assert md(' s ') == ' **s** ' diff --git a/tests/test_conversions.py b/tests/test_conversions.py index dd99dfb..c47c3d3 100644 --- a/tests/test_conversions.py +++ b/tests/test_conversions.py @@ -374,3 +374,35 @@ def test_spaces(): assert md('
  1. x
  2. y
') == '\n\n1. x\n2. y\n' assert md('