matthewwithanm · sp2935 · Jan 20, 2026
diff --git a/markdownify/__init__.py b/markdownify/__init__.py
@@ -85,7 +85,14 @@ def chomp(text):
     space, strip the string and return a space as suffix of prefix, if needed.
     This function is used to prevent conversions like
         <b> foo</b> => ** foo**
+
+    If the text is whitespace-only, preserve it as a single space instead of
+    returning an empty string (fixes issue #155).
     """
+    # Handle whitespace-only text: preserve as single space (fixes #155)
+    if text and not text.strip():
+        return ('', '', ' ')
+
     prefix = ' ' if text and text[0] == ' ' else ''
     suffix = ' ' if text and text[-1] == ' ' else ''
     text = text.strip()
@@ -111,6 +118,10 @@ def implementation(self, el, text, parent_tags):
         prefix, suffix, text = chomp(text)
         if not text:
             return ''
+        # If text is whitespace-only, return just the whitespace without markup
+        # This preserves spaces from tags like <strong> </strong> (fixes #155)
+        if text.isspace():
+            return text
         return '%s%s%s%s%s' % (prefix, markup_prefix, text, markup_suffix, suffix)
     return implementation
 

diff --git a/tests/test_advanced.py b/tests/test_advanced.py
@@ -3,9 +3,11 @@
 
 def test_chomp():
     assert md(' <b></b> ') == '  '
-    assert md(' <b> </b> ') == '  '
-    assert md(' <b>  </b> ') == '  '
-    assert md(' <b>   </b> ') == '  '
+    # With fix for issue #155, whitespace-only content is preserved as a single space
+    # so ' <b> </b> ' becomes ' ' (before) + ' ' (preserved) + ' ' (after) = '   '
+    assert md(' <b> </b> ') == '   '
+    assert md(' <b>  </b> ') == '   '
+    assert md(' <b>   </b> ') == '   '
     assert md(' <b>s </b> ') == ' **s**  '
     assert md(' <b> s</b> ') == '  **s** '
     assert md(' <b> s </b> ') == '  **s**  '

diff --git a/tests/test_conversions.py b/tests/test_conversions.py
@@ -374,3 +374,35 @@ def test_spaces():
     assert md(' <ol> <li> x </li> <li> y </li> </ol> ') == '\n\n1. x\n2. y\n'
     assert md(' <ul> <li> x </li> <li> y </li> </ol> ') == '\n\n* x\n* y\n'
     assert md('test <pre> foo </pre> bar') == 'test\n\n```\n foo\n```\n\nbar'
+
+
+def test_whitespace_only_inline_tags():
+    """
+    Test that whitespace-only inline tags preserve the whitespace.
+    Fixes issue #155: https://github.com/matthewwithanm/python-markdownify/issues/155
+
+    When DOCX files have formatting where a space is in its own formatting run
+    (e.g., "further" [normal] + " " [bold] + "reference" [normal]), the HTML
+    produced is: further<strong> </strong>reference
+
+    Previously, this would be converted to "furtherreference" (losing the space).
+    After the fix, it should be "further reference" (space preserved).
+    """
+    # Whitespace-only strong/b tags should preserve the space
+    assert md('further<strong> </strong>reference') == 'further reference'
+    assert md('word1<b> </b>word2') == 'word1 word2'
+
+    # Whitespace-only em/i tags should preserve the space
+    assert md('hello<em> </em>world') == 'hello world'
+    assert md('foo<i> </i>bar') == 'foo bar'
+
+    # Multiple whitespace characters should collapse to single space
+    assert md('a<strong>  </strong>b') == 'a b'
+    assert md('a<em>   </em>b') == 'a b'
+
+    # Mixed formatting with whitespace boundary (real-world DOCX pattern)
+    assert md('The <strong>TRUST,</strong> but without further<strong> </strong>reference') == 'The **TRUST,** but without further reference'
+
+    # Tabs and other whitespace should also be preserved as single space
+    assert md('a<b>\t</b>b') == 'a b'
+    assert md('a<i>\n</i>b') == 'a b'