From bf0a1e29c991bacb40438138a4d86338b7d9acd3 Mon Sep 17 00:00:00 2001
From: Crozzers <captaincrozzers@gmail.com>
Date: Sun, 30 Nov 2025 09:49:59 +0000
Subject: [PATCH 1/9] Re-write the IAB processor to implement GFM rules

---
 lib/markdown2.py                              | 160 +++++++++++++++++-
 .../tm-cases/middle_word_em_escaped_char.html |   1 +
 .../tm-cases/middle_word_em_escaped_char.opts |   1 +
 .../tm-cases/middle_word_em_escaped_char.text |   1 +
 4 files changed, 157 insertions(+), 6 deletions(-)
 create mode 100644 test/tm-cases/middle_word_em_escaped_char.html
 create mode 100644 test/tm-cases/middle_word_em_escaped_char.opts
 create mode 100644 test/tm-cases/middle_word_em_escaped_char.text
diff --git a/lib/markdown2.py b/lib/markdown2.py
index 71b19f67..3dd19541 100755
--- a/lib/markdown2.py
+++ b/lib/markdown2.py
@@ -1123,8 +1123,14 @@ def _strict_tag_block_sub(
         return result
 
     def _tag_is_closed(self, tag_name: str, text: str) -> bool:
-        # super basic check if number of open tags == number of closing tags
-        return len(re.findall('<%s(?:.*?)>' % tag_name, text)) == len(re.findall('</%s>' % tag_name, text))
+        # check if number of open tags == number of close tags
+        if len(re.findall('<%s(?:.*?)>' % tag_name, text)) != len(re.findall('</%s>' % tag_name, text)):
+            return False
+
+        # check that close tag position is AFTER open tag
+        close_index = text.find(f'</{tag_name}')
+        open_index = text.find(f'<{tag_name}')
+        return open_index != -1 and close_index != -1 and open_index < close_index
 
     @mark_stage(Stage.LINK_DEFS)
     def _strip_link_definitions(self, text: str) -> str:
@@ -2066,8 +2072,11 @@ def sub(match: re.Match):
             return f'{prefix}<{syntax}>{contents}</{syntax}>'
 
         # <strong> must go first:
-        text = self._strong_re.sub(sub, text)
-        text = self._em_re.sub(sub, text)
+        # text = self._strong_re.sub(sub, text)
+        # text = self._em_re.sub(sub, text)
+        iab = ItalicAndBoldProcessor2(self, None)
+        if iab.test(text):
+            text = iab.run(text)
         return text
 
     _block_quote_base = r'''
@@ -2581,6 +2590,138 @@ def test(self, text):
         return self.hash_table and re.search(r'md5-[0-9a-z]{32}', text)
 
 
+class ItalicAndBoldProcessor2(Extra):
+    name = 'iabp-2'
+    order = (Stage.ITALIC_AND_BOLD,), tuple()
+
+    def run(self, text):
+        for em_type in '*_':
+            opens = []
+            unused_opens = {}
+            tokens = []
+            index = 0
+
+            delim_runs = tuple(re.finditer(r'([%s]+)' % em_type, text))
+            for delim_run in delim_runs:
+                # first check if it is opening (left flanking)
+                # or closing (right flanking) run
+                run = delim_run.string[max(0, delim_run.start() - 1): delim_run.end() + 1]
+                syntax = delim_run.group(1)
+                syntax_re = syntax.replace('*', r'\*')
+
+                left = (
+                    # not followed by whitespace
+                    re.match(r'.*%s\S' % syntax_re, run, re.S)
+                    and (
+                        # either not followed by punctuation
+                        re.match(r'.*%s[\s\w]' % syntax_re, run, re.S)
+                        # or followed by punct and preceded by punct/whitespace
+                        or re.match(r'(^|[\s\W])%s([^\s\w]|$)' % syntax_re, run, re.S | re.M)
+                    )
+                )
+
+                right =  (
+                    # not preceded by whitespace
+                    re.match(r'\S%s.*' % syntax_re, run, re.S)
+                    and (
+                        # either not preceded by punct
+                        re.match(r'[\s\w]%s.*' % syntax_re, run, re.S)
+                        # or preceded by punct and followed by whitespace or punct
+                        or re.match(r'[^\s\w]%s(\s|[^\s\w]|$)' % syntax_re, run, re.S | re.M)
+                    )
+                )
+
+                if not (left or right):
+                    continue
+
+                if left and right:
+                    if opens:
+                        # if we have open tags prioritize closing them
+                        left = False
+                    else:
+                        # if we don't, let's open a new one
+                        right = False
+
+                if left:
+                    opens.append(delim_run)
+                    continue
+
+                # close. figure out how
+                if not opens:
+                    tokens.append(delim_run.string[index: delim_run.end()])
+                    index = delim_run.end()
+                    continue
+
+                # get the opening run
+                open = opens.pop(-1)
+                # if the opening run was joined to a previous closing run (eg: **strong***em*)
+                # then re-use that previous closing run, but ignore the part that was used to
+                # close the previous emphasis
+                open_offset = unused_opens.pop(open, 0)
+                open_syntax = open.group(1)[open_offset:]
+                open_start = open.start() + open_offset
+
+                # add everything between last emphasis and this one
+                tokens.append(delim_run.string[index: open_start])
+                body = delim_run.string[open.end(): delim_run.start()]
+                if not all(
+                    self.md._tag_is_closed(tag, body)
+                    for tag in re.findall(rf'</?({self.md._span_tags})', body)
+                ):
+                    tokens.append(delim_run.string[open_start: delim_run.end()])
+                    index = delim_run.end()
+                    continue
+
+                if len(open_syntax) > len(syntax):
+                    opens.append(open)
+                    unused_opens[open] = open_offset
+                    opens.append(delim_run)
+                    unused_opens[delim_run] = 0
+                    continue
+
+                # calc what type of emphasis based on the lowest common
+                # length of the delimiter run
+                length = min(3, min(len(open_syntax), len(syntax)))
+                if length == 3:
+                    tokens.append('<em><strong>')
+                    tokens.append(body)
+                    tokens.append('</strong></em>')
+                else:
+                    tag = 'strong' if length == 2 else 'em'
+                    # add any part of the open that we don't consume
+                    # eg: **one*
+                    tokens.append(open_syntax[:-length])
+                    tokens.append(f'<{tag}>')
+                    tokens.append(body)
+                    tokens.append(f'</{tag}>')
+
+                # if our closing syntax is longer than our opening that
+                # means it's joined onto a previous emphasis
+                # eg: **strong***em*
+                # This means the current delim_run is not completely "spent".
+                # Mark this closing run as an opening run for the next em but
+                # record in `unused_opens` how mmany chars from the run we've
+                # already used
+                if len(syntax) > len(open_syntax):
+                    opens.append(delim_run)
+                    unused_opens[delim_run] = length
+                    index = delim_run.start() + length
+                else:
+                    tokens.append(delim_run.group(1)[length:])
+                    index = delim_run.end()
+
+            if index < len(text):
+                tokens.append(text[index:])
+
+            text = ''.join(tokens)
+
+        return text
+
+
+    def test(self, text):
+        return text.count('*') > 1 or text.count('_') > 1
+
+
 class _LinkProcessorExtraOpts(TypedDict, total=False):
     '''Options for the `LinkProcessor` extra'''
     tags: List[str]
@@ -3420,14 +3561,21 @@ def __init__(self, md: Markdown, options: Union[dict, bool, None]):
         options.setdefault('allowed', True)
         super().__init__(md, options)
 
+        escaped_hashes = '|'.join(md._escape_table.values())
+
         self.middle_word_em_re = re.compile(
             r'''
             (?<!^)         # To be middle of a word, it cannot be at the start of the input
             (?<![*_\W])    # cannot be preceeded by em char or non word char (must be in middle of word)
+            (?<!%s)        # cannot be preceeded by a hashed escape char either
             ([*_])         # em char
             (?=\S)         # must be followed by non-whitespace char
-            (?![*_]|$|\W)  # cannot be followed by another em char, EOF or a non-word char
-            ''', re.X | re.M
+            (?!
+                [*_]|$|\W  # cannot be followed by another em char, EOF or a non-word char
+                |%s        # Also cannot be followed by any of the escaped non-word chars
+            )
+            ''' % (escaped_hashes, escaped_hashes),
+            re.X | re.M
         )
 
         # add a prefix to it so we don't interfere with escaped/hashed chars from other stages
diff --git a/test/tm-cases/middle_word_em_escaped_char.html b/test/tm-cases/middle_word_em_escaped_char.html
new file mode 100644
index 00000000..69006fea
--- /dev/null
+++ b/test/tm-cases/middle_word_em_escaped_char.html
@@ -0,0 +1 @@
+<p><em>x</em>/<em>y</em> and <em>x</em>\<em>y</em></p>
diff --git a/test/tm-cases/middle_word_em_escaped_char.opts b/test/tm-cases/middle_word_em_escaped_char.opts
new file mode 100644
index 00000000..f540dcd6
--- /dev/null
+++ b/test/tm-cases/middle_word_em_escaped_char.opts
@@ -0,0 +1 @@
+{'extras': {'middle-word-em': {'allowed': False}}}
diff --git a/test/tm-cases/middle_word_em_escaped_char.text b/test/tm-cases/middle_word_em_escaped_char.text
new file mode 100644
index 00000000..3548642d
--- /dev/null
+++ b/test/tm-cases/middle_word_em_escaped_char.text
@@ -0,0 +1 @@
+*x*/*y* and *x*\\*y*

From 6ade9ab62114e433b4d04fe771553b05e7825044 Mon Sep 17 00:00:00 2001
From: Crozzers <captaincrozzers@gmail.com>
Date: Sun, 30 Nov 2025 22:04:22 +0000
Subject: [PATCH 2/9] Get closer to GFM compliance

---
 lib/markdown2.py | 257 ++++++++++++++++++++++++++++-------------------
 1 file changed, 154 insertions(+), 103 deletions(-)

diff --git a/lib/markdown2.py b/lib/markdown2.py
index 3dd19541..6cf5132c 100755
--- a/lib/markdown2.py
+++ b/lib/markdown2.py
@@ -2596,124 +2596,175 @@ class ItalicAndBoldProcessor2(Extra):
 
     def run(self, text):
         for em_type in '*_':
-            opens = []
-            unused_opens = {}
-            tokens = []
-            index = 0
-
-            delim_runs = tuple(re.finditer(r'([%s]+)' % em_type, text))
-            for delim_run in delim_runs:
-                # first check if it is opening (left flanking)
-                # or closing (right flanking) run
-                run = delim_run.string[max(0, delim_run.start() - 1): delim_run.end() + 1]
-                syntax = delim_run.group(1)
-                syntax_re = syntax.replace('*', r'\*')
-
-                left = (
-                    # not followed by whitespace
-                    re.match(r'.*%s\S' % syntax_re, run, re.S)
-                    and (
-                        # either not followed by punctuation
-                        re.match(r'.*%s[\s\w]' % syntax_re, run, re.S)
-                        # or followed by punct and preceded by punct/whitespace
-                        or re.match(r'(^|[\s\W])%s([^\s\w]|$)' % syntax_re, run, re.S | re.M)
+            nesting = True
+            while nesting:
+                nesting = False
+
+                opens = []
+                buffer = []
+                unused_opens = {}
+                tokens = []
+                index = 0
+
+                for delim_run in re.finditer(r'([%s]+)' % em_type, text):
+                    # first check if it is opening (left flanking)
+                    # or closing (right flanking) run
+                    run = delim_run.string[max(0, delim_run.start() - 1): delim_run.end() + 1]
+                    syntax = delim_run.group(1)
+                    syntax_re = syntax.replace('*', r'\*')
+
+                    left = (
+                        # not followed by whitespace
+                        re.match(r'.*%s\S' % syntax_re, run, re.S)
+                        and (
+                            # either not followed by punctuation
+                            re.match(r'.*%s[\s\w]' % syntax_re, run, re.S)
+                            # or followed by punct and preceded by punct/whitespace
+                            or re.match(r'(^|[\s\W])%s([^\s\w]|$)' % syntax_re, run, re.S | re.M)
+                        )
                     )
-                )
 
-                right =  (
-                    # not preceded by whitespace
-                    re.match(r'\S%s.*' % syntax_re, run, re.S)
-                    and (
-                        # either not preceded by punct
-                        re.match(r'[\s\w]%s.*' % syntax_re, run, re.S)
-                        # or preceded by punct and followed by whitespace or punct
-                        or re.match(r'[^\s\w]%s(\s|[^\s\w]|$)' % syntax_re, run, re.S | re.M)
+                    right =  (
+                        # not preceded by whitespace
+                        re.match(r'\S%s.*' % syntax_re, run, re.S)
+                        and (
+                            # either not preceded by punct
+                            re.match(r'[\s\w]%s.*' % syntax_re, run, re.S)
+                            # or preceded by punct and followed by whitespace or punct
+                            or re.match(r'[^\s\w]%s(\s|[^\s\w]|$)' % syntax_re, run, re.S | re.M)
+                        )
                     )
-                )
 
-                if not (left or right):
-                    continue
+                    if not (left or right):
+                        continue
 
-                if left and right:
-                    if opens:
-                        # if we have open tags prioritize closing them
-                        left = False
-                    else:
-                        # if we don't, let's open a new one
-                        right = False
+                    if not right or not opens:
+                        if left:
+                            opens.append(delim_run)
+                        continue
 
-                if left:
-                    opens.append(delim_run)
-                    continue
+                    syntax = delim_run.group(1)
+
+                    open = opens.pop(-1)
+                    # if the opening run was joined to a previous closing run (eg: **strong***em*)
+                    # then re-use that previous closing run, but ignore the part that was used to
+                    # close the previous emphasis
+                    open_offset = unused_opens.pop(open, 0)
+                    open_start = open.start() + open_offset
+                    open_syntax = open.group(1)[open_offset:]
+
+                    if open.start() < index:
+                        # this happens with things like `*(**foo**)*`. We process LTR so the strong gets
+                        # processed first (since that has the first closing delimiter). We now have
+                        # `*(<strong>foo</strong>)*` and now we get round to processing the em.
+                        # It's hard compare the match (against the original text var) to the processed text
+                        # so it's easier to just note down that nesting is detected and re-run the loop
+                        nesting = True
+                        continue
 
-                # close. figure out how
-                if not opens:
-                    tokens.append(delim_run.string[index: delim_run.end()])
-                    index = delim_run.end()
-                    continue
+                    prev_open = None
+
+                    if len(open_syntax) < len(syntax):
+                        # if closing syntax is longer then maybe we can close multiple openers that are queued up
+                        if opens:
+                            prev_open = opens.pop(-1)
+                            prev_open_offset = unused_opens.pop(open, 0)
+                            prev_open_start = prev_open.start() + prev_open_offset
+                            prev_open_syntax = prev_open.group(1)[prev_open_offset:]
+
+                            # check the new expanded body doesn't cross span borders
+                            if not all(
+                                self.md._tag_is_closed(tag, delim_run.string[prev_open.end(): open.start()])
+                                for tag in re.findall(
+                                    rf'</?({self.md._span_tags})',
+                                    delim_run.string[prev_open.end(): open.start()]
+                                )
+                            ):
+                                opens.append(prev_open)
+                                prev_open = None
+                        else:
+                            unused_opens[open] = open_offset
+                            opens.append(open)
+                            unused_opens[delim_run] = 0
+                            opens.append(delim_run)
+                            continue
+                    elif len(open_syntax) > len(syntax):
+                        # if the opening syntax is bigger than this close won't close all of it.
+                        # Queue both up for later processing
+                        opens.append(open)
+                        unused_opens[open] = open_offset
+                        if left:
+                            opens.append(delim_run)
+                            unused_opens[delim_run] = 0
+                        continue
 
-                # get the opening run
-                open = opens.pop(-1)
-                # if the opening run was joined to a previous closing run (eg: **strong***em*)
-                # then re-use that previous closing run, but ignore the part that was used to
-                # close the previous emphasis
-                open_offset = unused_opens.pop(open, 0)
-                open_syntax = open.group(1)[open_offset:]
-                open_start = open.start() + open_offset
-
-                # add everything between last emphasis and this one
-                tokens.append(delim_run.string[index: open_start])
-                body = delim_run.string[open.end(): delim_run.start()]
-                if not all(
-                    self.md._tag_is_closed(tag, body)
-                    for tag in re.findall(rf'</?({self.md._span_tags})', body)
-                ):
-                    tokens.append(delim_run.string[open_start: delim_run.end()])
-                    index = delim_run.end()
-                    continue
+                    body = delim_run.string[open.end(): delim_run.start()]
 
-                if len(open_syntax) > len(syntax):
-                    opens.append(open)
-                    unused_opens[open] = open_offset
-                    opens.append(delim_run)
-                    unused_opens[delim_run] = 0
-                    continue
+                    # ensure the body does not cross span borders
+                    if not all(
+                        self.md._tag_is_closed(tag, body)
+                        for tag in re.findall(rf'</?({self.md._span_tags})', body)
+                    ):
+                        continue
 
-                # calc what type of emphasis based on the lowest common
-                # length of the delimiter run
-                length = min(3, min(len(open_syntax), len(syntax)))
-                if length == 3:
-                    tokens.append('<em><strong>')
-                    tokens.append(body)
-                    tokens.append('</strong></em>')
-                else:
-                    tag = 'strong' if length == 2 else 'em'
+                    # put all the new processing in a buffer array that gets added to `tokens` anyway.
+                    # Not the most efficient but it's convenient having a separate list of everything
+                    # processed and added in the previous iteration
+                    buffer = []
+
+                    # add all the text leading up to the opening delimiter
+                    buffer.append(delim_run.string[index: prev_open_start if prev_open else open_start])
+
+                    # calc what type of emphasis based on the lowest common
+                    # length of the delimiter run
+                    length = min(3, min(len(open_syntax), len(syntax)))
                     # add any part of the open that we don't consume
                     # eg: **one*
-                    tokens.append(open_syntax[:-length])
-                    tokens.append(f'<{tag}>')
-                    tokens.append(body)
-                    tokens.append(f'</{tag}>')
-
-                # if our closing syntax is longer than our opening that
-                # means it's joined onto a previous emphasis
-                # eg: **strong***em*
-                # This means the current delim_run is not completely "spent".
-                # Mark this closing run as an opening run for the next em but
-                # record in `unused_opens` how mmany chars from the run we've
-                # already used
-                if len(syntax) > len(open_syntax):
-                    opens.append(delim_run)
-                    unused_opens[delim_run] = length
-                    index = delim_run.start() + length
-                else:
-                    tokens.append(delim_run.group(1)[length:])
+                    buffer.append(open_syntax[:-length])
+                    if length == 3:
+                        buffer.append('<em><strong>')
+                        buffer.append(body)
+                        buffer.append('</strong></em>')
+                    else:
+                        tag = 'strong' if length == 2 else 'em'
+                        # prev_open is defined if this closing syntax is closing multiple openers at once
+                        if prev_open:
+                            if len(prev_open_syntax) == 3:
+                                prev_tag = 'strong' if tag == 'em' else 'em'
+                            else:
+                                prev_tag = 'strong' if len(prev_open_syntax) == 2 else 'em'
+                            buffer.append(f'<{prev_tag}>')
+
+                            if len(prev_open_syntax) == 3:
+                                buffer.append(f'<{tag}>')
+
+                            buffer.append(delim_run.string[prev_open.end(): open.start()])
+
+                            if len(prev_open_syntax) == 3:
+                                buffer.append(f'</{tag}>')
+                            else:
+                                buffer.append(f'<{tag}>')
+
+                            buffer.append(body)
+
+                            if len(prev_open_syntax) != 3:
+                                buffer.append(f'</{tag}>')
+                            buffer.append(f'</{prev_tag}>')
+                        else:
+                            buffer.append(f'<{tag}>')
+                            buffer.append(body)
+                            buffer.append(f'</{tag}>')
+
+                    # If both syntaxes are equal length then that's easy. Remove the open run as it's fully
+                    # processed and consumed, and move on
                     index = delim_run.end()
 
-            if index < len(text):
-                tokens.append(text[index:])
+                    tokens.extend(buffer)
+
+                if index < len(text):
+                    tokens.append(text[index:])
 
-            text = ''.join(tokens)
+                text = ''.join(tokens)
 
         return text
 

From b3e512de1925ab5f53597072d82b289a0e050e87 Mon Sep 17 00:00:00 2001
From: Crozzers <captaincrozzers@gmail.com>
Date: Fri, 5 Dec 2025 21:58:12 +0000
Subject: [PATCH 3/9] Iron out some GFM edge cases

---
 lib/markdown2.py | 291 ++++++++++++++++++++++++++++-------------------
 1 file changed, 174 insertions(+), 117 deletions(-)

diff --git a/lib/markdown2.py b/lib/markdown2.py
index 6cf5132c..22e51961 100755
--- a/lib/markdown2.py
+++ b/lib/markdown2.py
@@ -2601,39 +2601,18 @@ def run(self, text):
                 nesting = False
 
                 opens = []
-                buffer = []
                 unused_opens = {}
+                unused_closes = []
                 tokens = []
                 index = 0
 
-                for delim_run in re.finditer(r'([%s]+)' % em_type, text):
-                    # first check if it is opening (left flanking)
-                    # or closing (right flanking) run
-                    run = delim_run.string[max(0, delim_run.start() - 1): delim_run.end() + 1]
-                    syntax = delim_run.group(1)
-                    syntax_re = syntax.replace('*', r'\*')
-
-                    left = (
-                        # not followed by whitespace
-                        re.match(r'.*%s\S' % syntax_re, run, re.S)
-                        and (
-                            # either not followed by punctuation
-                            re.match(r'.*%s[\s\w]' % syntax_re, run, re.S)
-                            # or followed by punct and preceded by punct/whitespace
-                            or re.match(r'(^|[\s\W])%s([^\s\w]|$)' % syntax_re, run, re.S | re.M)
-                        )
-                    )
+                delim_runs = {
+                    delim_run: self.delimiter_left_or_right(delim_run)
+                    for delim_run in re.finditer(r'([%s]+)' % em_type, text)
+                }
 
-                    right =  (
-                        # not preceded by whitespace
-                        re.match(r'\S%s.*' % syntax_re, run, re.S)
-                        and (
-                            # either not preceded by punct
-                            re.match(r'[\s\w]%s.*' % syntax_re, run, re.S)
-                            # or preceded by punct and followed by whitespace or punct
-                            or re.match(r'[^\s\w]%s(\s|[^\s\w]|$)' % syntax_re, run, re.S | re.M)
-                        )
-                    )
+                for delim_run, (left, right) in delim_runs.items():
+                    syntax = delim_run.group(1)
 
                     if not (left or right):
                         continue
@@ -2662,112 +2641,190 @@ def run(self, text):
                         nesting = True
                         continue
 
-                    prev_open = None
-
-                    if len(open_syntax) < len(syntax):
-                        # if closing syntax is longer then maybe we can close multiple openers that are queued up
-                        if opens:
-                            prev_open = opens.pop(-1)
-                            prev_open_offset = unused_opens.pop(open, 0)
-                            prev_open_start = prev_open.start() + prev_open_offset
-                            prev_open_syntax = prev_open.group(1)[prev_open_offset:]
-
-                            # check the new expanded body doesn't cross span borders
-                            if not all(
-                                self.md._tag_is_closed(tag, delim_run.string[prev_open.end(): open.start()])
-                                for tag in re.findall(
-                                    rf'</?({self.md._span_tags})',
-                                    delim_run.string[prev_open.end(): open.start()]
-                                )
-                            ):
-                                opens.append(prev_open)
-                                prev_open = None
+                    middle = None
+
+                    if len(open_syntax) != len(syntax):
+                        if len(open_syntax) < len(syntax) and opens:
+                            # since we are detecting a previous open, we are expanding the em span to the left
+                            # so we should check if we're covering additional chars that we don't cross an
+                            # existing span border
+                            if not self.body_crosses_span_borders(opens[-1], open):
+                                middle = open
+
+                                open = opens.pop(-1)
+                                open_offset = unused_opens.pop(open, 0)
+                                open_start = open.start() + open_offset
+                        elif len(open_syntax) > len(syntax) and unused_closes:
+                            # check if there is a previous closing delim run in the current body
+                            # since this is already within the body we don't need to do a cross-span border check
+                            # as we're not expanding into new ground and that is covered later
+                            middle = next((i for i in unused_closes if open.end() < i.start() < delim_run.start()), None)
                         else:
-                            unused_opens[open] = open_offset
-                            opens.append(open)
-                            unused_opens[delim_run] = 0
-                            opens.append(delim_run)
-                            continue
-                    elif len(open_syntax) > len(syntax):
-                        # if the opening syntax is bigger than this close won't close all of it.
-                        # Queue both up for later processing
-                        opens.append(open)
-                        unused_opens[open] = open_offset
-                        if left:
-                            opens.append(delim_run)
-                            unused_opens[delim_run] = 0
-                        continue
-
-                    body = delim_run.string[open.end(): delim_run.start()]
+                            try:
+                                next_delim_run = tuple(delim_runs.keys())[tuple(delim_runs.keys()).index(delim_run) + 1]
+                            except IndexError:
+                                next_delim_run = None
+
+                            if next_delim_run is None:
+                                # if there is no follow up delimiter run then no point leaving this unused. Process now
+                                pass
+                            elif len(open_syntax) < len(syntax) and (
+                                # if this run can be an opener, but the next run won't close both of them
+                                (left and not delim_runs[next_delim_run][1])
+                                # if the next run is not an opener and won't consume this run
+                                and not delim_runs[next_delim_run][0]
+                            ):
+                                pass
+                            elif len(open_syntax) > len(syntax) and (
+                                # if this run can be an closer, but the next run is not a fresh opener
+                                (right and not delim_runs[next_delim_run][0])
+                                # if the next run is not a closer
+                                and not delim_runs[next_delim_run][1]
+                            ):
+                                pass
+                            elif delim_runs[next_delim_run][1] and len(open_syntax) == len(next_delim_run.group(1)):
+                                # of the next run is a closer and matches the length of the opener then that is probably
+                                # a better closer than this run - eg: **foo*bar** or *foo**bar*
+                                opens.append(open)
+                                continue
+                            else:
+                                # if there are no unused opens or closes to use up then this is just imbalanced
+                                # mark as unused and leave for later processing
+                                unused_opens[open] = open_offset
+                                opens.append(open)
+                                if left:
+                                    unused_opens[delim_run] = 0
+                                    opens.append(delim_run)
+                                else:
+                                    unused_closes.append(delim_run)
+                                continue
 
                     # ensure the body does not cross span borders
-                    if not all(
-                        self.md._tag_is_closed(tag, body)
-                        for tag in re.findall(rf'</?({self.md._span_tags})', body)
-                    ):
+                    if self.body_crosses_span_borders(open, delim_run):
                         continue
 
-                    # put all the new processing in a buffer array that gets added to `tokens` anyway.
-                    # Not the most efficient but it's convenient having a separate list of everything
-                    # processed and added in the previous iteration
-                    buffer = []
-
                     # add all the text leading up to the opening delimiter
-                    buffer.append(delim_run.string[index: prev_open_start if prev_open else open_start])
-
-                    # calc what type of emphasis based on the lowest common
-                    # length of the delimiter run
-                    length = min(3, min(len(open_syntax), len(syntax)))
-                    # add any part of the open that we don't consume
-                    # eg: **one*
-                    buffer.append(open_syntax[:-length])
-                    if length == 3:
-                        buffer.append('<em><strong>')
-                        buffer.append(body)
-                        buffer.append('</strong></em>')
-                    else:
-                        tag = 'strong' if length == 2 else 'em'
-                        # prev_open is defined if this closing syntax is closing multiple openers at once
-                        if prev_open:
-                            if len(prev_open_syntax) == 3:
-                                prev_tag = 'strong' if tag == 'em' else 'em'
-                            else:
-                                prev_tag = 'strong' if len(prev_open_syntax) == 2 else 'em'
-                            buffer.append(f'<{prev_tag}>')
+                    tokens.append(delim_run.string[index: open_start])
 
-                            if len(prev_open_syntax) == 3:
-                                buffer.append(f'<{tag}>')
+                    span, close_syntax_used_chars = self.process_span(open, delim_run, open_offset, middle)
+                    tokens.extend(span)
+                    if close_syntax_used_chars < len(syntax):
+                        # if we didn't use up the entire closing delimiter mark it as unused
+                        unused_opens[delim_run] = close_syntax_used_chars
+                        opens.append(delim_run)
 
-                            buffer.append(delim_run.string[prev_open.end(): open.start()])
+                    # Move index to end of the used delim run
+                    index = delim_run.start() + close_syntax_used_chars
 
-                            if len(prev_open_syntax) == 3:
-                                buffer.append(f'</{tag}>')
-                            else:
-                                buffer.append(f'<{tag}>')
+                if index < len(text):
+                    tokens.append(text[index:])
+
+                text = ''.join(tokens)
 
-                            buffer.append(body)
+        return text
 
-                            if len(prev_open_syntax) != 3:
-                                buffer.append(f'</{tag}>')
-                            buffer.append(f'</{prev_tag}>')
-                        else:
-                            buffer.append(f'<{tag}>')
-                            buffer.append(body)
-                            buffer.append(f'</{tag}>')
+    def process_span(
+            self, open: re.Match, close: re.Match,
+            offset: int, middle: Optional[re.Match] = None
+        ):
+        '''
+        Args:
+            open: the match against the opening delimiter run
+            close: the match against the closing delimiter run
+            offset: the number of chars from the opening delimiter that should be skipped when processing
+            middle: an optional delimiter run in the middle of the span
+        '''
+        tokens = []
 
-                    # If both syntaxes are equal length then that's easy. Remove the open run as it's fully
-                    # processed and consumed, and move on
-                    index = delim_run.end()
+        open_syntax = open.group(1)[offset:]
+        middle_syntax = middle.group(1) if middle else ''
+        close_syntax = close.group(1)
 
-                    tokens.extend(buffer)
+        # calculate what em type the inner and outer emphasis is
+        outer_syntax_length = min(3, min(len(open_syntax), len(close_syntax)))
+        inner_syntax_length = min(max(len(open_syntax), len(close_syntax)), len(middle_syntax)) if middle else 0
+        # add anything from the opening syntax that will not be consumed
+        # eg: **one*
+        tokens.append(open_syntax[:-(outer_syntax_length + inner_syntax_length)])
 
-                if index < len(text):
-                    tokens.append(text[index:])
+        if outer_syntax_length == 3:
+            tokens.append('<em><strong>')
+        else:
+            tokens.append(f'<{"strong" if outer_syntax_length == 2 else "em"}>')
 
-                text = ''.join(tokens)
+        if middle:
+            # outer_tag = 'strong' if outer_syntax_length == 2 else 'em'
 
-        return text
+            # if there is a middle em (eg: ***abc*def**) then do some wrangling to figure
+            # out where to put the opening/closing inner tags depending on the size of the
+            # opening delim run
+            inner_tag = 'strong' if len(middle_syntax) == 2 else 'em'
+            if len(open_syntax) > len(close_syntax):
+                tokens.append(f'<{inner_tag}>')
+
+            tokens.append(close.string[open.end(): middle.start()])
+
+            if len(open_syntax) > len(close_syntax):
+                tokens.append(f'</{inner_tag}>')
+            else:
+                tokens.append(f'<{inner_tag}>')
+
+            tokens.append(close.string[middle.end(): close.start()])
 
+            if len(open_syntax) < len(close_syntax):
+                tokens.append(f'</{inner_tag}>')
+        else:
+            # if no middle em then it's easy. Just add the whole text body
+            tokens.append(close.string[open.end(): close.start()])
+
+        if outer_syntax_length == 3:
+            tokens.append('</strong></em>')
+        else:
+            tokens.append(f'</{"strong" if outer_syntax_length == 2 else "em"}>')
+
+        # figure out how many chars from the closing delimiter we've actually used
+        close_delim_chars_used = outer_syntax_length
+        if middle and len(open_syntax) < len(close_syntax):
+            # if there's a middle part and it's right-aligned then add that on
+            close_delim_chars_used += inner_syntax_length
+
+        return tokens, close_delim_chars_used
+
+    def delimiter_left_or_right(self, delim_run: re.Match):
+        run = delim_run.string[max(0, delim_run.start() - 1): delim_run.end() + 1]
+        syntax = delim_run.group(1)
+        syntax_re = syntax.replace('*', r'\*')
+
+        left = (
+            # not followed by whitespace
+            re.match(r'.*%s\S' % syntax_re, run, re.S)
+            and (
+                # either not followed by punctuation
+                re.match(r'.*%s[\s\w]' % syntax_re, run, re.S)
+                # or followed by punct and preceded by punct/whitespace
+                or re.match(r'(^|[\s\W])%s([^\s\w]|$)' % syntax_re, run, re.S | re.M)
+            )
+        )
+
+        right =  (
+            # not preceded by whitespace
+            re.match(r'\S%s.*' % syntax_re, run, re.S)
+            and (
+                # either not preceded by punct
+                re.match(r'[\s\w]%s.*' % syntax_re, run, re.S)
+                # or preceded by punct and followed by whitespace or punct
+                or re.match(r'[^\s\w]%s(\s|[^\s\w]|$)' % syntax_re, run, re.S | re.M)
+            )
+        )
+
+        return left, right
+
+    def body_crosses_span_borders(self, open: re.Match, close: re.Match):
+        for tag in re.findall(rf'</?({self.md._span_tags})', open.string[open.end(): close.start()]):
+            if not self.md._tag_is_closed(tag, open.string[open.end(): close.start()]):
+                return True
+
+        return False
 
     def test(self, text):
         return text.count('*') > 1 or text.count('_') > 1

From 366ad8cb32687c95d87aaac19364e27b6f92c243 Mon Sep 17 00:00:00 2001
From: Crozzers <captaincrozzers@gmail.com>
Date: Fri, 5 Dec 2025 22:29:18 +0000
Subject: [PATCH 4/9] Acheive near full GFM compliance on iab

---
 lib/markdown2.py | 228 ++++++++++++++++++++++++-----------------------
 1 file changed, 115 insertions(+), 113 deletions(-)

diff --git a/lib/markdown2.py b/lib/markdown2.py
index 22e51961..9099a693 100755
--- a/lib/markdown2.py
+++ b/lib/markdown2.py
@@ -2595,131 +2595,133 @@ class ItalicAndBoldProcessor2(Extra):
     order = (Stage.ITALIC_AND_BOLD,), tuple()
 
     def run(self, text):
-        for em_type in '*_':
-            nesting = True
-            while nesting:
-                nesting = False
-
-                opens = []
-                unused_opens = {}
-                unused_closes = []
-                tokens = []
-                index = 0
-
-                delim_runs = {
-                    delim_run: self.delimiter_left_or_right(delim_run)
-                    for delim_run in re.finditer(r'([%s]+)' % em_type, text)
-                }
+        nesting = True
+        while nesting:
+            nesting = False
+
+            opens = {'*': [], '_': []}
+            unused_opens = {'*': {}, '_': {}}
+            unused_closes = {'*': [], '_': []}
+            tokens = []
+            index = 0
+
+            delim_runs = {
+                delim_run: self.delimiter_left_or_right(delim_run)
+                for delim_run in re.finditer(r'(\*+|_+)', text)
+            }
+
+            for delim_run, (left, right) in delim_runs.items():
+                syntax = delim_run.group(1)
+                em_type = syntax[0]
+
+                if not (left or right):
+                    continue
 
-                for delim_run, (left, right) in delim_runs.items():
-                    syntax = delim_run.group(1)
+                if not right or not opens[em_type]:
+                    if left:
+                        opens[em_type].append(delim_run)
+                    continue
 
-                    if not (left or right):
-                        continue
+                syntax = delim_run.group(1)
 
-                    if not right or not opens:
-                        if left:
-                            opens.append(delim_run)
-                        continue
+                # grab the open run. If it crosses a span, keep looking backwards
+                while opens[em_type] and self.body_crosses_span_borders(opens[em_type][-1], delim_run):
+                    opens[em_type].pop(-1)
+                if not opens[em_type]:
+                    continue
+                open = opens[em_type].pop(-1)
+
+                # if the opening run was joined to a previous closing run (eg: **strong***em*)
+                # then re-use that previous closing run, but ignore the part that was used to
+                # close the previous emphasis
+                open_offset = unused_opens[em_type].pop(open, 0)
+                open_start = open.start() + open_offset
+                open_syntax = open.group(1)[open_offset:]
+
+                if open.start() < index:
+                    # this happens with things like `*(**foo**)*`. We process LTR so the strong gets
+                    # processed first (since that has the first closing delimiter). We now have
+                    # `*(<strong>foo</strong>)*` and now we get round to processing the em.
+                    # It's hard compare the match (against the original text var) to the processed text
+                    # so it's easier to just note down that nesting is detected and re-run the loop
+                    nesting = True
+                    continue
 
-                    syntax = delim_run.group(1)
-
-                    open = opens.pop(-1)
-                    # if the opening run was joined to a previous closing run (eg: **strong***em*)
-                    # then re-use that previous closing run, but ignore the part that was used to
-                    # close the previous emphasis
-                    open_offset = unused_opens.pop(open, 0)
-                    open_start = open.start() + open_offset
-                    open_syntax = open.group(1)[open_offset:]
-
-                    if open.start() < index:
-                        # this happens with things like `*(**foo**)*`. We process LTR so the strong gets
-                        # processed first (since that has the first closing delimiter). We now have
-                        # `*(<strong>foo</strong>)*` and now we get round to processing the em.
-                        # It's hard compare the match (against the original text var) to the processed text
-                        # so it's easier to just note down that nesting is detected and re-run the loop
-                        nesting = True
-                        continue
+                middle = None
+
+                if len(open_syntax) != len(syntax):
+                    if len(open_syntax) < len(syntax) and opens[em_type]:
+                        # since we are detecting a previous open, we are expanding the em span to the left
+                        # so we should check if we're covering additional chars that we don't cross an
+                        # existing span border
+                        if not self.body_crosses_span_borders(opens[em_type][-1], open):
+                            middle = open
+
+                            open = opens[em_type].pop(-1)
+                            open_offset = unused_opens[em_type].pop(open, 0)
+                            open_start = open.start() + open_offset
+                    elif len(open_syntax) > len(syntax) and unused_closes[em_type]:
+                        # check if there is a previous closing delim run in the current body
+                        # since this is already within the body we don't need to do a cross-span border check
+                        # as we're not expanding into new ground and that is covered later
+                        middle = next((i for i in unused_closes[em_type] if open.end() < i.start() < delim_run.start()), None)
+                    else:
+                        try:
+                            next_delim_run = tuple(delim_runs.keys())[tuple(delim_runs.keys()).index(delim_run) + 1]
+                        except IndexError:
+                            next_delim_run = None
 
-                    middle = None
-
-                    if len(open_syntax) != len(syntax):
-                        if len(open_syntax) < len(syntax) and opens:
-                            # since we are detecting a previous open, we are expanding the em span to the left
-                            # so we should check if we're covering additional chars that we don't cross an
-                            # existing span border
-                            if not self.body_crosses_span_borders(opens[-1], open):
-                                middle = open
-
-                                open = opens.pop(-1)
-                                open_offset = unused_opens.pop(open, 0)
-                                open_start = open.start() + open_offset
-                        elif len(open_syntax) > len(syntax) and unused_closes:
-                            # check if there is a previous closing delim run in the current body
-                            # since this is already within the body we don't need to do a cross-span border check
-                            # as we're not expanding into new ground and that is covered later
-                            middle = next((i for i in unused_closes if open.end() < i.start() < delim_run.start()), None)
+                        if next_delim_run is None:
+                            # if there is no follow up delimiter run then no point leaving this unused. Process now
+                            pass
+                        elif len(open_syntax) < len(syntax) and (
+                            # if this run can be an opener, but the next run won't close both of them
+                            (left and not delim_runs[next_delim_run][1])
+                            # if the next run is not an opener and won't consume this run
+                            and not delim_runs[next_delim_run][0]
+                        ):
+                            pass
+                        elif len(open_syntax) > len(syntax) and (
+                            # if this run can be an closer, but the next run is not a fresh opener
+                            (right and not delim_runs[next_delim_run][0])
+                            # if the next run is not a closer
+                            and not delim_runs[next_delim_run][1]
+                        ):
+                            pass
+                        elif delim_runs[next_delim_run][1] and len(open_syntax) == len(next_delim_run.group(1)):
+                            # of the next run is a closer and matches the length of the opener then that is probably
+                            # a better closer than this run - eg: **foo*bar** or *foo**bar*
+                            opens[em_type].append(open)
+                            continue
                         else:
-                            try:
-                                next_delim_run = tuple(delim_runs.keys())[tuple(delim_runs.keys()).index(delim_run) + 1]
-                            except IndexError:
-                                next_delim_run = None
-
-                            if next_delim_run is None:
-                                # if there is no follow up delimiter run then no point leaving this unused. Process now
-                                pass
-                            elif len(open_syntax) < len(syntax) and (
-                                # if this run can be an opener, but the next run won't close both of them
-                                (left and not delim_runs[next_delim_run][1])
-                                # if the next run is not an opener and won't consume this run
-                                and not delim_runs[next_delim_run][0]
-                            ):
-                                pass
-                            elif len(open_syntax) > len(syntax) and (
-                                # if this run can be an closer, but the next run is not a fresh opener
-                                (right and not delim_runs[next_delim_run][0])
-                                # if the next run is not a closer
-                                and not delim_runs[next_delim_run][1]
-                            ):
-                                pass
-                            elif delim_runs[next_delim_run][1] and len(open_syntax) == len(next_delim_run.group(1)):
-                                # of the next run is a closer and matches the length of the opener then that is probably
-                                # a better closer than this run - eg: **foo*bar** or *foo**bar*
-                                opens.append(open)
-                                continue
+                            # if there are no unused opens or closes to use up then this is just imbalanced
+                            # mark as unused and leave for later processing
+                            unused_opens[em_type][open] = open_offset
+                            opens[em_type].append(open)
+                            if left:
+                                unused_opens[em_type][delim_run] = 0
+                                opens[em_type].append(delim_run)
                             else:
-                                # if there are no unused opens or closes to use up then this is just imbalanced
-                                # mark as unused and leave for later processing
-                                unused_opens[open] = open_offset
-                                opens.append(open)
-                                if left:
-                                    unused_opens[delim_run] = 0
-                                    opens.append(delim_run)
-                                else:
-                                    unused_closes.append(delim_run)
-                                continue
-
-                    # ensure the body does not cross span borders
-                    if self.body_crosses_span_borders(open, delim_run):
-                        continue
+                                unused_closes[em_type].append(delim_run)
+                            continue
 
-                    # add all the text leading up to the opening delimiter
-                    tokens.append(delim_run.string[index: open_start])
+                # add all the text leading up to the opening delimiter
+                tokens.append(delim_run.string[index: open_start])
 
-                    span, close_syntax_used_chars = self.process_span(open, delim_run, open_offset, middle)
-                    tokens.extend(span)
-                    if close_syntax_used_chars < len(syntax):
-                        # if we didn't use up the entire closing delimiter mark it as unused
-                        unused_opens[delim_run] = close_syntax_used_chars
-                        opens.append(delim_run)
+                span, close_syntax_used_chars = self.process_span(open, delim_run, open_offset, middle)
+                tokens.extend(span)
+                if close_syntax_used_chars < len(syntax):
+                    # if we didn't use up the entire closing delimiter mark it as unused
+                    unused_opens[em_type][delim_run] = close_syntax_used_chars
+                    opens[em_type].append(delim_run)
 
-                    # Move index to end of the used delim run
-                    index = delim_run.start() + close_syntax_used_chars
+                # Move index to end of the used delim run
+                index = delim_run.start() + close_syntax_used_chars
 
-                if index < len(text):
-                    tokens.append(text[index:])
+            if index < len(text):
+                tokens.append(text[index:])
 
-                text = ''.join(tokens)
+            text = ''.join(tokens)
 
         return text
 

From e8e7ced7feea5d2cea53f4ffc19aa7d0dda6c6b6 Mon Sep 17 00:00:00 2001
From: Crozzers <captaincrozzers@gmail.com>
Date: Sun, 7 Dec 2025 16:01:27 +0000
Subject: [PATCH 5/9] Acheive near complete GFM compliance

---
 lib/markdown2.py                    |  69 +++-----
 test/tm-cases/gfm_emphasis.html     | 261 ++++++++++++++++++++++++++++
 test/tm-cases/gfm_emphasis.text     | 260 +++++++++++++++++++++++++++
 test/tm-cases/hash_html_blocks.html |   3 -
 4 files changed, 544 insertions(+), 49 deletions(-)
 create mode 100644 test/tm-cases/gfm_emphasis.html
 create mode 100644 test/tm-cases/gfm_emphasis.text

diff --git a/lib/markdown2.py b/lib/markdown2.py
index 9099a693..95d5a405 100755
--- a/lib/markdown2.py
+++ b/lib/markdown2.py
@@ -2046,35 +2046,7 @@ def _encode_code(self, text: str) -> str:
 
     @mark_stage(Stage.ITALIC_AND_BOLD)
     def _do_italics_and_bold(self, text: str) -> str:
-        def sub(match: re.Match):
-            '''
-            regex sub function that checks that the match isn't matching across spans.
-            The span shouldn't be across a closing or opening HTML tag, although spans within
-            the span is acceptable.
-            '''
-            contents: str = match.group(2)
-            # the strong re also checks for leading em chars, so the match may cover some additional text
-            prefix = match.string[match.start(): match.regs[1][0]]
-            # look for all possible span HTML tags
-            for tag in re.findall(rf'</?({self._span_tags})', contents):
-                # if it's unbalanced then that violates the rules
-                if not self._tag_is_closed(tag, contents):
-                    return prefix + match.group(1) + contents + match.group(1)
-
-                # if it is balanced, but the closing tag is before the opening then
-                # the text probably looks like `_</strong>abcdef<strong>_`, which is across 2 spans
-                close_index = contents.find(f'</{tag}')
-                open_index = contents.find(f'<{tag}')
-                if close_index != -1 and close_index < open_index:
-                    return prefix + match.group(1) + contents + match.group(1)
-
-            syntax = 'strong' if len(match.group(1)) == 2 else 'em'
-            return f'{prefix}<{syntax}>{contents}</{syntax}>'
-
-        # <strong> must go first:
-        # text = self._strong_re.sub(sub, text)
-        # text = self._em_re.sub(sub, text)
-        iab = ItalicAndBoldProcessor2(self, None)
+        iab = GFMItalicAndBoldProcessor(self, None)
         if iab.test(text):
             text = iab.run(text)
         return text
@@ -2590,8 +2562,8 @@ def test(self, text):
         return self.hash_table and re.search(r'md5-[0-9a-z]{32}', text)
 
 
-class ItalicAndBoldProcessor2(Extra):
-    name = 'iabp-2'
+class GFMItalicAndBoldProcessor(Extra):
+    name = 'gfm-italic-and-bold-processor'
     order = (Stage.ITALIC_AND_BOLD,), tuple()
 
     def run(self, text):
@@ -2649,6 +2621,7 @@ def run(self, text):
 
                 middle = None
 
+                # if the delimiter runs don't match then we need to figure out how to resolve this
                 if len(open_syntax) != len(syntax):
                     if len(open_syntax) < len(syntax) and opens[em_type]:
                         # since we are detecting a previous open, we are expanding the em span to the left
@@ -2659,7 +2632,12 @@ def run(self, text):
 
                             open = opens[em_type].pop(-1)
                             open_offset = unused_opens[em_type].pop(open, 0)
+                            open_syntax = open.group(1)[open_offset:]
                             open_start = open.start() + open_offset
+
+                            if len(open_syntax) == len(syntax):
+                                # if it turns out the previous open is a perfect match then ignore the middle part
+                                middle = None
                     elif len(open_syntax) > len(syntax) and unused_closes[em_type]:
                         # check if there is a previous closing delim run in the current body
                         # since this is already within the body we don't need to do a cross-span border check
@@ -2676,7 +2654,10 @@ def run(self, text):
                             pass
                         elif len(open_syntax) < len(syntax) and (
                             # if this run can be an opener, but the next run won't close both of them
-                            (left and not delim_runs[next_delim_run][1])
+                            (left and (
+                                not delim_runs[next_delim_run][1]
+                                or len(next_delim_run.group(1)) < len(open_syntax) + len(syntax)
+                            ))
                             # if the next run is not an opener and won't consume this run
                             and not delim_runs[next_delim_run][0]
                         ):
@@ -2688,11 +2669,10 @@ def run(self, text):
                             and not delim_runs[next_delim_run][1]
                         ):
                             pass
-                        elif delim_runs[next_delim_run][1] and len(open_syntax) == len(next_delim_run.group(1)):
-                            # of the next run is a closer and matches the length of the opener then that is probably
-                            # a better closer than this run - eg: **foo*bar** or *foo**bar*
-                            opens[em_type].append(open)
-                            continue
+                        elif len(open_syntax) < len(syntax) and len(syntax) >= 3:
+                            # if closing syntax is bigger and its >= three long then focus on closing any
+                            # open em spans
+                            pass
                         else:
                             # if there are no unused opens or closes to use up then this is just imbalanced
                             # mark as unused and leave for later processing
@@ -2743,16 +2723,16 @@ def process_span(
         close_syntax = close.group(1)
 
         # calculate what em type the inner and outer emphasis is
-        outer_syntax_length = min(3, min(len(open_syntax), len(close_syntax)))
+        outer_syntax_length = min(len(open_syntax), len(close_syntax))
         inner_syntax_length = min(max(len(open_syntax), len(close_syntax)), len(middle_syntax)) if middle else 0
         # add anything from the opening syntax that will not be consumed
         # eg: **one*
         tokens.append(open_syntax[:-(outer_syntax_length + inner_syntax_length)])
 
-        if outer_syntax_length == 3:
-            tokens.append('<em><strong>')
-        else:
-            tokens.append(f'<{"strong" if outer_syntax_length == 2 else "em"}>')
+        tags = []
+        tags += ['<em>'] * (outer_syntax_length % 2)
+        tags += ['<strong>'] * (outer_syntax_length // 2)
+        tokens.append(''.join(tags))
 
         if middle:
             # outer_tag = 'strong' if outer_syntax_length == 2 else 'em'
@@ -2779,10 +2759,7 @@ def process_span(
             # if no middle em then it's easy. Just add the whole text body
             tokens.append(close.string[open.end(): close.start()])
 
-        if outer_syntax_length == 3:
-            tokens.append('</strong></em>')
-        else:
-            tokens.append(f'</{"strong" if outer_syntax_length == 2 else "em"}>')
+        tokens.append(''.join(reversed(tags)).replace('<', '</'))
 
         # figure out how many chars from the closing delimiter we've actually used
         close_delim_chars_used = outer_syntax_length
diff --git a/test/tm-cases/gfm_emphasis.html b/test/tm-cases/gfm_emphasis.html
new file mode 100644
index 00000000..9078f0f9
--- /dev/null
+++ b/test/tm-cases/gfm_emphasis.html
@@ -0,0 +1,261 @@
+<p><em>foo bar</em></p>
+
+<p>a * foo bar*</p>
+
+<p>a*"foo"*</p>
+
+<ul>
+<li>a *</li>
+</ul>
+
+<p>foo<em>bar</em></p>
+
+<p>5<em>6</em>78</p>
+
+<p>пристаням<em>стремятся</em></p>
+
+<p>aa_"bb"_cc</p>
+
+<p>foo-<em>(bar)</em></p>
+
+<p>_foo*</p>
+
+<p>*foo bar *</p>
+
+<p>*foo bar
+*</p>
+
+<p>*(*foo)</p>
+
+<p><em>(<em>foo</em>)</em></p>
+
+<p><em>foo</em>bar</p>
+
+<p>_foo bar _</p>
+
+<p>_(_foo)</p>
+
+<p><em>(<em>foo</em>)</em></p>
+
+<p><em>foo</em>bar</p>
+
+<p><em>пристаням</em>стремятся</p>
+
+<p><em>foo</em>bar<em>baz</em></p>
+
+<p><em>(bar)</em>.</p>
+
+<p><strong>foo bar</strong></p>
+
+<p>** foo bar**</p>
+
+<p>a**"foo"**</p>
+
+<p>foo<strong>bar</strong></p>
+
+<p><strong>foo bar</strong></p>
+
+<p>__ foo bar__</p>
+
+<p>__
+foo bar__</p>
+
+<p>a__"foo"__</p>
+
+<p>foo<strong>bar</strong></p>
+
+<p>5<strong>6</strong>78</p>
+
+<p>пристаням<strong>стремятся</strong></p>
+
+<p><strong>foo, <strong>bar</strong>, baz</strong></p>
+
+<p>foo-<strong>(bar)</strong></p>
+
+<p>**foo bar **</p>
+
+<p>**(**foo)</p>
+
+<p><em>(<strong>foo</strong>)</em></p>
+
+<p><strong>Gomphocarpus (<em>Gomphocarpus physocarpus</em>, syn.
+<em>Asclepias physocarpa</em>)</strong></p>
+
+<p><strong>foo "<em>bar</em>" foo</strong></p>
+
+<p><strong>foo</strong>bar</p>
+
+<p>__foo bar __</p>
+
+<p>__(__foo)</p>
+
+<p><em>(<strong>foo</strong>)</em></p>
+
+<p><strong>foo</strong>bar</p>
+
+<p><strong>пристаням</strong>стремятся</p>
+
+<p><strong>foo</strong>bar<strong>baz</strong></p>
+
+<p><strong>(bar)</strong>.</p>
+
+<p><em>foo <a href="/url">bar</a></em></p>
+
+<p><em>foo
+bar</em></p>
+
+<p><em>foo <strong>bar</strong> baz</em></p>
+
+<p><em>foo <em>bar</em> baz</em></p>
+
+<p><em><em>foo</em> bar</em></p>
+
+<p><em>foo <em>bar</em></em></p>
+
+<p><em>foo <strong>bar</strong> baz</em></p>
+
+<p><em>foo<strong>bar</strong>baz</em></p>
+
+<p><em>foo</em><em>bar</em></p>
+
+<p><em><strong>foo</strong> bar</em></p>
+
+<p><em>foo <strong>bar</strong></em></p>
+
+<p><em>foo<strong>bar</strong></em></p>
+
+<p>foo<em><strong>bar</strong></em>baz</p>
+
+<p>foo<strong><strong><strong>bar</strong></strong></strong>***baz</p>
+
+<p><em>foo <strong>bar <em>baz</em> bim</strong> bop</em></p>
+
+<p><em>foo <a href="/url"><em>bar</em></a></em></p>
+
+<p>** is not an empty emphasis</p>
+
+<p>**** is not an empty strong emphasis</p>
+
+<p><strong>foo <a href="/url">bar</a></strong></p>
+
+<p><strong>foo
+bar</strong></p>
+
+<p><strong>foo <em>bar</em> baz</strong></p>
+
+<p><strong>foo <strong>bar</strong> baz</strong></p>
+
+<p><strong><strong>foo</strong> bar</strong></p>
+
+<p><strong>foo <strong>bar</strong></strong></p>
+
+<p><strong>foo <em>bar</em> baz</strong></p>
+
+<p><strong>foo<em>bar</em>baz</strong></p>
+
+<p><strong><em>foo</em> bar</strong></p>
+
+<p><strong>foo <em>bar</em></strong></p>
+
+<p><strong>foo <em>bar <strong>baz</strong>
+bim</em> bop</strong></p>
+
+<p><strong>foo <a href="/url"><em>bar</em></a></strong></p>
+
+<p>__ is not an empty emphasis</p>
+
+<p>____ is not an empty strong emphasis</p>
+
+<p>foo ***</p>
+
+<p>foo <em>*</em></p>
+
+<p>foo <em>_</em></p>
+
+<p>foo *****</p>
+
+<p>foo <strong>*</strong></p>
+
+<p>foo <strong>_</strong></p>
+
+<p>*<em>foo</em></p>
+
+<p><em>foo</em>*</p>
+
+<p>*<strong>foo</strong></p>
+
+<p>***<em>foo</em></p>
+
+<p><strong>foo</strong>*</p>
+
+<p><em>foo</em>***</p>
+
+<p>foo ___</p>
+
+<p>foo <em>_</em></p>
+
+<p>foo <em>*</em></p>
+
+<p>foo _____</p>
+
+<p>foo <strong>_</strong></p>
+
+<p>foo <strong>*</strong></p>
+
+<p>_<em>foo</em></p>
+
+<p><em>foo</em>_</p>
+
+<p>_<strong>foo</strong></p>
+
+<p>___<em>foo</em></p>
+
+<p><strong>foo</strong>_</p>
+
+<p><em>foo</em>___</p>
+
+<p><strong>foo</strong></p>
+
+<p><em><em>foo</em></em></p>
+
+<p><strong>foo</strong></p>
+
+<p><em><em>foo</em></em></p>
+
+<p><strong><strong>foo</strong></strong></p>
+
+<p><strong><strong>foo</strong></strong></p>
+
+<p><strong><strong><strong>foo</strong></strong></strong></p>
+
+<p><em><strong>foo</strong></em></p>
+
+<p><em><strong><strong>foo</strong></strong></em></p>
+
+<p><em>foo _bar</em> baz_</p>
+
+<p><em>foo <strong>bar *baz bim</strong> bam</em></p>
+
+<p>**foo <strong>bar baz</strong></p>
+
+<p>*foo <em>bar baz</em></p>
+
+<p>*<a href="/url">bar*</a></p>
+
+<p>_foo <a href="/url">bar_</a></p>
+
+<p>*<img src="foo" title="*"/></p>
+
+<p>**<a href="**"></p>
+
+<p>__<a href="__"></p>
+
+<p><em>a <code>*</code></em></p>
+
+<p><em>a <code>_</code></em></p>
+
+<p>**a<a href="http://foo.bar/?q=**">http://foo.bar/?q=**</a></p>
+
+<p>__a<a href="http://foo.bar/?q=__">http://foo.bar/?q=__</a></p>
+
+<p><strong>foo*bar</strong></p>
diff --git a/test/tm-cases/gfm_emphasis.text b/test/tm-cases/gfm_emphasis.text
new file mode 100644
index 00000000..7b88c80c
--- /dev/null
+++ b/test/tm-cases/gfm_emphasis.text
@@ -0,0 +1,260 @@
+*foo bar*
+
+a * foo bar*
+
+a*"foo"*
+
+* a *
+
+foo*bar*
+
+5*6*78
+
+пристаням_стремятся_
+
+aa_"bb"_cc
+
+foo-_(bar)_
+
+_foo*
+
+*foo bar *
+
+*foo bar
+*
+
+*(*foo)
+
+*(*foo*)*
+
+*foo*bar
+
+_foo bar _
+
+_(_foo)
+
+_(_foo_)_
+
+_foo_bar
+
+_пристаням_стремятся
+
+_foo_bar_baz_
+
+_(bar)_.
+
+**foo bar**
+
+** foo bar**
+
+a**"foo"**
+
+foo**bar**
+
+__foo bar__
+
+__ foo bar__
+
+__
+foo bar__
+
+a__"foo"__
+
+foo__bar__
+
+5__6__78
+
+пристаням__стремятся__
+
+__foo, __bar__, baz__
+
+foo-__(bar)__
+
+**foo bar **
+
+**(**foo)
+
+*(**foo**)*
+
+**Gomphocarpus (*Gomphocarpus physocarpus*, syn.
+*Asclepias physocarpa*)**
+
+**foo "*bar*" foo**
+
+**foo**bar
+
+__foo bar __
+
+__(__foo)
+
+_(__foo__)_
+
+__foo__bar
+
+__пристаням__стремятся
+
+__foo__bar__baz__
+
+__(bar)__.
+
+*foo [bar](/url)*
+
+*foo
+bar*
+
+_foo __bar__ baz_
+
+_foo _bar_ baz_
+
+__foo_ bar_
+
+*foo *bar**
+
+*foo **bar** baz*
+
+*foo**bar**baz*
+
+*foo**bar*
+
+***foo** bar*
+
+*foo **bar***
+
+*foo**bar***
+
+foo***bar***baz
+
+foo******bar*********baz
+
+*foo **bar *baz* bim** bop*
+
+*foo [*bar*](/url)*
+
+
+** is not an empty emphasis
+
+**** is not an empty strong emphasis
+
+**foo [bar](/url)**
+
+**foo
+bar**
+
+__foo _bar_ baz__
+
+__foo __bar__ baz__
+
+____foo__ bar__
+
+**foo **bar****
+
+**foo *bar* baz**
+
+**foo*bar*baz**
+
+***foo* bar**
+
+**foo *bar***
+
+**foo *bar **baz**
+bim* bop**
+
+**foo [*bar*](/url)**
+
+__ is not an empty emphasis
+
+____ is not an empty strong emphasis
+
+foo ***
+
+foo *\**
+
+foo *_*
+
+foo *****
+
+foo **\***
+
+foo **_**
+
+**foo*
+
+*foo**
+
+***foo**
+
+****foo*
+
+**foo***
+
+*foo****
+
+foo ___
+
+foo _\__
+
+foo _*_
+
+foo _____
+
+foo __\___
+
+foo __*__
+
+__foo_
+
+_foo__
+
+___foo__
+
+____foo_
+
+__foo___
+
+_foo____
+
+**foo**
+
+*_foo_*
+
+__foo__
+
+_*foo*_
+
+****foo****
+
+____foo____
+
+******foo******
+
+***foo***
+
+_____foo_____
+
+*foo _bar* baz_
+
+*foo __bar *baz bim__ bam*
+
+**foo **bar baz**
+
+*foo *bar baz*
+
+*[bar*](/url)
+
+_foo [bar_](/url)
+
+*<img src="foo" title="*"/>
+
+**<a href="**">
+
+__<a href="__">
+
+*a `*`*
+
+_a `_`_
+
+**a<http://foo.bar/?q=**>
+
+__a<http://foo.bar/?q=__>
+
+**foo*bar**
\ No newline at end of file
diff --git a/test/tm-cases/hash_html_blocks.html b/test/tm-cases/hash_html_blocks.html
index 310fe3da..f4a20b0f 100644
--- a/test/tm-cases/hash_html_blocks.html
+++ b/test/tm-cases/hash_html_blocks.html
@@ -1,9 +1,6 @@
 <div
 >
 <h3>Archons of the Colophon</h3>
-
-
 <p>by Paco Xander Nathan
 </p>
-
 </div>

From 5988b0970137715d6f66cab766b74577ab742c43 Mon Sep 17 00:00:00 2001
From: Crozzers <captaincrozzers@gmail.com>
Date: Sun, 7 Dec 2025 17:14:43 +0000
Subject: [PATCH 6/9] Refactor inheritants of original IABP to use new GFM
 variant.

Also refactor the GFM class to be more readable
---
 lib/markdown2.py | 290 +++++++++++++++++++++++++++++++++--------------
 1 file changed, 206 insertions(+), 84 deletions(-)

diff --git a/lib/markdown2.py b/lib/markdown2.py
index 95d5a405..c4c2813f 100755
--- a/lib/markdown2.py
+++ b/lib/markdown2.py
@@ -2563,6 +2563,10 @@ def test(self, text):
 
 
 class GFMItalicAndBoldProcessor(Extra):
+    '''
+    An upgraded version of the `ItalicAndBoldProcessor` that covers far more edge cases and gets close
+    to Github Flavoured Markdown compliance.
+    '''
     name = 'gfm-italic-and-bold-processor'
     order = (Stage.ITALIC_AND_BOLD,), tuple()
 
@@ -2572,24 +2576,39 @@ def run(self, text):
             nesting = False
 
             opens = {'*': [], '_': []}
+            '''Mapping of em type to a list of opening runs of that em type'''
             unused_opens = {'*': {}, '_': {}}
+            '''
+            Mapping of em type to another mapping of unused opening runs of that em type.
+            An unused run is one that has been skipped, or only partially consumed (eg: **foo*) and
+            could be consumed by another closing run. The inner mapping is a mapping of the
+            delimiter run to an offset number, which is the number of characters from that run that
+            have been consumed so far
+            '''
             unused_closes = {'*': [], '_': []}
+            '''
+            Mapping of em type to a list of closing delimiter runs that have not been fully consumed.
+            EG: *foo*bar*
+            '''
             tokens = []
+            '''List of processed spans of text that will be joined to form the new `text`'''
             index = 0
+            '''Number of chars of `text` that has been processed so far'''
 
-            delim_runs = {
-                delim_run: self.delimiter_left_or_right(delim_run)
-                for delim_run in re.finditer(r'(\*+|_+)', text)
-            }
+            # do a quick scan for all delimiter runs, filtering for those that can open/close emphasis
+            delim_runs = OrderedDict()
+            for delim_run in re.finditer(r'(\*+|_+)', text):
+                left, right = self.delimiter_left_or_right(delim_run)
+                if left or right:
+                    delim_runs[delim_run] = (left, right)
 
             for delim_run, (left, right) in delim_runs.items():
                 syntax = delim_run.group(1)
                 em_type = syntax[0]
 
-                if not (left or right):
-                    continue
-
+                # if not a closing run, or there are no opens to consume
                 if not right or not opens[em_type]:
+                    # if it can also be an opening run
                     if left:
                         opens[em_type].append(delim_run)
                     continue
@@ -2623,75 +2642,42 @@ def run(self, text):
 
                 # if the delimiter runs don't match then we need to figure out how to resolve this
                 if len(open_syntax) != len(syntax):
-                    if len(open_syntax) < len(syntax) and opens[em_type]:
-                        # since we are detecting a previous open, we are expanding the em span to the left
-                        # so we should check if we're covering additional chars that we don't cross an
-                        # existing span border
-                        if not self.body_crosses_span_borders(opens[em_type][-1], open):
-                            middle = open
-
-                            open = opens[em_type].pop(-1)
+                    has_middle = self.has_middle(
+                        open, delim_run, opens[em_type],
+                        unused_opens[em_type], unused_closes[em_type]
+                    )
+
+                    if has_middle is not False:
+                        middle = has_middle[1]
+                        if has_middle[0] != open:
+                            # only re-assign and re-calc opening offsets if that run HAS changed
+                            open = has_middle[0]
                             open_offset = unused_opens[em_type].pop(open, 0)
                             open_syntax = open.group(1)[open_offset:]
                             open_start = open.start() + open_offset
-
-                            if len(open_syntax) == len(syntax):
-                                # if it turns out the previous open is a perfect match then ignore the middle part
-                                middle = None
-                    elif len(open_syntax) > len(syntax) and unused_closes[em_type]:
-                        # check if there is a previous closing delim run in the current body
-                        # since this is already within the body we don't need to do a cross-span border check
-                        # as we're not expanding into new ground and that is covered later
-                        middle = next((i for i in unused_closes[em_type] if open.end() < i.start() < delim_run.start()), None)
-                    else:
-                        try:
-                            next_delim_run = tuple(delim_runs.keys())[tuple(delim_runs.keys()).index(delim_run) + 1]
-                        except IndexError:
-                            next_delim_run = None
-
-                        if next_delim_run is None:
-                            # if there is no follow up delimiter run then no point leaving this unused. Process now
-                            pass
-                        elif len(open_syntax) < len(syntax) and (
-                            # if this run can be an opener, but the next run won't close both of them
-                            (left and (
-                                not delim_runs[next_delim_run][1]
-                                or len(next_delim_run.group(1)) < len(open_syntax) + len(syntax)
-                            ))
-                            # if the next run is not an opener and won't consume this run
-                            and not delim_runs[next_delim_run][0]
-                        ):
-                            pass
-                        elif len(open_syntax) > len(syntax) and (
-                            # if this run can be an closer, but the next run is not a fresh opener
-                            (right and not delim_runs[next_delim_run][0])
-                            # if the next run is not a closer
-                            and not delim_runs[next_delim_run][1]
-                        ):
-                            pass
-                        elif len(open_syntax) < len(syntax) and len(syntax) >= 3:
-                            # if closing syntax is bigger and its >= three long then focus on closing any
-                            # open em spans
-                            pass
+                    elif not self.should_process_imbalanced_delimiter_runs(
+                        open, delim_run, delim_runs, unused_opens[em_type]
+                    ):
+                        # if we shouldn't process them now, save these opens for a future pass
+                        unused_opens[em_type][open] = open_offset
+                        opens[em_type].append(open)
+                        if left:
+                            unused_opens[em_type][delim_run] = 0
+                            opens[em_type].append(delim_run)
                         else:
-                            # if there are no unused opens or closes to use up then this is just imbalanced
-                            # mark as unused and leave for later processing
-                            unused_opens[em_type][open] = open_offset
-                            opens[em_type].append(open)
-                            if left:
-                                unused_opens[em_type][delim_run] = 0
-                                opens[em_type].append(delim_run)
-                            else:
-                                unused_closes[em_type].append(delim_run)
-                            continue
+                            unused_closes[em_type].append(delim_run)
+                        continue
 
                 # add all the text leading up to the opening delimiter
                 tokens.append(delim_run.string[index: open_start])
 
                 span, close_syntax_used_chars = self.process_span(open, delim_run, open_offset, middle)
                 tokens.extend(span)
-                if close_syntax_used_chars < len(syntax):
-                    # if we didn't use up the entire closing delimiter mark it as unused
+
+                if close_syntax_used_chars is None:
+                    close_syntax_used_chars = len(syntax)
+                elif close_syntax_used_chars < len(syntax):
+                    # if we didn't use up the entire closing delimiter, mark it as unused
                     unused_opens[em_type][delim_run] = close_syntax_used_chars
                     opens[em_type].append(delim_run)
 
@@ -2708,13 +2694,17 @@ def run(self, text):
     def process_span(
             self, open: re.Match, close: re.Match,
             offset: int, middle: Optional[re.Match] = None
-        ):
+        ) -> Tuple[List[str], Optional[int]]:
         '''
         Args:
             open: the match against the opening delimiter run
             close: the match against the closing delimiter run
             offset: the number of chars from the opening delimiter that should be skipped when processing
             middle: an optional delimiter run in the middle of the span
+
+        Returns:
+            A list of processed tokens, and then the number of chars from the closing syntax that were
+            consumed. If the latter item is None, then assume all chars were consumed
         '''
         tokens = []
 
@@ -2769,6 +2759,108 @@ def process_span(
 
         return tokens, close_delim_chars_used
 
+    def has_middle(
+        self, open: re.Match, close: re.Match, opens: List[re.Match],
+        unused_opens: Dict[re.Match, int], unused_closes: List[re.Match]
+    ) -> Union[Tuple[re.Match, Optional[re.Match]], Literal[False]]:
+        '''
+        Check if an emphasis span has a middle delimiter run, which may change the outer tags
+
+        Args:
+            open: the current opening delimiter run
+            close: the closing delimiter run
+            opens: a list of all opening delimiter runs in the text
+            unused_opens: a mapping of unused opens within the text to their offset values
+            unused_closes: a list of unused closes within the text
+
+        Returns:
+            False if there is no middle run. Otherwise, a tuple of the new opening run and the optional
+            middle span. The middle span may be None if it is invalid
+        '''
+        open_offset = unused_opens.get(open, 0)
+        open_syntax = open.group(1)[open_offset:]
+
+        syntax = close.group(1)
+
+        if len(open_syntax) < len(syntax) and opens:
+            # expand the em span to the left, meaning we're covering additional chars.
+            # check we don't cross an existing span border
+            if not self.body_crosses_span_borders(opens[-1], open):
+                middle = open
+
+                open = opens.pop(-1)
+                open_offset = unused_opens.pop(open, 0)
+                open_syntax = open.group(1)[open_offset:]
+
+                if len(open_syntax) == len(syntax):
+                    # if it turns out the previous open is a perfect match then ignore the middle part
+                    # eg: **foo*bar**
+                    middle = None
+        elif len(open_syntax) > len(syntax) and unused_closes:
+            # check if there is a previous closing delim run in the current body
+            # since this is already within the body we don't need to do a cross-span border check
+            # as we're not expanding into new ground and that is covered later
+            middle = next((i for i in unused_closes if open.end() < i.start() < close.start()), None)
+        else:
+            return False
+
+        return open, middle
+
+    def should_process_imbalanced_delimiter_runs(
+        self, open: re.Match, close: re.Match,
+        delim_runs: Dict[re.Match, Tuple[bool, bool]],
+        unused_opens: Dict[re.Match, int]
+    ):
+        '''
+        Check if an imbalanced delimiter run should be consumed now, or left for a later pass
+
+        Args:
+            open: the opening delimiter run
+            close: the closing delimiter run
+            delim_runs: a mapping of all of the delimiter runs in the text to a tuple of whether
+                they are opening or closing runs
+            unused_opens: a mapping of unused opens within the text to their offset values
+        '''
+        open_offset = unused_opens.get(open, 0)
+        open_syntax = open.group(1)[open_offset:]
+
+        syntax = close.group(1)
+        left, right = delim_runs[close]
+
+        if len(open_syntax) < len(syntax) and len(syntax) >= 3:
+            # if closing syntax is bigger and its >= three long then focus on closing any
+            # open em spans
+            return True
+
+        try:
+            next_delim_run = tuple(delim_runs.keys())[tuple(delim_runs.keys()).index(close) + 1]
+        except IndexError:
+            # if there is no follow up delimiter run then no point leaving this unused. Process now
+            return True
+
+        if len(open_syntax) < len(syntax) and (
+            # if this run can be an opener, but the next run won't close both of them
+            (left and (
+                not delim_runs[next_delim_run][1]
+                or len(next_delim_run.group(1)) < len(open_syntax) + len(syntax)
+            ))
+            # if the next run is not an opener and won't consume this run
+            and not delim_runs[next_delim_run][0]
+        ):
+            return True
+
+        if len(open_syntax) > len(syntax) and (
+            # if this run can be a closer, but the next run is not a fresh opener
+            (right and not delim_runs[next_delim_run][0])
+            # if the next run is not a closer
+            and not delim_runs[next_delim_run][1]
+        ):
+            return True
+
+        # if there are no unused opens or closes to use up then this is just imbalanced.
+        # mark as unused and leave for later processing
+        return False
+
     def delimiter_left_or_right(self, delim_run: re.Match):
         run = delim_run.string[max(0, delim_run.start() - 1): delim_run.end() + 1]
         syntax = delim_run.group(1)
@@ -3269,37 +3361,60 @@ def run(self, text):
         return text
 
 
-class CodeFriendly(ItalicAndBoldProcessor):
+class CodeFriendly(GFMItalicAndBoldProcessor):
     '''
     Disable _ and __ for em and strong.
     '''
     name = 'code-friendly'
+    order = (Stage.ITALIC_AND_BOLD,), (Stage.ITALIC_AND_BOLD,)
 
     def __init__(self, md, options):
         super().__init__(md, options)
 
         # add a prefix to it so we don't interfere with escaped/hashed chars from other stages
-        self.hash_table[_hash_text(self.name + '_')] = '_'
-        self.hash_table[_hash_text(self.name + '__')] = '__'
+        self.hash_table = {
+            _hash_text(self.name + '_'): '_',
+            _hash_text(self.name + '__'): '__'
+        }
 
-    def sub(self, match: re.Match) -> str:
-        syntax = match.group(1)
-        # use match.regs because strong/em regex may include preceding text in the match as well
-        text: str = match.string[match.regs[1][0]: match.end()]
-        if '_' in syntax:
+    def run(self, text):
+        if self.md.order < Stage.ITALIC_AND_BOLD:
+            text = super().run(text)
+        else:
+            orig_text = ''
+            while orig_text != text:
+                orig_text = text
+                for key, substr in self.hash_table.items():
+                    text = text.replace(key, substr)
+        return text
+
+    def process_span(self, open: re.Match, close: re.Match, offset: int, middle: re.Match | None = None):
+        text = open.string[open.start(): close.end()]
+        open_syntax = open.group(1)[offset:]
+        close_syntax = close.group(1)
+
+        if len(open_syntax) > 2 or open_syntax != close_syntax:
+            return [text], None
+
+        if '_' in open_syntax:
             # if using _this_ syntax, hash it to avoid processing, but don't hash the contents incase of nested syntax
-            text = text.replace(syntax, _hash_text(self.name + syntax))
-            return text
+            text = text.replace(open_syntax, _hash_text(self.name + open_syntax))
+            return [text], None
         elif '_' in text:
             # if the text within the bold/em markers contains '_' then hash those chars to protect them from em_re
             text = (
-                text[len(syntax): -len(syntax)]
+                text[len(open_syntax): -len(close_syntax)]
                 .replace('__', _hash_text(self.name + '__'))
                 .replace('_', _hash_text(self.name + '_'))
             )
-            return syntax + text + syntax
-        # if no underscores are present, the text is fine and we can just leave it alone
-        return super().sub(match)
+            return [open_syntax, text, close_syntax], None
+
+        return super().process_span(open, close, offset, middle)
+
+    def test(self, text: str):
+        return super().test(text) or (
+            self.hash_table and re.search(r'md5-[0-9a-z]{32}', text)
+        )
 
 
 class FencedCodeBlocks(Extra):
@@ -3623,7 +3738,7 @@ def tags(self, lexer_name):
         return super().tags(lexer_name)
 
 
-class MiddleWordEm(ItalicAndBoldProcessor):
+class MiddleWordEm(GFMItalicAndBoldProcessor):
     '''
     Allows or disallows emphasis syntax in the middle of words,
     defaulting to allow. Disabling this means that `this_text_here` will not be
@@ -3666,8 +3781,10 @@ def __init__(self, md: Markdown, options: Union[dict, bool, None]):
         )
 
         # add a prefix to it so we don't interfere with escaped/hashed chars from other stages
-        self.hash_table['_'] = _hash_text(self.name + '_')
-        self.hash_table['*'] = _hash_text(self.name + '*')
+        self.hash_table = {
+            '_': _hash_text(self.name + '_'),
+            '*': _hash_text(self.name + '*')
+        }
 
     def run(self, text):
         if self.options['allowed']:
@@ -3692,6 +3809,11 @@ def sub(self, match: re.Match):
         syntax = match.group(1)
         return self.hash_table[syntax]
 
+    def test(self, text: str):
+        return super().test(text) or (
+            self.hash_table and re.search(r'md5-[0-9a-z]{32}', text)
+        )
+
 
 class Numbering(Extra):
     '''

From 060d48da4c5cf7188968b9954fddac9d3cc3b748 Mon Sep 17 00:00:00 2001
From: Crozzers <captaincrozzers@gmail.com>
Date: Sun, 7 Dec 2025 17:22:43 +0000
Subject: [PATCH 7/9] Add issues 645, 652, 653 and 654 to gfm test case

---
 test/tm-cases/gfm_emphasis.html |  8 ++++++++
 test/tm-cases/gfm_emphasis.text | 10 +++++++++-
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/test/tm-cases/gfm_emphasis.html b/test/tm-cases/gfm_emphasis.html
index 9078f0f9..0ed679d6 100644
--- a/test/tm-cases/gfm_emphasis.html
+++ b/test/tm-cases/gfm_emphasis.html
@@ -259,3 +259,11 @@
 <p>__a<a href="http://foo.bar/?q=__">http://foo.bar/?q=__</a></p>
 
 <p><strong>foo*bar</strong></p>
+
+<p>_foo <strong>bar</strong> baz._bim</p>
+
+<p><strong>__foo</strong> bar <strong>__baz</strong> bim <em>bam</em></p>
+
+<p><strong>foo<em>bar</em></strong></p>
+
+<p><strong><em>foo</em>bar</strong></p>
diff --git a/test/tm-cases/gfm_emphasis.text b/test/tm-cases/gfm_emphasis.text
index 7b88c80c..43e01891 100644
--- a/test/tm-cases/gfm_emphasis.text
+++ b/test/tm-cases/gfm_emphasis.text
@@ -257,4 +257,12 @@ _a `_`_
 
 __a<http://foo.bar/?q=__>
 
-**foo*bar**
\ No newline at end of file
+**foo*bar**
+
+_foo **bar** baz._bim
+
+**__foo** bar **__baz** bim *bam*
+
+**foo*bar***
+
+***foo*bar**
\ No newline at end of file

From 3b19616b3a1bc9e83e3414b629d8258d70ebc233 Mon Sep 17 00:00:00 2001
From: Crozzers <captaincrozzers@gmail.com>
Date: Sun, 7 Dec 2025 19:13:56 +0000
Subject: [PATCH 8/9] Improve performance in repetitive (ReDoS) scenarios by
 caching some IAB internal functions

---
 lib/markdown2.py | 102 +++++++++++++++++++++++++++++++++--------------
 1 file changed, 73 insertions(+), 29 deletions(-)

diff --git a/lib/markdown2.py b/lib/markdown2.py
index c4c2813f..4197ae03 100755
--- a/lib/markdown2.py
+++ b/lib/markdown2.py
@@ -121,7 +121,7 @@
 from collections import defaultdict, OrderedDict
 from abc import ABC, abstractmethod
 import functools
-from collections.abc import Iterable
+from collections.abc import Iterable, Iterator
 from hashlib import sha256
 from random import random
 from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Type, TypedDict, Union
@@ -2044,11 +2044,13 @@ def _encode_code(self, text: str) -> str:
     )
     _em_re = re.compile(r"(\*|_)(?=\S)(.*?\S)\1", re.S)
 
+    _iab_processor = None
     @mark_stage(Stage.ITALIC_AND_BOLD)
     def _do_italics_and_bold(self, text: str) -> str:
-        iab = GFMItalicAndBoldProcessor(self, None)
-        if iab.test(text):
-            text = iab.run(text)
+        if not self._iab_processor:
+            self._iab_processor = GFMItalicAndBoldProcessor(self, None)
+        if self._iab_processor.test(text):
+            text = self._iab_processor.run(text)
         return text
 
     _block_quote_base = r'''
@@ -2595,14 +2597,13 @@ def run(self, text):
             index = 0
             '''Number of chars of `text` that has been processed so far'''
 
-            # do a quick scan for all delimiter runs, filtering for those that can open/close emphasis
-            delim_runs = OrderedDict()
-            for delim_run in re.finditer(r'(\*+|_+)', text):
-                left, right = self.delimiter_left_or_right(delim_run)
-                if left or right:
-                    delim_runs[delim_run] = (left, right)
+            delim_runs_iter = re.finditer(r'(\*+|_+)', text)
+            next_delim_run = self._next_run(delim_runs_iter)
+
+            while next_delim_run:
+                delim_run, left, right = next_delim_run
+                next_delim_run = self._next_run(delim_runs_iter)
 
-            for delim_run, (left, right) in delim_runs.items():
                 syntax = delim_run.group(1)
                 em_type = syntax[0]
 
@@ -2656,7 +2657,7 @@ def run(self, text):
                             open_syntax = open.group(1)[open_offset:]
                             open_start = open.start() + open_offset
                     elif not self.should_process_imbalanced_delimiter_runs(
-                        open, delim_run, delim_runs, unused_opens[em_type]
+                        open, delim_run, unused_opens[em_type], next_delim_run
                     ):
                         # if we shouldn't process them now, save these opens for a future pass
                         unused_opens[em_type][open] = open_offset
@@ -2808,8 +2809,8 @@ def has_middle(
 
     def should_process_imbalanced_delimiter_runs(
         self, open: re.Match, close: re.Match,
-        delim_runs: Dict[re.Match, Tuple[bool, bool]],
-        unused_opens: Dict[re.Match, int]
+        unused_opens: Dict[re.Match, int],
+        next_delim_run: Optional[Tuple[re.Match, Optional[re.Match], Optional[re.Match]]] = None
     ):
         '''
         Check if an imbalanced delimiter run should be consumed now, or left for a later pass
@@ -2817,43 +2818,39 @@ def should_process_imbalanced_delimiter_runs(
         Args:
             open: the opening delimiter run
             close: the closing delimiter run
-            delim_runs: a mapping of all of the delimiter runs in the text to a tuple of whether
-                they are opening or closing runs
             unused_opens: a mapping of unused opens within the text to their offset values
+            next_delim_run: the next delimiter run after the closing run
         '''
         open_offset = unused_opens.get(open, 0)
         open_syntax = open.group(1)[open_offset:]
 
         syntax = close.group(1)
-        left, right = delim_runs[close]
+        left, right = self.delimiter_left_or_right(close)
 
         if len(open_syntax) < len(syntax) and len(syntax) >= 3:
             # if closing syntax is bigger and its >= three long then focus on closing any
             # open em spans
             return True
 
-        try:
-            next_delim_run = tuple(delim_runs.keys())[tuple(delim_runs.keys()).index(close) + 1]
-        except IndexError:
-            # if there is no follow up delimiter run then no point leaving this unused. Process now
+        if next_delim_run is None:
             return True
 
         if len(open_syntax) < len(syntax) and (
             # if this run can be an opener, but the next run won't close both of them
             (left and (
-                not delim_runs[next_delim_run][1]
-                or len(next_delim_run.group(1)) < len(open_syntax) + len(syntax)
+                not next_delim_run[2]
+                or len(next_delim_run[0].group(1)) < len(open_syntax) + len(syntax)
             ))
             # if the next run is not an opener and won't consume this run
-            and not delim_runs[next_delim_run][0]
+            and not next_delim_run[1]
         ):
             return True
 
         if len(open_syntax) > len(syntax) and (
             # if this run can be a closer, but the next run is not a fresh opener
-            (right and not delim_runs[next_delim_run][0])
+            (right and not next_delim_run[1])
             # if the next run is not a closer
-            and not delim_runs[next_delim_run][1]
+            and not next_delim_run[2]
         ):
             return True
 
@@ -2862,8 +2859,22 @@ def should_process_imbalanced_delimiter_runs(
         return False
 
     def delimiter_left_or_right(self, delim_run: re.Match):
+        '''
+        Determine if a delimiter run is left or right flanking
+
+        Returns:
+            Tuple of bools that mean left and right flanking respectively
+        '''
         run = delim_run.string[max(0, delim_run.start() - 1): delim_run.end() + 1]
-        syntax = delim_run.group(1)
+
+        return self._delimiter_left_or_right(run, delim_run.group(1))
+
+    @functools.lru_cache(maxsize=512)
+    def _delimiter_left_or_right(self, run: str, syntax: str):
+        '''
+        Cached version of `delimiter_left_or_right` that massively speeds things up when dealing
+        with many repetetive delimiter runs - eg: in a ReDoS scenario
+        '''
         syntax_re = syntax.replace('*', r'\*')
 
         left = (
@@ -2891,12 +2902,45 @@ def delimiter_left_or_right(self, delim_run: re.Match):
         return left, right
 
     def body_crosses_span_borders(self, open: re.Match, close: re.Match):
-        for tag in re.findall(rf'</?({self.md._span_tags})', open.string[open.end(): close.start()]):
-            if not self.md._tag_is_closed(tag, open.string[open.end(): close.start()]):
+        '''
+        Checks if the body of an emphasis crosses a span border
+
+        Args:
+            open: the opening delimiter run
+            close: the closing delimiter run
+
+        Returns:
+            True if the emphasis crosses a span border (invalid). False if not
+        '''
+        return self._body_crosses_span_borders(open.string[open.end(): close.start()])
+
+    @functools.lru_cache(maxsize=64)
+    def _body_crosses_span_borders(self, text: str):
+        '''Cached version of `body_crosses_span_borders`'''
+        for tag in re.findall(rf'</?({self.md._span_tags})', text):
+            if not self.md._tag_is_closed(tag, text):
                 return True
 
         return False
 
+    def _next_run(self, delim_runs_iter: Iterator[re.Match]):
+        '''
+        Gets the next delimiter run from an iterator of delimiter runs
+
+        Returns:
+            A tuple containing the run, and matches dictating whether it is left or right flanking
+            respectively. Returns nothing if no valid runs left
+        '''
+        next_delim_run: Optional[Tuple[re.Match, bool, bool]] = None
+        try:
+            while not next_delim_run:
+                delim_run = next(delim_runs_iter)
+                left, right = self.delimiter_left_or_right(delim_run)
+                if left or right:
+                    return (delim_run, left, right)
+        except StopIteration:
+            return
+
     def test(self, text):
         return text.count('*') > 1 or text.count('_') > 1
 

From 749c9cb19800a3bef267d99a6025d948948bac11 Mon Sep 17 00:00:00 2001
From: Crozzers <captaincrozzers@gmail.com>
Date: Sun, 7 Dec 2025 19:16:04 +0000
Subject: [PATCH 9/9] Fix python typing syntax error

---
 lib/markdown2.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/markdown2.py b/lib/markdown2.py
index 4197ae03..6af7c929 100755
--- a/lib/markdown2.py
+++ b/lib/markdown2.py
@@ -3432,7 +3432,7 @@ def run(self, text):
                     text = text.replace(key, substr)
         return text
 
-    def process_span(self, open: re.Match, close: re.Match, offset: int, middle: re.Match | None = None):
+    def process_span(self, open: re.Match, close: re.Match, offset: int, middle: Optional[re.Match] = None):
         text = open.string[open.start(): close.end()]
         open_syntax = open.group(1)[offset:]
         close_syntax = close.group(1)