From 2d52c17cabed4811e4eac46af2cfe775d46d540b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Samuel=20R=C3=B6tttgermann?= Date: Sat, 6 Jun 2026 00:41:37 +0200 Subject: [PATCH 1/3] enhanced codeblock regex and made stricter requirements for valid codeblocks --- bot/exts/info/codeblock/_parsing.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bot/exts/info/codeblock/_parsing.py b/bot/exts/info/codeblock/_parsing.py index abad09eef1..0408692295 100644 --- a/bot/exts/info/codeblock/_parsing.py +++ b/bot/exts/info/codeblock/_parsing.py @@ -35,11 +35,11 @@ fr""" (?P (?P[{''.join(_TICKS)}]) # Put all ticks into a character class within a group. - \2{{2}} # Match previous group 2 more times to ensure the same char. + \2* # Match previous group up to N more times to ensure the same char. ) - (?P[A-Za-z0-9\+\-\.]+\n)? # Optionally match a language specifier followed by a newline. + (?P[A-Za-z0-9+\-.]+\s)? # Optionally match a language specifier followed by a whitespace. (?P.+?) # Match the actual code within the block. - \1 # Match the same 3 ticks used at the start of the block. + \1 # Match the same N ticks used at the start of the block. """, re.DOTALL | re.VERBOSE ) @@ -86,9 +86,9 @@ def find_code_blocks(message: str) -> Sequence[CodeBlock] | None: for match in _RE_CODE_BLOCK.finditer(message): # Used to ensure non-matched groups have an empty string as the default value. groups = match.groupdict("") - language = groups["lang"].strip() # Strip the newline cause it's included in the group. + language = groups["lang"].strip() # Strip the whitespace cause it's included in the group. - if groups["tick"] == BACKTICK and language: + if groups["tick"] == BACKTICK and len(groups["ticks"]) == 3 and language and ("\n" in groups["lang"]): log.trace("Message has a valid code block with a language; returning None.") return None if has_lines(groups["code"], constants.CodeBlock.minimum_lines): From e23f230c85899ae40e7db2311c8447112cd136f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Samuel=20R=C3=B6tttgermann?= Date: Sat, 6 Jun 2026 00:52:40 +0200 Subject: [PATCH 2/3] added empty newline for readability --- bot/exts/info/codeblock/_parsing.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bot/exts/info/codeblock/_parsing.py b/bot/exts/info/codeblock/_parsing.py index 0408692295..e19934ba7a 100644 --- a/bot/exts/info/codeblock/_parsing.py +++ b/bot/exts/info/codeblock/_parsing.py @@ -91,6 +91,7 @@ def find_code_blocks(message: str) -> Sequence[CodeBlock] | None: if groups["tick"] == BACKTICK and len(groups["ticks"]) == 3 and language and ("\n" in groups["lang"]): log.trace("Message has a valid code block with a language; returning None.") return None + if has_lines(groups["code"], constants.CodeBlock.minimum_lines): code_block = CodeBlock(groups["code"], language, groups["tick"]) code_blocks.append(code_block) From 72a988b976eb3fa3f1a3fb2a8e1ddb25e6f5a7aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Samuel=20R=C3=B6tttgermann?= Date: Sat, 6 Jun 2026 00:53:41 +0200 Subject: [PATCH 3/3] minor code cleanup --- bot/exts/info/codeblock/_parsing.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bot/exts/info/codeblock/_parsing.py b/bot/exts/info/codeblock/_parsing.py index e19934ba7a..0569b1433e 100644 --- a/bot/exts/info/codeblock/_parsing.py +++ b/bot/exts/info/codeblock/_parsing.py @@ -182,7 +182,7 @@ def parse_bad_language(content: str) -> BadLanguage | None: ) -def _get_leading_spaces(content: str) -> int: +def _get_leading_spaces(content: str) -> int | None: """Return the number of spaces at the start of the first line in `content`.""" leading_spaces = 0 for char in content: @@ -190,6 +190,7 @@ def _get_leading_spaces(content: str) -> int: leading_spaces += 1 else: return leading_spaces + return None