Skip to content

Commit d686517

Browse files
committed
[BD-21] Cleaner comment parsing
1 parent 12dc1a7 commit d686517

File tree

1 file changed

+22
-19
lines changed

1 file changed

+22
-19
lines changed

code_annotations/extensions/base.py

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -42,22 +42,24 @@ class SimpleRegexAnnotationExtension(AnnotationExtension, metaclass=ABCMeta):
4242
# Javascript and Python extensions for examples.
4343
lang_comment_definition = None
4444

45-
r"""
45+
"""
4646
This format string/regex finds all comments in the file. The format tokens will be replaced with the
4747
language-specific comment definitions defined in the sub-classes.
4848
49-
{multi_start} - start of the language-specific multi-line comment (ex. /*)
50-
([\d\D]*?) - capture all of the characters...
51-
{multi_end} - until you find the end of the language-specific multi-line comment (ex. */)
52-
| - If you don't find any of those...
53-
{single} - start by finding the single-line comment token (ex. //)
54-
(.*) - and capture all characters until the end of the line
55-
56-
Returns a 2-tuple of:
57-
- ("Comment text", None) in the case of a multi-line comment OR
58-
- (None, "Comment text") in the case of a single-line comment
49+
Returns two named values: multiline_comment and singleline_comment.
50+
"""
51+
comment_regex_fmt = r"""
52+
{multi_start} # start of the language-specific multi-line comment (ex. /*)
53+
(?P<multiline_comment>
54+
[\d\D]*? # capture all of the characters...
55+
)
56+
{multi_end} # until you find the end of the language-specific multi-line comment (ex. */)
57+
| # If you don't find any of those...
58+
{single} # start by finding the single-line comment token (ex. //)
59+
(?P<singleline_comment>
60+
.* # and capture all characters until the end of the line
61+
)
5962
"""
60-
comment_regex_fmt = r'{multi_start}([\d\D]*?){multi_end}|{single}(.*)'
6163

6264
def __init__(self, config, echo):
6365
"""
@@ -74,7 +76,8 @@ def __init__(self, config, echo):
7476

7577
# pylint: disable=not-a-mapping
7678
self.comment_regex = re.compile(
77-
self.comment_regex_fmt.format(**self.lang_comment_definition)
79+
self.comment_regex_fmt.format(**self.lang_comment_definition),
80+
flags=re.VERBOSE
7881
)
7982

8083
# Parent class will allow this class to populate self.strings_to_search via
@@ -102,15 +105,15 @@ def search(self, file_handle):
102105
if any(anno in txt for anno in self.config.annotation_tokens):
103106
fname = clean_abs_path(file_handle.name, self.config.source_path)
104107

108+
# Iterate on all comments: both multi- and single-line.
105109
for match in self.comment_regex.finditer(txt):
110+
# Get the line number by counting newlines + 1 (for the first line).
111+
# Note that this is the line number of the beginning of the comment, not the
112+
# annotation token itself.
113+
line = txt.count('\n', 0, match.start()) + 1
106114
# Should only be one match
107-
comment_content = [item for item in match.groups() if item is not None][0]
115+
comment_content = match.groupdict()["multiline_comment"] or match.groupdict()["singleline_comment"]
108116
for inner_match in self.query.finditer(comment_content):
109-
# Get the line number by counting newlines + 1 (for the first line).
110-
# Note that this is the line number of the beginning of the comment, not the
111-
# annotation token itself.
112-
line = txt.count('\n', 0, match.start()) + 1
113-
114117
try:
115118
annotation_token = inner_match.group('token')
116119
annotation_data = inner_match.group('data')

0 commit comments

Comments
 (0)