@@ -42,23 +42,26 @@ class SimpleRegexAnnotationExtension(AnnotationExtension, metaclass=ABCMeta):
4242 # Javascript and Python extensions for examples.
4343 lang_comment_definition = None
4444
45- """
46- This format string/regex finds all comments in the file. The format tokens will be replaced with the
47- language-specific comment definitions defined in the sub-classes.
48-
49- Returns two named values: multiline_comment and singleline_comment.
50- """
45+ # This format string/regex finds all comments in the file. The format tokens will be replaced with the
46+ # language-specific comment definitions defined in the sub-classes.
47+ #
48+ # Match groupdict will contain two named subgroups: 'comment' and 'prefixed_comment', of which at most
49+ # one will be non-None.
5150 comment_regex_fmt = r"""
52- {multi_start} # start of the language-specific multi-line comment (ex. /*)
53- (?P<multiline_comment>
54- [\d\D]*? # capture all of the characters...
55- )
56- {multi_end} # until you find the end of the language-specific multi-line comment (ex. */)
57- | # If you don't find any of those...
58- {single} # start by finding the single-line comment token (ex. //)
59- (?P<singleline_comment>
60- .* # and capture all characters until the end of the line
61- )
51+ {multi_start} # start of the language-specific multi-line comment (ex. /*)
52+ (?P<comment> # Look for a multiline comment
53+ [\d\D]*? # capture all of the characters...
54+ )
55+ {multi_end} # until you find the end of the language-specific multi-line comment (ex. */)
56+ | # If you don't find any of those...
57+ (?P<prefixed_comment> # Look for a group of single-line comments
58+ (?: # Non-capture mode
59+ {single} # start by finding the single-line comment token (ex. //)
60+ .* # and capture all characters until the end of the line
61+ \n? # followed by an optional carriage return
62+ \ * # and some empty space
63+ )* # multiple times
64+ )
6265 """
6366
6467 def __init__ (self , config , echo ):
@@ -79,6 +82,10 @@ def __init__(self, config, echo):
7982 self .comment_regex_fmt .format (** self .lang_comment_definition ),
8083 flags = re .VERBOSE
8184 )
85+ self .prefixed_comment_regex = re .compile (
86+ r"^ *{single}" .format (** self .lang_comment_definition ),
87+ flags = re .MULTILINE
88+ )
8289
8390 # Parent class will allow this class to populate self.strings_to_search via
8491 # calls to _add_annotation_token or _add_annotation_group for each configured
@@ -105,14 +112,14 @@ def search(self, file_handle):
105112 if any (anno in txt for anno in self .config .annotation_tokens ):
106113 fname = clean_abs_path (file_handle .name , self .config .source_path )
107114
108- # Iterate on all comments: both multi - and single-line .
115+ # Iterate on all comments: both prefixed - and non-prefixed .
109116 for match in self .comment_regex .finditer (txt ):
110117 # Get the line number by counting newlines + 1 (for the first line).
111118 # Note that this is the line number of the beginning of the comment, not the
112119 # annotation token itself.
113120 line = txt .count ('\n ' , 0 , match .start ()) + 1
114- # Should only be one match
115- comment_content = match . groupdict ()[ "multiline_comment" ] or match . groupdict ()[ "singleline_comment" ]
121+
122+ comment_content = self . _find_comment_content ( match )
116123 for inner_match in self .query .finditer (comment_content ):
117124 try :
118125 annotation_token = inner_match .group ('token' )
@@ -134,3 +141,27 @@ def search(self, file_handle):
134141 })
135142
136143 return found_annotations
144+
145+ def _find_comment_content (self , match ):
146+ """
147+ Return the comment content as text.
148+
149+ Args:
150+ match (sre.SRE_MATCH): one of the matches of the self.comment_regex regular expression.
151+ """
152+ comment_content = match .groupdict ()["comment" ]
153+ if comment_content :
154+ return comment_content
155+
156+ # Find single-line comments and strip comment tokens
157+ comment_content = match .groupdict ()["prefixed_comment" ]
158+ return self ._strip_single_line_comment_tokens (comment_content )
159+
160+ def _strip_single_line_comment_tokens (self , content ):
161+ """
162+ Strip the leading single-line comment tokens from a comment text.
163+
164+ Args:
165+ content (str): token-prefixed multi-line comment string.
166+ """
167+ return self .prefixed_comment_regex .sub ("" , content )
0 commit comments