Skip to content

Commit 097db99

Browse files
committed
Add support for multiline annotations with single-line prefix ("#")
Multiline annotations were previously supported only for multi-line comments. In Python: """...""" This introduces multiline annotations for comments prefixed by single-line comment signs. In Python: # .. pii: a multiline annotation # that spans multiple lines. This makes it possible to wrap long comment lines more naturally, in particular in Python.
1 parent d686517 commit 097db99

File tree

6 files changed

+88
-20
lines changed

6 files changed

+88
-20
lines changed

CHANGELOG.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,11 @@ Change Log
1111

1212
.. There should always be an "Unreleased" section for changes pending release.
1313
14+
[0.6.0] - 2020-08-27
15+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
16+
17+
* Add support for multiline annotations for lines prefixed with single-line comment signs ("#")
18+
1419
[0.5.1] - 2020-08-25
1520
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1621

code_annotations/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
Extensible tools for parsing annotations in codebases.
33
"""
44

5-
__version__ = '0.5.1'
5+
__version__ = '0.6.0'

code_annotations/extensions/base.py

Lines changed: 50 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -42,23 +42,26 @@ class SimpleRegexAnnotationExtension(AnnotationExtension, metaclass=ABCMeta):
4242
# Javascript and Python extensions for examples.
4343
lang_comment_definition = None
4444

45-
"""
46-
This format string/regex finds all comments in the file. The format tokens will be replaced with the
47-
language-specific comment definitions defined in the sub-classes.
48-
49-
Returns two named values: multiline_comment and singleline_comment.
50-
"""
45+
# This format string/regex finds all comments in the file. The format tokens will be replaced with the
46+
# language-specific comment definitions defined in the sub-classes.
47+
#
48+
# Match groupdict will contain two named subgroups: 'comment' and 'prefixed_comment', of which at most
49+
# one will be non-None.
5150
comment_regex_fmt = r"""
52-
{multi_start} # start of the language-specific multi-line comment (ex. /*)
53-
(?P<multiline_comment>
54-
[\d\D]*? # capture all of the characters...
55-
)
56-
{multi_end} # until you find the end of the language-specific multi-line comment (ex. */)
57-
| # If you don't find any of those...
58-
{single} # start by finding the single-line comment token (ex. //)
59-
(?P<singleline_comment>
60-
.* # and capture all characters until the end of the line
61-
)
51+
{multi_start} # start of the language-specific multi-line comment (ex. /*)
52+
(?P<comment> # Look for a multiline comment
53+
[\d\D]*? # capture all of the characters...
54+
)
55+
{multi_end} # until you find the end of the language-specific multi-line comment (ex. */)
56+
| # If you don't find any of those...
57+
(?P<prefixed_comment> # Look for a group of single-line comments
58+
(?: # Non-capture mode
59+
{single} # start by finding the single-line comment token (ex. //)
60+
.* # and capture all characters until the end of the line
61+
\n? # followed by an optional carriage return
62+
\ * # and some empty space
63+
)* # multiple times
64+
)
6265
"""
6366

6467
def __init__(self, config, echo):
@@ -79,6 +82,10 @@ def __init__(self, config, echo):
7982
self.comment_regex_fmt.format(**self.lang_comment_definition),
8083
flags=re.VERBOSE
8184
)
85+
self.prefixed_comment_regex = re.compile(
86+
r"^ *{single}".format(**self.lang_comment_definition),
87+
flags=re.MULTILINE
88+
)
8289

8390
# Parent class will allow this class to populate self.strings_to_search via
8491
# calls to _add_annotation_token or _add_annotation_group for each configured
@@ -105,14 +112,14 @@ def search(self, file_handle):
105112
if any(anno in txt for anno in self.config.annotation_tokens):
106113
fname = clean_abs_path(file_handle.name, self.config.source_path)
107114

108-
# Iterate on all comments: both multi- and single-line.
115+
# Iterate on all comments: both prefixed- and non-prefixed.
109116
for match in self.comment_regex.finditer(txt):
110117
# Get the line number by counting newlines + 1 (for the first line).
111118
# Note that this is the line number of the beginning of the comment, not the
112119
# annotation token itself.
113120
line = txt.count('\n', 0, match.start()) + 1
114-
# Should only be one match
115-
comment_content = match.groupdict()["multiline_comment"] or match.groupdict()["singleline_comment"]
121+
122+
comment_content = self._find_comment_content(match)
116123
for inner_match in self.query.finditer(comment_content):
117124
try:
118125
annotation_token = inner_match.group('token')
@@ -134,3 +141,27 @@ def search(self, file_handle):
134141
})
135142

136143
return found_annotations
144+
145+
def _find_comment_content(self, match):
146+
"""
147+
Return the comment content as text.
148+
149+
Args:
150+
match (sre.SRE_MATCH): one of the matches of the self.comment_regex regular expression.
151+
"""
152+
comment_content = match.groupdict()["comment"]
153+
if comment_content:
154+
return comment_content
155+
156+
# Find single-line comments and strip comment tokens
157+
comment_content = match.groupdict()["prefixed_comment"]
158+
return self._strip_single_line_comment_tokens(comment_content)
159+
160+
def _strip_single_line_comment_tokens(self, content):
161+
"""
162+
Strip the leading single-line comment tokens from a comment text.
163+
164+
Args:
165+
content (str): token-prefixed multi-line comment string.
166+
"""
167+
return self.prefixed_comment_regex.sub("", content)
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Docstring
2+
#.. pii: A long description that
3+
# spans multiple
4+
# lines
5+
# A comment that is not indented and not part of the above multi-line annotation
6+
#.. pii_types: id, name
7+
# Some comment that comes after the multiple-line annotation

tests/extensions/test_base_extensions.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,19 @@ def test_nothing_found():
2828
r = FakeExtension(config, VerboseEcho())
2929
with open('tests/extensions/base_test_files/empty.foo') as f:
3030
r.search(f)
31+
32+
33+
def test_strip_single_line_comment_tokens():
34+
config = FakeConfig()
35+
36+
extension = FakeExtension(config, VerboseEcho())
37+
text = """baz line1
38+
baz line2
39+
bazline3
40+
baz line4"""
41+
expected_result = """ line1
42+
line2
43+
line3
44+
line4"""
45+
# pylint: disable=protected-access
46+
assert expected_result == extension._strip_single_line_comment_tokens(text)

tests/extensions/test_extension_python.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,15 @@ def test_grouping_and_choice_failures(test_file, expected_exit_code, expected_me
7676
Multi-line and multi-paragraph.""")
7777
]
7878
),
79+
(
80+
'multiline_singlelinecomment.pyt',
81+
[
82+
('.. pii:', """A long description that
83+
spans multiple
84+
lines"""),
85+
('.. pii_types:', 'id, name'),
86+
]
87+
),
7988
])
8089
def test_multi_line_annotations(test_file, annotations):
8190
config = AnnotationConfig('tests/test_configurations/.annotations_test')

0 commit comments

Comments
 (0)