Skip to content

Commit 5e401ca

Browse files
authored
Merge pull request #20 from edx/bmedx/grouping_in_reports
Add a unique report_group_id to found groups in reports
2 parents 81995d2 + ac2c79d commit 5e401ca

File tree

4 files changed

+204
-28
lines changed

4 files changed

+204
-28
lines changed

code_annotations/base.py

Lines changed: 91 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,20 @@ def _check_results_choices(self, annotation):
362362
'No choices found for "{}". Expected one of {}.'.format(token, self.config.choices[token])
363363
)
364364

365+
def _get_group_children(self):
366+
"""
367+
Create a list of all annotation tokens that are part of a group.
368+
369+
Returns:
370+
List of annotation tokens that are configured to be in groups
371+
"""
372+
group_children = []
373+
374+
for group in self.config.groups:
375+
group_children.extend(self.config.groups[group])
376+
377+
return group_children
378+
365379
def _get_group_for_token(self, token):
366380
"""
367381
Find out which group, if any, an annotation token belongs to.
@@ -391,12 +405,7 @@ def check_results(self, all_results):
391405
if self.config.verbosity >= 2:
392406
pprint.pprint(all_results, indent=3)
393407

394-
# This is used to quickly find out if a token is a member of a group
395-
group_children = []
396-
397-
# Build a big list of all tokens that are part of a group
398-
for group in self.config.groups:
399-
group_children.extend(self.config.groups[group])
408+
group_children = self._get_group_children()
400409

401410
# Spin through the search results
402411
for filename in all_results:
@@ -434,12 +443,6 @@ def check_results(self, all_results):
434443
current_group
435444
))
436445
found_group_members.append(token)
437-
438-
# If we have all members, this group is done
439-
if len(found_group_members) == len(self.config.groups[current_group]):
440-
self.echo.echo_vv("Group complete!")
441-
current_group = None
442-
found_group_members = []
443446
else:
444447
if token in group_children:
445448
current_group = self._get_group_for_token(token)
@@ -456,6 +459,12 @@ def check_results(self, all_results):
456459
current_group, token, annotation['line_number'])
457460
)
458461

462+
# If we have all members, this group is done
463+
if current_group and len(found_group_members) == len(self.config.groups[current_group]):
464+
self.echo.echo_vv("Group complete!")
465+
current_group = None
466+
found_group_members = []
467+
459468
if current_group:
460469
self.errors.append('File finished with an incomplete group {}!'.format(current_group))
461470

@@ -494,6 +503,73 @@ def search(self):
494503
"""
495504
pass # pragma: no cover
496505

506+
def _format_results_for_report(self, all_results):
507+
"""
508+
Format the given results dict for reporting purposes.
509+
510+
Args:
511+
all_results: Dict of all results found in a search
512+
513+
Returns:
514+
Dict of results arranged for reporting
515+
"""
516+
group_children = self._get_group_children()
517+
formatted_results = {}
518+
current_group_id = 0
519+
520+
for filename in all_results:
521+
self.echo.echo_vv("report_format: formatting {}".format(filename))
522+
formatted_results[filename] = []
523+
current_group = None
524+
525+
found_group_members = []
526+
527+
for annotation in all_results[filename]:
528+
token = annotation['annotation_token']
529+
self.echo.echo_vvv("report_format: formatting annotation token {}".format(token))
530+
531+
if current_group:
532+
if token not in self.config.groups[current_group]:
533+
self.echo.echo_vv(
534+
"report_format: {} is not a group member, finishing group id {}".format(
535+
token,
536+
current_group_id
537+
)
538+
)
539+
current_group = None
540+
found_group_members = []
541+
formatted_results[filename].append(annotation)
542+
else:
543+
self.echo.echo_vv("report_format: Adding {} to group id {}".format(
544+
token,
545+
current_group_id
546+
))
547+
annotation['report_group_id'] = current_group_id
548+
formatted_results[filename].append(annotation)
549+
found_group_members.append(token)
550+
else:
551+
if token in group_children:
552+
current_group = self._get_group_for_token(token)
553+
current_group_id += 1
554+
found_group_members = [token]
555+
annotation['report_group_id'] = current_group_id
556+
formatted_results[filename].append(annotation)
557+
558+
self.echo.echo_vv('Starting group id {} for "{}" token "{}", line {}'.format(
559+
current_group_id, current_group, token, annotation['line_number'])
560+
)
561+
else:
562+
self.echo.echo_vv('Adding single token {}.'.format(token))
563+
formatted_results[filename].append(annotation)
564+
565+
# If we have all members, this group is done
566+
if current_group and len(found_group_members) == len(self.config.groups[current_group]):
567+
self.echo.echo_vv("report_format: Group complete!")
568+
current_group = None
569+
found_group_members = []
570+
571+
return formatted_results
572+
497573
def report(self, all_results):
498574
"""
499575
Genrates the YAML report of all search results.
@@ -509,6 +585,8 @@ def report(self, all_results):
509585
now = datetime.datetime.now()
510586
report_filename = os.path.join(self.config.report_path, '{}.yaml'.format(now.strftime('%Y-%d-%m-%H-%M-%S')))
511587

588+
formatted_results = self._format_results_for_report(all_results)
589+
512590
self.echo("Generating report to {}".format(report_filename))
513591

514592
try:
@@ -518,6 +596,6 @@ def report(self, all_results):
518596
raise
519597

520598
with open(report_filename, 'w+') as report_file:
521-
yaml.dump(all_results, report_file, default_flow_style=False)
599+
yaml.dump(formatted_results, report_file, default_flow_style=False)
522600

523601
return report_filename

code_annotations/cli.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -130,14 +130,6 @@ def django_find_annotations(
130130
def static_find_annotations(config_file, source_path, report_path, verbosity, lint, report):
131131
"""
132132
Subcommand to find annotations via static file analysis.
133-
134-
Args:
135-
config_file: Path to the configuration file
136-
source_path: Location of the source code to search
137-
report_path: Location to write the report
138-
verbosity: Verbosity level for output
139-
lint: Boolean indicating whether or not to perform linting checks
140-
report Boolean indicating whether or not to write the report file
141133
"""
142134
try:
143135
start_time = datetime.datetime.now()

docs/static_search.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ annotations, grouped by file. Each annotation entry has the following keys:
4646
'annotation_data': 'This model contains no PII.', # The comment, or choices, found with the annotation token
4747
}
4848
49+
If an annotation is in a group, there will also be a `report_group_id`. This key is unique for each found group,
50+
allowing tools further down the toolchain to keep them together for presentation.
51+
4952
Extensions can also send back some additional data in an ``extra`` key, if desired. The Django Model Search Tool does
5053
this to return the Django app and model name.
5154

tests/test_base.py

Lines changed: 110 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
"""
22
Tests for code_annotations/base.py
33
"""
4+
from collections import OrderedDict
5+
46
import pytest
57

68
from code_annotations.base import AnnotationConfig, ConfigurationException
@@ -16,15 +18,11 @@ def test_get_group_for_token_missing_token():
1618
def test_get_group_for_token_multiple_groups():
1719
config = FakeConfig()
1820
config.groups = {
19-
'group1': [
20-
{'token1': None}
21-
],
22-
'group2': [
23-
{'token2': None, 'foo': None}
24-
]
21+
'group1': ['token1'],
22+
'group2': ['token2', 'foo']
2523
}
2624
search = FakeSearch(config)
27-
assert search._get_group_for_token('foo') is None # pylint: disable=protected-access
25+
assert search._get_group_for_token('foo') == 'group2' # pylint: disable=protected-access
2826

2927

3028
@pytest.mark.parametrize("test_config,expected_message", [
@@ -72,3 +70,108 @@ def test_annotation_configuration_errors(test_config, expected_message):
7270

7371
exc_msg = str(exception.value)
7472
assert expected_message in exc_msg
73+
74+
75+
def test_format_results_for_report():
76+
"""
77+
Test that report formatting puts annotations into groups correctly
78+
"""
79+
config = FakeConfig()
80+
config.echo.set_verbosity(3)
81+
config.groups = {
82+
'group1': ['token1'],
83+
'group2': ['token2', 'foo']
84+
}
85+
86+
search = FakeSearch(config)
87+
88+
# Create a fake result set for _format_results_for_report to work on
89+
fake_results = OrderedDict()
90+
91+
# First file has 6 annotations. expected_group_id is a special key for this test, allowing us to loop through
92+
# these below and know what group each result should be in.
93+
fake_results['foo/bar.py'] = [
94+
{
95+
'found_by': 'test',
96+
'filename': 'foo/bar.py',
97+
'line_number': 1,
98+
'annotation_token': 'token2',
99+
'annotation_data': 'file 1 annotation 1',
100+
'expected_group_id': 1
101+
},
102+
{
103+
'found_by': 'test',
104+
'filename': 'foo/bar.py',
105+
'line_number': 2,
106+
'annotation_token': 'foo',
107+
'annotation_data': 'file 1 annotation 2',
108+
'expected_group_id': 1
109+
},
110+
{
111+
'found_by': 'test',
112+
'filename': 'foo/bar.py',
113+
'line_number': 4,
114+
'annotation_token': 'not_in_a_group',
115+
'annotation_data': 'file 1 annotation 3',
116+
'expected_group_id': None
117+
},
118+
{
119+
'found_by': 'test',
120+
'filename': 'foo/bar.py',
121+
'line_number': 10,
122+
'annotation_token': 'token1',
123+
'annotation_data': 'file 1 annotation 4',
124+
'expected_group_id': 2
125+
},
126+
{
127+
'found_by': 'test',
128+
'filename': 'foo/bar.py',
129+
'line_number': 12,
130+
'annotation_token': 'token2',
131+
'annotation_data': 'file 1 annotation 5',
132+
'expected_group_id': 3
133+
},
134+
{
135+
'found_by': 'test',
136+
'filename': 'foo/bar.py',
137+
'line_number': 13,
138+
'annotation_token': 'foo',
139+
'annotation_data': 'file 1 annotation 6',
140+
'expected_group_id': 3
141+
},
142+
]
143+
144+
fake_results['foo/baz.py'] = [
145+
{
146+
'found_by': 'test',
147+
'filename': 'foo/bar.py',
148+
'line_number': 1,
149+
'annotation_token': 'token2',
150+
'annotation_data': 'file 2 annotation 1',
151+
'expected_group_id': 4
152+
},
153+
{
154+
'found_by': 'test',
155+
'filename': 'foo/bar.py',
156+
'line_number': 2,
157+
'annotation_token': 'foo',
158+
'annotation_data': 'file 1 annotation 2',
159+
'expected_group_id': 4
160+
}
161+
]
162+
163+
# Run the format function
164+
results = search._format_results_for_report(fake_results) # pylint: disable=protected-access
165+
166+
for filename in fake_results:
167+
for fake in fake_results[filename]:
168+
for formatted in results[filename]:
169+
# When we find the same annotation, make sure that grouping is correct
170+
if fake['annotation_data'] == formatted['annotation_data']:
171+
# Ungrouped annotations should not have the 'report_group_id' key
172+
if fake['expected_group_id'] is None:
173+
assert 'report_group_id' not in formatted
174+
# Otherwise it should match our expected value
175+
else:
176+
assert fake['expected_group_id'] == formatted['report_group_id']
177+
break

0 commit comments

Comments
 (0)