Merge pull request #20 from edx/bmedx/grouping_in_reports

bmedx · web-flow · commit 5e401ca2c1d8 · 2019-02-04T16:38:35.000-05:00
Add a unique report_group_id to found groups in reports
diff --git a/code_annotations/base.py b/code_annotations/base.py
@@ -362,6 +362,20 @@ def _check_results_choices(self, annotation):
                 'No choices found for "{}". Expected one of {}.'.format(token, self.config.choices[token])
             )
 
+    def _get_group_children(self):
+        """
+        Create a list of all annotation tokens that are part of a group.
+
+        Returns:
+            List of annotation tokens that are configured to be in groups
+        """
+        group_children = []
+
+        for group in self.config.groups:
+            group_children.extend(self.config.groups[group])
+
+        return group_children
+
     def _get_group_for_token(self, token):
         """
         Find out which group, if any, an annotation token belongs to.
@@ -391,12 +405,7 @@ def check_results(self, all_results):
         if self.config.verbosity >= 2:
             pprint.pprint(all_results, indent=3)
 
-        # This is used to quickly find out if a token is a member of a group
-        group_children = []
-
-        # Build a big list of all tokens that are part of a group
-        for group in self.config.groups:
-            group_children.extend(self.config.groups[group])
+        group_children = self._get_group_children()
 
         # Spin through the search results
         for filename in all_results:
@@ -434,12 +443,6 @@ def check_results(self, all_results):
                             current_group
                         ))
                         found_group_members.append(token)
-
-                        # If we have all members, this group is done
-                        if len(found_group_members) == len(self.config.groups[current_group]):
-                            self.echo.echo_vv("Group complete!")
-                            current_group = None
-                            found_group_members = []
                 else:
                     if token in group_children:
                         current_group = self._get_group_for_token(token)
@@ -456,6 +459,12 @@ def check_results(self, all_results):
                             current_group, token, annotation['line_number'])
                         )
 
+                # If we have all members, this group is done
+                if current_group and len(found_group_members) == len(self.config.groups[current_group]):
+                    self.echo.echo_vv("Group complete!")
+                    current_group = None
+                    found_group_members = []
+
             if current_group:
                 self.errors.append('File finished with an incomplete group {}!'.format(current_group))
 
@@ -494,6 +503,73 @@ def search(self):
         """
         pass  # pragma: no cover
 
+    def _format_results_for_report(self, all_results):
+        """
+        Format the given results dict for reporting purposes.
+
+        Args:
+            all_results: Dict of all results found in a search
+
+        Returns:
+            Dict of results arranged for reporting
+        """
+        group_children = self._get_group_children()
+        formatted_results = {}
+        current_group_id = 0
+
+        for filename in all_results:
+            self.echo.echo_vv("report_format: formatting {}".format(filename))
+            formatted_results[filename] = []
+            current_group = None
+
+            found_group_members = []
+
+            for annotation in all_results[filename]:
+                token = annotation['annotation_token']
+                self.echo.echo_vvv("report_format: formatting annotation token {}".format(token))
+
+                if current_group:
+                    if token not in self.config.groups[current_group]:
+                        self.echo.echo_vv(
+                            "report_format: {} is not a group member, finishing group id {}".format(
+                                token,
+                                current_group_id
+                            )
+                        )
+                        current_group = None
+                        found_group_members = []
+                        formatted_results[filename].append(annotation)
+                    else:
+                        self.echo.echo_vv("report_format: Adding {} to group id {}".format(
+                            token,
+                            current_group_id
+                        ))
+                        annotation['report_group_id'] = current_group_id
+                        formatted_results[filename].append(annotation)
+                        found_group_members.append(token)
+                else:
+                    if token in group_children:
+                        current_group = self._get_group_for_token(token)
+                        current_group_id += 1
+                        found_group_members = [token]
+                        annotation['report_group_id'] = current_group_id
+                        formatted_results[filename].append(annotation)
+
+                        self.echo.echo_vv('Starting group id {} for "{}" token "{}", line {}'.format(
+                            current_group_id, current_group, token, annotation['line_number'])
+                        )
+                    else:
+                        self.echo.echo_vv('Adding single token {}.'.format(token))
+                        formatted_results[filename].append(annotation)
+
+                # If we have all members, this group is done
+                if current_group and len(found_group_members) == len(self.config.groups[current_group]):
+                    self.echo.echo_vv("report_format: Group complete!")
+                    current_group = None
+                    found_group_members = []
+
+        return formatted_results
+
     def report(self, all_results):
         """
         Genrates the YAML report of all search results.
@@ -509,6 +585,8 @@ def report(self, all_results):
         now = datetime.datetime.now()
         report_filename = os.path.join(self.config.report_path, '{}.yaml'.format(now.strftime('%Y-%d-%m-%H-%M-%S')))
 
+        formatted_results = self._format_results_for_report(all_results)
+
         self.echo("Generating report to {}".format(report_filename))
 
         try:
@@ -518,6 +596,6 @@ def report(self, all_results):
                 raise
 
         with open(report_filename, 'w+') as report_file:
-            yaml.dump(all_results, report_file, default_flow_style=False)
+            yaml.dump(formatted_results, report_file, default_flow_style=False)
 
         return report_filename
diff --git a/code_annotations/cli.py b/code_annotations/cli.py
@@ -130,14 +130,6 @@ def django_find_annotations(
 def static_find_annotations(config_file, source_path, report_path, verbosity, lint, report):
     """
     Subcommand to find annotations via static file analysis.
-
-    Args:
-        config_file: Path to the configuration file
-        source_path: Location of the source code to search
-        report_path: Location to write the report
-        verbosity: Verbosity level for output
-        lint: Boolean indicating whether or not to perform linting checks
-        report Boolean indicating whether or not to write the report file
     """
     try:
         start_time = datetime.datetime.now()
diff --git a/docs/static_search.rst b/docs/static_search.rst
@@ -46,6 +46,9 @@ annotations, grouped by file. Each annotation entry has the following keys:
         'annotation_data': 'This model contains no PII.',  # The comment, or choices, found with the annotation token
     }
 
+If an annotation is in a group, there will also be a `report_group_id`. This key is unique for each found group,
+allowing tools further down the toolchain to keep them together for presentation.
+
 Extensions can also send back some additional data in an ``extra`` key, if desired. The Django Model Search Tool does
 this to return the Django app and model name.
 
diff --git a/tests/test_base.py b/tests/test_base.py
@@ -1,6 +1,8 @@
 """
 Tests for code_annotations/base.py
 """
+from collections import OrderedDict
+
 import pytest
 
 from code_annotations.base import AnnotationConfig, ConfigurationException
@@ -16,15 +18,11 @@ def test_get_group_for_token_missing_token():
 def test_get_group_for_token_multiple_groups():
     config = FakeConfig()
     config.groups = {
-        'group1': [
-            {'token1': None}
-        ],
-        'group2': [
-            {'token2': None, 'foo': None}
-        ]
+        'group1': ['token1'],
+        'group2': ['token2', 'foo']
     }
     search = FakeSearch(config)
-    assert search._get_group_for_token('foo') is None  # pylint: disable=protected-access
+    assert search._get_group_for_token('foo') == 'group2'  # pylint: disable=protected-access
 
 
 @pytest.mark.parametrize("test_config,expected_message", [
@@ -72,3 +70,108 @@ def test_annotation_configuration_errors(test_config, expected_message):
 
     exc_msg = str(exception.value)
     assert expected_message in exc_msg
+
+
+def test_format_results_for_report():
+    """
+    Test that report formatting puts annotations into groups correctly
+    """
+    config = FakeConfig()
+    config.echo.set_verbosity(3)
+    config.groups = {
+        'group1': ['token1'],
+        'group2': ['token2', 'foo']
+    }
+
+    search = FakeSearch(config)
+
+    # Create a fake result set for _format_results_for_report to work on
+    fake_results = OrderedDict()
+
+    # First file has 6 annotations. expected_group_id is a special key for this test, allowing us to loop through
+    # these below and know what group each result should be in.
+    fake_results['foo/bar.py'] = [
+            {
+                'found_by': 'test',
+                'filename': 'foo/bar.py',
+                'line_number': 1,
+                'annotation_token': 'token2',
+                'annotation_data': 'file 1 annotation 1',
+                'expected_group_id': 1
+            },
+            {
+                'found_by': 'test',
+                'filename': 'foo/bar.py',
+                'line_number': 2,
+                'annotation_token': 'foo',
+                'annotation_data': 'file 1 annotation 2',
+                'expected_group_id': 1
+            },
+            {
+                'found_by': 'test',
+                'filename': 'foo/bar.py',
+                'line_number': 4,
+                'annotation_token': 'not_in_a_group',
+                'annotation_data': 'file 1 annotation 3',
+                'expected_group_id': None
+            },
+            {
+                'found_by': 'test',
+                'filename': 'foo/bar.py',
+                'line_number': 10,
+                'annotation_token': 'token1',
+                'annotation_data': 'file 1 annotation 4',
+                'expected_group_id': 2
+            },
+            {
+                'found_by': 'test',
+                'filename': 'foo/bar.py',
+                'line_number': 12,
+                'annotation_token': 'token2',
+                'annotation_data': 'file 1 annotation 5',
+                'expected_group_id': 3
+            },
+            {
+                'found_by': 'test',
+                'filename': 'foo/bar.py',
+                'line_number': 13,
+                'annotation_token': 'foo',
+                'annotation_data': 'file 1 annotation 6',
+                'expected_group_id': 3
+            },
+        ]
+
+    fake_results['foo/baz.py'] = [
+            {
+                'found_by': 'test',
+                'filename': 'foo/bar.py',
+                'line_number': 1,
+                'annotation_token': 'token2',
+                'annotation_data': 'file 2 annotation 1',
+                'expected_group_id': 4
+            },
+            {
+                'found_by': 'test',
+                'filename': 'foo/bar.py',
+                'line_number': 2,
+                'annotation_token': 'foo',
+                'annotation_data': 'file 1 annotation 2',
+                'expected_group_id': 4
+            }
+        ]
+
+    # Run the format function
+    results = search._format_results_for_report(fake_results)  # pylint: disable=protected-access
+
+    for filename in fake_results:
+        for fake in fake_results[filename]:
+            for formatted in results[filename]:
+                # When we find the same annotation, make sure that grouping is correct
+                if fake['annotation_data'] == formatted['annotation_data']:
+                    # Ungrouped annotations should not have the 'report_group_id' key
+                    if fake['expected_group_id'] is None:
+                        assert 'report_group_id' not in formatted
+                    # Otherwise it should match our expected value
+                    else:
+                        assert fake['expected_group_id'] == formatted['report_group_id']
+                    break

Original file line number	Diff line number	Diff line change
`@@ -46,6 +46,9 @@ annotations, grouped by file. Each annotation entry has the following keys:`
`46`	`46`	`'annotation_data': 'This model contains no PII.', # The comment, or choices, found with the annotation token`
`47`	`47`	`}`
`48`	`48`
	`49`	+If an annotation is in a group, there will also be a `report_group_id`. This key is unique for each found group,
	`50`	`+allowing tools further down the toolchain to keep them together for presentation.`
	`51`	`+`
`49`	`52`	Extensions can also send back some additional data in an ``extra`` key, if desired. The Django Model Search Tool does
`50`	`53`	`this to return the Django app and model name.`
`51`	`54`