From db52c5e287dc0fb83a245ae0d51a4ced022dec76 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 11 Feb 2025 19:39:41 +0530 Subject: [PATCH 1/7] Adress review comments Signed-off-by: Tushar Goel --- vulnerabilities/improvers/__init__.py | 2 + .../fill_vulnerability_summary_pipeline.py | 69 ++++++++++ ...est_fill_vulnerability_summary_pipeline.py | 120 ++++++++++++++++++ 3 files changed, 191 insertions(+) create mode 100644 vulnerabilities/pipelines/fill_vulnerability_summary_pipeline.py create mode 100644 vulnerabilities/tests/pipelines/test_fill_vulnerability_summary_pipeline.py diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index 37143d125..af1818b7a 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -17,6 +17,7 @@ from vulnerabilities.pipelines import enhance_with_exploitdb from vulnerabilities.pipelines import enhance_with_kev from vulnerabilities.pipelines import enhance_with_metasploit +from vulnerabilities.pipelines import fill_vulnerability_summary_pipeline from vulnerabilities.pipelines import flag_ghost_packages from vulnerabilities.pipelines import remove_duplicate_advisories @@ -47,6 +48,7 @@ collect_commits.CollectFixCommitsPipeline, add_cvss31_to_CVEs.CVEAdvisoryMappingPipeline, remove_duplicate_advisories.RemoveDuplicateAdvisoriesPipeline, + fill_vulnerability_summary_pipeline.FillVulnerabilitySummariesPipeline, ] IMPROVERS_REGISTRY = { diff --git a/vulnerabilities/pipelines/fill_vulnerability_summary_pipeline.py b/vulnerabilities/pipelines/fill_vulnerability_summary_pipeline.py new file mode 100644 index 000000000..a9ccdcc23 --- /dev/null +++ b/vulnerabilities/pipelines/fill_vulnerability_summary_pipeline.py @@ -0,0 +1,69 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging + +from django.db.models import Q + +from vulnerabilities.models import Advisory +from vulnerabilities.models import Vulnerability +from vulnerabilities.pipelines import VulnerableCodePipeline + + +class FillVulnerabilitySummariesPipeline(VulnerableCodePipeline): + """Pipeline to fill missing vulnerability summaries from advisories.""" + + pipeline_id = "fill_vulnerability_summaries" + + @classmethod + def steps(cls): + return (cls.fill_missing_summaries,) + + def fill_missing_summaries(self): + """Find vulnerabilities without summaries and fill them using advisories with the same aliases.""" + vulnerabilities_qs = Vulnerability.objects.filter(summary="").prefetch_related("aliases") + self.log( + f"Processing {vulnerabilities_qs.count()} vulnerabilities without summaries", + level=logging.INFO, + ) + nvd_importer_advisories = Advisory.objects.filter( + created_by="nvd_importer", summary__isnull=False + ).exclude(summary="") + self.log( + f"Found {nvd_importer_advisories.count()} advisories from NVD importer", + level=logging.INFO, + ) + + for vulnerability in vulnerabilities_qs.paginated(): + aliases = vulnerability.aliases.values_list("alias", flat=True) + # get alias that start with CVE- with filter + alias = aliases.filter(alias__startswith="CVE-").first() + + # check if the vulnerability has an alias + if not alias: + self.log( + f"Vulnerability {vulnerability.vulnerability_id} has no alias", + level=logging.INFO, + ) + continue + + # check if the vulnerability has an alias that matches an advisory + matching_advisories = nvd_importer_advisories.filter(Q(aliases__contains=alias)) + + if matching_advisories.exists(): + # Take the first matching advisory with a summary + best_advisory = matching_advisories.first() + vulnerability.summary = best_advisory.summary + vulnerability.save() + self.log( + f"Updated summary for vulnerability {vulnerability.vulnerability_id}", + level=logging.INFO, + ) + else: + self.log(f"No advisory found for alias {alias}", level=logging.INFO) diff --git a/vulnerabilities/tests/pipelines/test_fill_vulnerability_summary_pipeline.py b/vulnerabilities/tests/pipelines/test_fill_vulnerability_summary_pipeline.py new file mode 100644 index 000000000..aa6628ad8 --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_fill_vulnerability_summary_pipeline.py @@ -0,0 +1,120 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import datetime +from pathlib import Path + +import pytz +from django.test import TestCase + +from vulnerabilities.models import Advisory +from vulnerabilities.models import Alias +from vulnerabilities.models import Vulnerability +from vulnerabilities.pipelines.fill_vulnerability_summary_pipeline import ( + FillVulnerabilitySummariesPipeline, +) + + +class FillVulnerabilitySummariesPipelineTest(TestCase): + def setUp(self): + self.data = Path(__file__).parent.parent / "test_data" + + def test_fill_missing_summaries_from_nvd(self): + """ + Test that vulnerabilities without summaries get them from NVD advisories. + """ + + # Create a vulnerability without a summary + vulnerability = Vulnerability.objects.create( + vulnerability_id="VCID-1234", + summary="", + ) + alias = Alias.objects.create(alias="CVE-2024-1234", vulnerability=vulnerability) + + # Create an NVD advisory with a summary + Advisory.objects.create( + summary="Test vulnerability summary", + created_by="nvd_importer", + date_collected=datetime.datetime(2024, 1, 1, tzinfo=pytz.UTC), + aliases=["CVE-2024-1234"], + ) + + # Run the pipeline + pipeline = FillVulnerabilitySummariesPipeline() + pipeline.fill_missing_summaries() + + # Check that the vulnerability now has a summary + vulnerability.refresh_from_db() + self.assertEqual(vulnerability.summary, "Test vulnerability summary") + + def test_no_matching_advisory(self): + """ + Test handling of vulnerabilities that have no matching NVD advisory. + """ + # Create a vulnerability without a summary + vulnerability = Vulnerability.objects.create( + vulnerability_id="VCID-1234", + summary="", + ) + Alias.objects.create(alias="CVE-2024-1234", vulnerability=vulnerability) + + # Run the pipeline + pipeline = FillVulnerabilitySummariesPipeline() + pipeline.fill_missing_summaries() + + # Check that the vulnerability still has no summary + vulnerability.refresh_from_db() + self.assertEqual(vulnerability.summary, "") + + def test_vulnerability_without_alias(self): + """ + Test handling of vulnerabilities that have no aliases. + """ + + # Create a vulnerability without a summary or alias + vulnerability = Vulnerability.objects.create( + vulnerability_id="VCID-1234", + summary="", + ) + + # Run the pipeline + pipeline = FillVulnerabilitySummariesPipeline() + pipeline.fill_missing_summaries() + + # Check that the vulnerability still has no summary + vulnerability.refresh_from_db() + self.assertEqual(vulnerability.summary, "") + + def test_non_nvd_advisory_ignored(self): + """ + Test that advisories from sources other than NVD are ignored. + """ + + # Create a vulnerability without a summary + vulnerability = Vulnerability.objects.create( + vulnerability_id="VCID-1234", + summary="", + ) + alias = Alias.objects.create(alias="CVE-2024-1234", vulnerability=vulnerability) + + # Create a non-NVD advisory with a summary + Advisory.objects.create( + summary="Test vulnerability summary", + created_by="other_importer", + date_collected=datetime.datetime(2024, 1, 1, tzinfo=pytz.UTC), + aliases=["CVE-2024-1234"], + ) + + # Run the pipeline + pipeline = FillVulnerabilitySummariesPipeline() + pipeline.fill_missing_summaries() + + # Check that the vulnerability still has no summary + vulnerability.refresh_from_db() + self.assertEqual(vulnerability.summary, "") From 01abf21206d6a7adca984ea0ff689505379d0b43 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 11 Feb 2025 19:52:47 +0530 Subject: [PATCH 2/7] Add tests Signed-off-by: Tushar Goel --- .../fill_vulnerability_summary_pipeline.py | 3 +- ...est_fill_vulnerability_summary_pipeline.py | 33 +++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/vulnerabilities/pipelines/fill_vulnerability_summary_pipeline.py b/vulnerabilities/pipelines/fill_vulnerability_summary_pipeline.py index a9ccdcc23..678a9f516 100644 --- a/vulnerabilities/pipelines/fill_vulnerability_summary_pipeline.py +++ b/vulnerabilities/pipelines/fill_vulnerability_summary_pipeline.py @@ -58,7 +58,8 @@ def fill_missing_summaries(self): if matching_advisories.exists(): # Take the first matching advisory with a summary - best_advisory = matching_advisories.first() + # get the advisory that was collected the most recently + best_advisory = matching_advisories.order_by("-date_collected").first() vulnerability.summary = best_advisory.summary vulnerability.save() self.log( diff --git a/vulnerabilities/tests/pipelines/test_fill_vulnerability_summary_pipeline.py b/vulnerabilities/tests/pipelines/test_fill_vulnerability_summary_pipeline.py index aa6628ad8..5ec8c58cd 100644 --- a/vulnerabilities/tests/pipelines/test_fill_vulnerability_summary_pipeline.py +++ b/vulnerabilities/tests/pipelines/test_fill_vulnerability_summary_pipeline.py @@ -118,3 +118,36 @@ def test_non_nvd_advisory_ignored(self): # Check that the vulnerability still has no summary vulnerability.refresh_from_db() self.assertEqual(vulnerability.summary, "") + + def test_multiple_matching_advisories(self): + """ + Test that the most recent matching advisory is used when there are multiple. + """ + vulnerability = Vulnerability.objects.create( + vulnerability_id="VCID-1234", + summary="", + ) + alias = Alias.objects.create(alias="CVE-2024-1234", vulnerability=vulnerability) + + # Create two NVD advisories with the same alias + Advisory.objects.create( + summary="First matching advisory", + created_by="nvd_importer", + date_collected=datetime.datetime(2024, 1, 1, tzinfo=pytz.UTC), + aliases=["CVE-2024-1234"], + ) + + Advisory.objects.create( + summary="Second matching advisory", + created_by="nvd_importer", + date_collected=datetime.datetime(2024, 1, 2, tzinfo=pytz.UTC), + aliases=["CVE-2024-1234"], + ) + + # Run the pipeline + pipeline = FillVulnerabilitySummariesPipeline() + pipeline.fill_missing_summaries() + + # Check that the vulnerability now has the most recent summary + vulnerability.refresh_from_db() + self.assertEqual(vulnerability.summary, "Second matching advisory") From 1cafd40db41a7dcc41e02192905f317ce9edcca4 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 12 Feb 2025 19:38:57 +0530 Subject: [PATCH 3/7] Address review comments Signed-off-by: Tushar Goel --- .../pipelines/fill_vulnerability_summary_pipeline.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/vulnerabilities/pipelines/fill_vulnerability_summary_pipeline.py b/vulnerabilities/pipelines/fill_vulnerability_summary_pipeline.py index 678a9f516..bb14dbcc9 100644 --- a/vulnerabilities/pipelines/fill_vulnerability_summary_pipeline.py +++ b/vulnerabilities/pipelines/fill_vulnerability_summary_pipeline.py @@ -9,6 +9,7 @@ import logging +from aboutcode.pipeline import LoopProgress from django.db.models import Q from vulnerabilities.models import Advisory @@ -27,7 +28,7 @@ def steps(cls): def fill_missing_summaries(self): """Find vulnerabilities without summaries and fill them using advisories with the same aliases.""" - vulnerabilities_qs = Vulnerability.objects.filter(summary="").prefetch_related("aliases") + vulnerabilities_qs = Vulnerability.objects.filter(summary="") self.log( f"Processing {vulnerabilities_qs.count()} vulnerabilities without summaries", level=logging.INFO, @@ -36,11 +37,13 @@ def fill_missing_summaries(self): created_by="nvd_importer", summary__isnull=False ).exclude(summary="") self.log( - f"Found {nvd_importer_advisories.count()} advisories from NVD importer", + f"Found {nvd_importer_advisories.count()} advisories with summaries from NVD importer", level=logging.INFO, ) - for vulnerability in vulnerabilities_qs.paginated(): + progress = LoopProgress(total_iterations=vulnerabilities_qs.count(), logger=self.log) + + for vulnerability in progress.iter(vulnerabilities_qs.paginated()): aliases = vulnerability.aliases.values_list("alias", flat=True) # get alias that start with CVE- with filter alias = aliases.filter(alias__startswith="CVE-").first() @@ -60,6 +63,7 @@ def fill_missing_summaries(self): # Take the first matching advisory with a summary # get the advisory that was collected the most recently best_advisory = matching_advisories.order_by("-date_collected").first() + # Note: we filtered above to only get non-empty summaries vulnerability.summary = best_advisory.summary vulnerability.save() self.log( From 6328d93a102c2596e97f7c911f949cde894f0ec4 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 28 Mar 2025 11:10:42 +0530 Subject: [PATCH 4/7] Change models Signed-off-by: Tushar Goel --- .../fill_vulnerability_summary_pipeline.py | 40 ++++++++----------- ...est_fill_vulnerability_summary_pipeline.py | 12 +++--- 2 files changed, 22 insertions(+), 30 deletions(-) diff --git a/vulnerabilities/pipelines/fill_vulnerability_summary_pipeline.py b/vulnerabilities/pipelines/fill_vulnerability_summary_pipeline.py index bb14dbcc9..7c7e3c8ea 100644 --- a/vulnerabilities/pipelines/fill_vulnerability_summary_pipeline.py +++ b/vulnerabilities/pipelines/fill_vulnerability_summary_pipeline.py @@ -17,51 +17,43 @@ from vulnerabilities.pipelines import VulnerableCodePipeline -class FillVulnerabilitySummariesPipeline(VulnerableCodePipeline): - """Pipeline to fill missing vulnerability summaries from advisories.""" +class PopulateVulnerabilitySummariesPipeline(VulnerableCodePipeline): + """Pipeline to populate missing vulnerability summaries from advisories.""" - pipeline_id = "fill_vulnerability_summaries" + pipeline_id = "populate_vulnerability_summaries" @classmethod def steps(cls): - return (cls.fill_missing_summaries,) + return (cls.populate_missing_summaries,) - def fill_missing_summaries(self): - """Find vulnerabilities without summaries and fill them using advisories with the same aliases.""" + def populate_missing_summaries(self): + """Find vulnerabilities with missing summaries and populate them using advisories with the same aliases.""" vulnerabilities_qs = Vulnerability.objects.filter(summary="") self.log( f"Processing {vulnerabilities_qs.count()} vulnerabilities without summaries", level=logging.INFO, ) - nvd_importer_advisories = Advisory.objects.filter( - created_by="nvd_importer", summary__isnull=False - ).exclude(summary="") - self.log( - f"Found {nvd_importer_advisories.count()} advisories with summaries from NVD importer", - level=logging.INFO, - ) + # nvd_importer_advisories = Advisory.objects.filter(created_by="nvd_importer").exclude(summary="") + # self.log( + # f"Found {nvd_importer_advisories.count()} advisories with summaries from NVD importer", + # level=logging.INFO, + # ) progress = LoopProgress(total_iterations=vulnerabilities_qs.count(), logger=self.log) - for vulnerability in progress.iter(vulnerabilities_qs.paginated()): - aliases = vulnerability.aliases.values_list("alias", flat=True) - # get alias that start with CVE- with filter - alias = aliases.filter(alias__startswith="CVE-").first() + for vulnerability in progress.iter(vulnerabilities_qs.iterator()): + cve_alias = vulnerability.aliases.filter(alias__startswith="CVE-").first() - # check if the vulnerability has an alias - if not alias: + if not cve_alias: self.log( - f"Vulnerability {vulnerability.vulnerability_id} has no alias", + f"Vulnerability {vulnerability.vulnerability_id} has no CVE alias", level=logging.INFO, ) continue - # check if the vulnerability has an alias that matches an advisory - matching_advisories = nvd_importer_advisories.filter(Q(aliases__contains=alias)) + matching_advisories = Advisory.objects.filter(aliases=cve_alias) if matching_advisories.exists(): - # Take the first matching advisory with a summary - # get the advisory that was collected the most recently best_advisory = matching_advisories.order_by("-date_collected").first() # Note: we filtered above to only get non-empty summaries vulnerability.summary = best_advisory.summary diff --git a/vulnerabilities/tests/pipelines/test_fill_vulnerability_summary_pipeline.py b/vulnerabilities/tests/pipelines/test_fill_vulnerability_summary_pipeline.py index 5ec8c58cd..d5fdadf28 100644 --- a/vulnerabilities/tests/pipelines/test_fill_vulnerability_summary_pipeline.py +++ b/vulnerabilities/tests/pipelines/test_fill_vulnerability_summary_pipeline.py @@ -17,7 +17,7 @@ from vulnerabilities.models import Alias from vulnerabilities.models import Vulnerability from vulnerabilities.pipelines.fill_vulnerability_summary_pipeline import ( - FillVulnerabilitySummariesPipeline, + PopulateVulnerabilitySummariesPipeline, ) @@ -46,7 +46,7 @@ def test_fill_missing_summaries_from_nvd(self): ) # Run the pipeline - pipeline = FillVulnerabilitySummariesPipeline() + pipeline = PopulateVulnerabilitySummariesPipeline() pipeline.fill_missing_summaries() # Check that the vulnerability now has a summary @@ -65,7 +65,7 @@ def test_no_matching_advisory(self): Alias.objects.create(alias="CVE-2024-1234", vulnerability=vulnerability) # Run the pipeline - pipeline = FillVulnerabilitySummariesPipeline() + pipeline = PopulateVulnerabilitySummariesPipeline() pipeline.fill_missing_summaries() # Check that the vulnerability still has no summary @@ -84,7 +84,7 @@ def test_vulnerability_without_alias(self): ) # Run the pipeline - pipeline = FillVulnerabilitySummariesPipeline() + pipeline = PopulateVulnerabilitySummariesPipeline() pipeline.fill_missing_summaries() # Check that the vulnerability still has no summary @@ -112,7 +112,7 @@ def test_non_nvd_advisory_ignored(self): ) # Run the pipeline - pipeline = FillVulnerabilitySummariesPipeline() + pipeline = PopulateVulnerabilitySummariesPipeline() pipeline.fill_missing_summaries() # Check that the vulnerability still has no summary @@ -145,7 +145,7 @@ def test_multiple_matching_advisories(self): ) # Run the pipeline - pipeline = FillVulnerabilitySummariesPipeline() + pipeline = PopulateVulnerabilitySummariesPipeline() pipeline.fill_missing_summaries() # Check that the vulnerability now has the most recent summary From 97301bc776a8b5d4dfc9f0da444f730c92f42201 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 28 Mar 2025 11:10:58 +0530 Subject: [PATCH 5/7] Change models Signed-off-by: Tushar Goel --- vulnerabilities/improvers/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index af1818b7a..fd343850f 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -48,7 +48,7 @@ collect_commits.CollectFixCommitsPipeline, add_cvss31_to_CVEs.CVEAdvisoryMappingPipeline, remove_duplicate_advisories.RemoveDuplicateAdvisoriesPipeline, - fill_vulnerability_summary_pipeline.FillVulnerabilitySummariesPipeline, + fill_vulnerability_summary_pipeline.PopulateVulnerabilitySummariesPipeline, ] IMPROVERS_REGISTRY = { From 23620565478ef69c34d39627fc2d7afd7145c335 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Mon, 31 Mar 2025 12:46:12 +0530 Subject: [PATCH 6/7] Adhere to new models for summary pipeline Signed-off-by: Tushar Goel --- .../fill_vulnerability_summary_pipeline.py | 11 +++---- ...est_fill_vulnerability_summary_pipeline.py | 33 +++++++++++-------- 2 files changed, 24 insertions(+), 20 deletions(-) diff --git a/vulnerabilities/pipelines/fill_vulnerability_summary_pipeline.py b/vulnerabilities/pipelines/fill_vulnerability_summary_pipeline.py index 7c7e3c8ea..c16ea4103 100644 --- a/vulnerabilities/pipelines/fill_vulnerability_summary_pipeline.py +++ b/vulnerabilities/pipelines/fill_vulnerability_summary_pipeline.py @@ -33,11 +33,6 @@ def populate_missing_summaries(self): f"Processing {vulnerabilities_qs.count()} vulnerabilities without summaries", level=logging.INFO, ) - # nvd_importer_advisories = Advisory.objects.filter(created_by="nvd_importer").exclude(summary="") - # self.log( - # f"Found {nvd_importer_advisories.count()} advisories with summaries from NVD importer", - # level=logging.INFO, - # ) progress = LoopProgress(total_iterations=vulnerabilities_qs.count(), logger=self.log) @@ -51,7 +46,9 @@ def populate_missing_summaries(self): ) continue - matching_advisories = Advisory.objects.filter(aliases=cve_alias) + matching_advisories = Advisory.objects.filter( + aliases=cve_alias, created_by="nvd_importer" + ).exclude(summary="") if matching_advisories.exists(): best_advisory = matching_advisories.order_by("-date_collected").first() @@ -63,4 +60,4 @@ def populate_missing_summaries(self): level=logging.INFO, ) else: - self.log(f"No advisory found for alias {alias}", level=logging.INFO) + self.log(f"No advisory found for alias {cve_alias}", level=logging.INFO) diff --git a/vulnerabilities/tests/pipelines/test_fill_vulnerability_summary_pipeline.py b/vulnerabilities/tests/pipelines/test_fill_vulnerability_summary_pipeline.py index d5fdadf28..6bc590f84 100644 --- a/vulnerabilities/tests/pipelines/test_fill_vulnerability_summary_pipeline.py +++ b/vulnerabilities/tests/pipelines/test_fill_vulnerability_summary_pipeline.py @@ -38,16 +38,17 @@ def test_fill_missing_summaries_from_nvd(self): alias = Alias.objects.create(alias="CVE-2024-1234", vulnerability=vulnerability) # Create an NVD advisory with a summary - Advisory.objects.create( + adv = Advisory.objects.create( summary="Test vulnerability summary", created_by="nvd_importer", date_collected=datetime.datetime(2024, 1, 1, tzinfo=pytz.UTC), - aliases=["CVE-2024-1234"], + unique_content_id="Test", ) + adv.aliases.add(alias) # Run the pipeline pipeline = PopulateVulnerabilitySummariesPipeline() - pipeline.fill_missing_summaries() + pipeline.populate_missing_summaries() # Check that the vulnerability now has a summary vulnerability.refresh_from_db() @@ -66,7 +67,7 @@ def test_no_matching_advisory(self): # Run the pipeline pipeline = PopulateVulnerabilitySummariesPipeline() - pipeline.fill_missing_summaries() + pipeline.populate_missing_summaries() # Check that the vulnerability still has no summary vulnerability.refresh_from_db() @@ -85,7 +86,7 @@ def test_vulnerability_without_alias(self): # Run the pipeline pipeline = PopulateVulnerabilitySummariesPipeline() - pipeline.fill_missing_summaries() + pipeline.populate_missing_summaries() # Check that the vulnerability still has no summary vulnerability.refresh_from_db() @@ -104,16 +105,18 @@ def test_non_nvd_advisory_ignored(self): alias = Alias.objects.create(alias="CVE-2024-1234", vulnerability=vulnerability) # Create a non-NVD advisory with a summary - Advisory.objects.create( + adv = Advisory.objects.create( summary="Test vulnerability summary", created_by="other_importer", date_collected=datetime.datetime(2024, 1, 1, tzinfo=pytz.UTC), - aliases=["CVE-2024-1234"], + unique_content_id="Test", ) + adv.aliases.add(alias) + # Run the pipeline pipeline = PopulateVulnerabilitySummariesPipeline() - pipeline.fill_missing_summaries() + pipeline.populate_missing_summaries() # Check that the vulnerability still has no summary vulnerability.refresh_from_db() @@ -130,23 +133,27 @@ def test_multiple_matching_advisories(self): alias = Alias.objects.create(alias="CVE-2024-1234", vulnerability=vulnerability) # Create two NVD advisories with the same alias - Advisory.objects.create( + adv1 = Advisory.objects.create( summary="First matching advisory", created_by="nvd_importer", date_collected=datetime.datetime(2024, 1, 1, tzinfo=pytz.UTC), - aliases=["CVE-2024-1234"], + unique_content_id="Test", ) - Advisory.objects.create( + adv1.aliases.add(alias) + + adv2 = Advisory.objects.create( summary="Second matching advisory", created_by="nvd_importer", date_collected=datetime.datetime(2024, 1, 2, tzinfo=pytz.UTC), - aliases=["CVE-2024-1234"], + unique_content_id="Test-1", ) + adv2.aliases.add(alias) + # Run the pipeline pipeline = PopulateVulnerabilitySummariesPipeline() - pipeline.fill_missing_summaries() + pipeline.populate_missing_summaries() # Check that the vulnerability now has the most recent summary vulnerability.refresh_from_db() From 2ceb046af1c252cca711c828ebbcc99e30e3a611 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Mon, 31 Mar 2025 13:03:25 +0530 Subject: [PATCH 7/7] Do bulk update Signed-off-by: Tushar Goel --- vulnerabilities/improvers/__init__.py | 4 ++-- ... => populate_vulnerability_summary_pipeline.py} | 14 +++++++++++--- ...est_populate_vulnerability_summary_pipeline.py} | 6 +++--- 3 files changed, 16 insertions(+), 8 deletions(-) rename vulnerabilities/pipelines/{fill_vulnerability_summary_pipeline.py => populate_vulnerability_summary_pipeline.py} (83%) rename vulnerabilities/tests/pipelines/{test_fill_vulnerability_summary_pipeline.py => test_populate_vulnerability_summary_pipeline.py} (96%) diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index fd343850f..9e36ce5f0 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -17,8 +17,8 @@ from vulnerabilities.pipelines import enhance_with_exploitdb from vulnerabilities.pipelines import enhance_with_kev from vulnerabilities.pipelines import enhance_with_metasploit -from vulnerabilities.pipelines import fill_vulnerability_summary_pipeline from vulnerabilities.pipelines import flag_ghost_packages +from vulnerabilities.pipelines import populate_vulnerability_summary_pipeline from vulnerabilities.pipelines import remove_duplicate_advisories IMPROVERS_REGISTRY = [ @@ -48,7 +48,7 @@ collect_commits.CollectFixCommitsPipeline, add_cvss31_to_CVEs.CVEAdvisoryMappingPipeline, remove_duplicate_advisories.RemoveDuplicateAdvisoriesPipeline, - fill_vulnerability_summary_pipeline.PopulateVulnerabilitySummariesPipeline, + populate_vulnerability_summary_pipeline.PopulateVulnerabilitySummariesPipeline, ] IMPROVERS_REGISTRY = { diff --git a/vulnerabilities/pipelines/fill_vulnerability_summary_pipeline.py b/vulnerabilities/pipelines/populate_vulnerability_summary_pipeline.py similarity index 83% rename from vulnerabilities/pipelines/fill_vulnerability_summary_pipeline.py rename to vulnerabilities/pipelines/populate_vulnerability_summary_pipeline.py index c16ea4103..fb458efd5 100644 --- a/vulnerabilities/pipelines/fill_vulnerability_summary_pipeline.py +++ b/vulnerabilities/pipelines/populate_vulnerability_summary_pipeline.py @@ -36,13 +36,15 @@ def populate_missing_summaries(self): progress = LoopProgress(total_iterations=vulnerabilities_qs.count(), logger=self.log) + vulnerabilities_to_be_updated = [] + for vulnerability in progress.iter(vulnerabilities_qs.iterator()): cve_alias = vulnerability.aliases.filter(alias__startswith="CVE-").first() if not cve_alias: self.log( f"Vulnerability {vulnerability.vulnerability_id} has no CVE alias", - level=logging.INFO, + level=logging.DEBUG, ) continue @@ -54,10 +56,16 @@ def populate_missing_summaries(self): best_advisory = matching_advisories.order_by("-date_collected").first() # Note: we filtered above to only get non-empty summaries vulnerability.summary = best_advisory.summary - vulnerability.save() + vulnerabilities_to_be_updated.append(vulnerability) self.log( f"Updated summary for vulnerability {vulnerability.vulnerability_id}", level=logging.INFO, ) else: - self.log(f"No advisory found for alias {cve_alias}", level=logging.INFO) + self.log(f"No advisory found for alias {cve_alias}", level=logging.DEBUG) + Vulnerability.objects.bulk_update(vulnerabilities_to_be_updated, ["summary"]) + self.log( + f"Successfully populated {len(vulnerabilities_to_be_updated)} vulnerabilities with summary", + level=logging.INFO, + ) + self.log("Pipeline completed", level=logging.INFO) diff --git a/vulnerabilities/tests/pipelines/test_fill_vulnerability_summary_pipeline.py b/vulnerabilities/tests/pipelines/test_populate_vulnerability_summary_pipeline.py similarity index 96% rename from vulnerabilities/tests/pipelines/test_fill_vulnerability_summary_pipeline.py rename to vulnerabilities/tests/pipelines/test_populate_vulnerability_summary_pipeline.py index 6bc590f84..d8f3ad944 100644 --- a/vulnerabilities/tests/pipelines/test_fill_vulnerability_summary_pipeline.py +++ b/vulnerabilities/tests/pipelines/test_populate_vulnerability_summary_pipeline.py @@ -16,16 +16,16 @@ from vulnerabilities.models import Advisory from vulnerabilities.models import Alias from vulnerabilities.models import Vulnerability -from vulnerabilities.pipelines.fill_vulnerability_summary_pipeline import ( +from vulnerabilities.pipelines.populate_vulnerability_summary_pipeline import ( PopulateVulnerabilitySummariesPipeline, ) -class FillVulnerabilitySummariesPipelineTest(TestCase): +class PopulateVulnerabilitySummariesPipelineTest(TestCase): def setUp(self): self.data = Path(__file__).parent.parent / "test_data" - def test_fill_missing_summaries_from_nvd(self): + def test_populate_missing_summaries_from_nvd(self): """ Test that vulnerabilities without summaries get them from NVD advisories. """