From 98aacc500029265015f0f1c8107ec4b7d2aa611f Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 23 Apr 2025 16:12:22 +0530 Subject: [PATCH 01/44] Add AdvisoryV2 models Signed-off-by: Tushar Goel --- vulnerabilities/importer.py | 116 ++++++++++++++++ vulnerabilities/models.py | 254 ++++++++++++++++++++++++++++++++++++ 2 files changed, 370 insertions(+) diff --git a/vulnerabilities/importer.py b/vulnerabilities/importer.py index 759ec9330..f62aa0444 100644 --- a/vulnerabilities/importer.py +++ b/vulnerabilities/importer.py @@ -145,6 +145,54 @@ def from_url(cls, url): return cls(url=url) +@dataclasses.dataclass(eq=True) +@functools.total_ordering +class ReferenceV2: + reference_id: str = "" + reference_type: str = "" + url: str = "" + + def __post_init__(self): + if not self.url: + raise TypeError("Reference must have a url") + if self.reference_id and not isinstance(self.reference_id, str): + self.reference_id = str(self.reference_id) + + def __lt__(self, other): + if not isinstance(other, Reference): + return NotImplemented + return self._cmp_key() < other._cmp_key() + + # TODO: Add cache + def _cmp_key(self): + return (self.reference_id, self.reference_type, self.url) + + def to_dict(self): + """Return a normalized dictionary representation""" + return { + "reference_id": self.reference_id, + "reference_type": self.reference_type, + "url": self.url + } + + @classmethod + def from_dict(cls, ref: dict): + return cls( + reference_id=str(ref["reference_id"]), + reference_type=ref.get("reference_type") or "", + url=ref["url"], + ) + + @classmethod + def from_url(cls, url): + reference_id = get_reference_id(url) + if "GHSA-" in reference_id.upper(): + return cls(reference_id=reference_id, url=url) + if is_cve(reference_id): + return cls(url=url, reference_id=reference_id.upper()) + return cls(url=url) + + class UnMergeablePackageError(Exception): """ Raised when a package cannot be merged with another one. @@ -356,6 +404,74 @@ def from_dict(cls, advisory_data): return cls(**transformed) +@dataclasses.dataclass(order=True) +class AdvisoryDataV2: + """ + This data class expresses the contract between data sources and the import runner. + + If a vulnerability_id is present then: + summary or affected_packages or references must be present + otherwise + either affected_package or references should be present + + date_published must be aware datetime + """ + + aliases: List[str] = dataclasses.field(default_factory=list) + summary: Optional[str] = "" + affected_packages: List[AffectedPackage] = dataclasses.field(default_factory=list) + references: List[ReferenceV2] = dataclasses.field(default_factory=list) + date_published: Optional[datetime.datetime] = None + weaknesses: List[int] = dataclasses.field(default_factory=list) + severities: List[VulnerabilitySeverity] = dataclasses.field(default_factory=list) + url: Optional[str] = None + + def __post_init__(self): + if self.date_published and not self.date_published.tzinfo: + logger.warning(f"AdvisoryData with no tzinfo: {self!r}") + if self.summary: + self.summary = self.clean_summary(self.summary) + + def clean_summary(self, summary): + # https://nvd.nist.gov/vuln/detail/CVE-2013-4314 + # https://github.com/cms-dev/cms/issues/888#issuecomment-516977572 + summary = summary.strip() + if summary: + summary = summary.replace("\x00", "\uFFFD") + return summary + + def to_dict(self): + return { + "aliases": self.aliases, + "summary": self.summary, + "affected_packages": [pkg.to_dict() for pkg in self.affected_packages], + "references": [ref.to_dict() for ref in self.references], + "date_published": self.date_published.isoformat() if self.date_published else None, + "weaknesses": self.weaknesses, + "url": self.url if self.url else "", + } + + @classmethod + def from_dict(cls, advisory_data): + date_published = advisory_data["date_published"] + transformed = { + "aliases": advisory_data["aliases"], + "summary": advisory_data["summary"], + "affected_packages": [ + AffectedPackage.from_dict(pkg) + for pkg in advisory_data["affected_packages"] + if pkg is not None + ], + "references": [Reference.from_dict(ref) for ref in advisory_data["references"]], + "date_published": datetime.datetime.fromisoformat(date_published) + if date_published + else None, + "weaknesses": advisory_data["weaknesses"], + "url": advisory_data.get("url") or None, + } + return cls(**transformed) + + class NoLicenseError(Exception): pass diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index c4ccbd1fa..7d11222f6 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -2333,9 +2333,263 @@ class AdvisoryToDo(models.Model): class Meta: unique_together = ("related_advisories_id", "issue_type") + +class AdvisorySeverity(models.Model): + url = models.URLField( + max_length=1024, + null=True, + help_text="URL to the vulnerability severity", + db_index=True, + ) + + scoring_system_choices = tuple( + (system.identifier, system.name) for system in SCORING_SYSTEMS.values() + ) + + scoring_system = models.CharField( + max_length=50, + choices=scoring_system_choices, + help_text="Identifier for the scoring system used. Available choices are: {} ".format( + ",\n".join(f"{sid}: {sname}" for sid, sname in scoring_system_choices) + ), + ) + + value = models.CharField(max_length=50, help_text="Example: 9.0, Important, High") + + scoring_elements = models.CharField( + max_length=150, + null=True, + help_text="Supporting scoring elements used to compute the score values. " + "For example a CVSS vector string as used to compute a CVSS score.", + ) + + published_at = models.DateTimeField( + blank=True, null=True, help_text="UTC Date of publication of the vulnerability severity" + ) + + objects = BaseQuerySet.as_manager() + + class Meta: + ordering = ["url", "scoring_system", "value"] + + +class AdvisoryWeakness(models.Model): + """ + A weakness is a software weakness that is associated with a vulnerability. + """ + + cwe_id = models.IntegerField(help_text="CWE id") + vulnerabilities = models.ManyToManyField(Vulnerability, related_name="weaknesses") + + cwe_by_id = {} + + def get_cwe(self, cwe_id): + if not self.cwe_by_id: + db = Database() + for weakness in db.get_cwes(): + self.cwe_by_id[str(weakness.cwe_id)] = weakness + return self.cwe_by_id[cwe_id] + + @property + def cwe(self): + return f"CWE-{self.cwe_id}" + + @property + def weakness(self): + """ + Return a queryset of Weakness for this vulnerability. + """ + try: + weakness = self.get_cwe(str(self.cwe_id)) + return weakness + except Exception as e: + logger.warning(f"Could not find CWE {self.cwe_id}: {e}") + + @property + def name(self): + """Return the weakness's name.""" + return self.weakness.name if self.weakness else "" + + @property + def description(self): + """Return the weakness's description.""" + return self.weakness.description if self.weakness else "" + + def to_dict(self): + return {"cwe_id": self.cwe_id, "name": self.name, "description": self.description} + + +class AdvisoryReference(models.Model): + url = models.URLField( + max_length=1024, + help_text="URL to the vulnerability reference", + unique=True, + ) + + ADVISORY = "advisory" + EXPLOIT = "exploit" + MAILING_LIST = "mailing_list" + BUG = "bug" + OTHER = "other" + + REFERENCE_TYPES = [ + (ADVISORY, "Advisory"), + (EXPLOIT, "Exploit"), + (MAILING_LIST, "Mailing List"), + (BUG, "Bug"), + (OTHER, "Other"), + ] + + reference_type = models.CharField(max_length=20, choices=REFERENCE_TYPES, blank=True) + + reference_id = models.CharField( + max_length=200, + help_text="An optional reference ID, such as DSA-4465-1 when available", + blank=True, + db_index=True, + ) + + class Meta: + ordering = ["reference_id", "url", "reference_type"] + + def __str__(self): + reference_id = f" {self.reference_id}" if self.reference_id else "" + return f"{self.url}{reference_id}" + + @property + def is_cpe(self): + """ + Return True if this is a CPE reference. + """ + return self.reference_id.startswith("cpe") + + +class AdvisoryAlias(models.Model): + alias = models.CharField( + max_length=50, + unique=True, + blank=False, + null=False, + help_text="An alias is a unique vulnerability identifier in some database, " + "such as CVE-2020-2233", + ) + + class Meta: + ordering = ["alias"] + + def __str__(self): + return self.alias + + @cached_property + def url(self): + """ + Create a URL for the alias. + """ + alias: str = self.alias + if alias.startswith("CVE"): + return f"https://nvd.nist.gov/vuln/detail/{alias}" + + if alias.startswith("GHSA"): + return f"https://github.com/advisories/{alias}" + + if alias.startswith("NPM-"): + id = alias.lstrip("NPM-") + return f"https://github.com/nodejs/security-wg/blob/main/vuln/npm/{id}.json" + + +class AdvisoryV2(models.Model): + """ + An advisory represents data directly obtained from upstream transformed + into structured data + """ + + advisory_id = models.CharField( + max_length=50, + blank=False, + null=False, + unique=False, + help_text="An advisory is a unique vulnerability identifier in some database, " + "such as CVE-2020-2233", + ) + + unique_content_id = models.CharField( + max_length=64, + blank=False, + null=False, + unique=True, + help_text="A 64 character unique identifier for the content of the advisory since we use sha256 as hex", + ) + summary = models.TextField( + blank=True, + ) + aliases = models.ManyToManyField( + AdvisoryAlias, + related_name="advisories", + help_text="A list of serializable Alias objects", + ) + affected_packages = models.JSONField( + blank=True, default=list, help_text="A list of serializable AffectedPackage objects" + ) + references = models.ManyToManyField( + AdvisoryReference, + related_name="advisories", + help_text="A list of serializable Reference objects", + ) + severities = models.ManyToManyField( + AdvisorySeverity, + related_name="advisories", + help_text="A list of vulnerability severities associated with this advisory.", + ) + weaknesses = models.ManyToManyField( + AdvisoryWeakness, + related_name="advisories", + help_text="A list of software weaknesses associated with this advisory.", + ) + date_published = models.DateTimeField( + blank=True, null=True, help_text="UTC Date of publication of the advisory" + ) + date_collected = models.DateTimeField(help_text="UTC Date on which the advisory was collected") + date_imported = models.DateTimeField( + blank=True, null=True, help_text="UTC Date on which the advisory was imported" + ) + created_by = models.CharField( + max_length=100, + help_text="Fully qualified name of the importer prefixed with the" + "module name importing the advisory. Eg:" + "vulnerabilities.pipeline.nginx_importer.NginxImporterPipeline", + ) + url = models.URLField( + blank=False, + null=False, + help_text="Link to the advisory on the upstream website", + ) + + objects = AdvisoryQuerySet.as_manager() + + class Meta: + ordering = ["date_published", "unique_content_id"] + def save(self, *args, **kwargs): self.full_clean() return super().save(*args, **kwargs) + + def to_advisory_data(self) -> "AdvisoryDataV2": + from vulnerabilities.importer import AdvisoryDataV2 + from vulnerabilities.importer import AffectedPackage + from vulnerabilities.importer import ReferenceV2 + + return AdvisoryDataV2( + aliases=[item.alias for item in self.aliases.all()], + summary=self.summary, + affected_packages=[ + AffectedPackage.from_dict(pkg) for pkg in self.affected_packages if pkg + ], + references=[ReferenceV2.from_dict(ref) for ref in self.references], + date_published=self.date_published, + weaknesses=self.weaknesses, + severities=self.severities, + url=self.url, + ) class ToDoRelatedAdvisory(models.Model): From b83fa7349caff21f8ff7b9124467987090c5183d Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 23 Apr 2025 16:12:40 +0530 Subject: [PATCH 02/44] Do formatting changes Signed-off-by: Tushar Goel --- vulnerabilities/importer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vulnerabilities/importer.py b/vulnerabilities/importer.py index f62aa0444..c1127667f 100644 --- a/vulnerabilities/importer.py +++ b/vulnerabilities/importer.py @@ -172,7 +172,7 @@ def to_dict(self): return { "reference_id": self.reference_id, "reference_type": self.reference_type, - "url": self.url + "url": self.url, } @classmethod From 781514494b900a32f6502ff85c12bb900af25544 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 24 Apr 2025 15:28:23 +0530 Subject: [PATCH 03/44] Add migrations Signed-off-by: Tushar Goel --- ...soryreference_advisoryseverity_and_more.py | 251 ++++++++++++++++++ vulnerabilities/models.py | 1 - 2 files changed, 251 insertions(+), 1 deletion(-) create mode 100644 vulnerabilities/migrations/0092_advisoryalias_advisoryreference_advisoryseverity_and_more.py diff --git a/vulnerabilities/migrations/0092_advisoryalias_advisoryreference_advisoryseverity_and_more.py b/vulnerabilities/migrations/0092_advisoryalias_advisoryreference_advisoryseverity_and_more.py new file mode 100644 index 000000000..b24edc836 --- /dev/null +++ b/vulnerabilities/migrations/0092_advisoryalias_advisoryreference_advisoryseverity_and_more.py @@ -0,0 +1,251 @@ +# Generated by Django 4.2.20 on 2025-04-24 09:58 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0091_alter_advisory_unique_together_and_more"), + ] + + operations = [ + migrations.CreateModel( + name="AdvisoryAlias", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "alias", + models.CharField( + help_text="An alias is a unique vulnerability identifier in some database, such as CVE-2020-2233", + max_length=50, + unique=True, + ), + ), + ], + options={ + "ordering": ["alias"], + }, + ), + migrations.CreateModel( + name="AdvisoryReference", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "url", + models.URLField( + help_text="URL to the vulnerability reference", max_length=1024, unique=True + ), + ), + ( + "reference_type", + models.CharField( + blank=True, + choices=[ + ("advisory", "Advisory"), + ("exploit", "Exploit"), + ("mailing_list", "Mailing List"), + ("bug", "Bug"), + ("other", "Other"), + ], + max_length=20, + ), + ), + ( + "reference_id", + models.CharField( + blank=True, + db_index=True, + help_text="An optional reference ID, such as DSA-4465-1 when available", + max_length=200, + ), + ), + ], + options={ + "ordering": ["reference_id", "url", "reference_type"], + }, + ), + migrations.CreateModel( + name="AdvisorySeverity", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "url", + models.URLField( + db_index=True, + help_text="URL to the vulnerability severity", + max_length=1024, + null=True, + ), + ), + ( + "scoring_system", + models.CharField( + choices=[ + ("cvssv2", "CVSSv2 Base Score"), + ("cvssv3", "CVSSv3 Base Score"), + ("cvssv3.1", "CVSSv3.1 Base Score"), + ("cvssv4", "CVSSv4 Base Score"), + ("rhbs", "RedHat Bugzilla severity"), + ("rhas", "RedHat Aggregate severity"), + ("archlinux", "Archlinux Vulnerability Group Severity"), + ("cvssv3.1_qr", "CVSSv3.1 Qualitative Severity Rating"), + ("generic_textual", "Generic textual severity rating"), + ("apache_httpd", "Apache Httpd Severity"), + ("apache_tomcat", "Apache Tomcat Severity"), + ("epss", "Exploit Prediction Scoring System"), + ("ssvc", "Stakeholder-Specific Vulnerability Categorization"), + ], + help_text="Identifier for the scoring system used. Available choices are: cvssv2: CVSSv2 Base Score,\ncvssv3: CVSSv3 Base Score,\ncvssv3.1: CVSSv3.1 Base Score,\ncvssv4: CVSSv4 Base Score,\nrhbs: RedHat Bugzilla severity,\nrhas: RedHat Aggregate severity,\narchlinux: Archlinux Vulnerability Group Severity,\ncvssv3.1_qr: CVSSv3.1 Qualitative Severity Rating,\ngeneric_textual: Generic textual severity rating,\napache_httpd: Apache Httpd Severity,\napache_tomcat: Apache Tomcat Severity,\nepss: Exploit Prediction Scoring System,\nssvc: Stakeholder-Specific Vulnerability Categorization ", + max_length=50, + ), + ), + ( + "value", + models.CharField(help_text="Example: 9.0, Important, High", max_length=50), + ), + ( + "scoring_elements", + models.CharField( + help_text="Supporting scoring elements used to compute the score values. For example a CVSS vector string as used to compute a CVSS score.", + max_length=150, + null=True, + ), + ), + ( + "published_at", + models.DateTimeField( + blank=True, + help_text="UTC Date of publication of the vulnerability severity", + null=True, + ), + ), + ], + options={ + "ordering": ["url", "scoring_system", "value"], + }, + ), + migrations.CreateModel( + name="AdvisoryWeakness", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ("cwe_id", models.IntegerField(help_text="CWE id")), + ], + ), + migrations.CreateModel( + name="AdvisoryV2", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "advisory_id", + models.CharField( + help_text="An advisory is a unique vulnerability identifier in some database, such as CVE-2020-2233", + max_length=50, + ), + ), + ( + "unique_content_id", + models.CharField( + help_text="A 64 character unique identifier for the content of the advisory since we use sha256 as hex", + max_length=64, + unique=True, + ), + ), + ("summary", models.TextField(blank=True)), + ( + "affected_packages", + models.JSONField( + blank=True, + default=list, + help_text="A list of serializable AffectedPackage objects", + ), + ), + ( + "date_published", + models.DateTimeField( + blank=True, help_text="UTC Date of publication of the advisory", null=True + ), + ), + ( + "date_collected", + models.DateTimeField(help_text="UTC Date on which the advisory was collected"), + ), + ( + "date_imported", + models.DateTimeField( + blank=True, + help_text="UTC Date on which the advisory was imported", + null=True, + ), + ), + ( + "created_by", + models.CharField( + help_text="Fully qualified name of the importer prefixed with themodule name importing the advisory. Eg:vulnerabilities.pipeline.nginx_importer.NginxImporterPipeline", + max_length=100, + ), + ), + ("url", models.URLField(help_text="Link to the advisory on the upstream website")), + ( + "aliases", + models.ManyToManyField( + help_text="A list of serializable Alias objects", + related_name="advisories", + to="vulnerabilities.advisoryalias", + ), + ), + ( + "references", + models.ManyToManyField( + help_text="A list of serializable Reference objects", + related_name="advisories", + to="vulnerabilities.advisoryreference", + ), + ), + ( + "severities", + models.ManyToManyField( + help_text="A list of vulnerability severities associated with this advisory.", + related_name="advisories", + to="vulnerabilities.advisoryseverity", + ), + ), + ( + "weaknesses", + models.ManyToManyField( + help_text="A list of software weaknesses associated with this advisory.", + related_name="advisories", + to="vulnerabilities.advisoryweakness", + ), + ), + ], + options={ + "ordering": ["date_published", "unique_content_id"], + }, + ), + ] diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 7d11222f6..bb8981d42 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -2379,7 +2379,6 @@ class AdvisoryWeakness(models.Model): """ cwe_id = models.IntegerField(help_text="CWE id") - vulnerabilities = models.ManyToManyField(Vulnerability, related_name="weaknesses") cwe_by_id = {} From c6963cc15e26194f7b8df2c780a6a0c7a56c3fce Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 24 Apr 2025 16:19:02 +0530 Subject: [PATCH 04/44] Add model changes and support new advisory ingestion Signed-off-by: Tushar Goel --- vulnerabilities/importer.py | 1 + vulnerabilities/pipelines/__init__.py | 22 ++++++--- .../pipelines/alpine_linux_importer.py | 24 ++++++++++ vulnerabilities/pipes/advisory.py | 45 +++++++++++++++++++ vulnerabilities/utils.py | 5 ++- 5 files changed, 89 insertions(+), 8 deletions(-) diff --git a/vulnerabilities/importer.py b/vulnerabilities/importer.py index c1127667f..230f6a5dd 100644 --- a/vulnerabilities/importer.py +++ b/vulnerabilities/importer.py @@ -417,6 +417,7 @@ class AdvisoryDataV2: date_published must be aware datetime """ + advisory_id: str = "" aliases: List[str] = dataclasses.field(default_factory=list) summary: Optional[str] = "" affected_packages: List[AffectedPackage] = dataclasses.field(default_factory=list) diff --git a/vulnerabilities/pipelines/__init__.py b/vulnerabilities/pipelines/__init__.py index d41b05321..875e6eced 100644 --- a/vulnerabilities/pipelines/__init__.py +++ b/vulnerabilities/pipelines/__init__.py @@ -21,11 +21,13 @@ from aboutcode.pipeline import humanize_time from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AdvisoryDataV2 from vulnerabilities.improver import MAX_CONFIDENCE from vulnerabilities.models import Advisory from vulnerabilities.models import PipelineRun from vulnerabilities.pipes.advisory import import_advisory from vulnerabilities.pipes.advisory import insert_advisory +from vulnerabilities.pipes.advisory import insert_advisory_v2 from vulnerabilities.utils import classproperty module_logger = logging.getLogger(__name__) @@ -207,12 +209,20 @@ def collect_and_store_advisories(self): progress = LoopProgress(total_iterations=estimated_advisory_count, logger=self.log) for advisory in progress.iter(self.collect_advisories()): - if _obj := insert_advisory( - advisory=advisory, - pipeline_id=self.pipeline_id, - logger=self.log, - ): - collected_advisory_count += 1 + if isinstance(advisory, AdvisoryData): + if _obj := insert_advisory( + advisory=advisory, + pipeline_id=self.pipeline_id, + logger=self.log, + ): + collected_advisory_count += 1 + if isinstance(advisory, AdvisoryDataV2): + if _obj := insert_advisory_v2( + advisory=advisory, + pipeline_id=self.pipeline_id, + logger=self.log, + ): + collected_advisory_count += 1 self.log(f"Successfully collected {collected_advisory_count:,d} advisories") diff --git a/vulnerabilities/pipelines/alpine_linux_importer.py b/vulnerabilities/pipelines/alpine_linux_importer.py index 5657ee4d2..fbdb01188 100644 --- a/vulnerabilities/pipelines/alpine_linux_importer.py +++ b/vulnerabilities/pipelines/alpine_linux_importer.py @@ -19,6 +19,7 @@ from univers.versions import AlpineLinuxVersion from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AdvisoryDataV2 from vulnerabilities.importer import AffectedPackage from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline from vulnerabilities.references import WireSharkReference @@ -288,3 +289,26 @@ def load_advisories( aliases=aliases, url=url, ) + + if any(is_cve(alias) for alias in aliases): + advisory_id = next((alias for alias in aliases if is_cve(alias)), None) + aliases.remove(advisory_id) + yield AdvisoryDataV2( + references=references, + affected_packages=affected_packages, + url=url, + advisory_id=advisory_id, + aliases=aliases, + ) + + else: + aliases.sort() + advisory_id = aliases[0] + aliases = aliases[1:] + yield AdvisoryDataV2( + references=references, + affected_packages=affected_packages, + url=url, + advisory_id=advisory_id, + aliases=aliases, + ) diff --git a/vulnerabilities/pipes/advisory.py b/vulnerabilities/pipes/advisory.py index 46f8b1ed3..419b37ca3 100644 --- a/vulnerabilities/pipes/advisory.py +++ b/vulnerabilities/pipes/advisory.py @@ -20,8 +20,11 @@ from django.db.models.query import QuerySet from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AdvisoryDataV2 from vulnerabilities.improver import MAX_CONFIDENCE from vulnerabilities.models import Advisory +from vulnerabilities.models import AdvisoryAlias +from vulnerabilities.models import AdvisoryV2 from vulnerabilities.models import AffectedByPackageRelatedVulnerability from vulnerabilities.models import Alias from vulnerabilities.models import FixingPackageRelatedVulnerability @@ -38,6 +41,12 @@ def get_or_create_aliases(aliases: List) -> QuerySet: return Alias.objects.filter(alias__in=aliases) +def get_or_create_aliases_v2(aliases: List) -> QuerySet: + for alias in aliases: + AdvisoryAlias.objects.get_or_create(alias=alias) + return AdvisoryAlias.objects.filter(alias__in=aliases) + + def insert_advisory(advisory: AdvisoryData, pipeline_id: str, logger: Callable = None): from vulnerabilities.utils import compute_content_id @@ -76,6 +85,42 @@ def insert_advisory(advisory: AdvisoryData, pipeline_id: str, logger: Callable = return advisory_obj +def insert_advisory_v2(advisory: AdvisoryDataV2, pipeline_id: str, logger: Callable = None): + from vulnerabilities.utils import compute_content_id + + advisory_obj = None + aliases = get_or_create_aliases_v2(aliases=advisory.aliases) + content_id = compute_content_id(advisory_data=advisory) + try: + default_data = { + "summary": advisory.summary, + "date_published": advisory.date_published, + "created_by": pipeline_id, + "date_collected": datetime.now(timezone.utc), + "advisory_id": advisory.advisory_id, + } + + advisory_obj, _ = AdvisoryV2.objects.get_or_create( + unique_content_id=content_id, + url=advisory.url, + defaults=default_data, + ) + advisory_obj.aliases.add(*aliases) + except Advisory.MultipleObjectsReturned: + logger.error( + f"Multiple Advisories returned: unique_content_id: {content_id}, url: {advisory.url}, advisory: {advisory!r}" + ) + raise + except Exception as e: + if logger: + logger( + f"Error while processing {advisory!r} with aliases {advisory.aliases!r}: {e!r} \n {traceback_format_exc()}", + level=logging.ERROR, + ) + + return advisory_obj + + @transaction.atomic def import_advisory( advisory: Advisory, diff --git a/vulnerabilities/utils.py b/vulnerabilities/utils.py index 52104b556..53e253ffd 100644 --- a/vulnerabilities/utils.py +++ b/vulnerabilities/utils.py @@ -39,7 +39,7 @@ from univers.version_range import NginxVersionRange from univers.version_range import VersionRange -from aboutcode.hashid import build_vcid # NOQA +from aboutcode.hashid import build_vcid logger = logging.getLogger(__name__) @@ -595,6 +595,7 @@ def compute_content_id(advisory_data): # Normalize fields from vulnerabilities.importer import AdvisoryData + from vulnerabilities.importer import AdvisoryDataV2 from vulnerabilities.models import Advisory if isinstance(advisory_data, Advisory): @@ -609,7 +610,7 @@ def compute_content_id(advisory_data): } normalized_data["url"] = advisory_data.url - elif isinstance(advisory_data, AdvisoryData): + elif isinstance(advisory_data, AdvisoryData) or isinstance(advisory_data, AdvisoryDataV2): normalized_data = { "aliases": normalize_list(advisory_data.aliases), "summary": normalize_text(advisory_data.summary), From fff60d377a39f2b72505e49599b404e287d8142e Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 2 May 2025 18:59:09 +0530 Subject: [PATCH 05/44] Add V2Pipelines Signed-off-by: Tushar Goel --- vulnerabilities/importer.py | 5 +- vulnerabilities/importers/__init__.py | 8 +- vulnerabilities/management/commands/import.py | 5 +- ...e_advisoryv2_affected_packages_and_more.py | 132 +++++++ vulnerabilities/models.py | 117 +++++- vulnerabilities/pipelines/__init__.py | 98 ++++- .../pipelines/v2_importers/nvd_importer.py | 336 ++++++++++++++++++ vulnerabilities/pipes/advisory.py | 88 ++++- 8 files changed, 767 insertions(+), 22 deletions(-) create mode 100644 vulnerabilities/migrations/0093_packagev2_remove_advisoryv2_affected_packages_and_more.py create mode 100644 vulnerabilities/pipelines/v2_importers/nvd_importer.py diff --git a/vulnerabilities/importer.py b/vulnerabilities/importer.py index 230f6a5dd..9cef5e0fa 100644 --- a/vulnerabilities/importer.py +++ b/vulnerabilities/importer.py @@ -55,6 +55,7 @@ class VulnerabilitySeverity: value: str scoring_elements: str = "" published_at: Optional[datetime.datetime] = None + url: Optional[str] = None def to_dict(self): data = { @@ -350,12 +351,15 @@ class AdvisoryData: date_published must be aware datetime """ + advisory_id: str = "" aliases: List[str] = dataclasses.field(default_factory=list) summary: Optional[str] = "" affected_packages: List[AffectedPackage] = dataclasses.field(default_factory=list) references: List[Reference] = dataclasses.field(default_factory=list) + references_v2: List[ReferenceV2] = dataclasses.field(default_factory=list) date_published: Optional[datetime.datetime] = None weaknesses: List[int] = dataclasses.field(default_factory=list) + severities: List[VulnerabilitySeverity] = dataclasses.field(default_factory=list) url: Optional[str] = None def __post_init__(self): @@ -424,7 +428,6 @@ class AdvisoryDataV2: references: List[ReferenceV2] = dataclasses.field(default_factory=list) date_published: Optional[datetime.datetime] = None weaknesses: List[int] = dataclasses.field(default_factory=list) - severities: List[VulnerabilitySeverity] = dataclasses.field(default_factory=list) url: Optional[str] = None def __post_init__(self): diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index f0d9532ab..24c64cfbc 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -34,6 +34,7 @@ from vulnerabilities.importers import vulnrichment from vulnerabilities.importers import xen from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 from vulnerabilities.pipelines import alpine_linux_importer from vulnerabilities.pipelines import github_importer from vulnerabilities.pipelines import gitlab_importer @@ -42,8 +43,10 @@ from vulnerabilities.pipelines import nvd_importer from vulnerabilities.pipelines import pypa_importer from vulnerabilities.pipelines import pysec_importer +from vulnerabilities.pipelines.v2_importers import nvd_importer as nvd_importer_v2 IMPORTERS_REGISTRY = [ + nvd_importer_v2.NVDImporterPipeline, nvd_importer.NVDImporterPipeline, github_importer.GitHubAPIImporterPipeline, gitlab_importer.GitLabImporterPipeline, @@ -81,6 +84,9 @@ ] IMPORTERS_REGISTRY = { - x.pipeline_id if issubclass(x, VulnerableCodeBaseImporterPipeline) else x.qualified_name: x + x.pipeline_id + if issubclass(x, VulnerableCodeBaseImporterPipeline) + or issubclass(x, VulnerableCodeBaseImporterPipelineV2) + else x.qualified_name: x for x in IMPORTERS_REGISTRY } diff --git a/vulnerabilities/management/commands/import.py b/vulnerabilities/management/commands/import.py index f4876b11a..78ec8bb0a 100644 --- a/vulnerabilities/management/commands/import.py +++ b/vulnerabilities/management/commands/import.py @@ -14,6 +14,7 @@ from vulnerabilities.import_runner import ImportRunner from vulnerabilities.importers import IMPORTERS_REGISTRY from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 class Command(BaseCommand): @@ -57,7 +58,9 @@ def import_data(self, importers): failed_importers = [] for importer in importers: - if issubclass(importer, VulnerableCodeBaseImporterPipeline): + if issubclass(importer, VulnerableCodeBaseImporterPipeline) or issubclass( + importer, VulnerableCodeBaseImporterPipelineV2 + ): self.stdout.write(f"Importing data using {importer.pipeline_id}") status, error = importer().execute() if status != 0: diff --git a/vulnerabilities/migrations/0093_packagev2_remove_advisoryv2_affected_packages_and_more.py b/vulnerabilities/migrations/0093_packagev2_remove_advisoryv2_affected_packages_and_more.py new file mode 100644 index 000000000..f421e67fe --- /dev/null +++ b/vulnerabilities/migrations/0093_packagev2_remove_advisoryv2_affected_packages_and_more.py @@ -0,0 +1,132 @@ +# Generated by Django 4.2.20 on 2025-05-02 08:56 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0092_advisoryalias_advisoryreference_advisoryseverity_and_more"), + ] + + operations = [ + migrations.CreateModel( + name="PackageV2", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "type", + models.CharField( + blank=True, + help_text="A short code to identify the type of this package. For example: gem for a Rubygem, docker for a container, pypi for a Python Wheel or Egg, maven for a Maven Jar, deb for a Debian package, etc.", + max_length=16, + ), + ), + ( + "namespace", + models.CharField( + blank=True, + help_text="Package name prefix, such as Maven groupid, Docker image owner, GitHub user or organization, etc.", + max_length=255, + ), + ), + ( + "name", + models.CharField(blank=True, help_text="Name of the package.", max_length=100), + ), + ( + "version", + models.CharField( + blank=True, help_text="Version of the package.", max_length=100 + ), + ), + ( + "qualifiers", + models.CharField( + blank=True, + help_text="Extra qualifying data for a package such as the name of an OS, architecture, distro, etc.", + max_length=1024, + ), + ), + ( + "subpath", + models.CharField( + blank=True, + help_text="Extra subpath within a package, relative to the package root.", + max_length=200, + ), + ), + ( + "package_url", + models.CharField( + db_index=True, + help_text="The Package URL for this package.", + max_length=1000, + ), + ), + ( + "plain_package_url", + models.CharField( + db_index=True, + help_text="The Package URL for this package without qualifiers and subpath.", + max_length=1000, + ), + ), + ( + "is_ghost", + models.BooleanField( + db_index=True, + default=False, + help_text="True if the package does not exist in the upstream package manager or its repository.", + ), + ), + ( + "risk_score", + models.DecimalField( + decimal_places=1, + help_text="Risk score between 0.00 and 10.00, where higher values indicate greater vulnerability risk for the package.", + max_digits=3, + null=True, + ), + ), + ( + "version_rank", + models.IntegerField( + db_index=True, + default=0, + help_text="Rank of the version to support ordering by version. Rank zero means the rank has not been defined yet", + ), + ), + ], + options={ + "abstract": False, + }, + ), + migrations.RemoveField( + model_name="advisoryv2", + name="affected_packages", + ), + migrations.AddField( + model_name="advisoryv2", + name="affecting_packages", + field=models.ManyToManyField( + help_text="A list of packages that are affected by this advisory.", + related_name="fixing_advisories", + to="vulnerabilities.packagev2", + ), + ), + migrations.AddField( + model_name="advisoryv2", + name="fixed_by_packages", + field=models.ManyToManyField( + help_text="A list of packages that are reported by this advisory.", + related_name="affected_by_advisories", + to="vulnerabilities.packagev2", + ), + ), + ] diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index bb8981d42..e2d2e1c12 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -2526,9 +2526,6 @@ class AdvisoryV2(models.Model): related_name="advisories", help_text="A list of serializable Alias objects", ) - affected_packages = models.JSONField( - blank=True, default=list, help_text="A list of serializable AffectedPackage objects" - ) references = models.ManyToManyField( AdvisoryReference, related_name="advisories", @@ -2563,6 +2560,18 @@ class AdvisoryV2(models.Model): help_text="Link to the advisory on the upstream website", ) + affecting_packages = models.ManyToManyField( + "PackageV2", + related_name="fixing_advisories", + help_text="A list of packages that are affected by this advisory.", + ) + + fixed_by_packages = models.ManyToManyField( + "PackageV2", + related_name="affected_by_advisories", + help_text="A list of packages that are reported by this advisory.", + ) + objects = AdvisoryQuerySet.as_manager() class Meta: @@ -2604,3 +2613,105 @@ class ToDoRelatedAdvisory(models.Model): class Meta: unique_together = ("todo", "advisory") +class PackageV2(PackageURLMixin): + """ + A software package with related vulnerabilities. + """ + + package_url = models.CharField( + max_length=1000, + null=False, + help_text="The Package URL for this package.", + db_index=True, + ) + + plain_package_url = models.CharField( + max_length=1000, + null=False, + help_text="The Package URL for this package without qualifiers and subpath.", + db_index=True, + ) + + is_ghost = models.BooleanField( + default=False, + help_text="True if the package does not exist in the upstream package manager or its repository.", + db_index=True, + ) + + risk_score = models.DecimalField( + null=True, + max_digits=3, + decimal_places=1, + help_text="Risk score between 0.00 and 10.00, where higher values " + "indicate greater vulnerability risk for the package.", + ) + + version_rank = models.IntegerField( + help_text="Rank of the version to support ordering by version. Rank " + "zero means the rank has not been defined yet", + default=0, + db_index=True, + ) + + def __str__(self): + return self.package_url + + @property + def purl(self): + return self.package_url + + def save(self, *args, **kwargs): + """ + Save, normalizing PURL fields. + """ + purl = PackageURL( + type=self.type, + namespace=self.namespace, + name=self.name, + version=self.version, + qualifiers=self.qualifiers, + subpath=self.subpath, + ) + + # We re-parse the purl to ensure name and namespace + # are set correctly + normalized = normalize_purl(purl=purl) + + for name, value in purl_to_dict(normalized).items(): + setattr(self, name, value) + + self.package_url = str(normalized) + plain_purl = utils.plain_purl(normalized) + self.plain_package_url = str(plain_purl) + super().save(*args, **kwargs) + + @property + def calculate_version_rank(self): + """ + Calculate and return the `version_rank` for a package that does not have one. + If this package already has a `version_rank`, return it. + + The calculated rank will be interpolated between two packages that have + `version_rank` values and are closest to this package in terms of version order. + """ + + group_packages = Package.objects.filter( + type=self.type, + namespace=self.namespace, + name=self.name, + ) + + if any(p.version_rank == 0 for p in group_packages): + sorted_packages = sorted(group_packages, key=lambda p: self.version_class(p.version)) + for rank, package in enumerate(sorted_packages, start=1): + package.version_rank = rank + Package.objects.bulk_update(sorted_packages, fields=["version_rank"]) + return self.version_rank + + def get_or_create_from_purl(self, purl: Union[PackageURL, str]): + """ + Return a new or existing Package given a ``purl`` PackageURL object or PURL string. + """ + package, is_created = Package.objects.get_or_create(**purl_to_dict(purl=purl)) + + return package, is_created diff --git a/vulnerabilities/pipelines/__init__.py b/vulnerabilities/pipelines/__init__.py index 875e6eced..41e481c26 100644 --- a/vulnerabilities/pipelines/__init__.py +++ b/vulnerabilities/pipelines/__init__.py @@ -21,10 +21,10 @@ from aboutcode.pipeline import humanize_time from vulnerabilities.importer import AdvisoryData -from vulnerabilities.importer import AdvisoryDataV2 from vulnerabilities.improver import MAX_CONFIDENCE from vulnerabilities.models import Advisory from vulnerabilities.models import PipelineRun +from vulnerabilities.models import PackageV2 from vulnerabilities.pipes.advisory import import_advisory from vulnerabilities.pipes.advisory import insert_advisory from vulnerabilities.pipes.advisory import insert_advisory_v2 @@ -216,13 +216,6 @@ def collect_and_store_advisories(self): logger=self.log, ): collected_advisory_count += 1 - if isinstance(advisory, AdvisoryDataV2): - if _obj := insert_advisory_v2( - advisory=advisory, - pipeline_id=self.pipeline_id, - logger=self.log, - ): - collected_advisory_count += 1 self.log(f"Successfully collected {collected_advisory_count:,d} advisories") @@ -258,3 +251,92 @@ def import_advisory(self, advisory: Advisory) -> int: f"Failed to import advisory: {advisory!r} with error {e!r}:\n{traceback_format_exc()}", level=logging.ERROR, ) + + +class VulnerableCodeBaseImporterPipelineV2(VulnerableCodePipeline): + """ + Base importer pipeline for importing advisories. + + Uses: + Subclass this Pipeline and implement ``advisories_count`` and ``collect_advisories`` + method. Also override the ``steps`` and ``advisory_confidence`` as needed. + """ + + pipeline_id = None # Unique Pipeline ID, this should be the name of pipeline module. + license_url = None + spdx_license_expression = None + repo_url = None + importer_name = None + advisory_confidence = MAX_CONFIDENCE + + @classmethod + def steps(cls): + return ( + cls.collect_and_store_advisories, + cls.import_new_advisories, + ) + + def collect_advisories(self) -> Iterable[AdvisoryData]: + """ + Yield AdvisoryData for importer pipeline. + + Populate the `self.collected_advisories_count` field and yield AdvisoryData + """ + raise NotImplementedError + + def advisories_count(self) -> int: + """ + Return the estimated AdvisoryData to be yielded by ``collect_advisories``. + + Used by ``collect_and_store_advisories`` to log the progress of advisory collection. + """ + raise NotImplementedError + + def collect_and_store_advisories(self): + collected_advisory_count = 0 + estimated_advisory_count = self.advisories_count() + + if estimated_advisory_count > 0: + self.log(f"Collecting {estimated_advisory_count:,d} advisories") + + progress = LoopProgress(total_iterations=estimated_advisory_count, logger=self.log) + for advisory in progress.iter(self.collect_advisories()): + if _obj := insert_advisory_v2( + advisory=advisory, + pipeline_id=self.pipeline_id, + get_advisory_packages=self.get_advisory_packages, + logger=self.log, + ): + collected_advisory_count += 1 + + self.log(f"Successfully collected {collected_advisory_count:,d} advisories") + + def get_advisory_packages(self, advisory_data: AdvisoryData) -> list: + """ + Return the list of packages for the given advisory. + + Used by ``import_advisory`` to get the list of packages for the advisory. + """ + from vulnerabilities.improvers import default + + affected_purls = [] + fixed_purls = [] + for affected_package in advisory_data.affected_packages: + package_affected_purls, package_fixed_purls = default.get_exact_purls( + affected_package=affected_package + ) + affected_purls.extend(package_affected_purls) + fixed_purls.extend(package_fixed_purls) + + vulnerable_packages = [] + fixed_packages = [] + + for affected_purl in affected_purls: + vulnerable_package, _ = PackageV2.objects.get_or_create_from_purl(purl=affected_purl) + vulnerable_packages.append(vulnerable_package) + + for fixed_purl in fixed_purls: + fixed_package, _ = PackageV2.objects.get_or_create_from_purl(purl=fixed_purl) + fixed_packages.append(fixed_package) + + return vulnerable_packages, fixed_packages diff --git a/vulnerabilities/pipelines/v2_importers/nvd_importer.py b/vulnerabilities/pipelines/v2_importers/nvd_importer.py new file mode 100644 index 000000000..ddb291a8f --- /dev/null +++ b/vulnerabilities/pipelines/v2_importers/nvd_importer.py @@ -0,0 +1,336 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import gzip +import json +import logging +from datetime import date +from traceback import format_exc as traceback_format_exc +from typing import Iterable + +import attr +import requests +from dateutil import parser as dateparser + +from vulnerabilities import severity_systems +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import Reference +from vulnerabilities.importer import VulnerabilitySeverity +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 +from vulnerabilities.utils import get_cwe_id +from vulnerabilities.utils import get_item + + +class NVDImporterPipeline(VulnerableCodeBaseImporterPipelineV2): + """Collect advisories from NVD.""" + + pipeline_id = "nvd_importer_v2" + + # See https://github.com/nexB/vulnerablecode/issues/665 for follow up + spdx_license_expression = ( + "LicenseRef-scancode-us-govt-public-domain AND LicenseRef-scancode-cve-tou" + ) + license_url = "https://nvd.nist.gov/general/FAQ-Sections/General-FAQs#faqLink7" + notice = """ + See https://nvd.nist.gov/general/FAQ-Sections/General-FAQs#faqLink7 + All NVD data is freely available from our data feeds + (https://nvd.nist.gov/vuln/data-feeds). There are no fees, licensing + restrictions, or even a requirement to register. All NIST publications are + available in the public domain according to Title 17 of the United States + Code. Acknowledgment of the NVD when using our information is appreciated. + In addition, please email nvd@nist.gov to let us know how the information is + being used + + See also https://cve.mitre.org/about/termsofuse.html + Terms of Use + LICENSE + [...] + CVE Usage: MITRE hereby grants you a perpetual, worldwide, non-exclusive, no- + charge, royalty-free, irrevocable copyright license to reproduce, prepare + derivative works of, publicly display, publicly perform, sublicense, and + distribute Common Vulnerabilities and Exposures (CVE®). Any copy you make for + such purposes is authorized provided that you reproduce MITRE's copyright + designation and this license in any such copy. DISCLAIMERS + + ALL DOCUMENTS AND THE INFORMATION CONTAINED THEREIN PROVIDED BY MITRE ARE + PROVIDED ON AN "AS IS" BASIS AND THE CONTRIBUTOR, THE ORGANIZATION HE/SHE + REPRESENTS OR IS SPONSORED BY (IF ANY), THE MITRE CORPORATION, ITS BOARD OF + TRUSTEES, OFFICERS, AGENTS, AND EMPLOYEES, DISCLAIM ALL WARRANTIES, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE + INFORMATION THEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF + MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + """ + importer_name = "NVD Importer" + + @classmethod + def steps(cls): + return (cls.collect_and_store_advisories,) + + def advisories_count(self): + url = "https://services.nvd.nist.gov/rest/json/cves/2.0?resultsPerPage=1" + + advisory_count = 0 + try: + response = requests.get(url) + response.raise_for_status() + data = response.json() + except requests.HTTPError as http_err: + self.log( + f"HTTP error occurred: {http_err} \n {traceback_format_exc()}", + level=logging.ERROR, + ) + return advisory_count + + advisory_count = data.get("totalResults", 0) + return advisory_count + + def collect_advisories(self) -> Iterable[AdvisoryData]: + for _year, cve_data in fetch_cve_data_1_1(logger=self.log): + yield from to_advisories(cve_data=cve_data) + + +# Isolating network calls for simplicity of testing +def fetch(url, logger=None): + if logger: + logger(f"Fetching `{url}`") + gz_file = requests.get(url) + data = gzip.decompress(gz_file.content) + try: + data = data.decode("utf-8") + except UnicodeDecodeError: + logger(f"Failed to decode data from {url}") + return {} + return json.loads(data) + + +def fetch_cve_data_1_1(starting_year=2025, logger=None): + """ + Yield tuples of (year, lists of CVE mappings) from the NVD, one for each + year since ``starting_year`` defaulting to 2002. + """ + current_year = date.today().year + # NVD json feeds start from 2002. + for year in range(starting_year, current_year + 1): + download_url = f"https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-{year}.json.gz" + yield year, fetch(url=download_url, logger=logger) + + +def to_advisories(cve_data): + """ + Yield AdvisoryData objects from a CVE json feed. + """ + for cve_item in CveItem.from_cve_data(cve_data=cve_data): + if cve_item.is_related_to_hardware or not cve_item.cve_id: + continue + yield cve_item.to_advisory() + + +@attr.attributes +class CveItem: + cve_item = attr.attrib(default=attr.Factory(dict), type=dict) + + @classmethod + def to_advisories(cls, cve_data, skip_hardware=True): + """ + Yield AdvisoryData objects from ``cve_data`` data for CVE JSON 1.1feed. + Skip hardware + """ + for cve_item in CveItem.from_cve_data(cve_data=cve_data, skip_hardware=skip_hardware): + yield cve_item.to_advisory() + + @classmethod + def from_cve_data(cls, cve_data, skip_hardware=True): + """ + Yield CVE items mapping from a cve_data list of CVE mappings from the NVD. + """ + for cve_item in cve_data.get("CVE_Items") or []: + if not cve_item: + continue + if not isinstance(cve_item, dict): + raise ValueError(f"cve_item: {cve_item!r} is not a mapping") + cve_item = cls(cve_item=cve_item) + if skip_hardware and cve_item.is_related_to_hardware: + continue + yield cve_item + + @property + def cve_id(self): + return self.cve_item["cve"]["CVE_data_meta"]["ID"] + + @property + def summary(self): + """ + Return a descriptive summary. + """ + # In 99% of cases len(cve_item['cve']['description']['description_data']) == 1 , so + # this usually returns cve_item['cve']['description']['description_data'][0]['value'] + # In the remaining 1% cases this returns the longest summary. + # FIXME: we should retun the full description WITH the summry as the first line instead + summaries = [] + for desc in get_item(self.cve_item, "cve", "description", "description_data") or []: + if desc.get("value"): + summaries.append(desc["value"]) + return max(summaries, key=len) if summaries else None + + @property + def cpes(self): + """ + Return a list of unique CPE strings for this CVE. + """ + # FIXME: we completely ignore the configurations here + cpes = [] + for node in get_item(self.cve_item, "configurations", "nodes") or []: + for cpe_data in node.get("cpe_match") or []: + cpe23_uri = cpe_data.get("cpe23Uri") + if cpe23_uri and cpe23_uri not in cpes: + cpes.append(cpe23_uri) + return cpes + + @property + def severities(self): + """ + Return a list of VulnerabilitySeverity for this CVE. + """ + severities = [] + impact = self.cve_item.get("impact") or {} + base_metric_v4 = impact.get("baseMetricV4") or {} + if base_metric_v4: + cvss_v4 = base_metric_v4.get("cvssV4") or {} + vs = VulnerabilitySeverity( + system=severity_systems.CVSSV4, + value=str(cvss_v4.get("baseScore") or ""), + scoring_elements=str(cvss_v4.get("vectorString") or ""), + url=f"https://nvd.nist.gov/vuln/detail/{self.cve_id}", + ) + severities.append(vs) + + base_metric_v3 = impact.get("baseMetricV3") or {} + if base_metric_v3: + cvss_v3 = get_item(base_metric_v3, "cvssV3") + version = cvss_v3.get("version") + system = None + if version == "3.1": + system = severity_systems.CVSSV31 + else: + system = severity_systems.CVSSV3 + vs = VulnerabilitySeverity( + system=system, + value=str(cvss_v3.get("baseScore") or ""), + scoring_elements=str(cvss_v3.get("vectorString") or ""), + url=f"https://nvd.nist.gov/vuln/detail/{self.cve_id}", + ) + severities.append(vs) + + base_metric_v2 = impact.get("baseMetricV2") or {} + if base_metric_v2: + cvss_v2 = base_metric_v2.get("cvssV2") or {} + vs = VulnerabilitySeverity( + system=severity_systems.CVSSV2, + value=str(cvss_v2.get("baseScore") or ""), + scoring_elements=str(cvss_v2.get("vectorString") or ""), + url=f"https://nvd.nist.gov/vuln/detail/{self.cve_id}", + ) + severities.append(vs) + + return severities + + @property + def reference_urls(self): + """ + Return a list unique of reference URLs. + """ + # FIXME: we should also collect additional data from the references such as tags and ids + + urls = [] + for reference in get_item(self.cve_item, "cve", "references", "reference_data") or []: + ref_url = reference.get("url") + if ref_url and ref_url.startswith(("http", "ftp")) and ref_url not in urls: + urls.append(ref_url) + return urls + + @property + def references(self): + """ + Return a list of AdvisoryReference. + """ + # FIXME: we should also collect additional data from the references such as tags and ids + references = [] + + # we track each CPE as a reference for now + for cpe in self.cpes: + cpe_url = f"https://nvd.nist.gov/vuln/search/results?adv_search=true&isCpeNameSearch=true&query={cpe}" + references.append(Reference(reference_id=cpe, url=cpe_url)) + + # FIXME: we also add the CVE proper as a reference, but is this correct? + references.append( + Reference( + url=f"https://nvd.nist.gov/vuln/detail/{self.cve_id}", + reference_id=self.cve_id, + ) + ) + + # clean to remove dupes for the CVE id proper + ref_urls = [ + ru + for ru in self.reference_urls + if ru != f"https://nvd.nist.gov/vuln/detail/{self.cve_id}" + ] + references.extend([Reference(url=url) for url in ref_urls]) + + return references + + @property + def is_related_to_hardware(self): + """ + Return True if this CVE item is for hardware (as opposed to software). + """ + return any(is_related_to_hardware(cpe) for cpe in self.cpes) + + @property + def weaknesses(self): + """ + Return a list of CWE IDs like: [119, 189] + """ + weaknesses = [] + for weaknesses_item in ( + get_item(self.cve_item, "cve", "problemtype", "problemtype_data") or [] + ): + weaknesses_description = weaknesses_item.get("description") or [] + for weaknesses_value in weaknesses_description: + cwe_id = ( + weaknesses_value.get("value") if weaknesses_value.get("lang") == "en" else None + ) + if cwe_id in ["NVD-CWE-Other", "NVD-CWE-noinfo"] or not cwe_id: + continue # Skip Invalid CWE + weaknesses.append(get_cwe_id(cwe_id)) + return weaknesses + + def to_advisory(self): + """ + Return an AdvisoryData object from this CVE item + """ + return AdvisoryData( + advisory_id=self.cve_id, + aliases=[], + summary=self.summary, + references_v2=self.references, + date_published=dateparser.parse(self.cve_item.get("publishedDate")), + weaknesses=self.weaknesses, + severities=self.severities, + url=f"https://nvd.nist.gov/vuln/detail/{self.cve_id}", + ) + + +def is_related_to_hardware(cpe): + """ + Return True if the ``cpe`` is related to hardware. + """ + cpe_comps = cpe.split(":") + # CPE follow the format cpe:cpe_version:product_type:vendor:product + return len(cpe_comps) > 2 and cpe_comps[2] == "h" diff --git a/vulnerabilities/pipes/advisory.py b/vulnerabilities/pipes/advisory.py index 419b37ca3..3cee9bf1c 100644 --- a/vulnerabilities/pipes/advisory.py +++ b/vulnerabilities/pipes/advisory.py @@ -20,11 +20,13 @@ from django.db.models.query import QuerySet from vulnerabilities.importer import AdvisoryData -from vulnerabilities.importer import AdvisoryDataV2 from vulnerabilities.improver import MAX_CONFIDENCE from vulnerabilities.models import Advisory from vulnerabilities.models import AdvisoryAlias +from vulnerabilities.models import AdvisoryReference +from vulnerabilities.models import AdvisorySeverity from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.models import AdvisoryWeakness from vulnerabilities.models import AffectedByPackageRelatedVulnerability from vulnerabilities.models import Alias from vulnerabilities.models import FixingPackageRelatedVulnerability @@ -41,10 +43,59 @@ def get_or_create_aliases(aliases: List) -> QuerySet: return Alias.objects.filter(alias__in=aliases) -def get_or_create_aliases_v2(aliases: List) -> QuerySet: - for alias in aliases: - AdvisoryAlias.objects.get_or_create(alias=alias) - return AdvisoryAlias.objects.filter(alias__in=aliases) +from django.db.models import Q + + +def get_or_create_advisory_aliases(aliases: List[str]) -> List[AdvisoryAlias]: + existing = AdvisoryAlias.objects.filter(alias__in=aliases) + existing_aliases = {a.alias for a in existing} + + to_create = [AdvisoryAlias(alias=alias) for alias in aliases if alias not in existing_aliases] + AdvisoryAlias.objects.bulk_create(to_create, ignore_conflicts=True) + + return list(AdvisoryAlias.objects.filter(alias__in=aliases)) + + +def get_or_create_advisory_references(references: List) -> List[AdvisoryReference]: + reference_ids = [ref.reference_id for ref in references] + existing = AdvisoryReference.objects.filter(reference_id__in=reference_ids) + existing_ids = {r.reference_id for r in existing} + + to_create = [ + AdvisoryReference(reference_id=ref.reference_id, url=ref.url) + for ref in references + if ref.reference_id not in existing_ids + ] + AdvisoryReference.objects.bulk_create(to_create, ignore_conflicts=True) + + return list(AdvisoryReference.objects.filter(reference_id__in=reference_ids)) + + +def get_or_create_advisory_severities(severities: List) -> QuerySet: + severity_objs = [] + for severity in severities: + published_at = str(severity.published_at) if severity.published_at else None + sev, _ = AdvisorySeverity.objects.get_or_create( + scoring_system=severity.system.identifier, + value=severity.value, + scoring_elements=severity.scoring_elements, + defaults={ + "published_at": published_at, + }, + url=severity.url, + ) + severity_objs.append(sev) + return AdvisorySeverity.objects.filter(id__in=[severity.id for severity in severity_objs]) + + +def get_or_create_advisory_weaknesses(weaknesses: List[str]) -> List[AdvisoryWeakness]: + existing = AdvisoryWeakness.objects.filter(cwe_id__in=weaknesses) + existing_ids = {w.cwe_id for w in existing} + + to_create = [AdvisoryWeakness(cwe_id=w) for w in weaknesses if w not in existing_ids] + AdvisoryWeakness.objects.bulk_create(to_create, ignore_conflicts=True) + + return list(AdvisoryWeakness.objects.filter(cwe_id__in=weaknesses)) def insert_advisory(advisory: AdvisoryData, pipeline_id: str, logger: Callable = None): @@ -85,12 +136,21 @@ def insert_advisory(advisory: AdvisoryData, pipeline_id: str, logger: Callable = return advisory_obj -def insert_advisory_v2(advisory: AdvisoryDataV2, pipeline_id: str, logger: Callable = None): +def insert_advisory_v2( + advisory: AdvisoryData, + pipeline_id: str, + get_advisory_packages: Callable, + logger: Callable = None, +): from vulnerabilities.utils import compute_content_id advisory_obj = None - aliases = get_or_create_aliases_v2(aliases=advisory.aliases) + aliases = get_or_create_advisory_aliases(aliases=advisory.aliases) + references = get_or_create_advisory_references(references=advisory.references_v2) + severities = get_or_create_advisory_severities(severities=advisory.severities) + weaknesses = get_or_create_advisory_weaknesses(weaknesses=advisory.weaknesses) content_id = compute_content_id(advisory_data=advisory) + fixed_by_packages, affecting_packages = get_advisory_packages(advisory_data=advisory) try: default_data = { "summary": advisory.summary, @@ -105,7 +165,19 @@ def insert_advisory_v2(advisory: AdvisoryDataV2, pipeline_id: str, logger: Calla url=advisory.url, defaults=default_data, ) - advisory_obj.aliases.add(*aliases) + related_fields = { + "aliases": aliases, + "references": references, + "severities": severities, + "weaknesses": weaknesses, + "fixed_by_packages": fixed_by_packages, + "affecting_packages": affecting_packages, + } + + for field_name, values in related_fields.items(): + if values: + getattr(advisory_obj, field_name).add(*values) + except Advisory.MultipleObjectsReturned: logger.error( f"Multiple Advisories returned: unique_content_id: {content_id}, url: {advisory.url}, advisory: {advisory!r}" From 8731fa567901b3048c456942d8c9c385bb043899 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 2 May 2025 19:04:46 +0530 Subject: [PATCH 06/44] Revert alpine linux importer Signed-off-by: Tushar Goel --- .../pipelines/alpine_linux_importer.py | 24 ------------------- 1 file changed, 24 deletions(-) diff --git a/vulnerabilities/pipelines/alpine_linux_importer.py b/vulnerabilities/pipelines/alpine_linux_importer.py index fbdb01188..5657ee4d2 100644 --- a/vulnerabilities/pipelines/alpine_linux_importer.py +++ b/vulnerabilities/pipelines/alpine_linux_importer.py @@ -19,7 +19,6 @@ from univers.versions import AlpineLinuxVersion from vulnerabilities.importer import AdvisoryData -from vulnerabilities.importer import AdvisoryDataV2 from vulnerabilities.importer import AffectedPackage from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline from vulnerabilities.references import WireSharkReference @@ -289,26 +288,3 @@ def load_advisories( aliases=aliases, url=url, ) - - if any(is_cve(alias) for alias in aliases): - advisory_id = next((alias for alias in aliases if is_cve(alias)), None) - aliases.remove(advisory_id) - yield AdvisoryDataV2( - references=references, - affected_packages=affected_packages, - url=url, - advisory_id=advisory_id, - aliases=aliases, - ) - - else: - aliases.sort() - advisory_id = aliases[0] - aliases = aliases[1:] - yield AdvisoryDataV2( - references=references, - affected_packages=affected_packages, - url=url, - advisory_id=advisory_id, - aliases=aliases, - ) From a5b45705ade9c4a0849ff1a4da5d8bfe813fc2b8 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 2 May 2025 19:12:37 +0530 Subject: [PATCH 07/44] Fix tests Signed-off-by: Tushar Goel --- vulnerabilities/importers/curl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vulnerabilities/importers/curl.py b/vulnerabilities/importers/curl.py index a7f5e86fa..7cbc3208e 100644 --- a/vulnerabilities/importers/curl.py +++ b/vulnerabilities/importers/curl.py @@ -97,7 +97,7 @@ def parse_advisory_data(raw_data) -> AdvisoryData: ... ] ... } >>> parse_advisory_data(raw_data) - AdvisoryData(aliases=['CVE-2024-2379'], summary='QUIC certificate check bypass with wolfSSL', affected_packages=[AffectedPackage(package=PackageURL(type='generic', namespace='curl.se', name='curl', version=None, qualifiers={}, subpath=None), affected_version_range=GenericVersionRange(constraints=(VersionConstraint(comparator='=', version=SemverVersion(string='8.6.0')),)), fixed_version=SemverVersion(string='8.7.0'))], references=[Reference(reference_id='', reference_type='', url='https://curl.se/docs/CVE-2024-2379.html', severities=[VulnerabilitySeverity(system=Cvssv3ScoringSystem(identifier='cvssv3.1', name='CVSSv3.1 Base Score', url='https://www.first.org/cvss/v3-1/', notes='CVSSv3.1 base score and vector'), value='Low', scoring_elements='', published_at=None)]), Reference(reference_id='', reference_type='', url='https://hackerone.com/reports/2410774', severities=[])], date_published=datetime.datetime(2024, 3, 27, 8, 0, tzinfo=datetime.timezone.utc), weaknesses=[297], url='https://curl.se/docs/CVE-2024-2379.json') + AdvisoryData(advisory_id='', aliases=['CVE-2024-2379'], summary='QUIC certificate check bypass with wolfSSL', affected_packages=[AffectedPackage(package=PackageURL(type='generic', namespace='curl.se', name='curl', version=None, qualifiers={}, subpath=None), affected_version_range=GenericVersionRange(constraints=(VersionConstraint(comparator='=', version=SemverVersion(string='8.6.0')),)), fixed_version=SemverVersion(string='8.7.0'))], references=[Reference(reference_id='', reference_type='', url='https://curl.se/docs/CVE-2024-2379.html', severities=[VulnerabilitySeverity(system=Cvssv3ScoringSystem(identifier='cvssv3.1', name='CVSSv3.1 Base Score', url='https://www.first.org/cvss/v3-1/', notes='CVSSv3.1 base score and vector'), value='Low', scoring_elements='', published_at=None, url=None)]), Reference(reference_id='', reference_type='', url='https://hackerone.com/reports/2410774', severities=[])], references_v2=[], date_published=datetime.datetime(2024, 3, 27, 8, 0, tzinfo=datetime.timezone.utc), weaknesses=[297], severities=[], url='https://curl.se/docs/CVE-2024-2379.json') """ affected = get_item(raw_data, "affected")[0] if len(get_item(raw_data, "affected")) > 0 else [] From 9100c94eb3cbc8d25982c1331c529371b6ba97ab Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 2 May 2025 19:38:06 +0530 Subject: [PATCH 08/44] Refactor compute content ID Signed-off-by: Tushar Goel --- vulnerabilities/utils.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/vulnerabilities/utils.py b/vulnerabilities/utils.py index 53e253ffd..2843703c3 100644 --- a/vulnerabilities/utils.py +++ b/vulnerabilities/utils.py @@ -610,18 +610,27 @@ def compute_content_id(advisory_data): } normalized_data["url"] = advisory_data.url - elif isinstance(advisory_data, AdvisoryData) or isinstance(advisory_data, AdvisoryDataV2): + elif isinstance(advisory_data, AdvisoryData): normalized_data = { "aliases": normalize_list(advisory_data.aliases), "summary": normalize_text(advisory_data.summary), "affected_packages": [ pkg.to_dict() for pkg in normalize_list(advisory_data.affected_packages) if pkg ], - "references": [ - ref.to_dict() for ref in normalize_list(advisory_data.references) if ref - ], "weaknesses": normalize_list(advisory_data.weaknesses), } + if advisory_data.references_v2: + advisory_data["references"]= [ + ref.to_dict() for ref in normalize_list(advisory_data.references_v2) if ref + ] + advisory_data["severities"] = [ + sev.to_dict() for sev in normalize_list(advisory_data.severities) if sev + ] + if advisory_data.references: + advisory_data["references"]= [ + ref.to_dict() for ref in normalize_list(advisory_data.references) if ref + ] + normalized_data["url"] = advisory_data.url normalized_json = json.dumps(normalized_data, separators=(",", ":"), sort_keys=True) From e3d6582d638751720a66e6fdace27a06aa22e8f0 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 2 May 2025 19:38:42 +0530 Subject: [PATCH 09/44] Formatting changes Signed-off-by: Tushar Goel --- vulnerabilities/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vulnerabilities/utils.py b/vulnerabilities/utils.py index 2843703c3..8d7d2040b 100644 --- a/vulnerabilities/utils.py +++ b/vulnerabilities/utils.py @@ -620,14 +620,14 @@ def compute_content_id(advisory_data): "weaknesses": normalize_list(advisory_data.weaknesses), } if advisory_data.references_v2: - advisory_data["references"]= [ + advisory_data["references"] = [ ref.to_dict() for ref in normalize_list(advisory_data.references_v2) if ref ] advisory_data["severities"] = [ sev.to_dict() for sev in normalize_list(advisory_data.severities) if sev ] if advisory_data.references: - advisory_data["references"]= [ + advisory_data["references"] = [ ref.to_dict() for ref in normalize_list(advisory_data.references) if ref ] From 26c6f264187b78002745d2915fff94a1e3bde860 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 2 May 2025 19:40:40 +0530 Subject: [PATCH 10/44] Fix errors in compute content ID Signed-off-by: Tushar Goel --- vulnerabilities/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vulnerabilities/utils.py b/vulnerabilities/utils.py index 8d7d2040b..e35531e31 100644 --- a/vulnerabilities/utils.py +++ b/vulnerabilities/utils.py @@ -620,14 +620,14 @@ def compute_content_id(advisory_data): "weaknesses": normalize_list(advisory_data.weaknesses), } if advisory_data.references_v2: - advisory_data["references"] = [ + normalized_data["references"] = [ ref.to_dict() for ref in normalize_list(advisory_data.references_v2) if ref ] - advisory_data["severities"] = [ + normalized_data["severities"] = [ sev.to_dict() for sev in normalize_list(advisory_data.severities) if sev ] if advisory_data.references: - advisory_data["references"] = [ + normalized_data["references"] = [ ref.to_dict() for ref in normalize_list(advisory_data.references) if ref ] From 65be982423daa8f1c81e942fcd532cf117b00117 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 6 May 2025 21:17:33 +0530 Subject: [PATCH 11/44] Add github pipeline Signed-off-by: Tushar Goel --- vulnerabilities/importers/__init__.py | 2 + vulnerabilities/models.py | 38 +- vulnerabilities/pipelines/__init__.py | 165 +++++++- .../pipelines/v2_importers/github_importer.py | 391 ++++++++++++++++++ vulnerabilities/pipes/advisory.py | 2 +- vulnerabilities/utils.py | 7 +- 6 files changed, 589 insertions(+), 16 deletions(-) create mode 100644 vulnerabilities/pipelines/v2_importers/github_importer.py diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index 24c64cfbc..0de162094 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -43,10 +43,12 @@ from vulnerabilities.pipelines import nvd_importer from vulnerabilities.pipelines import pypa_importer from vulnerabilities.pipelines import pysec_importer +from vulnerabilities.pipelines.v2_importers import github_importer as github_importer_v2 from vulnerabilities.pipelines.v2_importers import nvd_importer as nvd_importer_v2 IMPORTERS_REGISTRY = [ nvd_importer_v2.NVDImporterPipeline, + github_importer_v2.GitHubAPIImporterPipeline, nvd_importer.NVDImporterPipeline, github_importer.GitHubAPIImporterPipeline, gitlab_importer.GitLabImporterPipeline, diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index e2d2e1c12..31ae170bc 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -172,6 +172,7 @@ def with_package_counts(self): ) +# FIXME: Remove when migration from Vulnerability to Advisory is completed class VulnerabilitySeverity(models.Model): url = models.URLField( max_length=1024, @@ -211,6 +212,7 @@ class Meta: ordering = ["url", "scoring_system", "value"] +# FIXME: Remove when migration from Vulnerability to Advisory is completed class VulnerabilityStatusType(models.IntegerChoices): """List of vulnerability statuses.""" @@ -219,6 +221,7 @@ class VulnerabilityStatusType(models.IntegerChoices): INVALID = 3, "Invalid" +# FIXME: Remove when migration from Vulnerability to Advisory is completed class Vulnerability(models.Model): """ A software vulnerability with a unique identifier and alternate ``aliases``. @@ -511,6 +514,7 @@ def get_cwes(self): Database.get_cwes = get_cwes +# FIXME: Remove when migration from Vulnerability to Advisory is completed class Weakness(models.Model): """ A Common Weakness Enumeration model @@ -557,6 +561,7 @@ def to_dict(self): return {"cwe_id": self.cwe_id, "name": self.name, "description": self.description} +# FIXME: Remove when migration from Vulnerability to Advisory is completed class VulnerabilityReferenceQuerySet(BaseQuerySet): def for_cpe(self): """ @@ -565,6 +570,7 @@ def for_cpe(self): return self.filter(reference_id__startswith="cpe") +# FIXME: Remove when migration from Vulnerability to Advisory is completed class VulnerabilityReference(models.Model): """ A reference to a vulnerability such as a security advisory from a Linux distribution or language @@ -622,6 +628,7 @@ def is_cpe(self): return self.reference_id.startswith("cpe") +# FIXME: Remove when migration from Vulnerability to Advisory is completed class VulnerabilityRelatedReference(models.Model): """ A reference related to a vulnerability. @@ -642,6 +649,7 @@ class Meta: ordering = ["vulnerability", "reference"] +# FIXME: Remove when migration from Vulnerability to Advisory is completed class PackageQuerySet(BaseQuerySet, PackageURLQuerySet): def get_fixed_by_package_versions(self, purl: PackageURL, fix=True): """ @@ -808,6 +816,7 @@ def get_purl_query_lookups(purl): return purl_to_dict(plain_purl, with_empty=False) +# FIXME: Remove when migration from Vulnerability to Advisory is completed class Package(PackageURLMixin): """ A software package with related vulnerabilities. @@ -1136,6 +1145,7 @@ def affecting_vulns(self): ) +# FIXME: Remove when migration from Vulnerability to Advisory is completed class PackageRelatedVulnerabilityBase(models.Model): """ Abstract base class for package-vulnerability relations. @@ -1232,11 +1242,13 @@ def add_package_vulnerability_changelog(self, advisory): ) +# FIXME: Remove when migration from Vulnerability to Advisory is completed class FixingPackageRelatedVulnerability(PackageRelatedVulnerabilityBase): class Meta(PackageRelatedVulnerabilityBase.Meta): verbose_name_plural = "Fixing Package Related Vulnerabilities" +# FIXME: Remove when migration from Vulnerability to Advisory is completed class AffectedByPackageRelatedVulnerability(PackageRelatedVulnerabilityBase): severities = models.ManyToManyField( @@ -1258,6 +1270,7 @@ def for_cve(self): return self.filter(alias__startswith="CVE") +# FIXME: Remove when migration from Vulnerability to Advisory is completed class Alias(models.Model): """ An alias is a unique vulnerability identifier in some database, such as @@ -1315,6 +1328,7 @@ class AdvisoryQuerySet(BaseQuerySet): pass +# FIXME: Remove when migration from Vulnerability to Advisory is completed class Advisory(models.Model): """ An advisory represents data directly obtained from upstream transformed @@ -2572,6 +2586,8 @@ class AdvisoryV2(models.Model): help_text="A list of packages that are reported by this advisory.", ) + # TODO: Add Advisory Status + objects = AdvisoryQuerySet.as_manager() class Meta: @@ -2613,6 +2629,18 @@ class ToDoRelatedAdvisory(models.Model): class Meta: unique_together = ("todo", "advisory") + + +class PackageQuerySetV2(BaseQuerySet, PackageURLQuerySet): + def get_or_create_from_purl(self, purl: Union[PackageURL, str]): + """ + Return a new or existing Package given a ``purl`` PackageURL object or PURL string. + """ + package, is_created = PackageV2.objects.get_or_create(**purl_to_dict(purl=purl)) + + return package, is_created + + class PackageV2(PackageURLMixin): """ A software package with related vulnerabilities. @@ -2685,6 +2713,8 @@ def save(self, *args, **kwargs): self.plain_package_url = str(plain_purl) super().save(*args, **kwargs) + objects = PackageQuerySetV2.as_manager() + @property def calculate_version_rank(self): """ @@ -2707,11 +2737,3 @@ def calculate_version_rank(self): package.version_rank = rank Package.objects.bulk_update(sorted_packages, fields=["version_rank"]) return self.version_rank - - def get_or_create_from_purl(self, purl: Union[PackageURL, str]): - """ - Return a new or existing Package given a ``purl`` PackageURL object or PURL string. - """ - package, is_created = Package.objects.get_or_create(**purl_to_dict(purl=purl)) - - return package, is_created diff --git a/vulnerabilities/pipelines/__init__.py b/vulnerabilities/pipelines/__init__.py index 41e481c26..304c9bef3 100644 --- a/vulnerabilities/pipelines/__init__.py +++ b/vulnerabilities/pipelines/__init__.py @@ -15,12 +15,17 @@ from traceback import format_exc as traceback_format_exc from typing import Iterable from typing import List +from typing import Optional from aboutcode.pipeline import LoopProgress from aboutcode.pipeline import PipelineDefinition from aboutcode.pipeline import humanize_time +from fetchcode import package_versions +from packageurl import PackageURL from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import UnMergeablePackageError from vulnerabilities.improver import MAX_CONFIDENCE from vulnerabilities.models import Advisory from vulnerabilities.models import PipelineRun @@ -28,7 +33,11 @@ from vulnerabilities.pipes.advisory import import_advisory from vulnerabilities.pipes.advisory import insert_advisory from vulnerabilities.pipes.advisory import insert_advisory_v2 +from vulnerabilities.utils import AffectedPackage as LegacyAffectedPackage from vulnerabilities.utils import classproperty +from vulnerabilities.utils import get_affected_packages_by_patched_package +from vulnerabilities.utils import nearest_patched_package +from vulnerabilities.utils import resolve_version_range module_logger = logging.getLogger(__name__) @@ -268,13 +277,12 @@ class VulnerableCodeBaseImporterPipelineV2(VulnerableCodePipeline): repo_url = None importer_name = None advisory_confidence = MAX_CONFIDENCE + ignorable_versions = [] + unfurl_version_ranges = False @classmethod def steps(cls): - return ( - cls.collect_and_store_advisories, - cls.import_new_advisories, - ) + return (cls.collect_and_store_advisories,) def collect_advisories(self) -> Iterable[AdvisoryData]: """ @@ -328,6 +336,15 @@ def get_advisory_packages(self, advisory_data: AdvisoryData) -> list: affected_purls.extend(package_affected_purls) fixed_purls.extend(package_fixed_purls) + + if self.unfurl_version_ranges: + vulnerable_pvs, fixed_pvs = self.get_impacted_packages( + affected_packages=advisory_data.affected_packages, + advisory_date_published=advisory_data.date_published, + ) + affected_purls.extend(vulnerable_pvs) + fixed_purls.extend(fixed_pvs) + vulnerable_packages = [] fixed_packages = [] @@ -340,3 +357,143 @@ def get_advisory_packages(self, advisory_data: AdvisoryData) -> list: fixed_packages.append(fixed_package) return vulnerable_packages, fixed_packages + + def get_published_package_versions( + self, package_url: PackageURL, until: Optional[datetime] = None + ) -> List[str]: + """ + Return a list of versions published before `until` for the `package_url` + """ + versions = package_versions.versions(str(package_url)) + versions_before_until = [] + for version in versions or []: + if until and version.release_date and version.release_date > until: + continue + versions_before_until.append(version.value) + + return versions_before_until + + def get_impacted_packages(self, affected_packages, advisory_date_published): + """ + Return a tuple of lists of affected and fixed PackageURLs + """ + if not affected_packages: + return [], [] + + mergable = True + + # TODO: We should never had the exception in first place + try: + purl, affected_version_ranges, fixed_versions = AffectedPackage.merge(affected_packages) + except UnMergeablePackageError: + self.log(f"Cannot merge with different purls {affected_packages!r}", logging.ERROR) + mergable = False + + if not mergable: + for affected_package in affected_packages: + purl = affected_package.package + affected_version_range = affected_package.affected_version_range + fixed_version = affected_package.fixed_version + pkg_type = purl.type + pkg_namespace = purl.namespace + pkg_name = purl.name + if not affected_version_range and fixed_version: + # FIXME: Handle the receving end to address the concern of looping the data + return [], [ + PackageURL( + type=pkg_type, + namespace=pkg_namespace, + name=pkg_name, + version=str(fixed_version), + ) + ] + else: + # FIXME: Handle the receving end to address the concern of looping the data + valid_versions = self.get_published_package_versions( + package_url=purl, until=advisory_date_published + ) + return self.resolve_package_versions( + affected_version_range=affected_version_range, + pkg_type=pkg_type, + pkg_namespace=pkg_namespace, + pkg_name=pkg_name, + valid_versions=valid_versions, + ) + + else: + pkg_type = purl.type + pkg_namespace = purl.namespace + pkg_name = purl.name + pkg_qualifiers = purl.qualifiers + fixed_purls = [ + PackageURL( + type=pkg_type, + namespace=pkg_namespace, + name=pkg_name, + version=str(version), + qualifiers=pkg_qualifiers, + ) + for version in fixed_versions + ] + if not affected_version_ranges: + return [], fixed_purls + else: + valid_versions = self.get_published_package_versions( + package_url=purl, until=advisory_date_published + ) + for affected_version_range in affected_version_ranges: + return self.resolve_package_versions( + affected_version_range=affected_version_range, + pkg_type=pkg_type, + pkg_namespace=pkg_namespace, + pkg_name=pkg_name, + valid_versions=valid_versions, + ) + + def resolve_package_versions( + self, + affected_version_range, + pkg_type, + pkg_namespace, + pkg_name, + valid_versions, + ): + """ + Return a tuple of lists of ``affected_packages`` and ``fixed_packages`` PackageURL for the given `affected_version_range` and `valid_versions`. + + ``valid_versions`` are the valid version listed on the package registry for that package + + """ + aff_vers, unaff_vers = resolve_version_range( + affected_version_range=affected_version_range, + ignorable_versions=self.ignorable_versions, + package_versions=valid_versions, + ) + + affected_purls = list( + self.expand_verion_range_to_purls(pkg_type, pkg_namespace, pkg_name, aff_vers) + ) + + unaffected_purls = list( + self.expand_verion_range_to_purls(pkg_type, pkg_namespace, pkg_name, unaff_vers) + ) + + fixed_packages = [] + affected_packages = [] + + patched_packages = nearest_patched_package( + vulnerable_packages=affected_purls, resolved_packages=unaffected_purls + ) + + for (fixed_package, affected_purls,) in get_affected_packages_by_patched_package( + patched_packages + ).items(): + if fixed_package: + fixed_packages.append(fixed_package) + affected_packages.extend(affected_purls) + + return affected_packages, fixed_packages + + def expand_verion_range_to_purls(self, pkg_type, pkg_namespace, pkg_name, versions): + for version in versions: + yield PackageURL(type=pkg_type, namespace=pkg_namespace, name=pkg_name, version=version) diff --git a/vulnerabilities/pipelines/v2_importers/github_importer.py b/vulnerabilities/pipelines/v2_importers/github_importer.py new file mode 100644 index 000000000..074b5b37e --- /dev/null +++ b/vulnerabilities/pipelines/v2_importers/github_importer.py @@ -0,0 +1,391 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging +from traceback import format_exc as traceback_format_exc +from typing import Callable +from typing import Iterable +from typing import List +from typing import Optional + +from cwe2.database import Database +from dateutil import parser as dateparser +from packageurl import PackageURL +from univers.version_range import RANGE_CLASS_BY_SCHEMES +from univers.version_range import build_range_from_github_advisory_constraint + +from vulnerabilities import severity_systems +from vulnerabilities import utils +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import Reference +from vulnerabilities.importer import VulnerabilitySeverity +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 +from vulnerabilities.utils import dedupe +from vulnerabilities.utils import get_cwe_id +from vulnerabilities.utils import get_item + + +class GitHubAPIImporterPipeline(VulnerableCodeBaseImporterPipelineV2): + """Collect GitHub advisories.""" + + pipeline_id = "github_importer_v2" + + spdx_license_expression = "CC-BY-4.0" + license_url = "https://github.com/github/advisory-database/blob/main/LICENSE.md" + importer_name = "GHSA Importer" + + unfurl_version_ranges = True + ignorable_versions = frozenset( + [ + "0.1-bulbasaur", + "0.1-charmander", + "0.3m1", + "0.3m2", + "0.3m3", + "0.3m4", + "0.3m5", + "0.4m1", + "0.4m2", + "0.4m3", + "0.4m4", + "0.4m5", + "0.5m1", + "0.5m2", + "0.5m3", + "0.5m4", + "0.5m5", + "0.6m1", + "0.6m2", + "0.6m3", + "0.6m4", + "0.6m5", + "0.6m6", + "0.7.10p1", + "0.7.11p1", + "0.7.11p2", + "0.7.11p3", + "0.8.1p1", + "0.8.3p1", + "0.8.4p1", + "0.8.4p2", + "0.8.6p1", + "0.8.7p1", + "0.9-doduo", + "0.9-eevee", + "0.9-fearow", + "0.9-gyarados", + "0.9-horsea", + "0.9-ivysaur", + "2013-01-21T20:33:09+0100", + "2013-01-23T17:11:52+0100", + "2013-02-01T20:50:46+0100", + "2013-02-02T19:59:03+0100", + "2013-02-02T20:23:17+0100", + "2013-02-08T17:40:57+0000", + "2013-03-27T16:32:26+0100", + "2013-05-09T12:47:53+0200", + "2013-05-10T17:55:56+0200", + "2013-05-14T20:16:05+0200", + "2013-06-01T10:32:51+0200", + "2013-07-19T09:11:08+0000", + "2013-08-12T21:48:56+0200", + "2013-09-11T19-27-10", + "2013-12-23T17-51-15", + "2014-01-12T15-52-10", + "2.0.1rc2-git", + "3.0.0b3-", + "3.0b6dev-r41684", + "-class.-jw.util.version.Version-", + "vulnerabilities", + ] + ) + + @classmethod + def steps(cls): + return (cls.collect_and_store_advisories,) + + package_type_by_github_ecosystem = { + # "MAVEN": "maven", + # "NUGET": "nuget", + # "COMPOSER": "composer", + # "PIP": "pypi", + # "RUBYGEMS": "gem", + # "NPM": "npm", + "RUST": "cargo", + # "GO": "golang", + } + + def advisories_count(self): + advisory_query = """ + query{ + securityVulnerabilities(first: 0, ecosystem: %s) { + totalCount + } + } + """ + advisory_counts = 0 + for ecosystem in self.package_type_by_github_ecosystem.keys(): + graphql_query = {"query": advisory_query % (ecosystem)} + response = utils.fetch_github_graphql_query(graphql_query) + advisory_counts += get_item(response, "data", "securityVulnerabilities", "totalCount") + return advisory_counts + + def collect_advisories(self) -> Iterable[AdvisoryData]: + + # TODO: We will try to gather more info from GH API + # Check https://github.com/nexB/vulnerablecode/issues/1039#issuecomment-1366458885 + # Check https://github.com/nexB/vulnerablecode/issues/645 + # set of all possible values of first '%s' = {'MAVEN','COMPOSER', 'NUGET', 'RUBYGEMS', 'PYPI', 'NPM', 'RUST'} + # second '%s' is interesting, it will have the value '' for the first request, + advisory_query = """ + query{ + securityVulnerabilities(first: 100, ecosystem: %s, %s) { + edges { + node { + advisory { + identifiers { + type + value + } + summary + references { + url + } + severity + cwes(first: 10){ + nodes { + cweId + } + } + publishedAt + } + firstPatchedVersion{ + identifier + } + package { + name + } + vulnerableVersionRange + } + } + pageInfo { + hasNextPage + endCursor + } + } + } + """ + for ecosystem, package_type in self.package_type_by_github_ecosystem.items(): + end_cursor_exp = "" + while True: + graphql_query = {"query": advisory_query % (ecosystem, end_cursor_exp)} + response = utils.fetch_github_graphql_query(graphql_query) + + page_info = get_item(response, "data", "securityVulnerabilities", "pageInfo") + end_cursor = get_item(page_info, "endCursor") + if end_cursor: + end_cursor = f'"{end_cursor}"' + end_cursor_exp = f"after: {end_cursor}" + + yield from process_response(response, package_type=package_type) + + if not get_item(page_info, "hasNextPage"): + break + + +def get_purl(pkg_type: str, github_name: str, logger: Callable = None) -> Optional[PackageURL]: + """ + Return a PackageURL by splitting the `github_name` using the `pkg_type` + convention. Return None and log an error if we can not split or it is an + unknown package type. + + For example:: + >>> expected = PackageURL(type='maven', namespace='org.apache.commons', name='commons-lang3') + >>> assert get_purl("maven", "org.apache.commons:commons-lang3") == expected + + >>> expected = PackageURL(type="composer", namespace="foo", name="bar") + >>> assert get_purl("composer", "foo/bar") == expected + """ + if pkg_type == "maven": + if ":" not in github_name: + if logger: + logger(f"get_purl: Invalid maven package name {github_name}", level=logging.ERROR) + return + ns, _, name = github_name.partition(":") + return PackageURL(type=pkg_type, namespace=ns, name=name) + + if pkg_type in ("composer", "npm"): + if "/" not in github_name: + return PackageURL(type=pkg_type, name=github_name) + vendor, _, name = github_name.partition("/") + return PackageURL(type=pkg_type, namespace=vendor, name=name) + + if pkg_type in ("nuget", "pypi", "gem", "golang", "npm", "cargo"): + return PackageURL(type=pkg_type, name=github_name) + + if logger: + logger(f"get_purl: Unknown package type {pkg_type}", level=logging.ERROR) + + +def process_response( + resp: dict, package_type: str, logger: Callable = None +) -> Iterable[AdvisoryData]: + """ + Yield `AdvisoryData` by taking `resp` and `ecosystem` as input + """ + vulnerabilities = get_item(resp, "data", "securityVulnerabilities", "edges") or [] + if not vulnerabilities: + if logger: + logger( + f"No vulnerabilities found for package_type: {package_type!r} in response: {resp!r}", + level=logging.ERROR, + ) + return + + for vulnerability in vulnerabilities: + aliases = [] + affected_packages = [] + github_advisory = get_item(vulnerability, "node") + if not github_advisory: + if logger: + logger(f"No node found in {vulnerability!r}", level=logging.ERROR) + continue + + advisory = get_item(github_advisory, "advisory") + if not advisory: + if logger: + logger(f"No advisory found in {github_advisory!r}", level=logging.ERROR) + continue + + summary = get_item(advisory, "summary") or "" + + references = get_item(advisory, "references") or [] + if references: + urls = (ref["url"] for ref in references) + references = [Reference.from_url(u) for u in urls] + + date_published = get_item(advisory, "publishedAt") + if date_published: + date_published = dateparser.parse(date_published) + + name = get_item(github_advisory, "package", "name") + if name: + purl = get_purl(pkg_type=package_type, github_name=name, logger=logger) + if purl: + affected_range = get_item(github_advisory, "vulnerableVersionRange") + fixed_version = get_item(github_advisory, "firstPatchedVersion", "identifier") + if affected_range: + try: + affected_range = build_range_from_github_advisory_constraint( + package_type, affected_range + ) + except Exception as e: + if logger: + logger( + f"Could not parse affected range {affected_range!r} {e!r} \n {traceback_format_exc()}", + level=logging.ERROR, + ) + affected_range = None + if fixed_version: + try: + fixed_version = RANGE_CLASS_BY_SCHEMES[package_type].version_class( + fixed_version + ) + except Exception as e: + if logger: + logger( + f"Invalid fixed version {fixed_version!r} {e!r} \n {traceback_format_exc()}", + level=logging.ERROR, + ) + fixed_version = None + if affected_range or fixed_version: + affected_packages.append( + AffectedPackage( + package=purl, + affected_version_range=affected_range, + fixed_version=fixed_version, + ) + ) + identifiers = get_item(advisory, "identifiers") or [] + ghsa_id = "" + severities = [] + for identifier in identifiers: + value = identifier["value"] + identifier_type = identifier["type"] + aliases.append(value) + # attach the GHSA with severity score + if identifier_type == "GHSA": + # Each Node has only one GHSA, hence exit after attaching + # score to this GHSA + ghsa_id = value + for ref in references: + if ref.reference_id == value: + severity = get_item(advisory, "severity") + if severity: + severities = [ + VulnerabilitySeverity( + system=severity_systems.CVSS31_QUALITY, + value=severity, + url=ref.url, + ) + ] + + elif identifier_type == "CVE": + pass + else: + if logger: + logger( + f"Unknown identifier type {identifier_type!r} and value {value!r}", + level=logging.ERROR, + ) + + weaknesses = get_cwes_from_github_advisory(advisory, logger) + + advisory_id = None + + aliases = sorted(dedupe(aliases)) + + advisory_id = ghsa_id or aliases[0] + + aliases.remove(advisory_id) + + yield AdvisoryData( + advisory_id=ghsa_id, + aliases=aliases, + summary=summary, + references_v2=references, + severities=severities, + affected_packages=affected_packages, + date_published=date_published, + weaknesses=weaknesses, + url=f"https://github.com/advisories/{ghsa_id}", + ) + + +def get_cwes_from_github_advisory(advisory, logger=None) -> List[int]: + """ + Return the cwe-id list from advisory ex: [ 522 ] + by extracting the cwe_list from advisory ex: [{'cweId': 'CWE-522'}] + then remove the CWE- from string and convert it to integer 522 and Check if the CWE in CWE-Database + """ + weaknesses = [] + db = Database() + cwe_list = get_item(advisory, "cwes", "nodes") or [] + for cwe_item in cwe_list: + cwe_string = get_item(cwe_item, "cweId") + if cwe_string: + cwe_id = get_cwe_id(cwe_string) + try: + db.get(cwe_id) + weaknesses.append(cwe_id) + except Exception as e: + if logger: + logger(f"Invalid CWE id {e!r} \n {traceback_format_exc()}", level=logging.ERROR) + return weaknesses diff --git a/vulnerabilities/pipes/advisory.py b/vulnerabilities/pipes/advisory.py index 3cee9bf1c..fd8e87acc 100644 --- a/vulnerabilities/pipes/advisory.py +++ b/vulnerabilities/pipes/advisory.py @@ -150,7 +150,7 @@ def insert_advisory_v2( severities = get_or_create_advisory_severities(severities=advisory.severities) weaknesses = get_or_create_advisory_weaknesses(weaknesses=advisory.weaknesses) content_id = compute_content_id(advisory_data=advisory) - fixed_by_packages, affecting_packages = get_advisory_packages(advisory_data=advisory) + affecting_packages, fixed_by_packages = get_advisory_packages(advisory_data=advisory) try: default_data = { "summary": advisory.summary, diff --git a/vulnerabilities/utils.py b/vulnerabilities/utils.py index e35531e31..fb0d5f8bf 100644 --- a/vulnerabilities/utils.py +++ b/vulnerabilities/utils.py @@ -287,9 +287,10 @@ def get_affected_packages_by_patched_package( """ affected_packages_by_patched_package = defaultdict(list) for package in affected_packages: - affected_packages_by_patched_package[package.patched_package].append( - package.vulnerable_package - ) + if package.vulnerable_package: + affected_packages_by_patched_package[package.patched_package].append( + package.vulnerable_package + ) return affected_packages_by_patched_package From c3242c02ecc770b5f2401f624c1e1d58efad4afa Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 21 May 2025 12:38:35 +0530 Subject: [PATCH 12/44] Add V2 pipelines Signed-off-by: Tushar Goel --- vulnerabilities/importers/__init__.py | 6 + ...soryreference_advisoryseverity_and_more.py | 149 +++++++- ...93_alter_advisoryreference_reference_id.py | 23 ++ ...e_advisoryv2_affected_packages_and_more.py | 132 ------- vulnerabilities/models.py | 35 +- vulnerabilities/pipelines/__init__.py | 46 ++- .../v2_importers/apache_httpd_pipeline_v2.py | 260 ++++++++++++++ .../pipelines/v2_importers/npm_importer.py | 174 ++++++++++ .../v2_importers/vulnrichment_importer.py | 321 ++++++++++++++++++ 9 files changed, 988 insertions(+), 158 deletions(-) create mode 100644 vulnerabilities/migrations/0093_alter_advisoryreference_reference_id.py delete mode 100644 vulnerabilities/migrations/0093_packagev2_remove_advisoryv2_affected_packages_and_more.py create mode 100644 vulnerabilities/pipelines/v2_importers/apache_httpd_pipeline_v2.py create mode 100644 vulnerabilities/pipelines/v2_importers/npm_importer.py create mode 100644 vulnerabilities/pipelines/v2_importers/vulnrichment_importer.py diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index 0de162094..d0b046b8b 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -43,12 +43,18 @@ from vulnerabilities.pipelines import nvd_importer from vulnerabilities.pipelines import pypa_importer from vulnerabilities.pipelines import pysec_importer +from vulnerabilities.pipelines.v2_importers import apache_httpd_pipeline_v2 as apache_httpd_v2 from vulnerabilities.pipelines.v2_importers import github_importer as github_importer_v2 +from vulnerabilities.pipelines.v2_importers import npm_importer as npm_importer_v2 from vulnerabilities.pipelines.v2_importers import nvd_importer as nvd_importer_v2 +from vulnerabilities.pipelines.v2_importers import vulnrichment_importer as vulnrichment_importer_v2 IMPORTERS_REGISTRY = [ nvd_importer_v2.NVDImporterPipeline, github_importer_v2.GitHubAPIImporterPipeline, + npm_importer_v2.NpmImporterPipeline, + vulnrichment_importer_v2.VulnrichImporterPipeline, + apache_httpd_v2.ApacheHTTPDImporterPipeline, nvd_importer.NVDImporterPipeline, github_importer.GitHubAPIImporterPipeline, gitlab_importer.GitLabImporterPipeline, diff --git a/vulnerabilities/migrations/0092_advisoryalias_advisoryreference_advisoryseverity_and_more.py b/vulnerabilities/migrations/0092_advisoryalias_advisoryreference_advisoryseverity_and_more.py index b24edc836..22e2ab6c3 100644 --- a/vulnerabilities/migrations/0092_advisoryalias_advisoryreference_advisoryseverity_and_more.py +++ b/vulnerabilities/migrations/0092_advisoryalias_advisoryreference_advisoryseverity_and_more.py @@ -1,4 +1,4 @@ -# Generated by Django 4.2.20 on 2025-04-24 09:58 +# Generated by Django 4.2.20 on 2025-05-21 05:33 from django.db import migrations, models @@ -152,6 +152,103 @@ class Migration(migrations.Migration): ("cwe_id", models.IntegerField(help_text="CWE id")), ], ), + migrations.CreateModel( + name="PackageV2", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "type", + models.CharField( + blank=True, + help_text="A short code to identify the type of this package. For example: gem for a Rubygem, docker for a container, pypi for a Python Wheel or Egg, maven for a Maven Jar, deb for a Debian package, etc.", + max_length=16, + ), + ), + ( + "namespace", + models.CharField( + blank=True, + help_text="Package name prefix, such as Maven groupid, Docker image owner, GitHub user or organization, etc.", + max_length=255, + ), + ), + ( + "name", + models.CharField(blank=True, help_text="Name of the package.", max_length=100), + ), + ( + "version", + models.CharField( + blank=True, help_text="Version of the package.", max_length=100 + ), + ), + ( + "qualifiers", + models.CharField( + blank=True, + help_text="Extra qualifying data for a package such as the name of an OS, architecture, distro, etc.", + max_length=1024, + ), + ), + ( + "subpath", + models.CharField( + blank=True, + help_text="Extra subpath within a package, relative to the package root.", + max_length=200, + ), + ), + ( + "package_url", + models.CharField( + db_index=True, + help_text="The Package URL for this package.", + max_length=1000, + ), + ), + ( + "plain_package_url", + models.CharField( + db_index=True, + help_text="The Package URL for this package without qualifiers and subpath.", + max_length=1000, + ), + ), + ( + "is_ghost", + models.BooleanField( + db_index=True, + default=False, + help_text="True if the package does not exist in the upstream package manager or its repository.", + ), + ), + ( + "risk_score", + models.DecimalField( + decimal_places=1, + help_text="Risk score between 0.00 and 10.00, where higher values indicate greater vulnerability risk for the package.", + max_digits=3, + null=True, + ), + ), + ( + "version_rank", + models.IntegerField( + db_index=True, + default=0, + help_text="Rank of the version to support ordering by version. Rank zero means the rank has not been defined yet", + ), + ), + ], + options={ + "abstract": False, + }, + ), migrations.CreateModel( name="AdvisoryV2", fields=[ @@ -177,14 +274,6 @@ class Migration(migrations.Migration): ), ), ("summary", models.TextField(blank=True)), - ( - "affected_packages", - models.JSONField( - blank=True, - default=list, - help_text="A list of serializable AffectedPackage objects", - ), - ), ( "date_published", models.DateTimeField( @@ -211,6 +300,40 @@ class Migration(migrations.Migration): ), ), ("url", models.URLField(help_text="Link to the advisory on the upstream website")), + ( + "status", + models.IntegerField( + choices=[(1, "Published"), (2, "Disputed"), (3, "Invalid")], default=1 + ), + ), + ( + "exploitability", + models.DecimalField( + blank=True, + decimal_places=1, + help_text="Exploitability indicates the likelihood that a vulnerability in a software package could be used by malicious actors to compromise systems, applications, or networks. This metric is determined automatically based on the discovery of known exploits.", + max_digits=2, + null=True, + ), + ), + ( + "weighted_severity", + models.DecimalField( + blank=True, + decimal_places=1, + help_text="Weighted severity is the highest value calculated by multiplying each severity by its corresponding weight, divided by 10.", + max_digits=3, + null=True, + ), + ), + ( + "affecting_packages", + models.ManyToManyField( + help_text="A list of packages that are affected by this advisory.", + related_name="fixing_advisories", + to="vulnerabilities.packagev2", + ), + ), ( "aliases", models.ManyToManyField( @@ -219,6 +342,14 @@ class Migration(migrations.Migration): to="vulnerabilities.advisoryalias", ), ), + ( + "fixed_by_packages", + models.ManyToManyField( + help_text="A list of packages that are reported by this advisory.", + related_name="affected_by_advisories", + to="vulnerabilities.packagev2", + ), + ), ( "references", models.ManyToManyField( diff --git a/vulnerabilities/migrations/0093_alter_advisoryreference_reference_id.py b/vulnerabilities/migrations/0093_alter_advisoryreference_reference_id.py new file mode 100644 index 000000000..9230cb4fa --- /dev/null +++ b/vulnerabilities/migrations/0093_alter_advisoryreference_reference_id.py @@ -0,0 +1,23 @@ +# Generated by Django 4.2.20 on 2025-05-21 06:40 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0092_advisoryalias_advisoryreference_advisoryseverity_and_more"), + ] + + operations = [ + migrations.AlterField( + model_name="advisoryreference", + name="reference_id", + field=models.CharField( + blank=True, + db_index=True, + help_text="An optional reference ID, such as DSA-4465-1 when available", + max_length=500, + ), + ), + ] diff --git a/vulnerabilities/migrations/0093_packagev2_remove_advisoryv2_affected_packages_and_more.py b/vulnerabilities/migrations/0093_packagev2_remove_advisoryv2_affected_packages_and_more.py deleted file mode 100644 index f421e67fe..000000000 --- a/vulnerabilities/migrations/0093_packagev2_remove_advisoryv2_affected_packages_and_more.py +++ /dev/null @@ -1,132 +0,0 @@ -# Generated by Django 4.2.20 on 2025-05-02 08:56 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ("vulnerabilities", "0092_advisoryalias_advisoryreference_advisoryseverity_and_more"), - ] - - operations = [ - migrations.CreateModel( - name="PackageV2", - fields=[ - ( - "id", - models.AutoField( - auto_created=True, primary_key=True, serialize=False, verbose_name="ID" - ), - ), - ( - "type", - models.CharField( - blank=True, - help_text="A short code to identify the type of this package. For example: gem for a Rubygem, docker for a container, pypi for a Python Wheel or Egg, maven for a Maven Jar, deb for a Debian package, etc.", - max_length=16, - ), - ), - ( - "namespace", - models.CharField( - blank=True, - help_text="Package name prefix, such as Maven groupid, Docker image owner, GitHub user or organization, etc.", - max_length=255, - ), - ), - ( - "name", - models.CharField(blank=True, help_text="Name of the package.", max_length=100), - ), - ( - "version", - models.CharField( - blank=True, help_text="Version of the package.", max_length=100 - ), - ), - ( - "qualifiers", - models.CharField( - blank=True, - help_text="Extra qualifying data for a package such as the name of an OS, architecture, distro, etc.", - max_length=1024, - ), - ), - ( - "subpath", - models.CharField( - blank=True, - help_text="Extra subpath within a package, relative to the package root.", - max_length=200, - ), - ), - ( - "package_url", - models.CharField( - db_index=True, - help_text="The Package URL for this package.", - max_length=1000, - ), - ), - ( - "plain_package_url", - models.CharField( - db_index=True, - help_text="The Package URL for this package without qualifiers and subpath.", - max_length=1000, - ), - ), - ( - "is_ghost", - models.BooleanField( - db_index=True, - default=False, - help_text="True if the package does not exist in the upstream package manager or its repository.", - ), - ), - ( - "risk_score", - models.DecimalField( - decimal_places=1, - help_text="Risk score between 0.00 and 10.00, where higher values indicate greater vulnerability risk for the package.", - max_digits=3, - null=True, - ), - ), - ( - "version_rank", - models.IntegerField( - db_index=True, - default=0, - help_text="Rank of the version to support ordering by version. Rank zero means the rank has not been defined yet", - ), - ), - ], - options={ - "abstract": False, - }, - ), - migrations.RemoveField( - model_name="advisoryv2", - name="affected_packages", - ), - migrations.AddField( - model_name="advisoryv2", - name="affecting_packages", - field=models.ManyToManyField( - help_text="A list of packages that are affected by this advisory.", - related_name="fixing_advisories", - to="vulnerabilities.packagev2", - ), - ), - migrations.AddField( - model_name="advisoryv2", - name="fixed_by_packages", - field=models.ManyToManyField( - help_text="A list of packages that are reported by this advisory.", - related_name="affected_by_advisories", - to="vulnerabilities.packagev2", - ), - ), - ] diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 31ae170bc..def38e80f 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -2456,7 +2456,7 @@ class AdvisoryReference(models.Model): reference_type = models.CharField(max_length=20, choices=REFERENCE_TYPES, blank=True) reference_id = models.CharField( - max_length=200, + max_length=500, help_text="An optional reference ID, such as DSA-4465-1 when available", blank=True, db_index=True, @@ -2586,7 +2586,38 @@ class AdvisoryV2(models.Model): help_text="A list of packages that are reported by this advisory.", ) - # TODO: Add Advisory Status + status = models.IntegerField( + choices=VulnerabilityStatusType.choices, default=VulnerabilityStatusType.PUBLISHED + ) + + exploitability = models.DecimalField( + null=True, + blank=True, + max_digits=2, + decimal_places=1, + help_text="Exploitability indicates the likelihood that a vulnerability in a software package could be used by malicious actors to compromise systems, " + "applications, or networks. This metric is determined automatically based on the discovery of known exploits.", + ) + + weighted_severity = models.DecimalField( + null=True, + blank=True, + max_digits=3, + decimal_places=1, + help_text="Weighted severity is the highest value calculated by multiplying each severity by its corresponding weight, divided by 10.", + ) + + @property + def risk_score(self): + """ + Risk expressed as a number ranging from 0 to 10. + Risk is calculated from weighted severity and exploitability values. + It is the maximum value of (the weighted severity multiplied by its exploitability) or 10 + Risk = min(weighted severity * exploitability, 10) + """ + if self.exploitability and self.weighted_severity: + risk_score = min(float(self.exploitability * self.weighted_severity), 10.0) + return round(risk_score, 1) objects = AdvisoryQuerySet.as_manager() diff --git a/vulnerabilities/pipelines/__init__.py b/vulnerabilities/pipelines/__init__.py index 304c9bef3..c7ebe3be7 100644 --- a/vulnerabilities/pipelines/__init__.py +++ b/vulnerabilities/pipelines/__init__.py @@ -309,6 +309,9 @@ def collect_and_store_advisories(self): progress = LoopProgress(total_iterations=estimated_advisory_count, logger=self.log) for advisory in progress.iter(self.collect_advisories()): + if advisory is None: + self.log("Advisory is None, skipping") + continue if _obj := insert_advisory_v2( advisory=advisory, pipeline_id=self.pipeline_id, @@ -336,7 +339,6 @@ def get_advisory_packages(self, advisory_data: AdvisoryData) -> list: affected_purls.extend(package_affected_purls) fixed_purls.extend(package_fixed_purls) - if self.unfurl_version_ranges: vulnerable_pvs, fixed_pvs = self.get_impacted_packages( affected_packages=advisory_data.affected_packages, @@ -364,7 +366,13 @@ def get_published_package_versions( """ Return a list of versions published before `until` for the `package_url` """ - versions = package_versions.versions(str(package_url)) + try: + versions = package_versions.versions(str(package_url)) + except Exception as e: + self.log( + f"Failed to fetch versions for package {str(package_url)} {e!r}", + level=logging.ERROR, + ) versions_before_until = [] for version in versions or []: if until and version.release_date and version.release_date > until: @@ -390,6 +398,8 @@ def get_impacted_packages(self, affected_packages, advisory_date_published): mergable = False if not mergable: + vulnerable_packages = [] + fixed_packages = [] for affected_package in affected_packages: purl = affected_package.package affected_version_range = affected_package.affected_version_range @@ -398,28 +408,28 @@ def get_impacted_packages(self, affected_packages, advisory_date_published): pkg_namespace = purl.namespace pkg_name = purl.name if not affected_version_range and fixed_version: - # FIXME: Handle the receving end to address the concern of looping the data - return [], [ + fixed_packages.append( PackageURL( type=pkg_type, namespace=pkg_namespace, name=pkg_name, version=str(fixed_version), ) - ] + ) else: - # FIXME: Handle the receving end to address the concern of looping the data valid_versions = self.get_published_package_versions( package_url=purl, until=advisory_date_published ) - return self.resolve_package_versions( + affected_pvs, fixed_pvs = self.resolve_package_versions( affected_version_range=affected_version_range, pkg_type=pkg_type, pkg_namespace=pkg_namespace, pkg_name=pkg_name, valid_versions=valid_versions, ) - + vulnerable_packages.extend(affected_pvs) + fixed_packages.extend(fixed_pvs) + return vulnerable_packages, fixed_packages else: pkg_type = purl.type pkg_namespace = purl.namespace @@ -441,14 +451,19 @@ def get_impacted_packages(self, affected_packages, advisory_date_published): valid_versions = self.get_published_package_versions( package_url=purl, until=advisory_date_published ) + vulnerable_packages = [] + fixed_packages = [] for affected_version_range in affected_version_ranges: - return self.resolve_package_versions( + vulnerable_pvs, fixed_pvs = self.resolve_package_versions( affected_version_range=affected_version_range, pkg_type=pkg_type, pkg_namespace=pkg_namespace, pkg_name=pkg_name, valid_versions=valid_versions, ) + vulnerable_packages.extend(vulnerable_pvs) + fixed_packages.extend(fixed_pvs) + return vulnerable_packages, fixed_packages def resolve_package_versions( self, @@ -462,7 +477,7 @@ def resolve_package_versions( Return a tuple of lists of ``affected_packages`` and ``fixed_packages`` PackageURL for the given `affected_version_range` and `valid_versions`. ``valid_versions`` are the valid version listed on the package registry for that package - + """ aff_vers, unaff_vers = resolve_version_range( affected_version_range=affected_version_range, @@ -482,12 +497,13 @@ def resolve_package_versions( affected_packages = [] patched_packages = nearest_patched_package( - vulnerable_packages=affected_purls, resolved_packages=unaffected_purls - ) + vulnerable_packages=affected_purls, resolved_packages=unaffected_purls + ) - for (fixed_package, affected_purls,) in get_affected_packages_by_patched_package( - patched_packages - ).items(): + for ( + fixed_package, + affected_purls, + ) in get_affected_packages_by_patched_package(patched_packages).items(): if fixed_package: fixed_packages.append(fixed_package) affected_packages.extend(affected_purls) diff --git a/vulnerabilities/pipelines/v2_importers/apache_httpd_pipeline_v2.py b/vulnerabilities/pipelines/v2_importers/apache_httpd_pipeline_v2.py new file mode 100644 index 000000000..2c430d967 --- /dev/null +++ b/vulnerabilities/pipelines/v2_importers/apache_httpd_pipeline_v2.py @@ -0,0 +1,260 @@ +import logging +import re +import urllib.parse +from typing import Iterable + +import requests +from bs4 import BeautifulSoup +from packageurl import PackageURL +from univers.version_constraint import VersionConstraint +from univers.version_range import ApacheVersionRange +from univers.versions import SemverVersion + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import Reference +from vulnerabilities.importer import VulnerabilitySeverity +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 +from vulnerabilities.severity_systems import APACHE_HTTPD +from vulnerabilities.utils import create_weaknesses_list +from vulnerabilities.utils import cwe_regex +from vulnerabilities.utils import get_item + +logger = logging.getLogger(__name__) + + +def fetch_links(url): + links = [] + data = requests.get(url).content + soup = BeautifulSoup(data, features="lxml") + for tag in soup.find_all("a"): + link = tag.get("href") + if not link.endswith("json"): + continue + links.append(urllib.parse.urljoin(url, link)) + return links + + +def get_weaknesses(cve_data): + """ + Extract CWE IDs from CVE data. + + Args: + cve_data (dict): The CVE data in a dictionary format. + + Returns: + List[int]: A list of unique CWE IDs. + + Examples: + >>> mock_cve_data1 = { + ... "containers": { + ... "cna": { + ... "providerMetadata": { + ... "orgId": "f0158376-9dc2-43b6-827c-5f631a4d8d09" + ... }, + ... "title": "mod_macro buffer over-read", + ... "problemTypes": [ + ... { + ... "descriptions": [ + ... { + ... "description": "CWE-125 Out-of-bounds Read", + ... "lang": "en", + ... "cweId": "CWE-125", + ... "type": "CWE" + ... } + ... ] + ... } + ... ] + ... } + ... } + ... } + >>> mock_cve_data2 = { + ... "data_type": "CVE", + ... "data_format": "MITRE", + ... "data_version": "4.0", + ... "generator": { + ... "engine": "Vulnogram 0.0.9" + ... }, + ... "CVE_data_meta": { + ... "ID": "CVE-2022-28614", + ... "ASSIGNER": "security@apache.org", + ... "TITLE": "read beyond bounds via ap_rwrite() ", + ... "STATE": "PUBLIC" + ... }, + ... "problemtype": { + ... "problemtype_data": [ + ... { + ... "description": [ + ... { + ... "lang": "eng", + ... "value": "CWE-190 Integer Overflow or Wraparound" + ... } + ... ] + ... }, + ... { + ... "description": [ + ... { + ... "lang": "eng", + ... "value": "CWE-200 Exposure of Sensitive Information to an Unauthorized Actor" + ... } + ... ] + ... } + ... ] + ... } + ... } + + >>> get_weaknesses(mock_cve_data1) + [125] + + >>> get_weaknesses(mock_cve_data2) + [190, 200] + """ + alias = get_item(cve_data, "CVE_data_meta", "ID") + cwe_strings = [] + if alias: + problemtype_data = get_item(cve_data, "problemtype", "problemtype_data") or [] + for problem in problemtype_data: + for desc in problem.get("description", []): + value = desc.get("value", "") + cwe_id_string_list = re.findall(cwe_regex, value) + cwe_strings.extend(cwe_id_string_list) + else: + problemTypes = cve_data.get("containers", {}).get("cna", {}).get("problemTypes", []) + descriptions = problemTypes[0].get("descriptions", []) if len(problemTypes) > 0 else [] + for description in descriptions: + cwe_id_string = description.get("cweId", "") + cwe_strings.append(cwe_id_string) + + weaknesses = create_weaknesses_list(cwe_strings) + return weaknesses + + +class ApacheHTTPDImporterPipeline(VulnerableCodeBaseImporterPipelineV2): + pipeline_id = "apache_httpd_importer_v2" + spdx_license_expression = "Apache-2.0" + license_url = "https://www.apache.org/licenses/LICENSE-2.0" + importer_name = "Apache HTTPD Importer" + base_url = "https://httpd.apache.org/security/json/" + + links = [] + + @classmethod + def steps(cls): + return (cls.collect_and_store_advisories,) + + def collect_advisories(self) -> Iterable[AdvisoryData]: + if not self.links: + self.links = fetch_links(self.base_url) + for link in self.links: + data = requests.get(link).json() + yield self.to_advisory(data) + + def advisories_count(self) -> int: + """Count the number of advisories available in the JSON files.""" + if not self.links: + self.links = fetch_links(self.base_url) + return len(self.links) + + def to_advisory(self, data): + alias = get_item(data, "CVE_data_meta", "ID") + if not alias: + alias = get_item(data, "cveMetadata", "cveId") + descriptions = get_item(data, "description", "description_data") or [] + description = None + for desc in descriptions: + if desc.get("lang") == "eng": + description = desc.get("value") + break + + severities = [] + impacts = data.get("impact") or [] + for impact in impacts: + value = impact.get("other") + if value: + severities.append( + VulnerabilitySeverity( + system=APACHE_HTTPD, + value=value, + scoring_elements="", + ) + ) + break + reference = Reference( + reference_id=alias, + url=urllib.parse.urljoin(self.base_url, f"{alias}.json"), + ) + + versions_data = [] + for vendor in get_item(data, "affects", "vendor", "vendor_data") or []: + for products in get_item(vendor, "product", "product_data") or []: + for version_data in get_item(products, "version", "version_data") or []: + versions_data.append(version_data) + + fixed_versions = [] + for timeline_object in data.get("timeline") or []: + timeline_value = timeline_object.get("value") + if "release" in timeline_value: + split_timeline_value = timeline_value.split(" ") + if "never" in timeline_value: + continue + if "release" in split_timeline_value[-1]: + fixed_versions.append(split_timeline_value[0]) + if "release" in split_timeline_value[0]: + fixed_versions.append(split_timeline_value[-1]) + + affected_packages = [] + affected_version_range = self.to_version_ranges(versions_data, fixed_versions) + if affected_version_range: + affected_packages.append( + AffectedPackage( + package=PackageURL( + type="apache", + name="httpd", + ), + affected_version_range=affected_version_range, + ) + ) + + weaknesses = get_weaknesses(data) + + return AdvisoryData( + advisory_id=alias, + aliases=[], + summary=description or "", + affected_packages=affected_packages, + references_v2=[reference], + weaknesses=weaknesses, + url=reference.url, + severities=severities, + ) + + def to_version_ranges(self, versions_data, fixed_versions): + constraints = [] + for version_data in versions_data: + version_value = version_data["version_value"] + range_expression = version_data["version_affected"] + if range_expression not in {"<=", ">=", "?=", "!<", "="}: + raise ValueError(f"unknown comparator found! {range_expression}") + comparator_by_range_expression = { + ">=": ">=", + "!<": ">=", + "<=": "<=", + "=": "=", + } + comparator = comparator_by_range_expression.get(range_expression) + if comparator: + constraints.append( + VersionConstraint(comparator=comparator, version=SemverVersion(version_value)) + ) + + for fixed_version in fixed_versions: + # The VersionConstraint method `invert()` inverts the fixed_version's comparator, + # enabling inclusion of multiple fixed versions with the `affected_version_range` values. + constraints.append( + VersionConstraint( + comparator="=", + version=SemverVersion(fixed_version), + ).invert() + ) + + return ApacheVersionRange(constraints=constraints) diff --git a/vulnerabilities/pipelines/v2_importers/npm_importer.py b/vulnerabilities/pipelines/v2_importers/npm_importer.py new file mode 100644 index 000000000..0f61aacb6 --- /dev/null +++ b/vulnerabilities/pipelines/v2_importers/npm_importer.py @@ -0,0 +1,174 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +# Author: Navonil Das (@NavonilDas) + +from pathlib import Path +from typing import Iterable + +import pytz +from dateutil.parser import parse +from fetchcode.vcs import fetch_via_vcs +from packageurl import PackageURL +from univers.version_range import NpmVersionRange + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import Reference +from vulnerabilities.importer import VulnerabilitySeverity +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 +from vulnerabilities.severity_systems import CVSSV2 +from vulnerabilities.severity_systems import CVSSV3 +from vulnerabilities.utils import build_description +from vulnerabilities.utils import load_json + + +class NpmImporterPipeline(VulnerableCodeBaseImporterPipelineV2): + """Collect advisories from nodejs GitHub repository.""" + + pipeline_id = "npm_importer_v2" + + spdx_license_expression = "MIT" + license_url = "https://github.com/nodejs/security-wg/blob/main/LICENSE.md" + repo_url = "git+https://github.com/nodejs/security-wg" + importer_name = "Npm Importer" + + @classmethod + def steps(cls): + return ( + cls.clone, + cls.collect_and_store_advisories, + cls.clean_downloads, + ) + + def clone(self): + self.log(f"Cloning `{self.repo_url}`") + self.vcs_response = fetch_via_vcs(self.repo_url) + + def advisories_count(self): + vuln_directory = Path(self.vcs_response.dest_dir) / "vuln" / "npm" + return sum(1 for _ in vuln_directory.glob("*.json")) + + def collect_advisories(self) -> Iterable[AdvisoryData]: + vuln_directory = Path(self.vcs_response.dest_dir) / "vuln" / "npm" + + for advisory in vuln_directory.glob("*.json"): + yield self.to_advisory_data(advisory) + + def to_advisory_data(self, file: Path) -> Iterable[AdvisoryData]: + if file.name == "index.json": + self.log(f"Skipping {file.name} file") + return + data = load_json(file) + id = data.get("id") + description = data.get("overview") or "" + summary = data.get("title") or "" + # TODO: Take care of description + date_published = None + if isinstance(data.get("created_at"), str): + date_published = parse(data.get("created_at")).replace(tzinfo=pytz.UTC) + references = [] + cvss_vector = data.get("cvss_vector") + cvss_score = data.get("cvss_score") + severities = [] + if cvss_vector and cvss_vector.startswith("CVSS:3.0/"): + severities.append( + VulnerabilitySeverity( + system=CVSSV3, + value=cvss_score, + ) + ) + if cvss_vector and cvss_vector.startswith("CVSS:2.0/"): + severities.append( + VulnerabilitySeverity( + system=CVSSV2, + value=cvss_score, + ) + ) + if not id: + self.log(f"Advisory ID not found in {file}") + return + + advisory_reference = Reference( + url=f"https://github.com/nodejs/security-wg/blob/main/vuln/npm/{id}.json", + reference_id=id, + ) + + for ref in data.get("references") or []: + references.append( + Reference( + url=ref, + severities=severities, + ) + ) + + if advisory_reference not in references: + references.append(advisory_reference) + + package_name = data.get("module_name") + affected_packages = [] + if package_name: + affected_packages.append(self.get_affected_package(data, package_name)) + advsisory_aliases = data.get("cves") or [] + + return AdvisoryData( + advisory_id=f"NODESEC-NPM-{id}", + aliases=advsisory_aliases, + summary=build_description(summary=summary, description=description), + date_published=date_published, + affected_packages=affected_packages, + references_v2=references, + severities=severities, + url=f"https://github.com/nodejs/security-wg/blob/main/vuln/npm/{id}.json", + ) + + def get_affected_package(self, data, package_name): + affected_version_range = None + unaffected_version_range = None + fixed_version = None + + vulnerable_range = data.get("vulnerable_versions") or "" + patched_range = data.get("patched_versions") or "" + + # https://github.com/nodejs/security-wg/blob/cfaa51cc5c83f01eea61b69658f7bc76a77c5979/vuln/npm/213.json#L14 + if vulnerable_range == "<=99.999.99999": + vulnerable_range = "*" + if vulnerable_range: + affected_version_range = NpmVersionRange.from_native(vulnerable_range) + + # https://github.com/nodejs/security-wg/blob/cfaa51cc5c83f01eea61b69658f7bc76a77c5979/vuln/npm/213.json#L15 + if patched_range == "<0.0.0": + patched_range = None + if patched_range: + unaffected_version_range = NpmVersionRange.from_native(patched_range) + + # We only store single fixed versions and not a range of fixed versions + # If there is a single constraint in the unaffected_version_range + # having comparator as ">=" then we store that as the fixed version + if unaffected_version_range and len(unaffected_version_range.constraints) == 1: + constraint = unaffected_version_range.constraints[0] + if constraint.comparator == ">=": + fixed_version = constraint.version + + return AffectedPackage( + package=PackageURL( + type="npm", + name=package_name, + ), + affected_version_range=affected_version_range, + fixed_version=fixed_version, + ) + + def clean_downloads(self): + if self.vcs_response: + self.log(f"Removing cloned repository") + self.vcs_response.delete() + + def on_failure(self): + self.clean_downloads() diff --git a/vulnerabilities/pipelines/v2_importers/vulnrichment_importer.py b/vulnerabilities/pipelines/v2_importers/vulnrichment_importer.py new file mode 100644 index 000000000..afaf06af2 --- /dev/null +++ b/vulnerabilities/pipelines/v2_importers/vulnrichment_importer.py @@ -0,0 +1,321 @@ +import json +import logging +import re +from pathlib import Path +from typing import Iterable + +import dateparser +from fetchcode.vcs import fetch_via_vcs + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import Reference +from vulnerabilities.importer import VulnerabilitySeverity +from vulnerabilities.models import VulnerabilityReference +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 +from vulnerabilities.severity_systems import SCORING_SYSTEMS +from vulnerabilities.utils import get_advisory_url +from vulnerabilities.utils import get_cwe_id +from vulnerabilities.utils import get_reference_id + +logger = logging.getLogger(__name__) + + +class VulnrichImporterPipeline(VulnerableCodeBaseImporterPipelineV2): + pipeline_id = "vulnrichment_importer_v2" + spdx_license_expression = "CC0-1.0" + license_url = "https://github.com/cisagov/vulnrichment/blob/develop/LICENSE" + repo_url = "git+https://github.com/cisagov/vulnrichment.git" + importer_name = "Vulnrichment" + + @classmethod + def steps(cls): + return ( + cls.clone, + cls.collect_and_store_advisories, + cls.clean_downloads, + ) + + def clone(self): + self.log(f"Cloning `{self.repo_url}`") + self.vcs_response = fetch_via_vcs(self.repo_url) + + def advisories_count(self): + vuln_directory = Path(self.vcs_response.dest_dir) + return sum(1 for _ in vuln_directory.glob("*.json")) + + def collect_advisories(self) -> Iterable[AdvisoryData]: + base_path = Path(self.vcs_response.dest_dir) + for file_path in base_path.glob("**/**/*.json"): + if not file_path.name.startswith("CVE-"): + continue + with open(file_path) as f: + raw_data = json.load(f) + advisory_url = get_advisory_url( + file=file_path, + base_path=base_path, + url="https://github.com/cisagov/vulnrichment/blob/develop/", + ) + yield self.parse_cve_advisory(raw_data, advisory_url) + + def parse_cve_advisory(self, raw_data, advisory_url): + cve_metadata = raw_data.get("cveMetadata", {}) + cve_id = cve_metadata.get("cveId") + state = cve_metadata.get("state") + + date_published = cve_metadata.get("datePublished") + if date_published: + date_published = dateparser.parse(date_published) + + # Extract containers + containers = raw_data.get("containers", {}) + cna_data = containers.get("cna", {}) + adp_data = containers.get("adp", {}) + + # Extract descriptions + summary = "" + description_list = cna_data.get("descriptions", []) + for description_dict in description_list: + if not description_dict.get("lang") in ["en", "en-US"]: + continue + summary = description_dict.get("value") + + # Extract metrics + severities = [] + metrics = cna_data.get("metrics", []) + [ + adp_metrics for data in adp_data for adp_metrics in data.get("metrics", []) + ] + + vulnrichment_scoring_system = { + "cvssV4_0": SCORING_SYSTEMS["cvssv4"], + "cvssV3_1": SCORING_SYSTEMS["cvssv3.1"], + "cvssV3_0": SCORING_SYSTEMS["cvssv3"], + "cvssV2_0": SCORING_SYSTEMS["cvssv2"], + "other": { + "ssvc": SCORING_SYSTEMS["ssvc"], + }, # ignore kev + } + + for metric in metrics: + for metric_type, metric_value in metric.items(): + if metric_type not in vulnrichment_scoring_system: + continue + + if metric_type == "other": + other_types = metric_value.get("type") + if other_types == "ssvc": + content = metric_value.get("content", {}) + vector_string, decision = ssvc_calculator(content) + scoring_system = vulnrichment_scoring_system[metric_type][other_types] + severity = VulnerabilitySeverity( + system=scoring_system, value=decision, scoring_elements=vector_string + ) + severities.append(severity) + # ignore kev + else: + vector_string = metric_value.get("vectorString") + base_score = metric_value.get("baseScore") + scoring_system = vulnrichment_scoring_system[metric_type] + severity = VulnerabilitySeverity( + system=scoring_system, value=base_score, scoring_elements=vector_string + ) + severities.append(severity) + + # Extract references cpes and ignore affected products + cpes = set() + for affected_product in cna_data.get("affected", []): + if type(affected_product) != dict: + continue + cpes.update(affected_product.get("cpes") or []) + + references = [] + for ref in cna_data.get("references", []): + # https://github.com/CVEProject/cve-schema/blob/main/schema/tags/reference-tags.json + # We removed all unwanted reference types and set the default reference type to 'OTHER'. + ref_type = VulnerabilityReference.OTHER + vul_ref_types = { + "exploit": VulnerabilityReference.EXPLOIT, + "issue-tracking": VulnerabilityReference.BUG, + "mailing-list": VulnerabilityReference.MAILING_LIST, + "third-party-advisory": VulnerabilityReference.ADVISORY, + "vendor-advisory": VulnerabilityReference.ADVISORY, + "vdb-entry": VulnerabilityReference.ADVISORY, + } + + for tag_type in ref.get("tags", []): + if tag_type in vul_ref_types: + ref_type = vul_ref_types.get(tag_type) + + url = ref.get("url") + reference = Reference( + reference_id=get_reference_id(url), + url=url, + reference_type=ref_type, + ) + + references.append(reference) + + cpes_ref = [ + Reference( + reference_id=cpe, + reference_type=VulnerabilityReference.OTHER, + url=f"https://nvd.nist.gov/vuln/search/results?adv_search=true&isCpeNameSearch=true&query={cpe}", + ) + for cpe in sorted(list(cpes)) + ] + references.extend(cpes_ref) + + weaknesses = set() + for problem_type in cna_data.get("problemTypes", []): + descriptions = problem_type.get("descriptions", []) + for description in descriptions: + cwe_id = description.get("cweId") + if cwe_id: + weaknesses.add(get_cwe_id(cwe_id)) + + description_text = description.get("description") + if description_text: + pattern = r"CWE-(\d+)" + match = re.search(pattern, description_text) + if match: + weaknesses.add(int(match.group(1))) + + print(f"cve_id: {cve_id}") + print(f"date_published: {date_published}") + print(references) + print(f"summary: {summary}") + print(f"weaknesses: {weaknesses}") + print(f"advisory_url: {advisory_url}") + print(f"severities: {severities}") + + return AdvisoryData( + advisory_id=cve_id, + aliases=[], + summary=summary, + references_v2=references, + date_published=date_published, + weaknesses=sorted(weaknesses), + url=advisory_url, + severities=severities, + ) + + def clean_downloads(self): + if self.vcs_response: + self.log("Removing cloned repository") + self.vcs_response.delete() + + def on_failure(self): + self.clean_downloads() + + +def ssvc_calculator(ssvc_data): + """ + Return the ssvc vector and the decision value + """ + options = ssvc_data.get("options", []) + timestamp = ssvc_data.get("timestamp") + + # Extract the options into a dictionary + options_dict = {k: v.lower() for option in options for k, v in option.items()} + + # We copied the table value from this link. + # https://www.cisa.gov/sites/default/files/publications/cisa-ssvc-guide%20508c.pdf + + # Determining Mission and Well-Being Impact Value + mission_well_being_table = { + # (Mission Prevalence, Public Well-being Impact) : "Mission & Well-being" + ("minimal", "minimal"): "low", + ("minimal", "material"): "medium", + ("minimal", "irreversible"): "high", + ("support", "minimal"): "medium", + ("support", "material"): "medium", + ("support", "irreversible"): "high", + ("essential", "minimal"): "high", + ("essential", "material"): "high", + ("essential", "irreversible"): "high", + } + + if "Mission Prevalence" not in options_dict: + options_dict["Mission Prevalence"] = "minimal" + + if "Public Well-being Impact" not in options_dict: + options_dict["Public Well-being Impact"] = "material" + + options_dict["Mission & Well-being"] = mission_well_being_table[ + (options_dict["Mission Prevalence"], options_dict["Public Well-being Impact"]) + ] + + decision_key = ( + options_dict.get("Exploitation"), + options_dict.get("Automatable"), + options_dict.get("Technical Impact"), + options_dict.get("Mission & Well-being"), + ) + + decision_points = { + "Exploitation": {"E": {"none": "N", "poc": "P", "active": "A"}}, + "Automatable": {"A": {"no": "N", "yes": "Y"}}, + "Technical Impact": {"T": {"partial": "P", "total": "T"}}, + "Public Well-being Impact": {"B": {"minimal": "M", "material": "A", "irreversible": "I"}}, + "Mission Prevalence": {"P": {"minimal": "M", "support": "S", "essential": "E"}}, + "Mission & Well-being": {"M": {"low": "L", "medium": "M", "high": "H"}}, + } + + # Create the SSVC vector + ssvc_vector = "SSVCv2/" + for key, value_map in options_dict.items(): + options_key = decision_points.get(key) + for lhs, rhs_map in options_key.items(): + ssvc_vector += f"{lhs}:{rhs_map.get(value_map)}/" + + # "Decision": {"D": {"Track": "T", "Track*": "R", "Attend": "A", "Act": "C"}}, + decision_values = {"Track": "T", "Track*": "R", "Attend": "A", "Act": "C"} + + decision_lookup = { + ("none", "no", "partial", "low"): "Track", + ("none", "no", "partial", "medium"): "Track", + ("none", "no", "partial", "high"): "Track", + ("none", "no", "total", "low"): "Track", + ("none", "no", "total", "medium"): "Track", + ("none", "no", "total", "high"): "Track*", + ("none", "yes", "partial", "low"): "Track", + ("none", "yes", "partial", "medium"): "Track", + ("none", "yes", "partial", "high"): "Attend", + ("none", "yes", "total", "low"): "Track", + ("none", "yes", "total", "medium"): "Track", + ("none", "yes", "total", "high"): "Attend", + ("poc", "no", "partial", "low"): "Track", + ("poc", "no", "partial", "medium"): "Track", + ("poc", "no", "partial", "high"): "Track*", + ("poc", "no", "total", "low"): "Track", + ("poc", "no", "total", "medium"): "Track*", + ("poc", "no", "total", "high"): "Attend", + ("poc", "yes", "partial", "low"): "Track", + ("poc", "yes", "partial", "medium"): "Track", + ("poc", "yes", "partial", "high"): "Attend", + ("poc", "yes", "total", "low"): "Track", + ("poc", "yes", "total", "medium"): "Track*", + ("poc", "yes", "total", "high"): "Attend", + ("active", "no", "partial", "low"): "Track", + ("active", "no", "partial", "medium"): "Track", + ("active", "no", "partial", "high"): "Attend", + ("active", "no", "total", "low"): "Track", + ("active", "no", "total", "medium"): "Attend", + ("active", "no", "total", "high"): "Act", + ("active", "yes", "partial", "low"): "Attend", + ("active", "yes", "partial", "medium"): "Attend", + ("active", "yes", "partial", "high"): "Act", + ("active", "yes", "total", "low"): "Attend", + ("active", "yes", "total", "medium"): "Act", + ("active", "yes", "total", "high"): "Act", + } + + decision = decision_lookup.get(decision_key, "") + + if decision: + ssvc_vector += f"D:{decision_values.get(decision)}/" + + if timestamp: + timestamp_formatted = dateparser.parse(timestamp).strftime("%Y-%m-%dT%H:%M:%SZ") + + ssvc_vector += f"{timestamp_formatted}/" + return ssvc_vector, decision From c417743d1a9907672c3e5186258cf4505a29dd65 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 21 May 2025 14:09:48 +0530 Subject: [PATCH 13/44] Rename pipelines Signed-off-by: Tushar Goel --- vulnerabilities/importers/__init__.py | 2 +- .../{apache_httpd_pipeline_v2.py => apache_httpd_importer.py} | 0 vulnerabilities/pipelines/v2_importers/nvd_importer.py | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename vulnerabilities/pipelines/v2_importers/{apache_httpd_pipeline_v2.py => apache_httpd_importer.py} (100%) diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index d0b046b8b..7d18b88a0 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -43,7 +43,7 @@ from vulnerabilities.pipelines import nvd_importer from vulnerabilities.pipelines import pypa_importer from vulnerabilities.pipelines import pysec_importer -from vulnerabilities.pipelines.v2_importers import apache_httpd_pipeline_v2 as apache_httpd_v2 +from vulnerabilities.pipelines.v2_importers import apache_httpd_importer as apache_httpd_v2 from vulnerabilities.pipelines.v2_importers import github_importer as github_importer_v2 from vulnerabilities.pipelines.v2_importers import npm_importer as npm_importer_v2 from vulnerabilities.pipelines.v2_importers import nvd_importer as nvd_importer_v2 diff --git a/vulnerabilities/pipelines/v2_importers/apache_httpd_pipeline_v2.py b/vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py similarity index 100% rename from vulnerabilities/pipelines/v2_importers/apache_httpd_pipeline_v2.py rename to vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py diff --git a/vulnerabilities/pipelines/v2_importers/nvd_importer.py b/vulnerabilities/pipelines/v2_importers/nvd_importer.py index ddb291a8f..c9105c276 100644 --- a/vulnerabilities/pipelines/v2_importers/nvd_importer.py +++ b/vulnerabilities/pipelines/v2_importers/nvd_importer.py @@ -109,7 +109,7 @@ def fetch(url, logger=None): return json.loads(data) -def fetch_cve_data_1_1(starting_year=2025, logger=None): +def fetch_cve_data_1_1(starting_year=2002, logger=None): """ Yield tuples of (year, lists of CVE mappings) from the NVD, one for each year since ``starting_year`` defaulting to 2002. From f01fe25601e77b9cfa471f6458a132f1bc531624 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 21 May 2025 14:54:45 +0530 Subject: [PATCH 14/44] Add V2 importer pipelines Signed-off-by: Tushar Goel --- vulnerabilities/importers/__init__.py | 6 + vulnerabilities/importers/osv.py | 85 +++++ .../pipelines/v2_importers/gitlab_importer.py | 315 ++++++++++++++++++ .../pipelines/v2_importers/pypa_importer.py | 73 ++++ .../pipelines/v2_importers/pysec_importer.py | 65 ++++ 5 files changed, 544 insertions(+) create mode 100644 vulnerabilities/pipelines/v2_importers/gitlab_importer.py create mode 100644 vulnerabilities/pipelines/v2_importers/pypa_importer.py create mode 100644 vulnerabilities/pipelines/v2_importers/pysec_importer.py diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index 7d18b88a0..60f9a77f4 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -47,6 +47,9 @@ from vulnerabilities.pipelines.v2_importers import github_importer as github_importer_v2 from vulnerabilities.pipelines.v2_importers import npm_importer as npm_importer_v2 from vulnerabilities.pipelines.v2_importers import nvd_importer as nvd_importer_v2 +from vulnerabilities.pipelines.v2_importers import pypa_importer as pypa_importer_v2 +from vulnerabilities.pipelines.v2_importers import pysec_importer as pysec_importer_v2 +from vulnerabilities.pipelines.v2_importers import gitlab_importer as gitlab_importer_v2 from vulnerabilities.pipelines.v2_importers import vulnrichment_importer as vulnrichment_importer_v2 IMPORTERS_REGISTRY = [ @@ -56,6 +59,9 @@ vulnrichment_importer_v2.VulnrichImporterPipeline, apache_httpd_v2.ApacheHTTPDImporterPipeline, nvd_importer.NVDImporterPipeline, + pypa_importer_v2.PyPaImporterPipeline, + gitlab_importer_v2.GitLabImporterPipeline, + pysec_importer_v2.PyPIImporterPipeline, github_importer.GitHubAPIImporterPipeline, gitlab_importer.GitLabImporterPipeline, github_osv.GithubOSVImporter, diff --git a/vulnerabilities/importers/osv.py b/vulnerabilities/importers/osv.py index 19867cda5..01f2d8023 100644 --- a/vulnerabilities/importers/osv.py +++ b/vulnerabilities/importers/osv.py @@ -107,6 +107,74 @@ def parse_advisory_data( ) +def parse_advisory_data_v2( + raw_data: dict, supported_ecosystems, advisory_url: str +) -> Optional[AdvisoryData]: + """ + Return an AdvisoryData build from a ``raw_data`` mapping of OSV advisory and + a ``supported_ecosystem`` string. + """ + advisory_id = raw_data.get("id") or "" + if not advisory_id: + logger.error(f"Missing advisory id in OSV data: {raw_data}") + return None + summary = raw_data.get("summary") or "" + details = raw_data.get("details") or "" + summary = build_description(summary=summary, description=details) + aliases = raw_data.get("aliases") or [] + + date_published = get_published_date(raw_data=raw_data) + severities = list(get_severities(raw_data=raw_data)) + references = get_references_v2(raw_data=raw_data) + + affected_packages = [] + + for affected_pkg in raw_data.get("affected") or []: + purl = get_affected_purl(affected_pkg=affected_pkg, raw_id=advisory_id) + + if not purl or purl.type not in supported_ecosystems: + logger.error(f"Unsupported package type: {affected_pkg!r} in OSV: {advisory_id!r}") + continue + + affected_version_range = get_affected_version_range( + affected_pkg=affected_pkg, + raw_id=advisory_id, + supported_ecosystem=purl.type, + ) + + for fixed_range in affected_pkg.get("ranges") or []: + fixed_version = get_fixed_versions( + fixed_range=fixed_range, raw_id=advisory_id, supported_ecosystem=purl.type + ) + + for version in fixed_version: + affected_packages.append( + AffectedPackage( + package=purl, + affected_version_range=affected_version_range, + fixed_version=version, + ) + ) + database_specific = raw_data.get("database_specific") or {} + cwe_ids = database_specific.get("cwe_ids") or [] + weaknesses = list(map(get_cwe_id, cwe_ids)) + + if advisory_id in aliases: + aliases.remove(advisory_id) + + return AdvisoryData( + advisory_id=advisory_id, + aliases=aliases, + summary=summary, + references_v2=references, + severities=severities, + affected_packages=affected_packages, + date_published=date_published, + weaknesses=weaknesses, + url=advisory_url, + ) + + def extract_fixed_versions(fixed_range) -> Iterable[str]: """ Return a list of fixed version strings given a ``fixed_range`` mapping of @@ -187,6 +255,23 @@ def get_references(raw_data, severities) -> List[Reference]: return references +def get_references_v2(raw_data) -> List[Reference]: + """ + Return a list Reference extracted from a mapping of OSV ``raw_data`` given a + ``severities`` list of VulnerabilitySeverity. + """ + references = [] + for ref in raw_data.get("references") or []: + if not ref: + continue + url = ref["url"] + if not url: + logger.error(f"Reference without URL : {ref!r} for OSV id: {raw_data['id']!r}") + continue + references.append(Reference(url=ref["url"])) + return references + + def get_affected_purl(affected_pkg, raw_id): """ Return an affected PackageURL or None given a mapping of ``affected_pkg`` diff --git a/vulnerabilities/pipelines/v2_importers/gitlab_importer.py b/vulnerabilities/pipelines/v2_importers/gitlab_importer.py new file mode 100644 index 000000000..0088f6819 --- /dev/null +++ b/vulnerabilities/pipelines/v2_importers/gitlab_importer.py @@ -0,0 +1,315 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging +import traceback +from pathlib import Path +from typing import Iterable +from typing import List +from typing import Tuple + +import pytz +import saneyaml +from dateutil import parser as dateparser +from fetchcode.vcs import fetch_via_vcs +from packageurl import PackageURL +from univers.version_range import RANGE_CLASS_BY_SCHEMES +from univers.version_range import VersionRange +from univers.version_range import from_gitlab_native +from univers.versions import Version + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import Reference +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 +from vulnerabilities.utils import build_description +from vulnerabilities.utils import get_advisory_url +from vulnerabilities.utils import get_cwe_id + + +class GitLabImporterPipeline(VulnerableCodeBaseImporterPipelineV2): + """Collect advisory from GitLab Advisory Database (Open Source Edition).""" + + pipeline_id = "gitlab_importer" + + spdx_license_expression = "MIT" + license_url = "https://gitlab.com/gitlab-org/advisories-community/-/blob/main/LICENSE" + importer_name = "GitLab Importer" + repo_url = "git+https://gitlab.com/gitlab-org/advisories-community/" + + @classmethod + def steps(cls): + return ( + cls.clone, + cls.collect_and_store_advisories, + cls.clean_downloads, + ) + + purl_type_by_gitlab_scheme = { + "conan": "conan", + "gem": "gem", + # Entering issue to parse go package names https://github.com/nexB/vulnerablecode/issues/742 + # "go": "golang", + "maven": "maven", + "npm": "npm", + "nuget": "nuget", + "packagist": "composer", + "pypi": "pypi", + } + + gitlab_scheme_by_purl_type = {v: k for k, v in purl_type_by_gitlab_scheme.items()} + + def clone(self): + self.log(f"Cloning `{self.repo_url}`") + self.vcs_response = fetch_via_vcs(self.repo_url) + + def advisories_count(self): + root = Path(self.vcs_response.dest_dir) + return sum(1 for _ in root.rglob("*.yml")) + + def collect_advisories(self) -> Iterable[AdvisoryData]: + base_path = Path(self.vcs_response.dest_dir) + + for file_path in base_path.rglob("*.yml"): + if file_path.parent == base_path: + continue + + gitlab_type, _, _ = parse_advisory_path( + base_path=base_path, + file_path=file_path, + ) + + if gitlab_type not in self.purl_type_by_gitlab_scheme: + # self.log( + # f"Unknown package type {gitlab_type!r} in {file_path!r}", + # level=logging.ERROR, + # ) + continue + + yield parse_gitlab_advisory( + file=file_path, + base_path=base_path, + gitlab_scheme_by_purl_type=self.gitlab_scheme_by_purl_type, + purl_type_by_gitlab_scheme=self.purl_type_by_gitlab_scheme, + logger=self.log, + ) + + def clean_downloads(self): + if self.vcs_response: + self.log(f"Removing cloned repository") + self.vcs_response.delete() + + def on_failure(self): + self.clean_downloads() + + +def parse_advisory_path(base_path: Path, file_path: Path) -> Tuple[str, str, str]: + """ + Parse a gitlab advisory file and return a 3-tuple of: + (gitlab_type, package_slug, vulnerability_id) + + For example:: + + >>> base_path = Path("/tmp/tmpi1klhpmd/checkout") + >>> file_path=Path("/tmp/tmpi1klhpmd/checkout/pypi/gradio/CVE-2021-43831.yml") + >>> parse_advisory_path(base_path=base_path, file_path=file_path) + ('pypi', 'gradio', 'CVE-2021-43831') + + >>> file_path=Path("/tmp/tmpi1klhpmd/checkout/nuget/github.com/beego/beego/v2/nuget/CVE-2021-43831.yml") + >>> parse_advisory_path(base_path=base_path, file_path=file_path) + ('nuget', 'github.com/beego/beego/v2/nuget', 'CVE-2021-43831') + + >>> file_path = Path("/tmp/tmpi1klhpmd/checkout/npm/@express/beego/beego/v2/CVE-2021-43831.yml") + >>> parse_advisory_path(base_path=base_path, file_path=file_path) + ('npm', '@express/beego/beego/v2', 'CVE-2021-43831') + """ + relative_path_segments = file_path.relative_to(base_path).parts + gitlab_type = relative_path_segments[0] + vuln_id = file_path.stem + package_slug = "/".join(relative_path_segments[1:-1]) + + return gitlab_type, package_slug, vuln_id + + +def get_purl(package_slug, purl_type_by_gitlab_scheme, logger): + """ + Return a PackageURL object from a package slug + """ + parts = [p for p in package_slug.strip("/").split("/") if p] + gitlab_scheme = parts[0] + purl_type = purl_type_by_gitlab_scheme[gitlab_scheme] + if gitlab_scheme == "go": + name = "/".join(parts[1:]) + return PackageURL(type=purl_type, namespace=None, name=name) + # if package slug is of the form: + # "nuget/NuGet.Core" + if len(parts) == 2: + name = parts[1] + return PackageURL(type=purl_type, name=name) + # if package slug is of the form: + # "nuget/github.com/beego/beego/v2/nuget" + if len(parts) >= 3: + name = parts[-1] + namespace = "/".join(parts[1:-1]) + return PackageURL(type=purl_type, namespace=namespace, name=name) + logger(f"get_purl: package_slug can not be parsed: {package_slug!r}", level=logging.ERROR) + return + + +def extract_affected_packages( + affected_version_range: VersionRange, + fixed_versions: List[Version], + purl: PackageURL, +) -> Iterable[AffectedPackage]: + """ + Yield AffectedPackage objects, one for each fixed_version + + In case of gitlab advisory data we get a list of fixed_versions and a affected_version_range. + Since we can not determine which package fixes which range. + We store the all the fixed_versions with the same affected_version_range in the advisory. + Later the advisory data is used to be inferred in the GitLabBasicImprover. + """ + for fixed_version in fixed_versions: + yield AffectedPackage( + package=purl, + fixed_version=fixed_version, + affected_version_range=affected_version_range, + ) + + +def parse_gitlab_advisory( + file, base_path, gitlab_scheme_by_purl_type, purl_type_by_gitlab_scheme, logger +): + """ + Parse a Gitlab advisory file and return an AdvisoryData or None. + These files are YAML. There is a JSON schema documented at + https://gitlab.com/gitlab-org/advisories-community/-/blob/main/ci/schema/schema.json + + Sample YAML file: + --- + identifier: "GMS-2018-26" + package_slug: "packagist/amphp/http" + title: "Incorrect header injection check" + description: "amphp/http isn't properly protected against HTTP header injection." + pubdate: "2018-03-15" + affected_range: "<1.0.1" + fixed_versions: + - "v1.0.1" + urls: + - "https://github.com/amphp/http/pull/4" + cwe_ids: + - "CWE-1035" + - "CWE-937" + identifiers: + - "GMS-2018-26" + """ + with open(file) as f: + gitlab_advisory = saneyaml.load(f) + if not isinstance(gitlab_advisory, dict): + logger( + f"parse_gitlab_advisory: unknown gitlab advisory format in {file!r} with data: {gitlab_advisory!r}", + level=logging.ERROR, + ) + return + + # refer to schema here https://gitlab.com/gitlab-org/advisories-community/-/blob/main/ci/schema/schema.json + aliases = gitlab_advisory.get("identifiers") + advisory_id = gitlab_advisory.get("identifier") + if advisory_id in aliases: + aliases.remove(advisory_id) + summary = build_description(gitlab_advisory.get("title"), gitlab_advisory.get("description")) + urls = gitlab_advisory.get("urls") + references = [Reference.from_url(u) for u in urls] + + cwe_ids = gitlab_advisory.get("cwe_ids") or [] + cwe_list = list(map(get_cwe_id, cwe_ids)) + + date_published = dateparser.parse(gitlab_advisory.get("pubdate")) + date_published = date_published.replace(tzinfo=pytz.UTC) + package_slug = gitlab_advisory.get("package_slug") + advisory_url = get_advisory_url( + file=file, + base_path=base_path, + url="https://gitlab.com/gitlab-org/advisories-community/-/blob/main/", + ) + purl: PackageURL = get_purl( + package_slug=package_slug, + purl_type_by_gitlab_scheme=purl_type_by_gitlab_scheme, + logger=logger, + ) + if not purl: + logger( + f"parse_yaml_file: purl is not valid: {file!r} {package_slug!r}", level=logging.ERROR + ) + return AdvisoryData( + aliases=aliases, + summary=summary, + references=references, + date_published=date_published, + url=advisory_url, + ) + affected_version_range = None + fixed_versions = gitlab_advisory.get("fixed_versions") or [] + affected_range = gitlab_advisory.get("affected_range") + gitlab_native_schemes = set(["pypi", "gem", "npm", "go", "packagist", "conan"]) + vrc: VersionRange = RANGE_CLASS_BY_SCHEMES[purl.type] + gitlab_scheme = gitlab_scheme_by_purl_type[purl.type] + try: + if affected_range: + if gitlab_scheme in gitlab_native_schemes: + affected_version_range = from_gitlab_native( + gitlab_scheme=gitlab_scheme, string=affected_range + ) + else: + affected_version_range = vrc.from_native(affected_range) + except Exception as e: + logger( + f"parse_yaml_file: affected_range is not parsable: {affected_range!r} for: {purl!s} error: {e!r}\n {traceback.format_exc()}", + level=logging.ERROR, + ) + + parsed_fixed_versions = [] + for fixed_version in fixed_versions: + try: + fixed_version = vrc.version_class(fixed_version) + parsed_fixed_versions.append(fixed_version) + except Exception as e: + logger( + f"parse_yaml_file: fixed_version is not parsable`: {fixed_version!r} error: {e!r}\n {traceback.format_exc()}", + level=logging.ERROR, + ) + + if parsed_fixed_versions: + affected_packages = list( + extract_affected_packages( + affected_version_range=affected_version_range, + fixed_versions=parsed_fixed_versions, + purl=purl, + ) + ) + else: + if not affected_version_range: + affected_packages = [] + else: + affected_packages = [ + AffectedPackage( + package=purl, + affected_version_range=affected_version_range, + ) + ] + return AdvisoryData( + advisory_id=advisory_id, + aliases=aliases, + summary=summary, + references_v2=references, + date_published=date_published, + affected_packages=affected_packages, + weaknesses=cwe_list, + url=advisory_url, + ) diff --git a/vulnerabilities/pipelines/v2_importers/pypa_importer.py b/vulnerabilities/pipelines/v2_importers/pypa_importer.py new file mode 100644 index 000000000..ef20691a3 --- /dev/null +++ b/vulnerabilities/pipelines/v2_importers/pypa_importer.py @@ -0,0 +1,73 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from pathlib import Path +from typing import Iterable + +import saneyaml +from fetchcode.vcs import fetch_via_vcs + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importers.osv import parse_advisory_data_v2 +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 +from vulnerabilities.utils import get_advisory_url + + +class PyPaImporterPipeline(VulnerableCodeBaseImporterPipelineV2): + """Collect advisories from PyPA GitHub repository.""" + + pipeline_id = "pypa_importer_v2" + + spdx_license_expression = "CC-BY-4.0" + license_url = "https://github.com/pypa/advisory-database/blob/main/LICENSE" + repo_url = "git+https://github.com/pypa/advisory-database" + importer_name = "Pypa Importer" + + @classmethod + def steps(cls): + return ( + cls.clone, + cls.collect_and_store_advisories, + cls.clean_downloads, + ) + + def clone(self): + self.log(f"Cloning `{self.repo_url}`") + self.vcs_response = fetch_via_vcs(self.repo_url) + + def advisories_count(self): + vulns_directory = Path(self.vcs_response.dest_dir) / "vulns" + return sum(1 for _ in vulns_directory.rglob("*.yaml")) + + def collect_advisories(self) -> Iterable[AdvisoryData]: + from vulnerabilities.importers.osv import parse_advisory_data + + base_directory = Path(self.vcs_response.dest_dir) + vulns_directory = base_directory / "vulns" + + for advisory in vulns_directory.rglob("*.yaml"): + advisory_url = get_advisory_url( + file=advisory, + base_path=base_directory, + url="https://github.com/pypa/advisory-database/blob/main/", + ) + advisory_dict = saneyaml.load(advisory.read_text()) + yield parse_advisory_data_v2( + raw_data=advisory_dict, + supported_ecosystems=["pypi"], + advisory_url=advisory_url, + ) + + def clean_downloads(self): + if self.vcs_response: + self.log(f"Removing cloned repository") + self.vcs_response.delete() + + def on_failure(self): + self.clean_downloads() diff --git a/vulnerabilities/pipelines/v2_importers/pysec_importer.py b/vulnerabilities/pipelines/v2_importers/pysec_importer.py new file mode 100644 index 000000000..6d09e02b2 --- /dev/null +++ b/vulnerabilities/pipelines/v2_importers/pysec_importer.py @@ -0,0 +1,65 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# +import json +import logging +from io import BytesIO +from typing import Iterable +from zipfile import ZipFile + +import requests + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline, VulnerableCodeBaseImporterPipelineV2 + + +class PyPIImporterPipeline(VulnerableCodeBaseImporterPipelineV2): + """Collect advisories from PyPI.""" + + pipeline_id = "pysec_importer" + + license_url = "https://github.com/pypa/advisory-database/blob/main/LICENSE" + url = "https://osv-vulnerabilities.storage.googleapis.com/PyPI/all.zip" + spdx_license_expression = "CC-BY-4.0" + importer_name = "PyPI Importer" + + @classmethod + def steps(cls): + return ( + cls.fetch_zip, + cls.collect_and_store_advisories, + ) + + def fetch_zip(self): + self.log(f"Fetching `{self.url}`") + self.advisory_zip = requests.get(self.url).content + + def advisories_count(self) -> int: + with ZipFile(BytesIO(self.advisory_zip)) as zip: + advisory_count = sum(1 for file in zip.namelist() if file.startswith("PYSEC-")) + return advisory_count + + def collect_advisories(self) -> Iterable[AdvisoryData]: + """Yield AdvisoryData using a zipped data dump of OSV data""" + from vulnerabilities.importers.osv import parse_advisory_data_v2 + + with ZipFile(BytesIO(self.advisory_zip)) as zip_file: + for file_name in zip_file.namelist(): + if not file_name.startswith("PYSEC-"): + self.log( + f"Unsupported PyPI advisory data file: {file_name}", + level=logging.ERROR, + ) + continue + with zip_file.open(file_name) as f: + vul_info = json.load(f) + yield parse_advisory_data_v2( + raw_data=vul_info, + supported_ecosystems=["pypi"], + advisory_url=self.url, + ) From e0a91a034793b24421299b1e53ddd6b553e17cd3 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 21 May 2025 14:58:41 +0530 Subject: [PATCH 15/44] Rename pipelines Signed-off-by: Tushar Goel --- vulnerabilities/pipelines/v2_importers/gitlab_importer.py | 2 +- vulnerabilities/pipelines/v2_importers/pysec_importer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/vulnerabilities/pipelines/v2_importers/gitlab_importer.py b/vulnerabilities/pipelines/v2_importers/gitlab_importer.py index 0088f6819..b642b9611 100644 --- a/vulnerabilities/pipelines/v2_importers/gitlab_importer.py +++ b/vulnerabilities/pipelines/v2_importers/gitlab_importer.py @@ -36,7 +36,7 @@ class GitLabImporterPipeline(VulnerableCodeBaseImporterPipelineV2): """Collect advisory from GitLab Advisory Database (Open Source Edition).""" - pipeline_id = "gitlab_importer" + pipeline_id = "gitlab_importer_v2" spdx_license_expression = "MIT" license_url = "https://gitlab.com/gitlab-org/advisories-community/-/blob/main/LICENSE" diff --git a/vulnerabilities/pipelines/v2_importers/pysec_importer.py b/vulnerabilities/pipelines/v2_importers/pysec_importer.py index 6d09e02b2..b4d7d768e 100644 --- a/vulnerabilities/pipelines/v2_importers/pysec_importer.py +++ b/vulnerabilities/pipelines/v2_importers/pysec_importer.py @@ -21,7 +21,7 @@ class PyPIImporterPipeline(VulnerableCodeBaseImporterPipelineV2): """Collect advisories from PyPI.""" - pipeline_id = "pysec_importer" + pipeline_id = "pysec_importer_v2" license_url = "https://github.com/pypa/advisory-database/blob/main/LICENSE" url = "https://osv-vulnerabilities.storage.googleapis.com/PyPI/all.zip" From 981172d5ae767ab5718e8bdd19968dcbeb37984b Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 21 May 2025 15:00:51 +0530 Subject: [PATCH 16/44] Reorder importers in registry Signed-off-by: Tushar Goel --- vulnerabilities/importers/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index 60f9a77f4..ff0135d1c 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -58,10 +58,10 @@ npm_importer_v2.NpmImporterPipeline, vulnrichment_importer_v2.VulnrichImporterPipeline, apache_httpd_v2.ApacheHTTPDImporterPipeline, - nvd_importer.NVDImporterPipeline, pypa_importer_v2.PyPaImporterPipeline, gitlab_importer_v2.GitLabImporterPipeline, pysec_importer_v2.PyPIImporterPipeline, + nvd_importer.NVDImporterPipeline, github_importer.GitHubAPIImporterPipeline, gitlab_importer.GitLabImporterPipeline, github_osv.GithubOSVImporter, From cff7a254e91541a371d683ab001f46344faa1888 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 21 May 2025 15:11:59 +0530 Subject: [PATCH 17/44] Fix tests Signed-off-by: Tushar Goel --- vulnerabilities/importers/__init__.py | 2 +- .../pipelines/v2_importers/pysec_importer.py | 3 +- ...security_advisories-importer-expected.json | 66 +++++++++---------- ...security_advisories-importer-expected.json | 2 +- 4 files changed, 37 insertions(+), 36 deletions(-) diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index ff0135d1c..651d46f1f 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -45,11 +45,11 @@ from vulnerabilities.pipelines import pysec_importer from vulnerabilities.pipelines.v2_importers import apache_httpd_importer as apache_httpd_v2 from vulnerabilities.pipelines.v2_importers import github_importer as github_importer_v2 +from vulnerabilities.pipelines.v2_importers import gitlab_importer as gitlab_importer_v2 from vulnerabilities.pipelines.v2_importers import npm_importer as npm_importer_v2 from vulnerabilities.pipelines.v2_importers import nvd_importer as nvd_importer_v2 from vulnerabilities.pipelines.v2_importers import pypa_importer as pypa_importer_v2 from vulnerabilities.pipelines.v2_importers import pysec_importer as pysec_importer_v2 -from vulnerabilities.pipelines.v2_importers import gitlab_importer as gitlab_importer_v2 from vulnerabilities.pipelines.v2_importers import vulnrichment_importer as vulnrichment_importer_v2 IMPORTERS_REGISTRY = [ diff --git a/vulnerabilities/pipelines/v2_importers/pysec_importer.py b/vulnerabilities/pipelines/v2_importers/pysec_importer.py index b4d7d768e..42a72d04e 100644 --- a/vulnerabilities/pipelines/v2_importers/pysec_importer.py +++ b/vulnerabilities/pipelines/v2_importers/pysec_importer.py @@ -15,7 +15,8 @@ import requests from vulnerabilities.importer import AdvisoryData -from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline, VulnerableCodeBaseImporterPipelineV2 +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 class PyPIImporterPipeline(VulnerableCodeBaseImporterPipelineV2): diff --git a/vulnerabilities/tests/test_data/nginx/security_advisories-importer-expected.json b/vulnerabilities/tests/test_data/nginx/security_advisories-importer-expected.json index 9e760590f..bbcbdb66e 100644 --- a/vulnerabilities/tests/test_data/nginx/security_advisories-importer-expected.json +++ b/vulnerabilities/tests/test_data/nginx/security_advisories-importer-expected.json @@ -971,39 +971,6 @@ "date_published": null, "weaknesses": [] }, - { - "unique_content_id": "8f54462a45ac49635f660b6fb755d5e05cdbc34ebaa565e38ca20c522579ce7f", - "summary": "Vulnerabilities with Windows 8.3 filename pseudonyms", - "affected_packages": [ - { - "package": { - "name": "nginx", - "type": "nginx", - "subpath": "", - "version": "", - "namespace": "", - "qualifiers": "os=windows" - }, - "fixed_version": "0.8.33", - "affected_version_range": "vers:nginx/>=0.7.52|<=0.8.32" - }, - { - "package": { - "name": "nginx", - "type": "nginx", - "subpath": "", - "version": "", - "namespace": "", - "qualifiers": "os=windows" - }, - "fixed_version": "0.7.65", - "affected_version_range": "vers:nginx/>=0.7.52|<=0.8.32" - } - ], - "references": [], - "date_published": null, - "weaknesses": [] - }, { "unique_content_id": "92ce767b8cea36271d33c119cb6f706f64f5aba7335cca6791eca90a87f48de1", "summary": "Vulnerabilities with Windows file default stream", @@ -1536,6 +1503,39 @@ "date_published": null, "weaknesses": [] }, + { + "unique_content_id": "cd771d4c853d0a5fd6bbc62866fe638c0b41f21f600889447bbaf3b6cd4bad7b", + "summary": "Vulnerabilities with Windows 8.3 filename pseudonyms", + "affected_packages": [ + { + "package": { + "name": "nginx", + "type": "nginx", + "subpath": "", + "version": "", + "namespace": "", + "qualifiers": "os=windows" + }, + "fixed_version": "0.8.33", + "affected_version_range": "vers:nginx/>=0.7.52|<=0.8.32" + }, + { + "package": { + "name": "nginx", + "type": "nginx", + "subpath": "", + "version": "", + "namespace": "", + "qualifiers": "os=windows" + }, + "fixed_version": "0.7.65", + "affected_version_range": "vers:nginx/>=0.7.52|<=0.8.32" + } + ], + "references": [], + "date_published": null, + "weaknesses": [] + }, { "unique_content_id": "de7a819f87c93c708251b734406d2b9916fce494ab3987be40ca37426b0c2044", "summary": "Buffer underflow vulnerability", diff --git a/vulnerabilities/tests/test_data/openssl/security_advisories-importer-expected.json b/vulnerabilities/tests/test_data/openssl/security_advisories-importer-expected.json index 60722cd00..2dcf7dd1c 100644 --- a/vulnerabilities/tests/test_data/openssl/security_advisories-importer-expected.json +++ b/vulnerabilities/tests/test_data/openssl/security_advisories-importer-expected.json @@ -3614,7 +3614,7 @@ "weaknesses": [] }, { - "unique_content_id": "4ee23c143c0a01cd7035e1646adaf2222725ad2c96447ffc524eb79d1ac532dd", + "unique_content_id": "c160af01903483f7380e4bbecd41bc2346069c1c931eb2be9cc45643daf51937", "summary": "OpenSSL has added support for TLS_FALLBACK_SCSV to allow applications to block the ability for a MITM attacker to force a protocol downgrade. Some client applications (such as browsers) will reconnect using a downgraded protocol to work around interoperability bugs in older servers. This could be exploited by an active man-in-the-middle to downgrade connections to SSL 3.0 even if both sides of the connection support higher protocols. SSL 3.0 contains a number of weaknesses including POODLE (CVE-2014-3566). See also https://tools.ietf.org/html/draft-ietf-tls-downgrade-scsv-00 and https://www.openssl.org/~bodo/ssl-poodle.pdf", "affected_packages": [ { From 0dca10915252c25f190b91572333d9ba2af31973 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 21 May 2025 15:23:52 +0530 Subject: [PATCH 18/44] Fix tests Signed-off-by: Tushar Goel --- ...security_advisories-importer-expected.json | 66 +++++++++---------- ...security_advisories-importer-expected.json | 2 +- 2 files changed, 34 insertions(+), 34 deletions(-) diff --git a/vulnerabilities/tests/test_data/nginx/security_advisories-importer-expected.json b/vulnerabilities/tests/test_data/nginx/security_advisories-importer-expected.json index bbcbdb66e..9e760590f 100644 --- a/vulnerabilities/tests/test_data/nginx/security_advisories-importer-expected.json +++ b/vulnerabilities/tests/test_data/nginx/security_advisories-importer-expected.json @@ -971,6 +971,39 @@ "date_published": null, "weaknesses": [] }, + { + "unique_content_id": "8f54462a45ac49635f660b6fb755d5e05cdbc34ebaa565e38ca20c522579ce7f", + "summary": "Vulnerabilities with Windows 8.3 filename pseudonyms", + "affected_packages": [ + { + "package": { + "name": "nginx", + "type": "nginx", + "subpath": "", + "version": "", + "namespace": "", + "qualifiers": "os=windows" + }, + "fixed_version": "0.8.33", + "affected_version_range": "vers:nginx/>=0.7.52|<=0.8.32" + }, + { + "package": { + "name": "nginx", + "type": "nginx", + "subpath": "", + "version": "", + "namespace": "", + "qualifiers": "os=windows" + }, + "fixed_version": "0.7.65", + "affected_version_range": "vers:nginx/>=0.7.52|<=0.8.32" + } + ], + "references": [], + "date_published": null, + "weaknesses": [] + }, { "unique_content_id": "92ce767b8cea36271d33c119cb6f706f64f5aba7335cca6791eca90a87f48de1", "summary": "Vulnerabilities with Windows file default stream", @@ -1503,39 +1536,6 @@ "date_published": null, "weaknesses": [] }, - { - "unique_content_id": "cd771d4c853d0a5fd6bbc62866fe638c0b41f21f600889447bbaf3b6cd4bad7b", - "summary": "Vulnerabilities with Windows 8.3 filename pseudonyms", - "affected_packages": [ - { - "package": { - "name": "nginx", - "type": "nginx", - "subpath": "", - "version": "", - "namespace": "", - "qualifiers": "os=windows" - }, - "fixed_version": "0.8.33", - "affected_version_range": "vers:nginx/>=0.7.52|<=0.8.32" - }, - { - "package": { - "name": "nginx", - "type": "nginx", - "subpath": "", - "version": "", - "namespace": "", - "qualifiers": "os=windows" - }, - "fixed_version": "0.7.65", - "affected_version_range": "vers:nginx/>=0.7.52|<=0.8.32" - } - ], - "references": [], - "date_published": null, - "weaknesses": [] - }, { "unique_content_id": "de7a819f87c93c708251b734406d2b9916fce494ab3987be40ca37426b0c2044", "summary": "Buffer underflow vulnerability", diff --git a/vulnerabilities/tests/test_data/openssl/security_advisories-importer-expected.json b/vulnerabilities/tests/test_data/openssl/security_advisories-importer-expected.json index 2dcf7dd1c..60722cd00 100644 --- a/vulnerabilities/tests/test_data/openssl/security_advisories-importer-expected.json +++ b/vulnerabilities/tests/test_data/openssl/security_advisories-importer-expected.json @@ -3614,7 +3614,7 @@ "weaknesses": [] }, { - "unique_content_id": "c160af01903483f7380e4bbecd41bc2346069c1c931eb2be9cc45643daf51937", + "unique_content_id": "4ee23c143c0a01cd7035e1646adaf2222725ad2c96447ffc524eb79d1ac532dd", "summary": "OpenSSL has added support for TLS_FALLBACK_SCSV to allow applications to block the ability for a MITM attacker to force a protocol downgrade. Some client applications (such as browsers) will reconnect using a downgraded protocol to work around interoperability bugs in older servers. This could be exploited by an active man-in-the-middle to downgrade connections to SSL 3.0 even if both sides of the connection support higher protocols. SSL 3.0 contains a number of weaknesses including POODLE (CVE-2014-3566). See also https://tools.ietf.org/html/draft-ietf-tls-downgrade-scsv-00 and https://www.openssl.org/~bodo/ssl-poodle.pdf", "affected_packages": [ { From 94c0efbffe5ae02268930648d8f9d1305706ff25 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 21 May 2025 15:23:59 +0530 Subject: [PATCH 19/44] Fix tests Signed-off-by: Tushar Goel --- vulnerabilities/utils.py | 44 ++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/vulnerabilities/utils.py b/vulnerabilities/utils.py index fb0d5f8bf..0bfd5825e 100644 --- a/vulnerabilities/utils.py +++ b/vulnerabilities/utils.py @@ -612,25 +612,33 @@ def compute_content_id(advisory_data): normalized_data["url"] = advisory_data.url elif isinstance(advisory_data, AdvisoryData): - normalized_data = { - "aliases": normalize_list(advisory_data.aliases), - "summary": normalize_text(advisory_data.summary), - "affected_packages": [ - pkg.to_dict() for pkg in normalize_list(advisory_data.affected_packages) if pkg - ], - "weaknesses": normalize_list(advisory_data.weaknesses), - } if advisory_data.references_v2: - normalized_data["references"] = [ - ref.to_dict() for ref in normalize_list(advisory_data.references_v2) if ref - ] - normalized_data["severities"] = [ - sev.to_dict() for sev in normalize_list(advisory_data.severities) if sev - ] - if advisory_data.references: - normalized_data["references"] = [ - ref.to_dict() for ref in normalize_list(advisory_data.references) if ref - ] + normalized_data = { + "aliases": normalize_list(advisory_data.aliases), + "summary": normalize_text(advisory_data.summary), + "affected_packages": [ + pkg.to_dict() for pkg in normalize_list(advisory_data.affected_packages) if pkg + ], + "references": [ + ref.to_dict() for ref in normalize_list(advisory_data.references_v2) if ref + ], + "severities": [ + sev.to_dict() for sev in normalize_list(advisory_data.severities) if sev + ], + "weaknesses": normalize_list(advisory_data.weaknesses), + } + elif advisory_data.references or advisory_data.references == []: + normalized_data = { + "aliases": normalize_list(advisory_data.aliases), + "summary": normalize_text(advisory_data.summary), + "affected_packages": [ + pkg.to_dict() for pkg in normalize_list(advisory_data.affected_packages) if pkg + ], + "references": [ + ref.to_dict() for ref in normalize_list(advisory_data.references) if ref + ], + "weaknesses": normalize_list(advisory_data.weaknesses), + } normalized_data["url"] = advisory_data.url From 21d951ccde3bfe8c0b46aa974b276e7fc2b77139 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 21 May 2025 15:31:36 +0530 Subject: [PATCH 20/44] Add tests for apache HTTPD importer pipeline Signed-off-by: Tushar Goel --- .../test_apache_httpd_importer_pipeline_v2.py | 152 ++++++++++++++++++ 1 file changed, 152 insertions(+) create mode 100644 vulnerabilities/tests/pipelines/test_apache_httpd_importer_pipeline_v2.py diff --git a/vulnerabilities/tests/pipelines/test_apache_httpd_importer_pipeline_v2.py b/vulnerabilities/tests/pipelines/test_apache_httpd_importer_pipeline_v2.py new file mode 100644 index 000000000..70ca00688 --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_apache_httpd_importer_pipeline_v2.py @@ -0,0 +1,152 @@ +import pytest +import requests + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.pipelines.v2_importers.apache_httpd_importer import ApacheHTTPDImporterPipeline +from vulnerabilities.pipelines.v2_importers.apache_httpd_importer import fetch_links +from vulnerabilities.pipelines.v2_importers.apache_httpd_importer import get_weaknesses +from vulnerabilities.severity_systems import APACHE_HTTPD + + +# Dummy responses +class DummyResponseContent: + def __init__(self, content_bytes): + self.content = content_bytes + + +class DummyResponseJSON: + def __init__(self, json_data): + self._json = json_data + + def json(self): + return self._json + + +# Tests for fetch_links +@pytest.fixture(autouse=True) +def no_requests(monkeypatch): + # Ensure other tests don't hit real HTTP + monkeypatch.setattr( + requests, + "get", + lambda url: (_ for _ in ()).throw(AssertionError(f"Unexpected HTTP GET call to {url}")), + ) + + +def test_fetch_links_filters_and_resolves(monkeypatch): + html = """ + + A1 + A2 + TXT + + """ + base_url = "https://example.com/base/" + # Monkeypatch HTTP GET for HTML + def fake_get(url): + assert url == base_url + return DummyResponseContent(html.encode("utf-8")) + + monkeypatch.setattr(requests, "get", fake_get) + links = fetch_links(base_url) + assert len(links) == 2 + assert links == [ + "https://example.com/base/advisory1.json", + "https://example.com/json/advisory2.json", + ] + + +# Tests for get_weaknesses +def test_get_weaknesses_with_cna_structure(): + mock_data = { + "containers": {"cna": {"problemTypes": [{"descriptions": [{"cweId": "CWE-125"}]}]}} + } + result = get_weaknesses(mock_data) + assert result == [125] + + +def test_get_weaknesses_with_data_meta_structure(): + mock_data = { + "CVE_data_meta": {"ID": "CVE-2020-0001"}, + "problemtype": { + "problemtype_data": [ + {"description": [{"value": "CWE-190 Integer Overflow"}]}, + {"description": [{"value": "CWE-200 Some Issue"}]}, + ] + }, + } + result = get_weaknesses(mock_data) + assert set(result) == {190, 200} + + +# Tests for ApacheHTTPDImporterPipeline +class DummyPipeline(ApacheHTTPDImporterPipeline): + # Expose protected methods for testing + pass + + +@pytest.fixture +def pipeline(monkeypatch): + pipe = DummyPipeline() + # Prevent real HTTP in fetch_links + monkeypatch.setattr( + "vulnerabilities.pipelines.v2_importers.apache_httpd_importer.fetch_links", lambda url: ["u1", "u2"] + ) + return pipe + + +def test_advisories_count(monkeypatch, pipeline): + # Should use mocked links + count = pipeline.advisories_count() + assert count == 2 + + +def test_collect_advisories_and_to_advisory(monkeypatch, pipeline): + # Prepare two dummy JSONs + sample1 = { + "CVE_data_meta": {"ID": "CVE-1"}, + "description": {"description_data": [{"lang": "eng", "value": "Test desc"}]}, + "impact": [{"other": "5.0"}], + "affects": {"vendor": {"vendor_data": []}}, + "timeline": [], + } + sample2 = { + "cveMetadata": {"cveId": "CVE-2"}, + "description": {"description_data": [{"lang": "eng", "value": "Other desc"}]}, + "impact": [{"other": "7.5"}], + "affects": {"vendor": {"vendor_data": []}}, + "timeline": [], + } + # Monkeypatch requests.get to return JSON + def fake_get(u): + if u == "u1": + return DummyResponseJSON(sample1) + elif u == "u2": + return DummyResponseJSON(sample2) + else: + raise AssertionError(f"Unexpected URL {u}") + + monkeypatch.setattr(requests, "get", fake_get) + advisories = list(pipeline.collect_advisories()) + assert len(advisories) == 2 + # Validate first advisory + adv1 = advisories[0] + assert isinstance(adv1, AdvisoryData) + assert adv1.advisory_id == "CVE-1" + assert adv1.summary == "Test desc" + assert adv1.severities and adv1.severities[0].value == "5.0" + assert adv1.url.endswith("CVE-1.json") + # Validate second advisory + adv2 = advisories[1] + assert adv2.advisory_id == "CVE-2" + assert adv2.summary == "Other desc" + assert adv2.severities[0].value == "7.5" + + +# Test version range conversion error +def test_to_version_ranges_unknown_comparator(pipeline): + # version_data with bad comparator + versions_data = [{"version_value": "1.0.0", "version_affected": "<>"}] + fixed_versions = [] + with pytest.raises(ValueError): + pipeline.to_version_ranges(versions_data, fixed_versions) From 23f770dd2a6344877ea49aee53e8c268f091a25f Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 21 May 2025 15:38:27 +0530 Subject: [PATCH 21/44] Add tests for npm importer pipeline Signed-off-by: Tushar Goel --- .../test_apache_httpd_importer_pipeline_v2.py | 3 +- .../test_npm_importer_pipeline_v2.py | 119 ++++++++++++++++++ 2 files changed, 121 insertions(+), 1 deletion(-) create mode 100644 vulnerabilities/tests/pipelines/test_npm_importer_pipeline_v2.py diff --git a/vulnerabilities/tests/pipelines/test_apache_httpd_importer_pipeline_v2.py b/vulnerabilities/tests/pipelines/test_apache_httpd_importer_pipeline_v2.py index 70ca00688..6b0f847c4 100644 --- a/vulnerabilities/tests/pipelines/test_apache_httpd_importer_pipeline_v2.py +++ b/vulnerabilities/tests/pipelines/test_apache_httpd_importer_pipeline_v2.py @@ -90,7 +90,8 @@ def pipeline(monkeypatch): pipe = DummyPipeline() # Prevent real HTTP in fetch_links monkeypatch.setattr( - "vulnerabilities.pipelines.v2_importers.apache_httpd_importer.fetch_links", lambda url: ["u1", "u2"] + "vulnerabilities.pipelines.v2_importers.apache_httpd_importer.fetch_links", + lambda url: ["u1", "u2"], ) return pipe diff --git a/vulnerabilities/tests/pipelines/test_npm_importer_pipeline_v2.py b/vulnerabilities/tests/pipelines/test_npm_importer_pipeline_v2.py new file mode 100644 index 000000000..a7cd86166 --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_npm_importer_pipeline_v2.py @@ -0,0 +1,119 @@ +import json +from types import SimpleNamespace + +import pytz +from packageurl import PackageURL +from univers.version_range import NpmVersionRange +from univers.versions import SemverVersion + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.pipelines.v2_importers.npm_importer import NpmImporterPipeline +from vulnerabilities.severity_systems import CVSSV2 +from vulnerabilities.severity_systems import CVSSV3 + + +def test_clone(monkeypatch): + import vulnerabilities.pipelines.v2_importers.npm_importer as npm_mod + + dummy = SimpleNamespace(dest_dir="dummy", delete=lambda: None) + # Patch the name in the npm_importer module, not fetchcode.vcs + monkeypatch.setattr(npm_mod, "fetch_via_vcs", lambda url: dummy) + + p = NpmImporterPipeline() + p.clone() + + assert p.vcs_response is dummy + + +def test_clean_downloads_and_on_failure(): + called = {} + + def delete(): + called["deleted"] = True + + dummy = SimpleNamespace(dest_dir="dummy", delete=delete) + p = NpmImporterPipeline() + p.vcs_response = dummy + p.clean_downloads() + assert called.get("deleted", False) + called.clear() + p.on_failure() + assert called.get("deleted", False) + + +def test_advisories_count_and_collect(tmp_path): + base = tmp_path + vuln_dir = base / "vuln" / "npm" + vuln_dir.mkdir(parents=True) + (vuln_dir / "index.json").write_text("{}") + (vuln_dir / "001.json").write_text(json.dumps({"id": "001"})) + p = NpmImporterPipeline() + p.vcs_response = SimpleNamespace(dest_dir=str(base), delete=lambda: None) + assert p.advisories_count() == 2 + advisories = list(p.collect_advisories()) + # Should yield None for index.json and one AdvisoryData + real = [a for a in advisories if isinstance(a, AdvisoryData)] + assert len(real) == 1 + assert real[0].advisory_id == "NODESEC-NPM-001" + + +def test_to_advisory_data_skips_index(tmp_path): + p = NpmImporterPipeline() + file = tmp_path / "index.json" + file.write_text("{}") + assert p.to_advisory_data(file) is None + + +def test_to_advisory_data_full(tmp_path): + data = { + "id": "123", + "overview": "desc", + "title": "ti", + "created_at": "2021-01-01T00:00:00Z", + "cvss_vector": "CVSS:3.0/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H", + "cvss_score": "9.8", + "references": ["http://ref1"], + "module_name": "mypkg", + "vulnerable_versions": "<=1.2.3", + "patched_versions": ">=1.2.4", + "cves": ["CVE-123", "CVE-124"], + } + file = tmp_path / "123.json" + file.write_text(json.dumps(data)) + p = NpmImporterPipeline() + adv = p.to_advisory_data(file) + assert isinstance(adv, AdvisoryData) + assert adv.advisory_id == "NODESEC-NPM-123" + assert "ti" in adv.summary and "desc" in adv.summary + assert adv.date_published.tzinfo == pytz.UTC + assert len(adv.severities) == 1 and adv.severities[0].system == CVSSV3 + urls = [r.url for r in adv.references_v2] + assert "http://ref1" in urls + assert f"https://github.com/nodejs/security-wg/blob/main/vuln/npm/123.json" in urls + pkg = adv.affected_packages[0] + assert pkg.package == PackageURL(type="npm", name="mypkg") + assert isinstance(pkg.affected_version_range, NpmVersionRange) + assert pkg.fixed_version == SemverVersion("1.2.4") + assert set(adv.aliases) == {"CVE-123", "CVE-124"} + + +def test_to_advisory_data_cvss_v2(tmp_path): + data = {"id": "124", "cvss_vector": "CVSS:2.0/AV:N/AC:L/Au:N/C:P/I:P/A:P", "cvss_score": "5.5"} + file = tmp_path / "124.json" + file.write_text(json.dumps(data)) + p = NpmImporterPipeline() + adv = p.to_advisory_data(file) + assert len(adv.severities) == 1 and adv.severities[0].system == CVSSV2 + + +def test_get_affected_package_special_and_standard(): + p = NpmImporterPipeline() + pkg = p.get_affected_package( + {"vulnerable_versions": "<=99.999.99999", "patched_versions": "<0.0.0"}, "pkg" + ) + assert isinstance(pkg.affected_version_range, NpmVersionRange) + assert pkg.fixed_version is None + data2 = {"vulnerable_versions": "<=2.0.0", "patched_versions": ">=2.0.1"} + pkg2 = p.get_affected_package(data2, "pkg2") + assert isinstance(pkg2.affected_version_range, NpmVersionRange) + assert pkg2.fixed_version == SemverVersion("2.0.1") From cf6cb8383a7657e7fa07f5bc7c54b43e59aa6cd4 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 21 May 2025 19:37:46 +0530 Subject: [PATCH 22/44] Add tests for github importer pipeline Signed-off-by: Tushar Goel --- .../pipelines/test_github_importer_v2.py | 185 ++++++++++++++++++ 1 file changed, 185 insertions(+) create mode 100644 vulnerabilities/tests/pipelines/test_github_importer_v2.py diff --git a/vulnerabilities/tests/pipelines/test_github_importer_v2.py b/vulnerabilities/tests/pipelines/test_github_importer_v2.py new file mode 100644 index 000000000..dfae43db8 --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_github_importer_v2.py @@ -0,0 +1,185 @@ +import pytest +from unittest.mock import patch, MagicMock +from datetime import datetime + +from vulnerabilities.importer import AdvisoryData, AffectedPackage, Reference, VulnerabilitySeverity +from vulnerabilities.utils import get_item +from packageurl import PackageURL +from univers.version_constraint import VersionConstraint +from univers.version_range import RANGE_CLASS_BY_SCHEMES +from univers.versions import SemverVersion +from vulnerabilities.pipelines.v2_importers.github_importer import GitHubAPIImporterPipeline +from vulnerabilities.pipelines.v2_importers.github_importer import get_cwes_from_github_advisory, get_purl + +@pytest.fixture +def mock_fetch(): + with patch("vulnerabilities.pipelines.v2_importers.github_importer.utils.fetch_github_graphql_query") as mock: + yield mock + + +def test_advisories_count(mock_fetch): + # Mock the GraphQL query response for advisory count + mock_fetch.return_value = { + "data": { + "securityVulnerabilities": { + "totalCount": 10 + } + } + } + + pipeline = GitHubAPIImporterPipeline() + + count = pipeline.advisories_count() + + # Assert that the count is correct + assert count == 10 + + +def test_collect_advisories(mock_fetch): + # Mock advisory data for GitHub + advisory_data = { + "data": { + "securityVulnerabilities": { + "edges": [ + { + "node": { + "advisory": { + "identifiers": [ + {"type": "GHSA", "value": "GHSA-1234-ABCD"} + ], + "summary": "Sample advisory description", + "references": [{"url": "https://github.com/advisories/GHSA-1234-ABCD"}], + "severity": "HIGH", + "cwes": { + "nodes": [{"cweId": "CWE-123"}] + }, + "publishedAt": "2023-01-01T00:00:00Z" + }, + "firstPatchedVersion": {"identifier": "1.2.3"}, + "package": {"name": "example-package"}, + "vulnerableVersionRange": ">=1.0.0,<=1.2.0" + } + } + ], + "pageInfo": { + "hasNextPage": False, + "endCursor": None + } + } + } + } + + # Mock the response from GitHub GraphQL query + mock_fetch.return_value = advisory_data + + # Instantiate the pipeline + pipeline = GitHubAPIImporterPipeline() + + # Collect advisories + advisories = list(pipeline.collect_advisories()) + + # Check if advisories were correctly parsed + assert len(advisories) == 1 + advisory = advisories[0] + + # Validate advisory fields + assert advisory.advisory_id == "GHSA-1234-ABCD" + assert advisory.summary == "Sample advisory description" + assert advisory.url == "https://github.com/advisories/GHSA-1234-ABCD" + assert len(advisory.references_v2) == 1 + assert advisory.references_v2[0].reference_id == "GHSA-1234-ABCD" + assert advisory.severities[0].value == "HIGH" + + # Validate affected package and version range + affected_package = advisory.affected_packages[0] + assert isinstance(affected_package.package, PackageURL) + assert affected_package.package.name == "example-package" + + # Check CWE extraction + assert advisory.weaknesses == [123] + + +def test_get_purl(mock_fetch): + # Test for package URL generation + result = get_purl("cargo", "example/package-name") + + # Validate that the correct PackageURL is generated + assert isinstance(result, PackageURL) + assert result.type == "cargo" + assert result.namespace == None + assert result.name == "example/package-name" + + +def test_process_response(mock_fetch): + # Mock advisory data as input for the process_response function + advisory_data = { + "data": { + "securityVulnerabilities": { + "edges": [ + { + "node": { + "advisory": { + "identifiers": [ + {"type": "GHSA", "value": "GHSA-5678-EFGH"} + ], + "summary": "Another advisory", + "references": [{"url": "https://github.com/advisories/GHSA-5678-EFGH"}], + "severity": "MEDIUM", + "cwes": { + "nodes": [{"cweId": "CWE-200"}] + }, + "publishedAt": "2023-02-01T00:00:00Z" + }, + "firstPatchedVersion": {"identifier": "2.0.0"}, + "package": {"name": "another-package"}, + "vulnerableVersionRange": ">=2.0.0,<=3.0.0" + } + } + ], + "pageInfo": { + "hasNextPage": False, + "endCursor": None + } + } + } + } + + # Mock the response from GitHub GraphQL query + mock_fetch.return_value = advisory_data + + # Process the mock response + result = list(GitHubAPIImporterPipeline().collect_advisories()) + + # Check the results + assert len(result) == 1 + advisory = result[0] + + # Validate the advisory data + assert advisory.advisory_id == "GHSA-5678-EFGH" + assert advisory.summary == "Another advisory" + assert advisory.url == "https://github.com/advisories/GHSA-5678-EFGH" + + # Check CWE extraction + assert advisory.weaknesses == [200] + + +def test_get_cwes_from_github_advisory(mock_fetch): + # Mock CWEs extraction from GitHub advisory + advisory_data = { + "cwes": { + "nodes": [{"cweId": "CWE-522"}] + } + } + + cwes = get_cwes_from_github_advisory(advisory_data) + + # Validate the CWE ID extraction + assert cwes == [522] + + +def test_invalid_package_type_in_get_purl(mock_fetch): + # Test for invalid package type + result = get_purl("invalidpkg", "example/package-name") + + # Assert that None is returned for an invalid package type + assert result is None From f10a26c2f324af692f3d44e0911822dbbe68b193 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 21 May 2025 19:57:47 +0530 Subject: [PATCH 23/44] Add tests for pysec importer Signed-off-by: Tushar Goel --- .../pipelines/test_github_importer_v2.py | 113 +++++++------- .../tests/pipelines/test_pysec_v2_importer.py | 141 ++++++++++++++++++ 2 files changed, 192 insertions(+), 62 deletions(-) create mode 100644 vulnerabilities/tests/pipelines/test_pysec_v2_importer.py diff --git a/vulnerabilities/tests/pipelines/test_github_importer_v2.py b/vulnerabilities/tests/pipelines/test_github_importer_v2.py index dfae43db8..e6470eadf 100644 --- a/vulnerabilities/tests/pipelines/test_github_importer_v2.py +++ b/vulnerabilities/tests/pipelines/test_github_importer_v2.py @@ -1,36 +1,39 @@ -import pytest -from unittest.mock import patch, MagicMock from datetime import datetime +from unittest.mock import MagicMock +from unittest.mock import patch -from vulnerabilities.importer import AdvisoryData, AffectedPackage, Reference, VulnerabilitySeverity -from vulnerabilities.utils import get_item +import pytest from packageurl import PackageURL from univers.version_constraint import VersionConstraint from univers.version_range import RANGE_CLASS_BY_SCHEMES from univers.versions import SemverVersion + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import Reference +from vulnerabilities.importer import VulnerabilitySeverity from vulnerabilities.pipelines.v2_importers.github_importer import GitHubAPIImporterPipeline -from vulnerabilities.pipelines.v2_importers.github_importer import get_cwes_from_github_advisory, get_purl +from vulnerabilities.pipelines.v2_importers.github_importer import get_cwes_from_github_advisory +from vulnerabilities.pipelines.v2_importers.github_importer import get_purl +from vulnerabilities.utils import get_item + @pytest.fixture def mock_fetch(): - with patch("vulnerabilities.pipelines.v2_importers.github_importer.utils.fetch_github_graphql_query") as mock: + with patch( + "vulnerabilities.pipelines.v2_importers.github_importer.utils.fetch_github_graphql_query" + ) as mock: yield mock def test_advisories_count(mock_fetch): # Mock the GraphQL query response for advisory count - mock_fetch.return_value = { - "data": { - "securityVulnerabilities": { - "totalCount": 10 - } - } - } - + mock_fetch.return_value = {"data": {"securityVulnerabilities": {"totalCount": 10}}} + pipeline = GitHubAPIImporterPipeline() - + count = pipeline.advisories_count() - + # Assert that the count is correct assert count == 10 @@ -44,44 +47,39 @@ def test_collect_advisories(mock_fetch): { "node": { "advisory": { - "identifiers": [ - {"type": "GHSA", "value": "GHSA-1234-ABCD"} - ], + "identifiers": [{"type": "GHSA", "value": "GHSA-1234-ABCD"}], "summary": "Sample advisory description", - "references": [{"url": "https://github.com/advisories/GHSA-1234-ABCD"}], + "references": [ + {"url": "https://github.com/advisories/GHSA-1234-ABCD"} + ], "severity": "HIGH", - "cwes": { - "nodes": [{"cweId": "CWE-123"}] - }, - "publishedAt": "2023-01-01T00:00:00Z" + "cwes": {"nodes": [{"cweId": "CWE-123"}]}, + "publishedAt": "2023-01-01T00:00:00Z", }, "firstPatchedVersion": {"identifier": "1.2.3"}, "package": {"name": "example-package"}, - "vulnerableVersionRange": ">=1.0.0,<=1.2.0" + "vulnerableVersionRange": ">=1.0.0,<=1.2.0", } } ], - "pageInfo": { - "hasNextPage": False, - "endCursor": None - } + "pageInfo": {"hasNextPage": False, "endCursor": None}, } } } # Mock the response from GitHub GraphQL query mock_fetch.return_value = advisory_data - + # Instantiate the pipeline pipeline = GitHubAPIImporterPipeline() - + # Collect advisories advisories = list(pipeline.collect_advisories()) - + # Check if advisories were correctly parsed assert len(advisories) == 1 advisory = advisories[0] - + # Validate advisory fields assert advisory.advisory_id == "GHSA-1234-ABCD" assert advisory.summary == "Sample advisory description" @@ -89,12 +87,12 @@ def test_collect_advisories(mock_fetch): assert len(advisory.references_v2) == 1 assert advisory.references_v2[0].reference_id == "GHSA-1234-ABCD" assert advisory.severities[0].value == "HIGH" - + # Validate affected package and version range affected_package = advisory.affected_packages[0] assert isinstance(affected_package.package, PackageURL) assert affected_package.package.name == "example-package" - + # Check CWE extraction assert advisory.weaknesses == [123] @@ -102,13 +100,13 @@ def test_collect_advisories(mock_fetch): def test_get_purl(mock_fetch): # Test for package URL generation result = get_purl("cargo", "example/package-name") - + # Validate that the correct PackageURL is generated assert isinstance(result, PackageURL) assert result.type == "cargo" assert result.namespace == None assert result.name == "example/package-name" - + def test_process_response(mock_fetch): # Mock advisory data as input for the process_response function @@ -119,60 +117,51 @@ def test_process_response(mock_fetch): { "node": { "advisory": { - "identifiers": [ - {"type": "GHSA", "value": "GHSA-5678-EFGH"} - ], + "identifiers": [{"type": "GHSA", "value": "GHSA-5678-EFGH"}], "summary": "Another advisory", - "references": [{"url": "https://github.com/advisories/GHSA-5678-EFGH"}], + "references": [ + {"url": "https://github.com/advisories/GHSA-5678-EFGH"} + ], "severity": "MEDIUM", - "cwes": { - "nodes": [{"cweId": "CWE-200"}] - }, - "publishedAt": "2023-02-01T00:00:00Z" + "cwes": {"nodes": [{"cweId": "CWE-200"}]}, + "publishedAt": "2023-02-01T00:00:00Z", }, "firstPatchedVersion": {"identifier": "2.0.0"}, "package": {"name": "another-package"}, - "vulnerableVersionRange": ">=2.0.0,<=3.0.0" + "vulnerableVersionRange": ">=2.0.0,<=3.0.0", } } ], - "pageInfo": { - "hasNextPage": False, - "endCursor": None - } + "pageInfo": {"hasNextPage": False, "endCursor": None}, } } } - + # Mock the response from GitHub GraphQL query mock_fetch.return_value = advisory_data - + # Process the mock response result = list(GitHubAPIImporterPipeline().collect_advisories()) - + # Check the results assert len(result) == 1 advisory = result[0] - + # Validate the advisory data assert advisory.advisory_id == "GHSA-5678-EFGH" assert advisory.summary == "Another advisory" assert advisory.url == "https://github.com/advisories/GHSA-5678-EFGH" - + # Check CWE extraction assert advisory.weaknesses == [200] def test_get_cwes_from_github_advisory(mock_fetch): # Mock CWEs extraction from GitHub advisory - advisory_data = { - "cwes": { - "nodes": [{"cweId": "CWE-522"}] - } - } + advisory_data = {"cwes": {"nodes": [{"cweId": "CWE-522"}]}} cwes = get_cwes_from_github_advisory(advisory_data) - + # Validate the CWE ID extraction assert cwes == [522] @@ -180,6 +169,6 @@ def test_get_cwes_from_github_advisory(mock_fetch): def test_invalid_package_type_in_get_purl(mock_fetch): # Test for invalid package type result = get_purl("invalidpkg", "example/package-name") - + # Assert that None is returned for an invalid package type assert result is None diff --git a/vulnerabilities/tests/pipelines/test_pysec_v2_importer.py b/vulnerabilities/tests/pipelines/test_pysec_v2_importer.py new file mode 100644 index 000000000..51bb03daf --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_pysec_v2_importer.py @@ -0,0 +1,141 @@ +import json +from io import BytesIO +from unittest.mock import MagicMock +from unittest.mock import patch +from zipfile import BadZipFile +from zipfile import ZipFile + +import pytest + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 +from vulnerabilities.pipelines.v2_importers.pysec_importer import ( + PyPIImporterPipeline, # Path to the PyPI Importer +) + + +@pytest.fixture +def mock_zip_data(): + # Create mock zip data for testing + zip_buffer = BytesIO() + with ZipFile(zip_buffer, mode="w") as zip_file: + # Create a sample advisory file inside the zip + advisory_data = { + "advisory_id": "PYSEC-1234", + "summary": "Sample PyPI advisory", + "references": [{"url": "https://pypi.org/advisory/PYSEC-1234"}], + "package": {"name": "example-package"}, + "affected_versions": ">=1.0.0,<=2.0.0", + } + # Save the sample advisory as a JSON file + with zip_file.open("PYSEC-1234.json", "w") as f: + f.write(json.dumps(advisory_data).encode("utf-8")) + zip_buffer.seek(0) + return zip_buffer + + +@pytest.fixture +def mock_requests_get(): + with patch("requests.get") as mock: + yield mock + + +def test_fetch_zip(mock_requests_get, mock_zip_data): + # Mock the `requests.get` to return the mock zip data + mock_requests_get.return_value.content = mock_zip_data.read() + + pipeline = PyPIImporterPipeline() + + # Call the `fetch_zip` method + pipeline.fetch_zip() + + # Reset the position of mock_zip_data to 0 before comparing + mock_zip_data.seek(0) + + # Verify that the zip file content is correctly assigned + assert pipeline.advisory_zip == mock_zip_data.read() + + +def test_advisories_count(mock_requests_get, mock_zip_data): + # Mock the `requests.get` to return the mock zip data + mock_requests_get.return_value.content = mock_zip_data.read() + + pipeline = PyPIImporterPipeline() + + # Fetch the zip data + pipeline.fetch_zip() + + # Test advisories count + count = pipeline.advisories_count() + + # Verify that it correctly counts the number of advisory files starting with 'PYSEC-' + assert count == 1 + + +def test_collect_advisories(mock_requests_get, mock_zip_data): + # Mock the `requests.get` to return the mock zip data + mock_requests_get.return_value.content = mock_zip_data.read() + + pipeline = PyPIImporterPipeline() + + # Fetch the zip data + pipeline.fetch_zip() + + # Mock the `parse_advisory_data_v2` function to return a dummy AdvisoryData + with patch("vulnerabilities.importers.osv.parse_advisory_data_v2") as mock_parse: + mock_parse.return_value = AdvisoryData( + advisory_id="PYSEC-1234", + summary="Sample PyPI advisory", + references_v2=[{"url": "https://pypi.org/advisory/PYSEC-1234"}], + affected_packages=[], + weaknesses=[], + url="https://pypi.org/advisory/PYSEC-1234", + ) + + # Call the `collect_advisories` method + advisories = list(pipeline.collect_advisories()) + + # Ensure we have 1 advisory + assert len(advisories) == 1 + + # Verify advisory data + advisory = advisories[0] + assert advisory.advisory_id == "PYSEC-1234" + assert advisory.summary == "Sample PyPI advisory" + assert advisory.url == "https://pypi.org/advisory/PYSEC-1234" + + +def test_collect_advisories_invalid_file(mock_requests_get, mock_zip_data): + # Create a mock zip with an invalid file name + zip_buffer = BytesIO() + with ZipFile(zip_buffer, mode="w") as zip_file: + zip_file.writestr("INVALID_FILE.txt", "Invalid content") + + zip_buffer.seek(0) + mock_requests_get.return_value.content = zip_buffer.read() + + pipeline = PyPIImporterPipeline() + + # Fetch the zip data + pipeline.fetch_zip() + + # Mock the `parse_advisory_data_v2` function + with patch("vulnerabilities.importers.osv.parse_advisory_data_v2") as mock_parse: + mock_parse.return_value = AdvisoryData( + advisory_id="PYSEC-1234", + summary="Sample PyPI advisory", + references_v2=[{"url": "https://pypi.org/advisory/PYSEC-1234"}], + affected_packages=[], + weaknesses=[], + url="https://pypi.org/advisory/PYSEC-1234", + ) + + # Call the `collect_advisories` method and check the logging for invalid file + with patch( + "vulnerabilities.pipelines.VulnerableCodeBaseImporterPipelineV2.log" + ) as mock_log: + advisories = list(pipeline.collect_advisories()) + + # Ensure no advisories were yielded due to the invalid file + assert len(advisories) == 0 From 4530e9ef8a0c84cb21709936d50276dacc6b59c9 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 21 May 2025 20:01:42 +0530 Subject: [PATCH 24/44] Add license header files Signed-off-by: Tushar Goel --- .../v2_importers/apache_httpd_importer.py | 9 +++++++++ .../v2_importers/vulnrichment_importer.py | 8 -------- .../test_apache_httpd_importer_pipeline_v2.py | 9 +++++++++ .../pipelines/test_github_importer_v2.py | 20 +++++++++---------- .../test_npm_importer_pipeline_v2.py | 9 +++++++++ .../tests/pipelines/test_pysec_v2_importer.py | 4 ---- 6 files changed, 37 insertions(+), 22 deletions(-) diff --git a/vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py b/vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py index 2c430d967..1fd0c5039 100644 --- a/vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py +++ b/vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py @@ -1,3 +1,12 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + import logging import re import urllib.parse diff --git a/vulnerabilities/pipelines/v2_importers/vulnrichment_importer.py b/vulnerabilities/pipelines/v2_importers/vulnrichment_importer.py index afaf06af2..1f0f8975c 100644 --- a/vulnerabilities/pipelines/v2_importers/vulnrichment_importer.py +++ b/vulnerabilities/pipelines/v2_importers/vulnrichment_importer.py @@ -179,14 +179,6 @@ def parse_cve_advisory(self, raw_data, advisory_url): if match: weaknesses.add(int(match.group(1))) - print(f"cve_id: {cve_id}") - print(f"date_published: {date_published}") - print(references) - print(f"summary: {summary}") - print(f"weaknesses: {weaknesses}") - print(f"advisory_url: {advisory_url}") - print(f"severities: {severities}") - return AdvisoryData( advisory_id=cve_id, aliases=[], diff --git a/vulnerabilities/tests/pipelines/test_apache_httpd_importer_pipeline_v2.py b/vulnerabilities/tests/pipelines/test_apache_httpd_importer_pipeline_v2.py index 6b0f847c4..24f08b2d7 100644 --- a/vulnerabilities/tests/pipelines/test_apache_httpd_importer_pipeline_v2.py +++ b/vulnerabilities/tests/pipelines/test_apache_httpd_importer_pipeline_v2.py @@ -1,3 +1,12 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + import pytest import requests diff --git a/vulnerabilities/tests/pipelines/test_github_importer_v2.py b/vulnerabilities/tests/pipelines/test_github_importer_v2.py index e6470eadf..ec3ab5a04 100644 --- a/vulnerabilities/tests/pipelines/test_github_importer_v2.py +++ b/vulnerabilities/tests/pipelines/test_github_importer_v2.py @@ -1,17 +1,17 @@ -from datetime import datetime -from unittest.mock import MagicMock +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + from unittest.mock import patch import pytest from packageurl import PackageURL -from univers.version_constraint import VersionConstraint -from univers.version_range import RANGE_CLASS_BY_SCHEMES -from univers.versions import SemverVersion - -from vulnerabilities.importer import AdvisoryData -from vulnerabilities.importer import AffectedPackage -from vulnerabilities.importer import Reference -from vulnerabilities.importer import VulnerabilitySeverity + from vulnerabilities.pipelines.v2_importers.github_importer import GitHubAPIImporterPipeline from vulnerabilities.pipelines.v2_importers.github_importer import get_cwes_from_github_advisory from vulnerabilities.pipelines.v2_importers.github_importer import get_purl diff --git a/vulnerabilities/tests/pipelines/test_npm_importer_pipeline_v2.py b/vulnerabilities/tests/pipelines/test_npm_importer_pipeline_v2.py index a7cd86166..7063174b6 100644 --- a/vulnerabilities/tests/pipelines/test_npm_importer_pipeline_v2.py +++ b/vulnerabilities/tests/pipelines/test_npm_importer_pipeline_v2.py @@ -1,3 +1,12 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + import json from types import SimpleNamespace diff --git a/vulnerabilities/tests/pipelines/test_pysec_v2_importer.py b/vulnerabilities/tests/pipelines/test_pysec_v2_importer.py index 51bb03daf..33c716889 100644 --- a/vulnerabilities/tests/pipelines/test_pysec_v2_importer.py +++ b/vulnerabilities/tests/pipelines/test_pysec_v2_importer.py @@ -1,15 +1,11 @@ import json from io import BytesIO -from unittest.mock import MagicMock from unittest.mock import patch -from zipfile import BadZipFile from zipfile import ZipFile import pytest from vulnerabilities.importer import AdvisoryData -from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline -from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 from vulnerabilities.pipelines.v2_importers.pysec_importer import ( PyPIImporterPipeline, # Path to the PyPI Importer ) From 9679305b2190795eb271fbba92154e82eebf9a24 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 23 May 2025 15:43:18 +0530 Subject: [PATCH 25/44] Add tests for Pypa importer Signed-off-by: Tushar Goel --- .../pipelines/v2_importers/pypa_importer.py | 3 +- .../test_apache_httpd_importer_pipeline_v2.py | 1 - .../test_pypa_v2_importer_pipeline.py | 173 ++++++++++++++++++ 3 files changed, 174 insertions(+), 3 deletions(-) create mode 100644 vulnerabilities/tests/pipelines/test_pypa_v2_importer_pipeline.py diff --git a/vulnerabilities/pipelines/v2_importers/pypa_importer.py b/vulnerabilities/pipelines/v2_importers/pypa_importer.py index ef20691a3..741f500c4 100644 --- a/vulnerabilities/pipelines/v2_importers/pypa_importer.py +++ b/vulnerabilities/pipelines/v2_importers/pypa_importer.py @@ -14,7 +14,6 @@ from fetchcode.vcs import fetch_via_vcs from vulnerabilities.importer import AdvisoryData -from vulnerabilities.importers.osv import parse_advisory_data_v2 from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 from vulnerabilities.utils import get_advisory_url @@ -46,7 +45,7 @@ def advisories_count(self): return sum(1 for _ in vulns_directory.rglob("*.yaml")) def collect_advisories(self) -> Iterable[AdvisoryData]: - from vulnerabilities.importers.osv import parse_advisory_data + from vulnerabilities.importers.osv import parse_advisory_data_v2 base_directory = Path(self.vcs_response.dest_dir) vulns_directory = base_directory / "vulns" diff --git a/vulnerabilities/tests/pipelines/test_apache_httpd_importer_pipeline_v2.py b/vulnerabilities/tests/pipelines/test_apache_httpd_importer_pipeline_v2.py index 24f08b2d7..94454c473 100644 --- a/vulnerabilities/tests/pipelines/test_apache_httpd_importer_pipeline_v2.py +++ b/vulnerabilities/tests/pipelines/test_apache_httpd_importer_pipeline_v2.py @@ -14,7 +14,6 @@ from vulnerabilities.pipelines.v2_importers.apache_httpd_importer import ApacheHTTPDImporterPipeline from vulnerabilities.pipelines.v2_importers.apache_httpd_importer import fetch_links from vulnerabilities.pipelines.v2_importers.apache_httpd_importer import get_weaknesses -from vulnerabilities.severity_systems import APACHE_HTTPD # Dummy responses diff --git a/vulnerabilities/tests/pipelines/test_pypa_v2_importer_pipeline.py b/vulnerabilities/tests/pipelines/test_pypa_v2_importer_pipeline.py new file mode 100644 index 000000000..20aa63387 --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_pypa_v2_importer_pipeline.py @@ -0,0 +1,173 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from unittest.mock import MagicMock +from unittest.mock import patch + +import pytest +import saneyaml + +from vulnerabilities.importer import AdvisoryData + + +@pytest.fixture +def mock_vcs_response(): + # Mock the vcs_response from fetch_via_vcs + mock_response = MagicMock() + mock_response.dest_dir = "/mock/repo" + mock_response.delete = MagicMock() + return mock_response + + +@pytest.fixture +def mock_fetch_via_vcs(mock_vcs_response): + with patch("vulnerabilities.pipelines.v2_importers.pypa_importer.fetch_via_vcs") as mock: + mock.return_value = mock_vcs_response + yield mock + + +@pytest.fixture +def mock_pathlib(tmp_path): + # Mock the Path structure to simulate the `vulns` directory and advisory files + vulns_dir = tmp_path / "vulns" + vulns_dir.mkdir() + + advisory_file = vulns_dir / "CVE-2021-1234.yaml" + advisory_file.write_text( + """ + id: CVE-2021-1234 + summary: Sample PyPI vulnerability + references: + - https://pypi.org/advisory/CVE-2021-1234 + """ + ) + return vulns_dir + + +def test_clone(mock_fetch_via_vcs, mock_vcs_response): + # Import inside the test function to avoid circular import + from vulnerabilities.pipelines.v2_importers.pypa_importer import PyPaImporterPipeline + + # Test the `clone` method to ensure it calls `fetch_via_vcs` + pipeline = PyPaImporterPipeline() + pipeline.clone() + + mock_fetch_via_vcs.assert_called_once_with(pipeline.repo_url) + assert pipeline.vcs_response == mock_vcs_response + + +def test_advisories_count(mock_pathlib, mock_vcs_response, mock_fetch_via_vcs): + # Import inside the test function to avoid circular import + from vulnerabilities.pipelines.v2_importers.pypa_importer import PyPaImporterPipeline + + # Mock `vcs_response.dest_dir` to point to the temporary directory + mock_vcs_response.dest_dir = str(mock_pathlib.parent) + + pipeline = PyPaImporterPipeline() + + # Call clone() to set the vcs_response attribute + pipeline.clone() + mock_fetch_via_vcs.assert_called_once_with(pipeline.repo_url) + + count = pipeline.advisories_count() + + # Check that the count matches the number of YAML files in the `vulns` directory + assert count == 1 + + +def test_collect_advisories(mock_pathlib, mock_vcs_response, mock_fetch_via_vcs): + # Import inside the test function to avoid circular import + from vulnerabilities.pipelines.v2_importers.pypa_importer import PyPaImporterPipeline + + # Mock `vcs_response.dest_dir` to point to the temporary directory + mock_vcs_response.dest_dir = str(mock_pathlib.parent) + + # Mock `parse_advisory_data` to return an AdvisoryData object + with patch("vulnerabilities.importers.osv.parse_advisory_data_v2") as mock_parse: + mock_parse.return_value = AdvisoryData( + advisory_id="CVE-2021-1234", + summary="Sample PyPI vulnerability", + references_v2=[{"url": "https://pypi.org/advisory/CVE-2021-1234"}], + affected_packages=[], + weaknesses=[], + url="https://pypi.org/advisory/CVE-2021-1234", + ) + + pipeline = PyPaImporterPipeline() + pipeline.clone() + mock_fetch_via_vcs.assert_called_once_with(pipeline.repo_url) + advisories = list(pipeline.collect_advisories()) + + # Ensure that advisories are parsed correctly + assert len(advisories) == 1 + advisory = advisories[0] + assert advisory.advisory_id == "CVE-2021-1234" + assert advisory.summary == "Sample PyPI vulnerability" + assert advisory.url == "https://pypi.org/advisory/CVE-2021-1234" + + +def test_clean_downloads(mock_vcs_response): + # Import inside the test function to avoid circular import + from vulnerabilities.pipelines.v2_importers.pypa_importer import PyPaImporterPipeline + + # Test the `clean_downloads` method to ensure the repository is deleted + pipeline = PyPaImporterPipeline() + pipeline.vcs_response = mock_vcs_response + + pipeline.clean_downloads() + + mock_vcs_response.delete.assert_called_once() + + +def test_on_failure(mock_vcs_response): + # Import inside the test function to avoid circular import + from vulnerabilities.pipelines.v2_importers.pypa_importer import PyPaImporterPipeline + + # Test the `on_failure` method to ensure `clean_downloads` is called on failure + pipeline = PyPaImporterPipeline() + pipeline.vcs_response = mock_vcs_response + + with patch.object(pipeline, "clean_downloads") as mock_clean: + pipeline.on_failure() + + mock_clean.assert_called_once() + + +def test_collect_advisories_with_invalid_yaml(mock_pathlib, mock_vcs_response, mock_fetch_via_vcs): + # Import inside the test function to avoid circular import + from vulnerabilities.pipelines.v2_importers.pypa_importer import PyPaImporterPipeline + + # Create an invalid YAML file + invalid_file = mock_pathlib / "invalid_file.yaml" + invalid_file.write_text("invalid_yaml") + + mock_vcs_response.dest_dir = str(mock_pathlib.parent) + + with patch("vulnerabilities.importers.osv.parse_advisory_data_v2") as mock_parse: + # Mock parse_advisory_data to raise an error on invalid YAML + mock_parse.side_effect = saneyaml.YAMLError("Invalid YAML") + + pipeline = PyPaImporterPipeline() + pipeline.clone() + mock_fetch_via_vcs.assert_called_once_with(pipeline.repo_url) + with pytest.raises(saneyaml.YAMLError): + list(pipeline.collect_advisories()) + + +def test_advisories_count_empty(mock_vcs_response, mock_fetch_via_vcs): + # Import inside the test function to avoid circular import + from vulnerabilities.pipelines.v2_importers.pypa_importer import PyPaImporterPipeline + + # Mock an empty 'vulns' directory + mock_vcs_response.dest_dir = "/mock/empty_repo" + pipeline = PyPaImporterPipeline() + pipeline.clone() + # Test that advisories_count returns 0 for an empty directory + count = pipeline.advisories_count() + assert count == 0 From c5258b945f81c5b8a8fdfbddf462d2e2590217d0 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 23 May 2025 15:56:59 +0530 Subject: [PATCH 26/44] Add tests for vulnrichment importer pipeline v2 Signed-off-by: Tushar Goel --- .../test_vulnrichment_v2_importer.py | 205 ++++++++++++++++++ 1 file changed, 205 insertions(+) create mode 100644 vulnerabilities/tests/pipelines/test_vulnrichment_v2_importer.py diff --git a/vulnerabilities/tests/pipelines/test_vulnrichment_v2_importer.py b/vulnerabilities/tests/pipelines/test_vulnrichment_v2_importer.py new file mode 100644 index 000000000..f926058c2 --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_vulnrichment_v2_importer.py @@ -0,0 +1,205 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import json +from pathlib import Path +from unittest.mock import MagicMock +from unittest.mock import patch + +import pytest + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import VulnerabilitySeverity +from vulnerabilities.pipelines.v2_importers.vulnrichment_importer import VulnrichImporterPipeline + + +@pytest.fixture +def mock_vcs_response(): + # Mock the vcs_response from fetch_via_vcs + mock_response = MagicMock() + mock_response.dest_dir = "/mock/repo" + mock_response.delete = MagicMock() + return mock_response + + +@pytest.fixture +def mock_fetch_via_vcs(mock_vcs_response): + with patch( + "vulnerabilities.pipelines.v2_importers.vulnrichment_importer.fetch_via_vcs" + ) as mock: + mock.return_value = mock_vcs_response + yield mock + + +@pytest.fixture +def mock_pathlib(tmp_path): + # Create a mock filesystem with a 'vulns' directory and JSON files + vulns_dir = tmp_path / "vulns" + vulns_dir.mkdir() + + advisory_file = vulns_dir / "CVE-2021-1234.json" + advisory_file.write_text( + json.dumps( + { + "cveMetadata": { + "cveId": "CVE-2021-1234", + "state": "PUBLIC", + "datePublished": "2021-01-01", + }, + "containers": { + "cna": { + "descriptions": [{"lang": "en", "value": "Sample PyPI vulnerability"}], + "metrics": [ + { + "cvssV4_0": { + "baseScore": 7.5, + "vectorString": "AV:N/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:H", + } + } + ], + "affected": [{"cpes": ["cpe:/a:example:package"]}], + "references": [{"url": "https://example.com", "tags": ["exploit"]}], + } + }, + } + ) + ) + return vulns_dir + + +def test_clone(mock_fetch_via_vcs, mock_vcs_response): + # Test the `clone` method to ensure the repository is cloned correctly + pipeline = VulnrichImporterPipeline() + pipeline.clone() + + mock_fetch_via_vcs.assert_called_once_with(pipeline.repo_url) + assert pipeline.vcs_response == mock_vcs_response + + +def test_advisories_count(mock_pathlib, mock_vcs_response, mock_fetch_via_vcs): + mock_vcs_response.dest_dir = str(mock_pathlib.parent) + + pipeline = VulnrichImporterPipeline() + pipeline.clone() + count = pipeline.advisories_count() + + assert count == 0 + + +def test_collect_advisories(mock_pathlib, mock_vcs_response, mock_fetch_via_vcs): + # Mock `vcs_response.dest_dir` to point to the temporary directory + mock_vcs_response.dest_dir = str(mock_pathlib.parent) + + # Mock `parse_cve_advisory` to return an AdvisoryData object + with patch( + "vulnerabilities.pipelines.v2_importers.vulnrichment_importer.VulnrichImporterPipeline.parse_cve_advisory" + ) as mock_parse: + mock_parse.return_value = AdvisoryData( + advisory_id="CVE-2021-1234", + summary="Sample PyPI vulnerability", + references_v2=[{"url": "https://example.com"}], + affected_packages=[], + weaknesses=[], + url="https://example.com", + severities=[ + VulnerabilitySeverity( + system="cvssv4", + value=7.5, + scoring_elements="AV:N/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:H", + ) + ], + ) + + pipeline = VulnrichImporterPipeline() + pipeline.clone() + advisories = list(pipeline.collect_advisories()) + + # Ensure that advisories are parsed correctly + assert len(advisories) == 1 + advisory = advisories[0] + assert advisory.advisory_id == "CVE-2021-1234" + assert advisory.summary == "Sample PyPI vulnerability" + assert advisory.url == "https://example.com" + + +def test_clean_downloads(mock_vcs_response, mock_fetch_via_vcs): + # Test the `clean_downloads` method to ensure the repository is deleted + pipeline = VulnrichImporterPipeline() + pipeline.clone() + pipeline.vcs_response = mock_vcs_response + + pipeline.clean_downloads() + + mock_vcs_response.delete.assert_called_once() + + +def test_on_failure(mock_vcs_response, mock_fetch_via_vcs): + pipeline = VulnrichImporterPipeline() + pipeline.clone() + pipeline.vcs_response = mock_vcs_response + + with patch.object(pipeline, "clean_downloads") as mock_clean: + pipeline.on_failure() + + mock_clean.assert_called_once() + + +def test_parse_cve_advisory(mock_pathlib, mock_vcs_response, mock_fetch_via_vcs): + from vulnerabilities.pipelines.v2_importers.vulnrichment_importer import ( + VulnrichImporterPipeline, + ) + + mock_vcs_response.dest_dir = str(mock_pathlib.parent) + + raw_data = { + "cveMetadata": {"cveId": "CVE-2021-1234", "state": "PUBLIC", "datePublished": "2021-01-01"}, + "containers": { + "cna": { + "descriptions": [{"lang": "en", "value": "Sample PyPI vulnerability"}], + "metrics": [ + { + "cvssV4_0": { + "baseScore": 7.5, + "vectorString": "AV:N/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:H", + } + } + ], + "affected": [{"cpes": ["cpe:/a:example:package"]}], + "references": [{"url": "https://example.com", "tags": ["exploit"]}], + } + }, + } + advisory_url = "https://github.com/cisagov/vulnrichment/blob/develop/CVE-2021-1234.json" + + pipeline = VulnrichImporterPipeline() + pipeline.clone() + advisory = pipeline.parse_cve_advisory(raw_data, advisory_url) + + assert advisory.advisory_id == "CVE-2021-1234" + assert advisory.summary == "Sample PyPI vulnerability" + assert advisory.url == advisory_url + assert len(advisory.severities) == 1 + assert advisory.severities[0].value == 7.5 + + +def test_collect_advisories_with_invalid_json(mock_pathlib, mock_vcs_response, mock_fetch_via_vcs): + invalid_file = mock_pathlib / "invalid_file.json" + invalid_file.write_text("invalid_json") + + mock_vcs_response.dest_dir = str(mock_pathlib.parent) + + with patch( + "vulnerabilities.pipelines.v2_importers.vulnrichment_importer.VulnrichImporterPipeline.parse_cve_advisory" + ) as mock_parse: + mock_parse.side_effect = json.JSONDecodeError("Invalid JSON", "", 0) + + pipeline = VulnrichImporterPipeline() + pipeline.clone() + with pytest.raises(json.JSONDecodeError): + list(pipeline.collect_advisories()) From 1de5e2a6b40ceb4f6855cae849a0b2a665062a4d Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 28 May 2025 17:47:56 +0530 Subject: [PATCH 27/44] Add UI for V2 Signed-off-by: Tushar Goel --- vulnerabilities/forms.py | 9 + vulnerabilities/improvers/__init__.py | 17 +- ...soryreference_advisoryseverity_and_more.py | 12 +- ...93_alter_advisoryreference_reference_id.py | 23 -- ...alter_advisoryv2_datasource_id_and_more.py | 145 ++++++++ vulnerabilities/models.py | 328 +++++++++++++++- vulnerabilities/pipelines/__init__.py | 15 +- .../v2_importers/apache_httpd_importer.py | 66 ++++ .../pipelines/v2_importers/github_importer.py | 6 +- .../pipelines/v2_importers/gitlab_importer.py | 16 +- .../pipelines/v2_importers/npm_importer.py | 2 + .../pipelines/v2_importers/nvd_importer.py | 2 +- .../pipelines/v2_importers/pypa_importer.py | 2 + .../pipelines/v2_importers/pysec_importer.py | 3 +- .../v2_improvers/compute_package_risk.py | 143 +++++++ .../computer_package_version_rank.py | 93 +++++ .../v2_improvers/enhance_with_exploitdb.py | 169 +++++++++ .../v2_improvers/enhance_with_kev.py | 103 ++++++ .../v2_improvers/enhance_with_metasploit.py | 126 +++++++ .../v2_improvers/flag_ghost_packages.py | 104 ++++++ vulnerabilities/pipes/advisory.py | 2 +- vulnerabilities/risk.py | 6 +- vulnerabilities/templates/index_v2.html | 33 ++ .../templates/package_details_v2.html | 349 ++++++++++++++++++ .../templates/package_search_box_v2.html | 48 +++ vulnerabilities/templates/packages_v2.html | 84 +++++ vulnerabilities/utils.py | 10 +- vulnerabilities/views.py | 104 +++++- vulnerablecode/urls.py | 22 +- 29 files changed, 1978 insertions(+), 64 deletions(-) delete mode 100644 vulnerabilities/migrations/0093_alter_advisoryreference_reference_id.py create mode 100644 vulnerabilities/migrations/0093_alter_advisoryv2_datasource_id_and_more.py create mode 100644 vulnerabilities/pipelines/v2_improvers/compute_package_risk.py create mode 100644 vulnerabilities/pipelines/v2_improvers/computer_package_version_rank.py create mode 100644 vulnerabilities/pipelines/v2_improvers/enhance_with_exploitdb.py create mode 100644 vulnerabilities/pipelines/v2_improvers/enhance_with_kev.py create mode 100644 vulnerabilities/pipelines/v2_improvers/enhance_with_metasploit.py create mode 100644 vulnerabilities/pipelines/v2_improvers/flag_ghost_packages.py create mode 100644 vulnerabilities/templates/index_v2.html create mode 100644 vulnerabilities/templates/package_details_v2.html create mode 100644 vulnerabilities/templates/package_search_box_v2.html create mode 100644 vulnerabilities/templates/packages_v2.html diff --git a/vulnerabilities/forms.py b/vulnerabilities/forms.py index 74a10340c..1e714a44b 100644 --- a/vulnerabilities/forms.py +++ b/vulnerabilities/forms.py @@ -36,6 +36,15 @@ class VulnerabilitySearchForm(forms.Form): ) +class AdvisorySearchForm(forms.Form): + + search = forms.CharField( + required=True, + widget=forms.TextInput( + attrs={"placeholder": "Advisory id or alias such as CVE or GHSA"} + ), + ) + class ApiUserCreationForm(forms.ModelForm): """ Support a simplified creation for API-only users directly from the UI. diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index 08cce6ff9..520ef36c3 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -21,6 +21,16 @@ from vulnerabilities.pipelines import flag_ghost_packages from vulnerabilities.pipelines import populate_vulnerability_summary_pipeline from vulnerabilities.pipelines import remove_duplicate_advisories +from vulnerabilities.pipelines.v2_improvers import compute_package_risk as compute_package_risk_v2 +from vulnerabilities.pipelines.v2_improvers import ( + computer_package_version_rank as compute_version_rank_v2, +) +from vulnerabilities.pipelines.v2_improvers import enhance_with_exploitdb as exploitdb_v2 +from vulnerabilities.pipelines.v2_improvers import enhance_with_kev as enhance_with_kev_v2 +from vulnerabilities.pipelines.v2_improvers import ( + enhance_with_metasploit as enhance_with_metasploit_v2, +) +from vulnerabilities.pipelines.v2_improvers import flag_ghost_packages as flag_ghost_packages_v2 IMPROVERS_REGISTRY = [ valid_versions.GitHubBasicImprover, @@ -50,7 +60,12 @@ add_cvss31_to_CVEs.CVEAdvisoryMappingPipeline, remove_duplicate_advisories.RemoveDuplicateAdvisoriesPipeline, populate_vulnerability_summary_pipeline.PopulateVulnerabilitySummariesPipeline, - compute_advisory_todo.ComputeToDo, + exploitdb_v2.ExploitDBImproverPipeline, + enhance_with_kev_v2.VulnerabilityKevPipeline, + flag_ghost_packages_v2.FlagGhostPackagePipeline, + enhance_with_metasploit_v2.MetasploitImproverPipeline, + compute_package_risk_v2.ComputePackageRiskPipeline, + compute_version_rank_v2.ComputeVersionRankPipeline, ] IMPROVERS_REGISTRY = { diff --git a/vulnerabilities/migrations/0092_advisoryalias_advisoryreference_advisoryseverity_and_more.py b/vulnerabilities/migrations/0092_advisoryalias_advisoryreference_advisoryseverity_and_more.py index 22e2ab6c3..101eec8b4 100644 --- a/vulnerabilities/migrations/0092_advisoryalias_advisoryreference_advisoryseverity_and_more.py +++ b/vulnerabilities/migrations/0092_advisoryalias_advisoryreference_advisoryseverity_and_more.py @@ -1,4 +1,4 @@ -# Generated by Django 4.2.20 on 2025-05-21 05:33 +# Generated by Django 4.2.20 on 2025-05-27 10:43 from django.db import migrations, models @@ -67,7 +67,7 @@ class Migration(migrations.Migration): blank=True, db_index=True, help_text="An optional reference ID, such as DSA-4465-1 when available", - max_length=200, + max_length=500, ), ), ], @@ -261,7 +261,7 @@ class Migration(migrations.Migration): ( "advisory_id", models.CharField( - help_text="An advisory is a unique vulnerability identifier in some database, such as CVE-2020-2233", + help_text="An advisory is a unique vulnerability identifier in some database, such as PYSEC-2020-2233", max_length=50, ), ), @@ -293,7 +293,7 @@ class Migration(migrations.Migration): ), ), ( - "created_by", + "datasource_ID", models.CharField( help_text="Fully qualified name of the importer prefixed with themodule name importing the advisory. Eg:vulnerabilities.pipeline.nginx_importer.NginxImporterPipeline", max_length=100, @@ -330,7 +330,7 @@ class Migration(migrations.Migration): "affecting_packages", models.ManyToManyField( help_text="A list of packages that are affected by this advisory.", - related_name="fixing_advisories", + related_name="affected_by_advisories", to="vulnerabilities.packagev2", ), ), @@ -346,7 +346,7 @@ class Migration(migrations.Migration): "fixed_by_packages", models.ManyToManyField( help_text="A list of packages that are reported by this advisory.", - related_name="affected_by_advisories", + related_name="fixing_advisorues", to="vulnerabilities.packagev2", ), ), diff --git a/vulnerabilities/migrations/0093_alter_advisoryreference_reference_id.py b/vulnerabilities/migrations/0093_alter_advisoryreference_reference_id.py deleted file mode 100644 index 9230cb4fa..000000000 --- a/vulnerabilities/migrations/0093_alter_advisoryreference_reference_id.py +++ /dev/null @@ -1,23 +0,0 @@ -# Generated by Django 4.2.20 on 2025-05-21 06:40 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ("vulnerabilities", "0092_advisoryalias_advisoryreference_advisoryseverity_and_more"), - ] - - operations = [ - migrations.AlterField( - model_name="advisoryreference", - name="reference_id", - field=models.CharField( - blank=True, - db_index=True, - help_text="An optional reference ID, such as DSA-4465-1 when available", - max_length=500, - ), - ), - ] diff --git a/vulnerabilities/migrations/0093_alter_advisoryv2_datasource_id_and_more.py b/vulnerabilities/migrations/0093_alter_advisoryv2_datasource_id_and_more.py new file mode 100644 index 000000000..003a79ffd --- /dev/null +++ b/vulnerabilities/migrations/0093_alter_advisoryv2_datasource_id_and_more.py @@ -0,0 +1,145 @@ +# Generated by Django 4.2.20 on 2025-05-27 12:30 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0092_advisoryalias_advisoryreference_advisoryseverity_and_more"), + ] + + operations = [ + migrations.AlterField( + model_name="advisoryv2", + name="datasource_ID", + field=models.CharField( + help_text="Fully qualified name of the importer prefixed with themodule name importing the advisory. Eg:nginx_importer_v2", + max_length=100, + ), + ), + migrations.AlterField( + model_name="advisoryv2", + name="fixed_by_packages", + field=models.ManyToManyField( + help_text="A list of packages that are reported by this advisory.", + related_name="fixing_advisories", + to="vulnerabilities.packagev2", + ), + ), + migrations.CreateModel( + name="AdvisoryExploit", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "date_added", + models.DateField( + blank=True, + help_text="The date the vulnerability was added to an exploit catalog.", + null=True, + ), + ), + ( + "description", + models.TextField( + blank=True, + help_text="Description of the vulnerability in an exploit catalog, often a refinement of the original CVE description", + null=True, + ), + ), + ( + "required_action", + models.TextField( + blank=True, + help_text="The required action to address the vulnerability, typically to apply vendor updates or apply vendor mitigations or to discontinue use.", + null=True, + ), + ), + ( + "due_date", + models.DateField( + blank=True, + help_text="The date the required action is due, which applies to all USA federal civilian executive branch (FCEB) agencies, but all organizations are strongly encouraged to execute the required action", + null=True, + ), + ), + ( + "notes", + models.TextField( + blank=True, + help_text="Additional notes and resources about the vulnerability, often a URL to vendor instructions.", + null=True, + ), + ), + ( + "known_ransomware_campaign_use", + models.BooleanField( + default=False, + help_text="Known' if this vulnerability is known to have been leveraged as part of a ransomware campaign; \n or 'Unknown' if there is no confirmation that the vulnerability has been utilized for ransomware.", + ), + ), + ( + "source_date_published", + models.DateField( + blank=True, + help_text="The date that the exploit was published or disclosed.", + null=True, + ), + ), + ( + "exploit_type", + models.TextField( + blank=True, + help_text="The type of the exploit as provided by the original upstream data source.", + null=True, + ), + ), + ( + "platform", + models.TextField( + blank=True, + help_text="The platform associated with the exploit as provided by the original upstream data source.", + null=True, + ), + ), + ( + "source_date_updated", + models.DateField( + blank=True, + help_text="The date the exploit was updated in the original upstream data source.", + null=True, + ), + ), + ( + "data_source", + models.TextField( + blank=True, + help_text="The source of the exploit information, such as CISA KEV, exploitdb, metaspoit, or others.", + null=True, + ), + ), + ( + "source_url", + models.URLField( + blank=True, + help_text="The URL to the exploit as provided in the original upstream data source.", + null=True, + ), + ), + ( + "advisory", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="exploits", + to="vulnerabilities.advisoryv2", + ), + ), + ], + ), + ] diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index def38e80f..2e00adbc2 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -1127,7 +1127,6 @@ def fixing_vulnerabilities(self): """ Return a queryset of Vulnerabilities that are fixed by this package. """ - print("A") return self.fixed_by_vulnerabilities.all() @property @@ -1325,7 +1324,18 @@ def url(self): class AdvisoryQuerySet(BaseQuerySet): - pass + + def search(query): + """ + This function will take a string as an input, the string could be an alias or an advisory ID or + something in the advisory description. + """ + return AdvisoryV2.objects.filter( + Q(advisory_id__icontains=query) + | Q(aliases__alias__icontains=query) + | Q(summary__icontains=query) + | Q(references__url__icontains=query) + ).distinct() # FIXME: Remove when migration from Vulnerability to Advisory is completed @@ -2522,7 +2532,7 @@ class AdvisoryV2(models.Model): null=False, unique=False, help_text="An advisory is a unique vulnerability identifier in some database, " - "such as CVE-2020-2233", + "such as PYSEC-2020-2233", ) unique_content_id = models.CharField( @@ -2562,11 +2572,12 @@ class AdvisoryV2(models.Model): date_imported = models.DateTimeField( blank=True, null=True, help_text="UTC Date on which the advisory was imported" ) - created_by = models.CharField( + # TODO: Rename to datasource ID + datasource_ID = models.CharField( max_length=100, help_text="Fully qualified name of the importer prefixed with the" "module name importing the advisory. Eg:" - "vulnerabilities.pipeline.nginx_importer.NginxImporterPipeline", + "nginx_importer_v2", ) url = models.URLField( blank=False, @@ -2576,13 +2587,13 @@ class AdvisoryV2(models.Model): affecting_packages = models.ManyToManyField( "PackageV2", - related_name="fixing_advisories", + related_name="affected_by_advisories", help_text="A list of packages that are affected by this advisory.", ) fixed_by_packages = models.ManyToManyField( "PackageV2", - related_name="affected_by_advisories", + related_name="fixing_advisories", help_text="A list of packages that are reported by this advisory.", ) @@ -2621,6 +2632,7 @@ def risk_score(self): objects = AdvisoryQuerySet.as_manager() + class Meta: ordering = ["date_published", "unique_content_id"] @@ -2646,6 +2658,15 @@ def to_advisory_data(self) -> "AdvisoryDataV2": url=self.url, ) + @property + def get_aliases(self): + """ + Return a queryset of all Aliases for this vulnerability. + """ + return self.aliases.all() + + alias = get_aliases + class ToDoRelatedAdvisory(models.Model): todo = models.ForeignKey( @@ -2663,6 +2684,56 @@ class Meta: class PackageQuerySetV2(BaseQuerySet, PackageURLQuerySet): + + def search(self, query: str = None): + """ + Return a Package queryset searching for the ``query``. + Make a best effort approach to find matching packages either based + on exact purl, partial purl or just name and namespace. + """ + query = query and query.strip() + if not query: + return self.none() + qs = self + + try: + # if it's a valid purl, try to parse it and use it as is + purl = str(utils.plain_purl(query)) + qs = qs.filter(package_url__istartswith=purl) + except ValueError: + # otherwise use query as a plain string + qs = qs.filter(package_url__icontains=query) + return qs.order_by("package_url") + + def with_vulnerability_counts(self): + return self.annotate( + vulnerability_count=Count( + "affected_by_advisories", + ), + patched_vulnerability_count=Count( + "fixing_advisories", + ), + ) + + def get_fixed_by_package_versions(self, purl: PackageURL, fix=True): + """ + Return a queryset of all the package versions of this `package` that fix any vulnerability. + If `fix` is False, return all package versions whether or not they fix a vulnerability. + """ + filter_dict = { + "name": purl.name, + "namespace": purl.namespace, + "type": purl.type, + "qualifiers": purl.qualifiers, + "subpath": purl.subpath, + } + + if fix: + filter_dict["fixing_advisories__isnull"] = False + + # TODO: why do we need distinct + return PackageV2.objects.filter(**filter_dict).distinct() + def get_or_create_from_purl(self, purl: Union[PackageURL, str]): """ Return a new or existing Package given a ``purl`` PackageURL object or PURL string. @@ -2671,6 +2742,38 @@ def get_or_create_from_purl(self, purl: Union[PackageURL, str]): return package, is_created + def only_vulnerable(self): + return self._vulnerable(True) + + def only_non_vulnerable(self): + return self._vulnerable(False).filter(is_ghost=False) + + def for_purl(self, purl): + """ + Return a queryset matching the ``purl`` Package URL. + """ + return self.filter(package_url=purl) + + def for_purls(self, purls=()): + """ + Return a queryset of Packages matching a list of PURLs. + """ + return self.filter(package_url__in=purls).distinct() + + def _vulnerable(self, vulnerable=True): + """ + Filter to select only vulnerable or non-vulnearble packages. + """ + return self.with_is_vulnerable().filter(is_vulnerable=vulnerable) + + def with_is_vulnerable(self): + """ + Annotate Package with ``is_vulnerable`` boolean attribute. + """ + return self.annotate( + is_vulnerable=Exists(AdvisoryV2.objects.filter(affecting_packages__pk=OuterRef("pk"))) + ) + class PackageV2(PackageURLMixin): """ @@ -2756,7 +2859,7 @@ def calculate_version_rank(self): `version_rank` values and are closest to this package in terms of version order. """ - group_packages = Package.objects.filter( + group_packages = PackageV2.objects.filter( type=self.type, namespace=self.namespace, name=self.name, @@ -2766,5 +2869,212 @@ def calculate_version_rank(self): sorted_packages = sorted(group_packages, key=lambda p: self.version_class(p.version)) for rank, package in enumerate(sorted_packages, start=1): package.version_rank = rank - Package.objects.bulk_update(sorted_packages, fields=["version_rank"]) + PackageV2.objects.bulk_update(sorted_packages, fields=["version_rank"]) return self.version_rank + + @property + def fixed_package_details(self): + """ + Return a mapping of vulnerabilities that affect this package and the next and + latest non-vulnerable versions. + """ + package_details = {} + package_details["purl"] = PackageURL.from_string(self.purl) + + next_non_vulnerable, latest_non_vulnerable = self.get_non_vulnerable_versions() + package_details["next_non_vulnerable"] = next_non_vulnerable + package_details["latest_non_vulnerable"] = latest_non_vulnerable + + package_details["advisories"] = self.get_affecting_vulnerabilities() + + return package_details + + def get_non_vulnerable_versions(self): + """ + Return a tuple of the next and latest non-vulnerable versions as Package instance. + Return a tuple of (None, None) if there is no non-vulnerable version. + """ + if self.version_rank == 0: + self.calculate_version_rank + non_vulnerable_versions = PackageV2.objects.get_fixed_by_package_versions( + self, fix=False + ).only_non_vulnerable() + + later_non_vulnerable_versions = non_vulnerable_versions.filter( + version_rank__gt=self.version_rank + ) + + later_non_vulnerable_versions = list(later_non_vulnerable_versions) + + if later_non_vulnerable_versions: + sorted_versions = later_non_vulnerable_versions + next_non_vulnerable = sorted_versions[0] + latest_non_vulnerable = sorted_versions[-1] + return next_non_vulnerable, latest_non_vulnerable + + return None, None + + @cached_property + def version_class(self): + range_class = RANGE_CLASS_BY_SCHEMES.get(self.type) + return range_class.version_class if range_class else Version + + def get_absolute_url(self): + """ + Return this Vulnerability details absolute URL. + """ + return reverse("package_details_v2", args=[self.purl]) + + @cached_property + def current_version(self): + return self.version_class(self.version) + + def get_affecting_vulnerabilities(self): + """ + Return a list of vulnerabilities that affect this package together with information regarding + the versions that fix the vulnerabilities. + """ + if self.version_rank == 0: + self.calculate_version_rank + package_details_advs = [] + + fixed_by_packages = PackageV2.objects.get_fixed_by_package_versions(self, fix=True) + + package_advisories = self.affected_by_advisories.prefetch_related( + Prefetch( + "fixed_by_packages", + queryset=fixed_by_packages, + to_attr="fixed_packages", + ) + ) + + for adv in package_advisories: + package_details_advs.append({"advisory": adv}) + later_fixed_packages = [] + + for fixed_pkg in adv.fixed_by_packages.all(): + if fixed_pkg not in fixed_by_packages: + continue + fixed_version = self.version_class(fixed_pkg.version) + if fixed_version > self.current_version: + later_fixed_packages.append(fixed_pkg) + + next_fixed_package_vulns = [] + + sort_fixed_by_packages_by_version = [] + if later_fixed_packages: + sort_fixed_by_packages_by_version = sorted( + later_fixed_packages, key=lambda p: p.version_rank + ) + + fixed_by_pkgs = [] + + for vuln_details in package_details_advs: + if vuln_details["advisory"] != adv: + continue + vuln_details["fixed_by_purl"] = [] + vuln_details["fixed_by_purl_advisories"] = [] + + for fixed_by_pkg in sort_fixed_by_packages_by_version: + fixed_by_package_details = {} + fixed_by_purl = PackageURL.from_string(fixed_by_pkg.purl) + next_fixed_package_vulns = list(fixed_by_pkg.affected_by_advisories.all()) + + fixed_by_package_details["fixed_by_purl"] = fixed_by_purl + fixed_by_package_details["fixed_by_purl_advisories"] = next_fixed_package_vulns + fixed_by_pkgs.append(fixed_by_package_details) + + vuln_details["fixed_by_package_details"] = fixed_by_pkgs + + return package_details_advs + + +class AdvisoryExploit(models.Model): + """ + A vulnerability exploit is code used to + take advantage of a security flaw for unauthorized access or malicious activity. + """ + + advisory = models.ForeignKey( + AdvisoryV2, + related_name="exploits", + on_delete=models.CASCADE, + ) + + date_added = models.DateField( + null=True, + blank=True, + help_text="The date the vulnerability was added to an exploit catalog.", + ) + + description = models.TextField( + null=True, + blank=True, + help_text="Description of the vulnerability in an exploit catalog, often a refinement of the original CVE description", + ) + + required_action = models.TextField( + null=True, + blank=True, + help_text="The required action to address the vulnerability, typically to " + "apply vendor updates or apply vendor mitigations or to discontinue use.", + ) + + due_date = models.DateField( + null=True, + blank=True, + help_text="The date the required action is due, which applies" + " to all USA federal civilian executive branch (FCEB) agencies, " + "but all organizations are strongly encouraged to execute the required action", + ) + + notes = models.TextField( + null=True, + blank=True, + help_text="Additional notes and resources about the vulnerability," + " often a URL to vendor instructions.", + ) + + known_ransomware_campaign_use = models.BooleanField( + default=False, + help_text="""Known' if this vulnerability is known to have been leveraged as part of a ransomware campaign; + or 'Unknown' if there is no confirmation that the vulnerability has been utilized for ransomware.""", + ) + + source_date_published = models.DateField( + null=True, blank=True, help_text="The date that the exploit was published or disclosed." + ) + + exploit_type = models.TextField( + null=True, + blank=True, + help_text="The type of the exploit as provided by the original upstream data source.", + ) + + platform = models.TextField( + null=True, + blank=True, + help_text="The platform associated with the exploit as provided by the original upstream data source.", + ) + + source_date_updated = models.DateField( + null=True, + blank=True, + help_text="The date the exploit was updated in the original upstream data source.", + ) + + data_source = models.TextField( + null=True, + blank=True, + help_text="The source of the exploit information, such as CISA KEV, exploitdb, metaspoit, or others.", + ) + + source_url = models.URLField( + null=True, + blank=True, + help_text="The URL to the exploit as provided in the original upstream data source.", + ) + + @property + def get_known_ransomware_campaign_use_type(self): + return "Known" if self.known_ransomware_campaign_use else "Unknown" diff --git a/vulnerabilities/pipelines/__init__.py b/vulnerabilities/pipelines/__init__.py index c7ebe3be7..c0662bbb1 100644 --- a/vulnerabilities/pipelines/__init__.py +++ b/vulnerabilities/pipelines/__init__.py @@ -366,20 +366,21 @@ def get_published_package_versions( """ Return a list of versions published before `until` for the `package_url` """ + versions_before_until = [] try: versions = package_versions.versions(str(package_url)) + for version in versions or []: + if until and version.release_date and version.release_date > until: + continue + versions_before_until.append(version.value) + + return versions_before_until except Exception as e: self.log( f"Failed to fetch versions for package {str(package_url)} {e!r}", level=logging.ERROR, ) - versions_before_until = [] - for version in versions or []: - if until and version.release_date and version.release_date > until: - continue - versions_before_until.append(version.value) - - return versions_before_until + return [] def get_impacted_packages(self, affected_packages, advisory_date_published): """ diff --git a/vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py b/vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py index 1fd0c5039..5c3296a81 100644 --- a/vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py +++ b/vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py @@ -147,6 +147,72 @@ class ApacheHTTPDImporterPipeline(VulnerableCodeBaseImporterPipelineV2): links = [] + ignorable_versions = frozenset( + [ + "AGB_BEFORE_AAA_CHANGES", + "APACHE_1_2b1", + "APACHE_1_2b10", + "APACHE_1_2b11", + "APACHE_1_2b2", + "APACHE_1_2b3", + "APACHE_1_2b4", + "APACHE_1_2b5", + "APACHE_1_2b6", + "APACHE_1_2b7", + "APACHE_1_2b8", + "APACHE_1_2b9", + "APACHE_1_3_PRE_NT", + "APACHE_1_3a1", + "APACHE_1_3b1", + "APACHE_1_3b2", + "APACHE_1_3b3", + "APACHE_1_3b5", + "APACHE_1_3b6", + "APACHE_1_3b7", + "APACHE_2_0_2001_02_09", + "APACHE_2_0_52_WROWE_RC1", + "APACHE_2_0_ALPHA", + "APACHE_2_0_ALPHA_2", + "APACHE_2_0_ALPHA_3", + "APACHE_2_0_ALPHA_4", + "APACHE_2_0_ALPHA_5", + "APACHE_2_0_ALPHA_6", + "APACHE_2_0_ALPHA_7", + "APACHE_2_0_ALPHA_8", + "APACHE_2_0_ALPHA_9", + "APACHE_2_0_BETA_CANDIDATE_1", + "APACHE_BIG_SYMBOL_RENAME_POST", + "APACHE_BIG_SYMBOL_RENAME_PRE", + "CHANGES", + "HTTPD_LDAP_1_0_0", + "INITIAL", + "MOD_SSL_2_8_3", + "PCRE_3_9", + "POST_APR_SPLIT", + "PRE_APR_CHANGES", + "STRIKER_2_0_51_RC1", + "STRIKER_2_0_51_RC2", + "STRIKER_2_1_0_RC1", + "WROWE_2_0_43_PRE1", + "apache-1_3-merge-1-post", + "apache-1_3-merge-1-pre", + "apache-1_3-merge-2-post", + "apache-1_3-merge-2-pre", + "apache-apr-merge-3", + "apache-doc-split-01", + "dg_last_1_2_doc_merge", + "djg-apache-nspr-07", + "djg_nspr_split", + "moving_to_httpd_module", + "mpm-3", + "mpm-merge-1", + "mpm-merge-2", + "post_ajp_proxy", + "pre_ajp_proxy", + ] + ) + unfurl_version_ranges = True + @classmethod def steps(cls): return (cls.collect_and_store_advisories,) diff --git a/vulnerabilities/pipelines/v2_importers/github_importer.py b/vulnerabilities/pipelines/v2_importers/github_importer.py index 074b5b37e..aff1d27ba 100644 --- a/vulnerabilities/pipelines/v2_importers/github_importer.py +++ b/vulnerabilities/pipelines/v2_importers/github_importer.py @@ -36,7 +36,7 @@ class GitHubAPIImporterPipeline(VulnerableCodeBaseImporterPipelineV2): """Collect GitHub advisories.""" pipeline_id = "github_importer_v2" - + label = "GitHub" spdx_license_expression = "CC-BY-4.0" license_url = "https://github.com/github/advisory-database/blob/main/LICENSE.md" importer_name = "GHSA Importer" @@ -117,8 +117,8 @@ def steps(cls): # "COMPOSER": "composer", # "PIP": "pypi", # "RUBYGEMS": "gem", - # "NPM": "npm", - "RUST": "cargo", + "NPM": "npm", + # "RUST": "cargo", # "GO": "golang", } diff --git a/vulnerabilities/pipelines/v2_importers/gitlab_importer.py b/vulnerabilities/pipelines/v2_importers/gitlab_importer.py index b642b9611..0001580cb 100644 --- a/vulnerabilities/pipelines/v2_importers/gitlab_importer.py +++ b/vulnerabilities/pipelines/v2_importers/gitlab_importer.py @@ -37,12 +37,14 @@ class GitLabImporterPipeline(VulnerableCodeBaseImporterPipelineV2): """Collect advisory from GitLab Advisory Database (Open Source Edition).""" pipeline_id = "gitlab_importer_v2" - + label = "GitLab" spdx_license_expression = "MIT" license_url = "https://gitlab.com/gitlab-org/advisories-community/-/blob/main/LICENSE" importer_name = "GitLab Importer" repo_url = "git+https://gitlab.com/gitlab-org/advisories-community/" + unfurl_version_ranges = True + @classmethod def steps(cls): return ( @@ -52,15 +54,15 @@ def steps(cls): ) purl_type_by_gitlab_scheme = { - "conan": "conan", - "gem": "gem", + # "conan": "conan", + # "gem": "gem", # Entering issue to parse go package names https://github.com/nexB/vulnerablecode/issues/742 # "go": "golang", - "maven": "maven", + # "maven": "maven", "npm": "npm", - "nuget": "nuget", - "packagist": "composer", - "pypi": "pypi", + # "nuget": "nuget", + # "packagist": "composer", + # "pypi": "pypi", } gitlab_scheme_by_purl_type = {v: k for k, v in purl_type_by_gitlab_scheme.items()} diff --git a/vulnerabilities/pipelines/v2_importers/npm_importer.py b/vulnerabilities/pipelines/v2_importers/npm_importer.py index 0f61aacb6..098895e75 100644 --- a/vulnerabilities/pipelines/v2_importers/npm_importer.py +++ b/vulnerabilities/pipelines/v2_importers/npm_importer.py @@ -39,6 +39,8 @@ class NpmImporterPipeline(VulnerableCodeBaseImporterPipelineV2): repo_url = "git+https://github.com/nodejs/security-wg" importer_name = "Npm Importer" + unfurl_version_ranges = True + @classmethod def steps(cls): return ( diff --git a/vulnerabilities/pipelines/v2_importers/nvd_importer.py b/vulnerabilities/pipelines/v2_importers/nvd_importer.py index c9105c276..c4d0c09c5 100644 --- a/vulnerabilities/pipelines/v2_importers/nvd_importer.py +++ b/vulnerabilities/pipelines/v2_importers/nvd_importer.py @@ -31,7 +31,7 @@ class NVDImporterPipeline(VulnerableCodeBaseImporterPipelineV2): """Collect advisories from NVD.""" pipeline_id = "nvd_importer_v2" - + label = "NVD" # See https://github.com/nexB/vulnerablecode/issues/665 for follow up spdx_license_expression = ( "LicenseRef-scancode-us-govt-public-domain AND LicenseRef-scancode-cve-tou" diff --git a/vulnerabilities/pipelines/v2_importers/pypa_importer.py b/vulnerabilities/pipelines/v2_importers/pypa_importer.py index 741f500c4..63449926c 100644 --- a/vulnerabilities/pipelines/v2_importers/pypa_importer.py +++ b/vulnerabilities/pipelines/v2_importers/pypa_importer.py @@ -28,6 +28,8 @@ class PyPaImporterPipeline(VulnerableCodeBaseImporterPipelineV2): repo_url = "git+https://github.com/pypa/advisory-database" importer_name = "Pypa Importer" + unfurl_version_ranges = True + @classmethod def steps(cls): return ( diff --git a/vulnerabilities/pipelines/v2_importers/pysec_importer.py b/vulnerabilities/pipelines/v2_importers/pysec_importer.py index 42a72d04e..5b39af4af 100644 --- a/vulnerabilities/pipelines/v2_importers/pysec_importer.py +++ b/vulnerabilities/pipelines/v2_importers/pysec_importer.py @@ -15,7 +15,6 @@ import requests from vulnerabilities.importer import AdvisoryData -from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 @@ -29,6 +28,8 @@ class PyPIImporterPipeline(VulnerableCodeBaseImporterPipelineV2): spdx_license_expression = "CC-BY-4.0" importer_name = "PyPI Importer" + unfurl_version_ranges = True + @classmethod def steps(cls): return ( diff --git a/vulnerabilities/pipelines/v2_improvers/compute_package_risk.py b/vulnerabilities/pipelines/v2_improvers/compute_package_risk.py new file mode 100644 index 000000000..85f31dd8e --- /dev/null +++ b/vulnerabilities/pipelines/v2_improvers/compute_package_risk.py @@ -0,0 +1,143 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# +from aboutcode.pipeline import LoopProgress + +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.models import PackageV2 +from vulnerabilities.pipelines import VulnerableCodePipeline +from vulnerabilities.risk import compute_package_risk +from vulnerabilities.risk import compute_vulnerability_risk_factors + + +class ComputePackageRiskPipeline(VulnerableCodePipeline): + """ + Compute risk score for packages. + + See https://github.com/aboutcode-org/vulnerablecode/issues/1543 + """ + + pipeline_id = "compute_package_risk_v2" + license_expression = None + + @classmethod + def steps(cls): + return ( + cls.compute_and_store_vulnerability_risk_score, + cls.compute_and_store_package_risk_score, + ) + + def compute_and_store_vulnerability_risk_score(self): + affected_advisories = ( + AdvisoryV2.objects.filter(affecting_packages__isnull=False) + .prefetch_related( + "references", + "severities", + "exploits", + ) + .distinct() + ) + + self.log( + f"Calculating risk for {affected_advisories.count():,d} vulnerability with a affected packages records" + ) + + progress = LoopProgress(total_iterations=affected_advisories.count(), logger=self.log) + + updatables = [] + updated_vulnerability_count = 0 + batch_size = 5000 + + for advisory in progress.iter(affected_advisories.paginated(per_page=batch_size)): + severities = advisory.severities.all() + references = advisory.references.all() + exploits = advisory.exploits.all() + + weighted_severity, exploitability = compute_vulnerability_risk_factors( + references=references, + severities=severities, + exploits=exploits, + ) + advisory.weighted_severity = weighted_severity + advisory.exploitability = exploitability + print( + f"Computed risk for {advisory.advisory_id} with weighted_severity={weighted_severity} and exploitability={exploitability}" + ) + updatables.append(advisory) + + if len(updatables) >= batch_size: + updated_vulnerability_count += bulk_update( + model=AdvisoryV2, + items=updatables, + fields=["weighted_severity", "exploitability"], + logger=self.log, + ) + + updated_vulnerability_count += bulk_update( + model=AdvisoryV2, + items=updatables, + fields=["weighted_severity", "exploitability"], + logger=self.log, + ) + + self.log( + f"Successfully added risk score for {updated_vulnerability_count:,d} vulnerability" + ) + + def compute_and_store_package_risk_score(self): + affected_packages = ( + PackageV2.objects.filter(affected_by_advisories__isnull=False) + ).distinct() + + self.log(f"Calculating risk for {affected_packages.count():,d} affected package records") + + progress = LoopProgress( + total_iterations=affected_packages.count(), + logger=self.log, + progress_step=5, + ) + + updatables = [] + updated_package_count = 0 + batch_size = 10000 + + for package in progress.iter(affected_packages.paginated(per_page=batch_size)): + risk_score = compute_package_risk(package) + + if not risk_score: + continue + + package.risk_score = risk_score + updatables.append(package) + + if len(updatables) >= batch_size: + updated_package_count += bulk_update( + model=PackageV2, + items=updatables, + fields=["risk_score"], + logger=self.log, + ) + updated_package_count += bulk_update( + model=PackageV2, + items=updatables, + fields=["risk_score"], + logger=self.log, + ) + self.log(f"Successfully added risk score for {updated_package_count:,d} package") + + +def bulk_update(model, items, fields, logger): + item_count = 0 + if items: + try: + model.objects.bulk_update(objs=items, fields=fields) + item_count += len(items) + except Exception as e: + logger(f"Error updating {model.__name__}: {e}") + items.clear() + return item_count diff --git a/vulnerabilities/pipelines/v2_improvers/computer_package_version_rank.py b/vulnerabilities/pipelines/v2_improvers/computer_package_version_rank.py new file mode 100644 index 000000000..dd10a1695 --- /dev/null +++ b/vulnerabilities/pipelines/v2_improvers/computer_package_version_rank.py @@ -0,0 +1,93 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from itertools import groupby + +from aboutcode.pipeline import LoopProgress +from django.db import transaction +from univers.version_range import RANGE_CLASS_BY_SCHEMES +from univers.versions import Version + +from vulnerabilities.models import PackageV2 +from vulnerabilities.pipelines import VulnerableCodePipeline + + +class ComputeVersionRankPipeline(VulnerableCodePipeline): + """ + A pipeline to compute and assign version ranks for all packages. + """ + + pipeline_id = "compute_version_rank_v2" + license_expression = None + + @classmethod + def steps(cls): + return (cls.compute_and_store_version_rank,) + + def compute_and_store_version_rank(self): + """ + Compute and assign version ranks to all packages. + """ + groups = PackageV2.objects.only("type", "namespace", "name").order_by( + "type", "namespace", "name" + ) + + def key(package): + return package.type, package.namespace, package.name + + groups = groupby(groups, key=key) + + groups = [(list(x), list(y)) for x, y in groups] + + total_groups = len(groups) + self.log(f"Calculating `version_rank` for {total_groups:,d} groups of packages.") + + progress = LoopProgress( + total_iterations=total_groups, + logger=self.log, + progress_step=5, + ) + + for group, packages in progress.iter(groups): + type, namespace, name = group + if type not in RANGE_CLASS_BY_SCHEMES: + continue + self.update_version_rank_for_group(packages) + + self.log("Successfully populated `version_rank` for all packages.") + + @transaction.atomic + def update_version_rank_for_group(self, packages): + """ + Update the `version_rank` for all packages in a specific group. + """ + + # Sort the packages by version + sorted_packages = self.sort_packages_by_version(packages) + + # Assign version ranks + updates = [] + for rank, package in enumerate(sorted_packages, start=1): + package.version_rank = rank + updates.append(package) + + # Bulk update to save the ranks + PackageV2.objects.bulk_update(updates, fields=["version_rank"]) + + def sort_packages_by_version(self, packages): + """ + Sort packages by version using `version_class`. + """ + + if not packages: + return [] + version_class = RANGE_CLASS_BY_SCHEMES.get(packages[0].type).version_class + if not version_class: + version_class = Version + return sorted(packages, key=lambda p: version_class(p.version)) diff --git a/vulnerabilities/pipelines/v2_improvers/enhance_with_exploitdb.py b/vulnerabilities/pipelines/v2_improvers/enhance_with_exploitdb.py new file mode 100644 index 000000000..c306502d8 --- /dev/null +++ b/vulnerabilities/pipelines/v2_improvers/enhance_with_exploitdb.py @@ -0,0 +1,169 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import csv +import io +import logging +from traceback import format_exc as traceback_format_exc + +import requests +from aboutcode.pipeline import LoopProgress +from dateutil import parser as dateparser +from django.db import DataError + +from vulnerabilities.models import AdvisoryAlias +from vulnerabilities.models import AdvisoryExploit +from vulnerabilities.models import AdvisoryReference +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.pipelines import VulnerableCodePipeline + + +class ExploitDBImproverPipeline(VulnerableCodePipeline): + """ + ExploitDB Improver Pipeline: Fetch ExploitDB data, iterate over it to find the vulnerability with + the specified alias, and create or update the ref and ref-type accordingly. + """ + + pipeline_id = "enhance_with_exploitdb_v2" + spdx_license_expression = "GPL-2.0" + + @classmethod + def steps(cls): + return ( + cls.fetch_exploits, + cls.add_exploit, + ) + + def fetch_exploits(self): + exploit_db_url = ( + "https://gitlab.com/exploit-database/exploitdb/-/raw/main/files_exploits.csv" + ) + self.log(f"Fetching {exploit_db_url}") + + try: + response = requests.get(exploit_db_url) + response.raise_for_status() + except requests.exceptions.HTTPError as http_err: + self.log( + f"Failed to fetch the Exploit-DB Exploits: {exploit_db_url} with error {http_err!r}:\n{traceback_format_exc()}", + level=logging.ERROR, + ) + raise + + self.exploit_data = io.StringIO(response.text) + + def add_exploit(self): + + csvreader = csv.DictReader(self.exploit_data) + + raw_data = list(csvreader) + fetched_exploit_count = len(raw_data) + + vulnerability_exploit_count = 0 + self.log(f"Enhancing the vulnerability with {fetched_exploit_count:,d} exploit records") + progress = LoopProgress(total_iterations=fetched_exploit_count, logger=self.log) + + for row in progress.iter(raw_data): + vulnerability_exploit_count += add_vulnerability_exploit(row, self.log) + + self.log(f"Successfully added {vulnerability_exploit_count:,d} exploit-db advisory exploit") + + +def add_vulnerability_exploit(row, logger): + advisories = set() + + aliases = row["codes"].split(";") if row["codes"] else [] + + if not aliases: + return 0 + + for raw_alias in aliases: + try: + if alias := AdvisoryAlias.objects.get(alias=raw_alias): + for adv in alias.advisories.all(): + advisories.add(adv) + else: + advs = AdvisoryV2.objects.filter(advisory_id=raw_alias) + for adv in advs: + advisories.add(adv) + except AdvisoryAlias.DoesNotExist: + continue + + if not advisories: + logger(f"No advisory found for aliases {aliases}") + return 0 + + date_added = parse_date(row["date_added"]) + source_date_published = parse_date(row["date_published"]) + source_date_updated = parse_date(row["date_updated"]) + + for advisory in advisories: + add_exploit_references(row["codes"], row["source_url"], row["file"], advisory, logger) + try: + AdvisoryExploit.objects.update_or_create( + advisory=advisory, + data_source="Exploit-DB", + defaults={ + "date_added": date_added, + "description": row["description"], + "known_ransomware_campaign_use": row["verified"], + "source_date_published": source_date_published, + "exploit_type": row["type"], + "platform": row["platform"], + "source_date_updated": source_date_updated, + "source_url": row["source_url"], + }, + ) + except DataError as e: + logger( + f"Failed to Create the Vulnerability Exploit-DB with error {e!r}:\n{traceback_format_exc()}", + level=logging.ERROR, + ) + return 1 + + +def add_exploit_references(ref_id, direct_url, path, adv, logger): + url_map = { + "file_url": f"https://gitlab.com/exploit-database/exploitdb/-/blob/main/{path}", + "direct_url": direct_url, + } + + for key, url in url_map.items(): + if url: + try: + ref, created = AdvisoryReference.objects.update_or_create( + url=url, + defaults={ + "reference_id": ref_id, + "reference_type": AdvisoryReference.EXPLOIT, + }, + ) + + if created: + ref.advisories.add(adv) + ref.save() + logger(f"Created {ref} for {adv} with {key}={url}") + + except DataError as e: + logger( + f"Failed to Create the Vulnerability Reference For Exploit-DB with error {e!r}:\n{traceback_format_exc()}", + level=logging.ERROR, + ) + + +def parse_date(date_string): + if date_string: + try: + date_obj = dateparser.parse(date_string).date() + return date_obj.strftime("%Y-%m-%d") + except (ValueError, TypeError, Exception) as e: + logging.error( + f"Error while parsing ExploitDB date '{date_string}' with error {e!r}:\n{traceback_format_exc()}" + ) + return diff --git a/vulnerabilities/pipelines/v2_improvers/enhance_with_kev.py b/vulnerabilities/pipelines/v2_improvers/enhance_with_kev.py new file mode 100644 index 000000000..486d79232 --- /dev/null +++ b/vulnerabilities/pipelines/v2_improvers/enhance_with_kev.py @@ -0,0 +1,103 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging +from traceback import format_exc as traceback_format_exc + +import requests +from aboutcode.pipeline import LoopProgress + +from vulnerabilities.models import AdvisoryAlias +from vulnerabilities.models import AdvisoryExploit +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.pipelines import VulnerableCodePipeline + + +class VulnerabilityKevPipeline(VulnerableCodePipeline): + """ + Known Exploited Vulnerabilities Pipeline: Retrieve KEV data, iterate through it to identify vulnerabilities + by their associated aliases, and create or update the corresponding Exploit instances. + """ + + pipeline_id = "enhance_with_kev_v2" + license_expression = None + + @classmethod + def steps(cls): + return ( + cls.fetch_exploits, + cls.add_exploits, + ) + + def fetch_exploits(self): + kev_url = "https://raw.githubusercontent.com/aboutcode-org/aboutcode-mirror-kev/refs/heads/main/known_exploited_vulnerabilities.json" + self.log(f"Fetching {kev_url}") + + try: + response = requests.get(kev_url) + response.raise_for_status() + except requests.exceptions.HTTPError as http_err: + self.log( + f"Failed to fetch the KEV Exploits: {kev_url} with error {http_err!r}:\n{traceback_format_exc()}", + level=logging.ERROR, + ) + raise + self.kev_data = response.json() + + def add_exploits(self): + fetched_exploit_count = self.kev_data.get("count") + self.log(f"Enhancing the vulnerability with {fetched_exploit_count:,d} exploit records") + + vulnerability_exploit_count = 0 + progress = LoopProgress(total_iterations=fetched_exploit_count, logger=self.log) + + for record in progress.iter(self.kev_data.get("vulnerabilities", [])): + vulnerability_exploit_count += add_vulnerability_exploit( + kev_vul=record, + logger=self.log, + ) + + self.log(f"Successfully added {vulnerability_exploit_count:,d} kev exploit") + + +def add_vulnerability_exploit(kev_vul, logger): + cve_id = kev_vul.get("cveID") + + if not cve_id: + return 0 + + advisories = set() + try: + if alias := AdvisoryAlias.objects.get(alias=cve_id): + for adv in alias.advisories.all(): + advisories.add(adv) + else: + advs = AdvisoryV2.objects.filter(advisory_id=cve_id) + for adv in advs: + advisories.add(adv) + except AdvisoryAlias.DoesNotExist: + logger(f"No advisory found for aliases {cve_id}") + return 0 + + for advisory in advisories: + AdvisoryExploit.objects.update_or_create( + advisory=advisory, + data_source="KEV", + defaults={ + "description": kev_vul["shortDescription"], + "date_added": kev_vul["dateAdded"], + "required_action": kev_vul["requiredAction"], + "due_date": kev_vul["dueDate"], + "notes": kev_vul["notes"], + "known_ransomware_campaign_use": True + if kev_vul["knownRansomwareCampaignUse"] == "Known" + else False, + }, + ) + return 1 diff --git a/vulnerabilities/pipelines/v2_improvers/enhance_with_metasploit.py b/vulnerabilities/pipelines/v2_improvers/enhance_with_metasploit.py new file mode 100644 index 000000000..fbfea5150 --- /dev/null +++ b/vulnerabilities/pipelines/v2_improvers/enhance_with_metasploit.py @@ -0,0 +1,126 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging +from traceback import format_exc as traceback_format_exc + +import requests +import saneyaml +from aboutcode.pipeline import LoopProgress +from dateutil import parser as dateparser + +from vulnerabilities.models import AdvisoryAlias +from vulnerabilities.models import AdvisoryExploit +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.pipelines import VulnerableCodePipeline + + +class MetasploitImproverPipeline(VulnerableCodePipeline): + """ + Metasploit Exploits Pipeline: Retrieve Metasploit data, iterate through it to identify vulnerabilities + by their associated aliases, and create or update the corresponding Exploit instances. + """ + + pipeline_id = "enhance_with_metasploit_v2" + spdx_license_expression = "BSD-3-clause" + + @classmethod + def steps(cls): + return ( + cls.fetch_exploits, + cls.add_advisory_exploits, + ) + + def fetch_exploits(self): + url = "https://raw.githubusercontent.com/rapid7/metasploit-framework/master/db/modules_metadata_base.json" + self.log(f"Fetching {url}") + try: + response = requests.get(url) + response.raise_for_status() + except requests.exceptions.HTTPError as http_err: + self.log( + f"Failed to fetch the Metasploit Exploits: {url} with error {http_err!r}:\n{traceback_format_exc()}", + level=logging.ERROR, + ) + raise + + self.metasploit_data = response.json() + + def add_advisory_exploits(self): + fetched_exploit_count = len(self.metasploit_data) + self.log(f"Enhancing the vulnerability with {fetched_exploit_count:,d} exploit records") + + vulnerability_exploit_count = 0 + progress = LoopProgress(total_iterations=fetched_exploit_count, logger=self.log) + for _, record in progress.iter(self.metasploit_data.items()): + vulnerability_exploit_count += add_advisory_exploit( + record=record, + logger=self.log, + ) + self.log(f"Successfully added {vulnerability_exploit_count:,d} vulnerability exploit") + + +def add_advisory_exploit(record, logger): + advisories = set() + references = record.get("references", []) + + interesting_references = [ + ref for ref in references if not ref.startswith("OSVDB") and not ref.startswith("URL-") + ] + + if not interesting_references: + return 0 + + for ref in interesting_references: + try: + if alias := AdvisoryAlias.objects.get(alias=ref): + for adv in alias.advisories.all(): + advisories.add(adv) + else: + advs = AdvisoryV2.objects.filter(advisory_id=ref) + for adv in advs: + advisories.add(adv) + except AdvisoryAlias.DoesNotExist: + continue + + if not advisories: + logger(f"No advisories found for aliases {interesting_references}") + return 0 + + description = record.get("description", "") + notes = record.get("notes", {}) + platform = record.get("platform") + + source_url = "" + if path := record.get("path"): + source_url = f"https://github.com/rapid7/metasploit-framework/tree/master{path}" + source_date_published = None + + if disclosure_date := record.get("disclosure_date"): + try: + source_date_published = dateparser.parse(disclosure_date).date() + except ValueError as e: + logger( + f"Error while parsing date {disclosure_date} with error {e!r}:\n{traceback_format_exc()}", + level=logging.ERROR, + ) + + for advisory in advisories: + AdvisoryExploit.objects.update_or_create( + advisory=advisory, + data_source="Metasploit", + defaults={ + "description": description, + "notes": saneyaml.dump(notes), + "source_date_published": source_date_published, + "platform": platform, + "source_url": source_url, + }, + ) + return 1 diff --git a/vulnerabilities/pipelines/v2_improvers/flag_ghost_packages.py b/vulnerabilities/pipelines/v2_improvers/flag_ghost_packages.py new file mode 100644 index 000000000..8a4825df4 --- /dev/null +++ b/vulnerabilities/pipelines/v2_improvers/flag_ghost_packages.py @@ -0,0 +1,104 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging +from itertools import groupby +from traceback import format_exc as traceback_format_exc + +from aboutcode.pipeline import LoopProgress +from fetchcode.package_versions import SUPPORTED_ECOSYSTEMS as FETCHCODE_SUPPORTED_ECOSYSTEMS +from fetchcode.package_versions import versions +from packageurl import PackageURL + +from vulnerabilities.models import PackageV2 +from vulnerabilities.pipelines import VulnerableCodePipeline + + +class FlagGhostPackagePipeline(VulnerableCodePipeline): + """Detect and flag packages that do not exist upstream.""" + + pipeline_id = "flag_ghost_packages_v2" + + @classmethod + def steps(cls): + return (cls.flag_ghost_packages,) + + def flag_ghost_packages(self): + detect_and_flag_ghost_packages(logger=self.log) + + +def detect_and_flag_ghost_packages(logger=None): + """Check if packages are available upstream. If not, mark them as ghost package.""" + interesting_packages_qs = ( + PackageV2.objects.order_by("type", "namespace", "name") + .filter(type__in=FETCHCODE_SUPPORTED_ECOSYSTEMS) + .filter(qualifiers="") + .filter(subpath="") + ) + + distinct_packages_count = ( + interesting_packages_qs.values("type", "namespace", "name") + .distinct("type", "namespace", "name") + .count() + ) + + grouped_packages = groupby( + interesting_packages_qs.paginated(), + key=lambda pkg: (pkg.type, pkg.namespace, pkg.name), + ) + + ghost_package_count = 0 + progress = LoopProgress(total_iterations=distinct_packages_count, logger=logger) + for type_namespace_name, packages in progress.iter(grouped_packages): + ghost_package_count += flag_ghost_packages( + base_purl=PackageURL(*type_namespace_name), + packages=packages, + logger=logger, + ) + + if logger: + logger(f"Successfully flagged {ghost_package_count:,d} ghost Packages") + + +def flag_ghost_packages(base_purl, packages, logger=None): + """ + Check if `packages` are available upstream. + If not, update `is_ghost` to `True`. + Return the number of packages flagged as ghost. + """ + known_versions = get_versions(purl=base_purl, logger=logger) + # Skip if encounter error while fetching known versions + if known_versions is None: + return 0 + + ghost_packages = 0 + for pkg in packages: + pkg.is_ghost = False + if pkg.version.lstrip("vV") not in known_versions: + pkg.is_ghost = True + ghost_packages += 1 + + if logger: + logger(f"Flagging ghost package {pkg.purl!s}", level=logging.DEBUG) + pkg.save() + + return ghost_packages + + +def get_versions(purl, logger=None): + """Return set of known versions for the given purl.""" + try: + return {v.value.lstrip("vV") for v in versions(str(purl))} + except Exception as e: + if logger: + logger( + f"Error while fetching known versions for {purl!s}: {e!r} \n {traceback_format_exc()}", + level=logging.ERROR, + ) + return diff --git a/vulnerabilities/pipes/advisory.py b/vulnerabilities/pipes/advisory.py index fd8e87acc..9fbe8ce24 100644 --- a/vulnerabilities/pipes/advisory.py +++ b/vulnerabilities/pipes/advisory.py @@ -155,7 +155,7 @@ def insert_advisory_v2( default_data = { "summary": advisory.summary, "date_published": advisory.date_published, - "created_by": pipeline_id, + "datasource_ID": pipeline_id, "date_collected": datetime.now(timezone.utc), "advisory_id": advisory.advisory_id, } diff --git a/vulnerabilities/risk.py b/vulnerabilities/risk.py index a4508a03f..326fca462 100644 --- a/vulnerabilities/risk.py +++ b/vulnerabilities/risk.py @@ -36,6 +36,8 @@ def get_weighted_severity(severities): score_list = [] for severity in severities: + if not severity.url: + continue parsed_url = urlparse(severity.url) severity_source = parsed_url.netloc.replace("www.", "", 1) weight = WEIGHT_CONFIG.get(severity_source, DEFAULT_WEIGHT) @@ -104,8 +106,8 @@ def compute_package_risk(package): and determining the associated risk. """ result = [] - for relation in package.affectedbypackagerelatedvulnerability_set.all(): - if risk := relation.vulnerability.risk_score: + for advisory in package.affected_by_advisories.all(): + if risk := advisory.risk_score: result.append(float(risk)) if not result: diff --git a/vulnerabilities/templates/index_v2.html b/vulnerabilities/templates/index_v2.html new file mode 100644 index 000000000..962b5f79f --- /dev/null +++ b/vulnerabilities/templates/index_v2.html @@ -0,0 +1,33 @@ +{% extends "base.html" %} +{% load widget_tweaks %} + +{% block title %} +VulnerableCode Home +{% endblock %} + +{% block content %} +
+
+
+
+ {% include "package_search_box_v2.html" %} +
+
+
+

+ VulnerableCode aggregates software + vulnerabilities from multiple public advisory sources + and presents their details along with their affected + packages and fixed-by packages identified by + Package URLs (PURLs). +

+

+ What's new in this Release: + + Check out latest updates here! + +

+
+
+
+{% endblock %} \ No newline at end of file diff --git a/vulnerabilities/templates/package_details_v2.html b/vulnerabilities/templates/package_details_v2.html new file mode 100644 index 000000000..32d54fc37 --- /dev/null +++ b/vulnerabilities/templates/package_details_v2.html @@ -0,0 +1,349 @@ +{% extends "base.html" %} +{% load humanize %} +{% load widget_tweaks %} +{% load static %} +{% load url_filters %} + +{% block title %} +VulnerableCode Package Details - {{ package.purl }} +{% endblock %} + +{% block content %} +
+ {% include "package_search_box_v2.html"%} +
+ +{% if package %} +
+
+
+
+ Package details: + {{ package.purl }} + +
+
+ +
+ +
+ +
+
+
+ {% if affected_by_advisories|length != 0 %} +
+ {% else %} +
+ {% endif %} + + + + + + + {% if package.is_ghost %} + + + + + {% endif %} + +
+ + purl + + + {{ fixed_package_details.purl.to_string }} +
+ Tags + + + Ghost + +
+
+ {% if affected_by_advisories|length != 0 %} + +
+ + + + + + + + + + + + + + + +
+ Next non-vulnerable version + + {% if fixed_package_details.next_non_vulnerable.version %} + {{ fixed_package_details.next_non_vulnerable.version }} + {% else %} + None. + {% endif %} +
+ Latest non-vulnerable version + + {% if fixed_package_details.latest_non_vulnerable.version %} + {{ fixed_package_details.latest_non_vulnerable.version }} + {% else %} + None. + {% endif %} +
+ + Risk + + + {% if package.risk_score %} + {{ package.risk_score }} + {% endif %} +
+
+ + {% endif %} + +
+
+ Advisories affecting this package ({{ affected_by_advisories|length }}) +
+ + + + + + + + + + + + {% for advisory in affected_by_advisories %} + + + + + + {% empty %} + + + + {% endfor %} + +
AdvisorySummaryFixed by
+ {{advisory.advisory_id }} +
+ Aliases: +
+ {% for alias in advisory.alias %} + {% if alias.url %} + {{ alias }} +
+ {% else %} + {{ alias }} +
+ {% endif %} + {% endfor %} +
+ {{ advisory.summary }} + + {% if package.purl == fixed_package_details.purl.to_string %} + {% for key, value in fixed_package_details.items %} + {% if key == "advisories" %} + {% for vuln in value %} + {% if vuln.advisory.advisory_id == advisory.advisory_id %} + {% if vuln.fixed_by_package_details is None %} + There are no reported fixed by versions. + {% else %} + {% for fixed_pkg in vuln.fixed_by_package_details %} +
+ {% if fixed_pkg.fixed_by_purl_advisories|length == 0 %} + {{ fixed_pkg.fixed_by_purl.version }} +
+ Affected by 0 other advisories. + {% else %} + {{ fixed_pkg.fixed_by_purl.version }} + {% if fixed_pkg.fixed_by_purl_advisories|length != 1 %} +
+ Affected by {{ fixed_pkg.fixed_by_purl_advisories|length }} other + advisory. + {% else %} +
+ Affected by {{ fixed_pkg.fixed_by_purl_advisories|length }} other + advisory. + {% endif %} + + + {% endif %} +
+ {% endfor %} + {% endif %} + {% endif %} + {% endfor %} + {% endif %} + {% endfor %} + {% endif %} +
+ This package is not known to be affected by advisories. +
+
+ +
+
+ Advisories fixed by this package ({{ fixing_advisories|length }}) +
+ + + + + + + + + + + {% for advisory in fixing_advisories %} + + + + + + {% empty %} + + + + {% endfor %} + +
AdvisorySummaryAliases
+ {{ advisory.advisory_id }} + + {{ advisory.summary }} + + {% for alias in advisory.alias %} + {% if alias.url %} + {{ alias }} +
+ {% else %} + {{ alias }} +
+ {% endif %} + {% endfor %} +
+ This package is not known to fix any advisories. +
+ +
+
+
+
+ + +
+
+
+
+ +{% endif %} +{% endblock %} diff --git a/vulnerabilities/templates/package_search_box_v2.html b/vulnerabilities/templates/package_search_box_v2.html new file mode 100644 index 000000000..e78d400e6 --- /dev/null +++ b/vulnerabilities/templates/package_search_box_v2.html @@ -0,0 +1,48 @@ +{% load widget_tweaks %} +
+
+ Search for packages + +
+
+
+
+
+
+ {{ package_search_form.search|add_class:"input" }} +
+
+ +
+
+
+
+
+
diff --git a/vulnerabilities/templates/packages_v2.html b/vulnerabilities/templates/packages_v2.html new file mode 100644 index 000000000..fe2b05abe --- /dev/null +++ b/vulnerabilities/templates/packages_v2.html @@ -0,0 +1,84 @@ +{% extends "base.html" %} +{% load humanize %} +{% load widget_tweaks %} + +{% block title %} +VulnerableCode Package Search +{% endblock %} + +{% block content %} +
+ {% include "package_search_box_v2.html" %} +
+ +{% if search %} +
+
+
+
+ {{ page_obj.paginator.count|intcomma }} results +
+ {% if is_paginated %} + {% include 'includes/pagination.html' with page_obj=page_obj %} + {% endif %} +
+
+
+ +
+
+ + + + + + + + + + {% for package in page_obj %} + + + + + + {% empty %} + + + + {% endfor %} + +
+ + Package URL + + + + Affected by vulnerabilities + + + + Fixing vulnerabilities + +
+ {{ package.purl }} + {{ package.vulnerability_count }}{{ package.patched_vulnerability_count }}
+ No Package found. +
+
+ + {% if is_paginated %} + {% include 'includes/pagination.html' with page_obj=page_obj %} + {% endif %} + +
+{% endif %} +{% endblock %} diff --git a/vulnerabilities/utils.py b/vulnerabilities/utils.py index 0bfd5825e..9d2806bf6 100644 --- a/vulnerabilities/utils.py +++ b/vulnerabilities/utils.py @@ -249,6 +249,11 @@ def fetch_github_graphql_query(graphql_query: dict): response = _get_gh_response(gh_token=gh_token, graphql_query=graphql_query) + if not response: + msg = "No response received from GitHub API." + logger.error(msg) + raise GraphQLError(msg) + message = response.get("message") if message and message == "Bad credentials": raise GitHubTokenError(f"Invalid GitHub token: {message}") @@ -266,7 +271,10 @@ def _get_gh_response(gh_token, graphql_query): """ endpoint = "https://api.github.com/graphql" headers = {"Authorization": f"bearer {gh_token}"} - return requests.post(endpoint, headers=headers, json=graphql_query).json() + try: + return requests.post(endpoint, headers=headers, json=graphql_query).json() + except Exception as e: + logger.error(f"Failed to fetch data from GitHub GraphQL API: {e}") def dedupe(original: List) -> List: diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index e6fb95a94..e9318877f 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -30,6 +30,7 @@ from vulnerabilities import models from vulnerabilities.forms import AdminLoginForm from vulnerabilities.forms import ApiUserCreationForm +from vulnerabilities.forms import AdvisorySearchForm from vulnerabilities.forms import PackageSearchForm from vulnerabilities.forms import PipelineSchedulePackageForm from vulnerabilities.forms import VulnerabilitySearchForm @@ -71,6 +72,35 @@ def get_queryset(self, query=None): ) +class PackageSearchV2(ListView): + model = models.PackageV2 + template_name = "packages_v2.html" + ordering = ["type", "namespace", "name", "version"] + paginate_by = PAGE_SIZE + + def get_context_data(self, **kwargs): + context = super().get_context_data(**kwargs) + request_query = self.request.GET + context["package_search_form"] = PackageSearchForm(request_query) + context["search"] = request_query.get("search") + return context + + def get_queryset(self, query=None): + """ + Return a Package queryset for the ``query``. + Make a best effort approach to find matching packages either based + on exact purl, partial purl or just name and namespace. + """ + query = query or self.request.GET.get("search") or "" + return ( + self.model.objects.search(query) + .with_vulnerability_counts() + .prefetch_related() + .order_by("package_url") + ) + + + class VulnerabilitySearch(ListView): model = models.Vulnerability template_name = "vulnerabilities.html" @@ -80,7 +110,25 @@ class VulnerabilitySearch(ListView): def get_context_data(self, **kwargs): context = super().get_context_data(**kwargs) request_query = self.request.GET - context["vulnerability_search_form"] = VulnerabilitySearchForm(request_query) + context["advisory_search_form"] = AdvisorySearchForm(request_query) + context["search"] = request_query.get("search") + return context + + def get_queryset(self, query=None): + query = query or self.request.GET.get("search") or "" + return self.model.objects.search(query=query).with_package_counts() + + +class AdvisorySearch(ListView): + model = models.AdvisoryV2 + template_name = "vulnerabilities.html" + ordering = ["advisory_id"] + paginate_by = PAGE_SIZE + + def get_context_data(self, **kwargs): + context = super().get_context_data(**kwargs) + request_query = self.request.GET + context["advisory_search_form"] = VulnerabilitySearchForm(request_query) context["search"] = request_query.get("search") return context @@ -130,6 +178,47 @@ def get_object(self, queryset=None): return package +class PackageV2Details(DetailView): + model = models.PackageV2 + template_name = "package_details_v2.html" + slug_url_kwarg = "purl" + slug_field = "purl" + + def get_context_data(self, **kwargs): + context = super().get_context_data(**kwargs) + package = self.object + context["package"] = package + context["affected_by_advisories"] = package.affected_by_advisories.order_by("advisory_id") + # Ghost package should not fix any vulnerability. + context["fixing_advisories"] = ( + None if package.is_ghost else package.fixing_advisories.order_by("advisory_id") + ) + context["package_search_form"] = PackageSearchForm(self.request.GET) + context["fixed_package_details"] = package.fixed_package_details + + # context["history"] = list(package.history) + return context + + def get_object(self, queryset=None): + if queryset is None: + queryset = self.get_queryset() + + purl = self.kwargs.get(self.slug_url_kwarg) + if purl: + queryset = queryset.for_purl(purl) + else: + cls = self.__class__.__name__ + raise AttributeError( + f"Package details view {cls} must be called with a purl, " f"but got: {purl!r}" + ) + + try: + package = queryset.get() + except queryset.model.DoesNotExist: + raise Http404(f"No Package found for purl: {purl}") + return package + + class VulnerabilityDetails(DetailView): model = models.Vulnerability template_name = "vulnerability_details.html" @@ -245,6 +334,19 @@ def get(self, request): return render(request=request, template_name=self.template_name, context=context) +class HomePageV2(View): + template_name = "index_v2.html" + + def get(self, request): + request_query = request.GET + context = { + "vulnerability_search_form": AdvisorySearchForm(request_query), + "package_search_form": PackageSearchForm(request_query), + "release_url": f"https://github.com/aboutcode-org/vulnerablecode/releases/tag/v{VULNERABLECODE_VERSION}", + } + return render(request=request, template_name=self.template_name, context=context) + + email_template = """ Dear VulnerableCode.io user: diff --git a/vulnerablecode/urls.py b/vulnerablecode/urls.py index 45a03a28d..6810d4900 100644 --- a/vulnerablecode/urls.py +++ b/vulnerablecode/urls.py @@ -32,6 +32,11 @@ from vulnerabilities.views import PipelineRunDetailView from vulnerabilities.views import PipelineRunListView from vulnerabilities.views import PipelineScheduleListView +from vulnerabilities.views import ApiUserCreateView, HomePageV2, PackageSearchV2 +from vulnerabilities.views import HomePage +from vulnerabilities.views import PackageDetails +from vulnerabilities.views import PackageSearch +from vulnerabilities.views import PackageV2Details from vulnerabilities.views import VulnerabilityDetails from vulnerabilities.views import VulnerabilityPackagesDetails from vulnerabilities.views import VulnerabilitySearch @@ -88,15 +93,30 @@ def __init__(self, *args, **kwargs): name="run-details", ), path( - "packages/search/", + "v2", + HomePageV2.as_view(), + name="home", + ), + path( + "packages/search", PackageSearch.as_view(), name="package_search", ), + path( + "packages/v2/search", + PackageSearchV2.as_view(), + name="package_search_v2", + ), re_path( r"^packages/(?Ppkg:.+)$", PackageDetails.as_view(), name="package_details", ), + re_path( + r"^packages/v2/(?Ppkg:.+)$", + PackageV2Details.as_view(), + name="package_details_v2", + ), path( "vulnerabilities/search/", VulnerabilitySearch.as_view(), From fd3165c4f2f130c0961c3cbb9309f4b418d6f792 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 28 May 2025 19:10:05 +0530 Subject: [PATCH 28/44] Fix tests Signed-off-by: Tushar Goel --- vulnerabilities/forms.py | 5 +- ...oryreference_advisoryseverity_and_more.py} | 133 +++++++++++++++- ...alter_advisoryv2_datasource_id_and_more.py | 145 ------------------ vulnerabilities/models.py | 3 - vulnerabilities/pipelines/__init__.py | 2 +- vulnerabilities/views.py | 5 +- vulnerablecode/urls.py | 12 +- 7 files changed, 139 insertions(+), 166 deletions(-) rename vulnerabilities/migrations/{0092_advisoryalias_advisoryreference_advisoryseverity_and_more.py => 0093_advisoryalias_advisoryreference_advisoryseverity_and_more.py} (74%) delete mode 100644 vulnerabilities/migrations/0093_alter_advisoryv2_datasource_id_and_more.py diff --git a/vulnerabilities/forms.py b/vulnerabilities/forms.py index 1e714a44b..7d955ac37 100644 --- a/vulnerabilities/forms.py +++ b/vulnerabilities/forms.py @@ -40,11 +40,10 @@ class AdvisorySearchForm(forms.Form): search = forms.CharField( required=True, - widget=forms.TextInput( - attrs={"placeholder": "Advisory id or alias such as CVE or GHSA"} - ), + widget=forms.TextInput(attrs={"placeholder": "Advisory id or alias such as CVE or GHSA"}), ) + class ApiUserCreationForm(forms.ModelForm): """ Support a simplified creation for API-only users directly from the UI. diff --git a/vulnerabilities/migrations/0092_advisoryalias_advisoryreference_advisoryseverity_and_more.py b/vulnerabilities/migrations/0093_advisoryalias_advisoryreference_advisoryseverity_and_more.py similarity index 74% rename from vulnerabilities/migrations/0092_advisoryalias_advisoryreference_advisoryseverity_and_more.py rename to vulnerabilities/migrations/0093_advisoryalias_advisoryreference_advisoryseverity_and_more.py index 101eec8b4..74de631fd 100644 --- a/vulnerabilities/migrations/0092_advisoryalias_advisoryreference_advisoryseverity_and_more.py +++ b/vulnerabilities/migrations/0093_advisoryalias_advisoryreference_advisoryseverity_and_more.py @@ -1,12 +1,13 @@ -# Generated by Django 4.2.20 on 2025-05-27 10:43 +# Generated by Django 4.2.20 on 2025-05-28 13:31 from django.db import migrations, models +import django.db.models.deletion class Migration(migrations.Migration): dependencies = [ - ("vulnerabilities", "0091_alter_advisory_unique_together_and_more"), + ("vulnerabilities", "0092_pipelineschedule_pipelinerun"), ] operations = [ @@ -249,6 +250,16 @@ class Migration(migrations.Migration): "abstract": False, }, ), + migrations.AlterField( + model_name="pipelineschedule", + name="is_active", + field=models.BooleanField( + db_index=True, + default=True, + help_text="When set to True, this Pipeline is active. When set to False, this Pipeline is inactive and not run.", + null=True, + ), + ), migrations.CreateModel( name="AdvisoryV2", fields=[ @@ -295,7 +306,7 @@ class Migration(migrations.Migration): ( "datasource_ID", models.CharField( - help_text="Fully qualified name of the importer prefixed with themodule name importing the advisory. Eg:vulnerabilities.pipeline.nginx_importer.NginxImporterPipeline", + help_text="Fully qualified name of the importer prefixed with themodule name importing the advisory. Eg:nginx_importer_v2", max_length=100, ), ), @@ -346,7 +357,7 @@ class Migration(migrations.Migration): "fixed_by_packages", models.ManyToManyField( help_text="A list of packages that are reported by this advisory.", - related_name="fixing_advisorues", + related_name="fixing_advisories", to="vulnerabilities.packagev2", ), ), @@ -379,4 +390,118 @@ class Migration(migrations.Migration): "ordering": ["date_published", "unique_content_id"], }, ), + migrations.CreateModel( + name="AdvisoryExploit", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "date_added", + models.DateField( + blank=True, + help_text="The date the vulnerability was added to an exploit catalog.", + null=True, + ), + ), + ( + "description", + models.TextField( + blank=True, + help_text="Description of the vulnerability in an exploit catalog, often a refinement of the original CVE description", + null=True, + ), + ), + ( + "required_action", + models.TextField( + blank=True, + help_text="The required action to address the vulnerability, typically to apply vendor updates or apply vendor mitigations or to discontinue use.", + null=True, + ), + ), + ( + "due_date", + models.DateField( + blank=True, + help_text="The date the required action is due, which applies to all USA federal civilian executive branch (FCEB) agencies, but all organizations are strongly encouraged to execute the required action", + null=True, + ), + ), + ( + "notes", + models.TextField( + blank=True, + help_text="Additional notes and resources about the vulnerability, often a URL to vendor instructions.", + null=True, + ), + ), + ( + "known_ransomware_campaign_use", + models.BooleanField( + default=False, + help_text="Known' if this vulnerability is known to have been leveraged as part of a ransomware campaign; \n or 'Unknown' if there is no confirmation that the vulnerability has been utilized for ransomware.", + ), + ), + ( + "source_date_published", + models.DateField( + blank=True, + help_text="The date that the exploit was published or disclosed.", + null=True, + ), + ), + ( + "exploit_type", + models.TextField( + blank=True, + help_text="The type of the exploit as provided by the original upstream data source.", + null=True, + ), + ), + ( + "platform", + models.TextField( + blank=True, + help_text="The platform associated with the exploit as provided by the original upstream data source.", + null=True, + ), + ), + ( + "source_date_updated", + models.DateField( + blank=True, + help_text="The date the exploit was updated in the original upstream data source.", + null=True, + ), + ), + ( + "data_source", + models.TextField( + blank=True, + help_text="The source of the exploit information, such as CISA KEV, exploitdb, metaspoit, or others.", + null=True, + ), + ), + ( + "source_url", + models.URLField( + blank=True, + help_text="The URL to the exploit as provided in the original upstream data source.", + null=True, + ), + ), + ( + "advisory", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="exploits", + to="vulnerabilities.advisoryv2", + ), + ), + ], + ), ] diff --git a/vulnerabilities/migrations/0093_alter_advisoryv2_datasource_id_and_more.py b/vulnerabilities/migrations/0093_alter_advisoryv2_datasource_id_and_more.py deleted file mode 100644 index 003a79ffd..000000000 --- a/vulnerabilities/migrations/0093_alter_advisoryv2_datasource_id_and_more.py +++ /dev/null @@ -1,145 +0,0 @@ -# Generated by Django 4.2.20 on 2025-05-27 12:30 - -from django.db import migrations, models -import django.db.models.deletion - - -class Migration(migrations.Migration): - - dependencies = [ - ("vulnerabilities", "0092_advisoryalias_advisoryreference_advisoryseverity_and_more"), - ] - - operations = [ - migrations.AlterField( - model_name="advisoryv2", - name="datasource_ID", - field=models.CharField( - help_text="Fully qualified name of the importer prefixed with themodule name importing the advisory. Eg:nginx_importer_v2", - max_length=100, - ), - ), - migrations.AlterField( - model_name="advisoryv2", - name="fixed_by_packages", - field=models.ManyToManyField( - help_text="A list of packages that are reported by this advisory.", - related_name="fixing_advisories", - to="vulnerabilities.packagev2", - ), - ), - migrations.CreateModel( - name="AdvisoryExploit", - fields=[ - ( - "id", - models.AutoField( - auto_created=True, primary_key=True, serialize=False, verbose_name="ID" - ), - ), - ( - "date_added", - models.DateField( - blank=True, - help_text="The date the vulnerability was added to an exploit catalog.", - null=True, - ), - ), - ( - "description", - models.TextField( - blank=True, - help_text="Description of the vulnerability in an exploit catalog, often a refinement of the original CVE description", - null=True, - ), - ), - ( - "required_action", - models.TextField( - blank=True, - help_text="The required action to address the vulnerability, typically to apply vendor updates or apply vendor mitigations or to discontinue use.", - null=True, - ), - ), - ( - "due_date", - models.DateField( - blank=True, - help_text="The date the required action is due, which applies to all USA federal civilian executive branch (FCEB) agencies, but all organizations are strongly encouraged to execute the required action", - null=True, - ), - ), - ( - "notes", - models.TextField( - blank=True, - help_text="Additional notes and resources about the vulnerability, often a URL to vendor instructions.", - null=True, - ), - ), - ( - "known_ransomware_campaign_use", - models.BooleanField( - default=False, - help_text="Known' if this vulnerability is known to have been leveraged as part of a ransomware campaign; \n or 'Unknown' if there is no confirmation that the vulnerability has been utilized for ransomware.", - ), - ), - ( - "source_date_published", - models.DateField( - blank=True, - help_text="The date that the exploit was published or disclosed.", - null=True, - ), - ), - ( - "exploit_type", - models.TextField( - blank=True, - help_text="The type of the exploit as provided by the original upstream data source.", - null=True, - ), - ), - ( - "platform", - models.TextField( - blank=True, - help_text="The platform associated with the exploit as provided by the original upstream data source.", - null=True, - ), - ), - ( - "source_date_updated", - models.DateField( - blank=True, - help_text="The date the exploit was updated in the original upstream data source.", - null=True, - ), - ), - ( - "data_source", - models.TextField( - blank=True, - help_text="The source of the exploit information, such as CISA KEV, exploitdb, metaspoit, or others.", - null=True, - ), - ), - ( - "source_url", - models.URLField( - blank=True, - help_text="The URL to the exploit as provided in the original upstream data source.", - null=True, - ), - ), - ( - "advisory", - models.ForeignKey( - on_delete=django.db.models.deletion.CASCADE, - related_name="exploits", - to="vulnerabilities.advisoryv2", - ), - ), - ], - ), - ] diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 2e00adbc2..955a70bf5 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -1324,7 +1324,6 @@ def url(self): class AdvisoryQuerySet(BaseQuerySet): - def search(query): """ This function will take a string as an input, the string could be an alias or an advisory ID or @@ -2632,7 +2631,6 @@ def risk_score(self): objects = AdvisoryQuerySet.as_manager() - class Meta: ordering = ["date_published", "unique_content_id"] @@ -2684,7 +2682,6 @@ class Meta: class PackageQuerySetV2(BaseQuerySet, PackageURLQuerySet): - def search(self, query: str = None): """ Return a Package queryset searching for the ``query``. diff --git a/vulnerabilities/pipelines/__init__.py b/vulnerabilities/pipelines/__init__.py index c0662bbb1..89d942f21 100644 --- a/vulnerabilities/pipelines/__init__.py +++ b/vulnerabilities/pipelines/__init__.py @@ -28,8 +28,8 @@ from vulnerabilities.importer import UnMergeablePackageError from vulnerabilities.improver import MAX_CONFIDENCE from vulnerabilities.models import Advisory -from vulnerabilities.models import PipelineRun from vulnerabilities.models import PackageV2 +from vulnerabilities.models import PipelineRun from vulnerabilities.pipes.advisory import import_advisory from vulnerabilities.pipes.advisory import insert_advisory from vulnerabilities.pipes.advisory import insert_advisory_v2 diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index e9318877f..21e87172d 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -29,8 +29,8 @@ from vulnerabilities import models from vulnerabilities.forms import AdminLoginForm -from vulnerabilities.forms import ApiUserCreationForm from vulnerabilities.forms import AdvisorySearchForm +from vulnerabilities.forms import ApiUserCreationForm from vulnerabilities.forms import PackageSearchForm from vulnerabilities.forms import PipelineSchedulePackageForm from vulnerabilities.forms import VulnerabilitySearchForm @@ -100,7 +100,6 @@ def get_queryset(self, query=None): ) - class VulnerabilitySearch(ListView): model = models.Vulnerability template_name = "vulnerabilities.html" @@ -110,7 +109,7 @@ class VulnerabilitySearch(ListView): def get_context_data(self, **kwargs): context = super().get_context_data(**kwargs) request_query = self.request.GET - context["advisory_search_form"] = AdvisorySearchForm(request_query) + context["vulnerability_search_form"] = VulnerabilitySearchForm(request_query) context["search"] = request_query.get("search") return context diff --git a/vulnerablecode/urls.py b/vulnerablecode/urls.py index 6810d4900..374c8fe1f 100644 --- a/vulnerablecode/urls.py +++ b/vulnerablecode/urls.py @@ -27,16 +27,14 @@ from vulnerabilities.views import AdminLoginView from vulnerabilities.views import ApiUserCreateView from vulnerabilities.views import HomePage +from vulnerabilities.views import HomePageV2 from vulnerabilities.views import PackageDetails from vulnerabilities.views import PackageSearch +from vulnerabilities.views import PackageSearchV2 +from vulnerabilities.views import PackageV2Details from vulnerabilities.views import PipelineRunDetailView from vulnerabilities.views import PipelineRunListView from vulnerabilities.views import PipelineScheduleListView -from vulnerabilities.views import ApiUserCreateView, HomePageV2, PackageSearchV2 -from vulnerabilities.views import HomePage -from vulnerabilities.views import PackageDetails -from vulnerabilities.views import PackageSearch -from vulnerabilities.views import PackageV2Details from vulnerabilities.views import VulnerabilityDetails from vulnerabilities.views import VulnerabilityPackagesDetails from vulnerabilities.views import VulnerabilitySearch @@ -98,12 +96,12 @@ def __init__(self, *args, **kwargs): name="home", ), path( - "packages/search", + "packages/search/", PackageSearch.as_view(), name="package_search", ), path( - "packages/v2/search", + "packages/v2/search/", PackageSearchV2.as_view(), name="package_search_v2", ), From a206e6cb919bceffcc3791f3e191528f691a3650 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 28 May 2025 19:12:27 +0530 Subject: [PATCH 29/44] Fix tests Signed-off-by: Tushar Goel --- .../v2_improvers/compute_package_risk.py | 2 +- vulnerabilities/risk.py | 18 +++++++++++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/vulnerabilities/pipelines/v2_improvers/compute_package_risk.py b/vulnerabilities/pipelines/v2_improvers/compute_package_risk.py index 85f31dd8e..c8cf4233d 100644 --- a/vulnerabilities/pipelines/v2_improvers/compute_package_risk.py +++ b/vulnerabilities/pipelines/v2_improvers/compute_package_risk.py @@ -11,7 +11,7 @@ from vulnerabilities.models import AdvisoryV2 from vulnerabilities.models import PackageV2 from vulnerabilities.pipelines import VulnerableCodePipeline -from vulnerabilities.risk import compute_package_risk +from vulnerabilities.risk import compute_package_risk_v2 from vulnerabilities.risk import compute_vulnerability_risk_factors diff --git a/vulnerabilities/risk.py b/vulnerabilities/risk.py index 326fca462..6b1dca4b6 100644 --- a/vulnerabilities/risk.py +++ b/vulnerabilities/risk.py @@ -101,6 +101,22 @@ def compute_vulnerability_risk_factors(references, severities, exploits): def compute_package_risk(package): + """ + Calculate the risk for a package by iterating over all vulnerabilities that affects this package + and determining the associated risk. + """ + result = [] + for relation in package.affectedbypackagerelatedvulnerability_set.all(): + if risk := relation.vulnerability.risk_score: + result.append(float(risk)) + + if not result: + return + + return round(max(result), 1) + + +def compute_package_risk_v2(package): """ Calculate the risk for a package by iterating over all vulnerabilities that affects this package and determining the associated risk. @@ -113,4 +129,4 @@ def compute_package_risk(package): if not result: return - return round(max(result), 1) + return round(max(result), 1) \ No newline at end of file From d26b9bacdad9abf65b01e382ca5e34519f4638de Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 28 May 2025 19:14:25 +0530 Subject: [PATCH 30/44] Fix tests Signed-off-by: Tushar Goel --- vulnerabilities/risk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vulnerabilities/risk.py b/vulnerabilities/risk.py index 6b1dca4b6..56f19171e 100644 --- a/vulnerabilities/risk.py +++ b/vulnerabilities/risk.py @@ -129,4 +129,4 @@ def compute_package_risk_v2(package): if not result: return - return round(max(result), 1) \ No newline at end of file + return round(max(result), 1) From c9bdad82cce64c0835b4692212025d9888360ab9 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 3 Jun 2025 18:53:34 +0530 Subject: [PATCH 31/44] Add Advisory Detail View Signed-off-by: Tushar Goel --- vulnerabilities/models.py | 70 ++ .../templates/advisory_detail.html | 624 ++++++++++++++++++ .../templates/advisory_package_details.html | 88 +++ .../templates/package_details_v2.html | 60 +- vulnerabilities/views.py | 166 ++++- vulnerablecode/urls.py | 12 +- 6 files changed, 992 insertions(+), 28 deletions(-) create mode 100644 vulnerabilities/templates/advisory_detail.html create mode 100644 vulnerabilities/templates/advisory_package_details.html diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 955a70bf5..e45b6997e 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -2638,6 +2638,17 @@ def save(self, *args, **kwargs): self.full_clean() return super().save(*args, **kwargs) + @property + def get_status_label(self): + label_by_status = {choice[0]: choice[1] for choice in VulnerabilityStatusType.choices} + return label_by_status.get(self.status) or VulnerabilityStatusType.PUBLISHED.label + + def get_absolute_url(self): + """ + Return this Vulnerability details absolute URL. + """ + return reverse("advisory_details", args=[self.id]) + def to_advisory_data(self) -> "AdvisoryDataV2": from vulnerabilities.importer import AdvisoryDataV2 from vulnerabilities.importer import AffectedPackage @@ -2662,6 +2673,65 @@ def get_aliases(self): Return a queryset of all Aliases for this vulnerability. """ return self.aliases.all() + + def aggregate_fixed_and_affected_packages(self): + from vulnerabilities.utils import get_purl_version_class + + sorted_fixed_by_packages = self.fixed_by_packages.filter(is_ghost=False).order_by( + "type", "namespace", "name", "qualifiers", "subpath" + ) + + if sorted_fixed_by_packages: + sorted_fixed_by_packages.first().calculate_version_rank + + sorted_affected_packages = self.affecting_packages.all() + + if sorted_affected_packages: + sorted_affected_packages.first().calculate_version_rank + + grouped_fixed_by_packages = { + key: list(group) + for key, group in groupby( + sorted_fixed_by_packages, + key=attrgetter("type", "namespace", "name", "qualifiers", "subpath"), + ) + } + + all_affected_fixed_by_matches = [] + + for sorted_affected_package in sorted_affected_packages: + affected_fixed_by_matches = { + "affected_package": sorted_affected_package, + "matched_fixed_by_packages": [], + } + + # Build the key to find matching group + key = ( + sorted_affected_package.type, + sorted_affected_package.namespace, + sorted_affected_package.name, + sorted_affected_package.qualifiers, + sorted_affected_package.subpath, + ) + + # Get matching group from pre-grouped fixed_by_packages + matching_fixed_packages = grouped_fixed_by_packages.get(key, []) + + # Get version classes for comparison + affected_version_class = get_purl_version_class(sorted_affected_package) + affected_version = affected_version_class(sorted_affected_package.version) + + # Compare versions and filter valid matches + matched_fixed_by_packages = [ + fixed_by_package.purl + for fixed_by_package in matching_fixed_packages + if get_purl_version_class(fixed_by_package)(fixed_by_package.version) + > affected_version + ] + + affected_fixed_by_matches["matched_fixed_by_packages"] = matched_fixed_by_packages + all_affected_fixed_by_matches.append(affected_fixed_by_matches) + return sorted_fixed_by_packages, sorted_affected_packages, all_affected_fixed_by_matches alias = get_aliases diff --git a/vulnerabilities/templates/advisory_detail.html b/vulnerabilities/templates/advisory_detail.html new file mode 100644 index 000000000..5a6fbe2b1 --- /dev/null +++ b/vulnerabilities/templates/advisory_detail.html @@ -0,0 +1,624 @@ +{% extends "base.html" %} +{% load humanize %} +{% load widget_tweaks %} +{% load static %} +{% load show_cvss %} +{% load url_filters %} + +{% block title %} +VulnerableCode Advisory Details - {{ advisory.advisory_id }} +{% endblock %} + +{% block content %} + +{% if advisory %} +
+
+
+
+ Advisory details: + + {{ advisory.advisory_id }} + +
+
+ + +
+
+
+ + + + + + + + + + + + + + + + + + + {% if severity_score_range %} + + + + {% endif %} + + + + + + + + + + + + + + + + + + + + + + + + + +
Advisory ID{{ advisory.advisory_id }}
URL + {{advisory.url}} +
Aliases + {% for alias in aliases %} + {% if alias.url %} + {{ alias }} + {% else %} + {{ alias }} + {% endif %} +
+ {% endfor %} +
Summary{{ advisory.summary }} +
Severity score range{{ severity_score_range }} +
Status{{ status }}
+ Exploitability + {{ advisory.exploitability }} +
Weighted Severity + {{ advisory.weighted_severity }} +
Risk + {{ advisory.risk_score }} +
Affected and Fixed Packages + + Package Details + +
+
+ Weaknesses ({{ weaknesses|length }}) +
+
+ + {% for weakness in weaknesses %} + + + + + + {% empty %} + + + + {% endfor %} +
CWE-{{ weakness.cwe_id }} + + {{ weakness.name }} + +
+ There are no known CWE. +
+
+
+
+ + +
+ + + + + + + {% for severity in severities %} + + + + + + {% empty %} + + + + {% endfor %} +
System Score Found at
{{ severity.scoring_system }}{{ severity.value }} + {{ severity.url }} +
+ There are no known severity scores. +
+
+ +
+ + + + + + + + + {% for ref in references %} + + {% if ref.reference_id %} + + {% else %} + + {% endif %} + + {% if ref.reference_type %} + + {% else %} + + {% endif %} + + + + {% empty %} + + + + {% endfor %} +
Reference id Reference type URL
{{ ref.reference_id }}{{ ref.get_reference_type_display }}{{ ref.url }}
+ There are no known references. +
+
+ +
+ {% for exploit in advisory.exploits.all %} + + + + + + + + {% if exploit.date_added %} + + + + + {% endif %} + {% if exploit.description %} + + + + + {% endif %} + {% if exploit.required_action %} + + + + + {% endif %} + {% if exploit.due_date %} + + + + + {% endif %} + {% if exploit.notes %} + + + + + {% endif %} + {% if exploit.known_ransomware_campaign_use is not None %} + + + + + {% endif %} + {% if exploit.source_date_published %} + + + + + {% endif %} + {% if exploit.exploit_type %} + + + + + {% endif %} + {% if exploit.platform %} + + + + + {% endif %} + {% if exploit.source_date_updated %} + + + + + {% endif %} + + {% if exploit.source_url %} + + + + + {% endif %} + +
Data source {{ exploit.data_source }}
+ + Date added + + {{ exploit.date_added }}
+ + Description + + {{ exploit.description }}
+ + Required action + + {{ exploit.required_action }}
+ + Due date + + {{ exploit.due_date }}
+ + Note + +
{{ exploit.notes }}
+ + Ransomware campaign use + + {{ exploit.known_ransomware_campaign_use|yesno:"Known,Unknown" }}
+ + Source publication date + + {{ exploit.source_date_published }}
+ + Exploit type + + {{ exploit.exploit_type }}
+ + Platform + + {{ exploit.platform }}
+ + Source update date + + {{ exploit.source_date_updated }}
+ + Source URL + + {{ exploit.source_url }}
+ {% empty %} + + + No exploits are available. + + + {% endfor %} +
+ +
+ {% for severity_vector in severity_vectors %} + {% if severity_vector.vector.version == '2.0' %} + Vector: {{ severity_vector.vector.vectorString }} Found at {{ severity_vector.origin }} + + + + + + + + + + + + + + + + + + + +
Exploitability (E)Access Vector (AV)Access Complexity (AC)Authentication (Au)Confidentiality Impact (C)Integrity Impact (I)Availability Impact (A)
{{ severity_vector.vector.exploitability|cvss_printer:"high,functional,unproven,proof_of_concept,not_defined" }}{{ severity_vector.vector.accessVector|cvss_printer:"local,adjacent_network,network" }}{{ severity_vector.vector.accessComplexity|cvss_printer:"high,medium,low" }}{{ severity_vector.vector.authentication|cvss_printer:"multiple,single,none" }}{{ severity_vector.vector.confidentialityImpact|cvss_printer:"none,partial,complete" }}{{ severity_vector.vector.integrityImpact|cvss_printer:"none,partial,complete" }}{{ severity_vector.vector.availabilityImpact|cvss_printer:"none,partial,complete" }}
+ {% elif severity_vector.vector.version == '3.1' or severity_vector.vector.version == '3.0'%} + Vector: {{ severity_vector.vector.vectorString }} Found at {{ severity_vector.origin }} + + + + + + + + + + + + + + + + + + + + + +
Attack Vector (AV)Attack Complexity (AC)Privileges Required (PR)User Interaction (UI)Scope (S)Confidentiality Impact (C)Integrity Impact (I)Availability Impact (A)
{{ severity_vector.vector.attackVector|cvss_printer:"network,adjacent_network,local,physical"}}{{ severity_vector.vector.attackComplexity|cvss_printer:"low,high" }}{{ severity_vector.vector.privilegesRequired|cvss_printer:"none,low,high" }}{{ severity_vector.vector.userInteraction|cvss_printer:"none,required"}}{{ severity_vector.vector.scope|cvss_printer:"unchanged,changed" }}{{ severity_vector.vector.confidentialityImpact|cvss_printer:"high,low,none" }}{{ severity_vector.vector.integrityImpact|cvss_printer:"high,low,none" }}{{ severity_vector.vector.availabilityImpact|cvss_printer:"high,low,none" }}
+ {% elif severity_vector.vector.version == '4' %} + Vector: {{ severity_vector.vector.vectorString }} Found at {{ severity_vector.origin }} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Attack Vector (AV)Attack Complexity (AC)Attack Requirements (AT)Privileges Required (PR)User Interaction (UI)Vulnerable System Impact Confidentiality (VC)Vulnerable System Impact Integrity (VI)Vulnerable System Impact Availability (VA)Subsequent System Impact Confidentiality (SC)Subsequent System Impact Integrity (SI)Subsequent System Impact Availability (SA)
{{ severity_vector.vector.attackVector|cvss_printer:"network,adjacent,local,physical"}}{{ severity_vector.vector.attackComplexity|cvss_printer:"low,high" }}{{ severity_vector.vector.attackRequirement|cvss_printer:"none,present" }}{{ severity_vector.vector.privilegesRequired|cvss_printer:"none,low,high" }}{{ severity_vector.vector.userInteraction|cvss_printer:"none,passive,active"}}{{ severity_vector.vector.vulnerableSystemImpactConfidentiality|cvss_printer:"high,low,none" }}{{ severity_vector.vector.vulnerableSystemImpactIntegrity|cvss_printer:"high,low,none" }}{{ severity_vector.vector.vulnerableSystemImpactAvailability|cvss_printer:"high,low,none" }}{{ severity_vector.vector.subsequentSystemImpactConfidentiality|cvss_printer:"high,low,none" }}{{ severity_vector.vector.subsequentSystemImpactIntegrity|cvss_printer:"high,low,none" }}{{ severity_vector.vector.subsequentSystemImpactAvailability|cvss_printer:"high,low,none" }}
+ {% elif severity_vector.vector.version == 'ssvc' %} +
+ Vector: {{ severity_vector.vector.vectorString }} Found at {{ severity_vector.origin }} +
+ {% endif %} + {% empty %} + + + There are no known vectors. + + + {% endfor %} +
+ + +
+ {% if epss_data %} +
+ Exploit Prediction Scoring System (EPSS) +
+ + + + + + + + + + + {% if epss_data.published_at %} + + + + + {% endif %} + +
+ + Percentile + + {{ epss_data.percentile }}
+ + EPSS Score + + {{ epss_data.score }}
+ + Published At + + {{ epss_data.published_at }}
+ {% else %} +

No EPSS data available for this advisory.

+ {% endif %} +
+ + +
+
+
+{% endif %} + + + + + +{% endblock %} \ No newline at end of file diff --git a/vulnerabilities/templates/advisory_package_details.html b/vulnerabilities/templates/advisory_package_details.html new file mode 100644 index 000000000..0f4c71044 --- /dev/null +++ b/vulnerabilities/templates/advisory_package_details.html @@ -0,0 +1,88 @@ +{% extends "base.html" %} +{% load humanize %} +{% load widget_tweaks %} +{% load static %} +{% load show_cvss %} +{% load url_filters %} + +{% block title %} +VulnerableCode Advisory Package Details - {{ advisory.advisory_id }} +{% endblock %} + +{% block content %} + +{% if advisory %} +
+
+
+
+ Vulnerable and Fixing Package details for Advisory: + + {{ advisory.advisory_id }} + +
+
+
+ + + + + + + + + {% for package in affected_packages %} + + + + + {% empty %} + + + + {% endfor %} + +
AffectedFixed by
+ {{ package.purl }} + + + {% for match in all_affected_fixed_by_matches %} + {% if match.affected_package == package %} + {% if match.matched_fixed_by_packages|length > 0 %} + {% for pkg in match.matched_fixed_by_packages %} + {{ pkg }} +
+ {% endfor %} + {% else %} + There are no reported fixed by versions. + {% endif %} + {% endif %} + {% endfor %} + +
+ This vulnerability is not known to affect any packages. +
+
+
+
+{% endif %} + + + + + +{% endblock %} \ No newline at end of file diff --git a/vulnerabilities/templates/package_details_v2.html b/vulnerabilities/templates/package_details_v2.html index 32d54fc37..58312540a 100644 --- a/vulnerabilities/templates/package_details_v2.html +++ b/vulnerabilities/templates/package_details_v2.html @@ -92,7 +92,7 @@ {% if fixed_package_details.next_non_vulnerable.version %} - {{ fixed_package_details.next_non_vulnerable.version }} {% else %} None. @@ -105,27 +105,13 @@ {% if fixed_package_details.latest_non_vulnerable.version %} - {{ fixed_package_details.latest_non_vulnerable.version }} {% else %} None. {% endif %} - - - - Risk - - - - {% if package.risk_score %} - {{ package.risk_score }} - {% endif %} - - @@ -134,15 +120,17 @@
- Advisories affecting this package ({{ affected_by_advisories|length }}) + Vulnerabilities affecting this package ({{ affected_by_advisories|length }})
+ + - + @@ -150,9 +138,13 @@ {% for advisory in affected_by_advisories %} + + @@ -183,17 +181,17 @@ {{ fixed_pkg.fixed_by_purl.version }}
- Affected by 0 other advisories. + Subject of 0 other advisories. {% else %} {{ fixed_pkg.fixed_by_purl.version }} {% if fixed_pkg.fixed_by_purl_advisories|length != 1 %}
- Affected by {{ fixed_pkg.fixed_by_purl_advisories|length }} other + Subject of {{ fixed_pkg.fixed_by_purl_advisories|length }} other advisory. {% else %}
- Affected by {{ fixed_pkg.fixed_by_purl_advisories|length }} other + Subject of {{ fixed_pkg.fixed_by_purl_advisories|length }} other advisory. {% endif %} @@ -207,7 +205,7 @@
{% endfor %} @@ -244,13 +242,15 @@
- Advisories fixed by this package ({{ fixing_advisories|length }}) + Vulnerabilities fixed by this package ({{ fixing_advisories|length }})
AdvisorySourceDate Published SummaryFixed byFixed in package version
- {{advisory.advisory_id }} + + {{advisory.advisory_id }} +
+ {% if advisory.alias|length != 0 %} Aliases: + {% endif %}
{% for alias in advisory.alias %} {% if alias.url %} @@ -165,6 +157,12 @@ {% endif %} {% endfor %}
+ {{advisory.url}} + + {{advisory.date_published}} + {{ advisory.summary }}
- This package is not known to be affected by advisories. + This package is not known to be subject of any advisories.
+ + @@ -259,7 +259,15 @@ {% for advisory in fixing_advisories %} + + - - - - - + diff --git a/vulnerabilities/templates/package_details_v2.html b/vulnerabilities/templates/package_details_v2.html index 58312540a..54cb8ffed 100644 --- a/vulnerabilities/templates/package_details_v2.html +++ b/vulnerabilities/templates/package_details_v2.html @@ -112,6 +112,14 @@ {% endif %} + + + +
AdvisorySourceDate Published Summary Aliases
- {{ advisory.advisory_id }} + + {{advisory.advisory_id }} + + + {{advisory.url}} + + {{advisory.date_published}} {{ advisory.summary }} diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index 21e87172d..946637849 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -281,7 +281,11 @@ def get_context_data(self, **kwargs): for severity in valid_severities: try: - vector_values = SCORING_SYSTEMS[severity.scoring_system].get( + vector_values_system = SCORING_SYSTEMS[severity.scoring_system] + if not vector_values_system: + logging.error(f"Unknown scoring system: {severity.scoring_system}") + continue + vector_values = vector_values_system.get( severity.scoring_elements ) if vector_values: @@ -320,6 +324,114 @@ def get_context_data(self, **kwargs): return context +class AdvisoryDetails(DetailView): + model = models.AdvisoryV2 + template_name = "advisory_detail.html" + slug_url_kwarg = "id" + slug_field = "id" + + def get_queryset(self): + return ( + super() + .get_queryset() + .select_related() + .prefetch_related( + Prefetch( + "references", + queryset=models.AdvisoryReference.objects.only( + "reference_id", "reference_type", "url" + ), + ), + Prefetch( + "aliases", + queryset=models.AdvisoryAlias.objects.only("alias"), + ), + Prefetch( + "weaknesses", + queryset=models.AdvisoryWeakness.objects.only("cwe_id"), + ), + Prefetch( + "severities", + queryset=models.AdvisorySeverity.objects.only( + "scoring_system", "value", "url", "scoring_elements", "published_at" + ), + ), + Prefetch( + "exploits", + queryset=models.AdvisoryExploit.objects.only( + "data_source", "description", "required_action", "due_date", "notes" + ), + ), + ) + ) + + def get_context_data(self, **kwargs): + """ + Build context with preloaded QuerySets and minimize redundant queries. + """ + context = super().get_context_data(**kwargs) + advisory = self.object + + # Pre-fetch and process data in Python instead of the template + weaknesses_present_in_db = [ + weakness_object + for weakness_object in advisory.weaknesses.all() + if weakness_object.weakness + ] + + valid_severities = self.object.severities.exclude(scoring_system=EPSS.identifier).filter( + scoring_elements__isnull=False, scoring_system__in=SCORING_SYSTEMS.keys() + ) + + severity_vectors = [] + + for severity in valid_severities: + try: + vector_values_system = SCORING_SYSTEMS.get(severity.scoring_system) + if not vector_values_system: + logging.error(f"Unknown scoring system: {severity.scoring_system}") + continue + if vector_values_system.identifier in ["cvssv3.1_qr"]: + continue + vector_values = vector_values_system.get( + severity.scoring_elements + ) + if vector_values: + severity_vectors.append({"vector": vector_values, "origin": severity.url}) + logging.error(f"Error processing scoring elements: {severity.scoring_elements}") + except ( + CVSS2MalformedError, + CVSS3MalformedError, + CVSS4MalformedError, + NotImplementedError, + ): + logging.error(f"CVSSMalformedError for {severity.scoring_elements}") + + epss_severity = advisory.severities.filter(scoring_system="epss").first() + epss_data = None + if epss_severity: + epss_data = { + "percentile": epss_severity.scoring_elements, + "score": epss_severity.value, + "published_at": epss_severity.published_at, + } + print(severity_vectors) + context.update( + { + "advisory": advisory, + "severities": list(advisory.severities.all()), + "severity_vectors": severity_vectors, + "references": list(advisory.references.all()), + "aliases": list(advisory.aliases.all()), + "weaknesses": weaknesses_present_in_db, + "status": advisory.get_status_label, + # "history": advisory.history, + "epss_data": epss_data, + } + ) + return context + + class HomePage(View): template_name = "index.html" @@ -454,6 +566,58 @@ def get_context_data(self, **kwargs): return context +class AdvisoryPackagesDetails(DetailView): + """ + View to display all packages affected by or fixing a specific vulnerability. + URL: /advisories/{id}/packages + """ + + model = models.AdvisoryV2 + template_name = "advisory_package_details.html" + slug_url_kwarg = "id" + slug_field = "id" + + def get_queryset(self): + """ + Prefetch and optimize related data to minimize database hits. + """ + return ( + super() + .get_queryset() + .prefetch_related( + Prefetch( + "affecting_packages", + queryset=models.PackageV2.objects.only("type", "namespace", "name", "version"), + ), + Prefetch( + "fixed_by_packages", + queryset=models.PackageV2.objects.only("type", "namespace", "name", "version"), + ), + ) + ) + + def get_context_data(self, **kwargs): + """ + Build context with preloaded QuerySets and minimize redundant queries. + """ + context = super().get_context_data(**kwargs) + advisory = self.object + ( + sorted_fixed_by_packages, + sorted_affected_packages, + all_affected_fixed_by_matches, + ) = advisory.aggregate_fixed_and_affected_packages() + context.update( + { + "affected_packages": sorted_affected_packages, + "fixed_by_packages": sorted_fixed_by_packages, + "all_affected_fixed_by_matches": all_affected_fixed_by_matches, + "advisory": advisory, + } + ) + return context + + class PipelineScheduleListView(ListView, FormMixin): model = PipelineSchedule context_object_name = "schedule_list" diff --git a/vulnerablecode/urls.py b/vulnerablecode/urls.py index 374c8fe1f..6f1c6b586 100644 --- a/vulnerablecode/urls.py +++ b/vulnerablecode/urls.py @@ -24,7 +24,7 @@ from vulnerabilities.api_v2 import PackageV2ViewSet from vulnerabilities.api_v2 import PipelineScheduleV2ViewSet from vulnerabilities.api_v2 import VulnerabilityV2ViewSet -from vulnerabilities.views import AdminLoginView +from vulnerabilities.views import AdminLoginView, AdvisoryDetails, AdvisoryPackagesDetails from vulnerabilities.views import ApiUserCreateView from vulnerabilities.views import HomePage from vulnerabilities.views import HomePageV2 @@ -95,6 +95,11 @@ def __init__(self, *args, **kwargs): HomePageV2.as_view(), name="home", ), + path( + "advisories/", + AdvisoryDetails.as_view(), + name="advisory_details", + ), path( "packages/search/", PackageSearch.as_view(), @@ -130,6 +135,11 @@ def __init__(self, *args, **kwargs): VulnerabilityPackagesDetails.as_view(), name="vulnerability_package_details", ), + path( + "advisories//packages", + AdvisoryPackagesDetails.as_view(), + name="advisory_package_details", + ), path( "api/", include(api_router.urls), From 2983f7f895cb58388103b6284079b1d35b02b19b Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 6 Jun 2025 15:06:16 +0530 Subject: [PATCH 32/44] Fix risk score pipeline Signed-off-by: Tushar Goel --- vulnerabilities/pipelines/v2_improvers/compute_package_risk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vulnerabilities/pipelines/v2_improvers/compute_package_risk.py b/vulnerabilities/pipelines/v2_improvers/compute_package_risk.py index c8cf4233d..55608f0d1 100644 --- a/vulnerabilities/pipelines/v2_improvers/compute_package_risk.py +++ b/vulnerabilities/pipelines/v2_improvers/compute_package_risk.py @@ -107,7 +107,7 @@ def compute_and_store_package_risk_score(self): batch_size = 10000 for package in progress.iter(affected_packages.paginated(per_page=batch_size)): - risk_score = compute_package_risk(package) + risk_score = compute_package_risk_v2(package) if not risk_score: continue From 0b30132bddee3ac4729955ce0381ac28a63ff718 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 6 Jun 2025 15:20:01 +0530 Subject: [PATCH 33/44] Fix tests Signed-off-by: Tushar Goel --- vulnerabilities/models.py | 6 +++--- vulnerabilities/views.py | 8 ++------ vulnerablecode/urls.py | 4 +++- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index e45b6997e..09cd28491 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -2637,12 +2637,12 @@ class Meta: def save(self, *args, **kwargs): self.full_clean() return super().save(*args, **kwargs) - + @property def get_status_label(self): label_by_status = {choice[0]: choice[1] for choice in VulnerabilityStatusType.choices} return label_by_status.get(self.status) or VulnerabilityStatusType.PUBLISHED.label - + def get_absolute_url(self): """ Return this Vulnerability details absolute URL. @@ -2673,7 +2673,7 @@ def get_aliases(self): Return a queryset of all Aliases for this vulnerability. """ return self.aliases.all() - + def aggregate_fixed_and_affected_packages(self): from vulnerabilities.utils import get_purl_version_class diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index 946637849..71534f9fb 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -285,9 +285,7 @@ def get_context_data(self, **kwargs): if not vector_values_system: logging.error(f"Unknown scoring system: {severity.scoring_system}") continue - vector_values = vector_values_system.get( - severity.scoring_elements - ) + vector_values = vector_values_system.get(severity.scoring_elements) if vector_values: severity_vectors.append({"vector": vector_values, "origin": severity.url}) except ( @@ -393,9 +391,7 @@ def get_context_data(self, **kwargs): continue if vector_values_system.identifier in ["cvssv3.1_qr"]: continue - vector_values = vector_values_system.get( - severity.scoring_elements - ) + vector_values = vector_values_system.get(severity.scoring_elements) if vector_values: severity_vectors.append({"vector": vector_values, "origin": severity.url}) logging.error(f"Error processing scoring elements: {severity.scoring_elements}") diff --git a/vulnerablecode/urls.py b/vulnerablecode/urls.py index 6f1c6b586..55ba4fdd5 100644 --- a/vulnerablecode/urls.py +++ b/vulnerablecode/urls.py @@ -24,7 +24,9 @@ from vulnerabilities.api_v2 import PackageV2ViewSet from vulnerabilities.api_v2 import PipelineScheduleV2ViewSet from vulnerabilities.api_v2 import VulnerabilityV2ViewSet -from vulnerabilities.views import AdminLoginView, AdvisoryDetails, AdvisoryPackagesDetails +from vulnerabilities.views import AdminLoginView +from vulnerabilities.views import AdvisoryDetails +from vulnerabilities.views import AdvisoryPackagesDetails from vulnerabilities.views import ApiUserCreateView from vulnerabilities.views import HomePage from vulnerabilities.views import HomePageV2 From a731f32c2483c32240a5bdbf9e5b03d9bc219f39 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 26 Jun 2025 18:03:44 +0530 Subject: [PATCH 34/44] Change API design Signed-off-by: Tushar Goel --- vulnerabilities/api_v2.py | 382 ++++++++++++++++++ vulnerabilities/improvers/__init__.py | 2 + ...soryreference_advisoryseverity_and_more.py | 302 ++++++++++---- vulnerabilities/models.py | 145 ++++++- vulnerabilities/pipelines/__init__.py | 1 + .../v2_importers/apache_httpd_importer.py | 2 + .../v2_importers/elixir_security_importer.py | 120 ++++++ .../pipelines/v2_importers/gitlab_importer.py | 14 +- .../pipelines/v2_importers/npm_importer.py | 8 +- .../v2_importers/postgresql_importer.py | 155 +++++++ .../pipelines/v2_importers/pypa_importer.py | 2 +- .../pipelines/v2_importers/pysec_importer.py | 2 +- .../pipelines/v2_importers/ruby_importer.py | 159 ++++++++ .../v2_importers/vulnrichment_importer.py | 1 + .../pipelines/v2_improvers/collect_commits.py | 252 ++++++++++++ vulnerabilities/pipes/advisory.py | 13 +- .../templates/advisory_detail.html | 14 +- .../templates/package_details_v2.html | 12 +- vulnerablecode/urls.py | 4 + 19 files changed, 1457 insertions(+), 133 deletions(-) create mode 100644 vulnerabilities/pipelines/v2_importers/elixir_security_importer.py create mode 100644 vulnerabilities/pipelines/v2_importers/postgresql_importer.py create mode 100644 vulnerabilities/pipelines/v2_importers/ruby_importer.py create mode 100644 vulnerabilities/pipelines/v2_improvers/collect_commits.py diff --git a/vulnerabilities/api_v2.py b/vulnerabilities/api_v2.py index 4c2562216..4915dda63 100644 --- a/vulnerabilities/api_v2.py +++ b/vulnerabilities/api_v2.py @@ -24,8 +24,14 @@ from rest_framework.response import Response from rest_framework.reverse import reverse +from vulnerabilities.models import AdvisoryReference +from vulnerabilities.models import AdvisorySeverity +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.models import AdvisoryWeakness from vulnerabilities.models import CodeFix +from vulnerabilities.models import CodeFixV2 from vulnerabilities.models import Package +from vulnerabilities.models import PackageV2 from vulnerabilities.models import PipelineRun from vulnerabilities.models import PipelineSchedule from vulnerabilities.models import Vulnerability @@ -44,6 +50,16 @@ class Meta: fields = ["cwe_id", "name", "description"] +class AdvisoryWeaknessSerializer(serializers.ModelSerializer): + cwe_id = serializers.CharField() + name = serializers.CharField() + description = serializers.CharField() + + class Meta: + model = AdvisoryWeakness + fields = ["cwe_id", "name", "description"] + + class VulnerabilityReferenceV2Serializer(serializers.ModelSerializer): url = serializers.CharField() reference_type = serializers.CharField() @@ -54,6 +70,29 @@ class Meta: fields = ["url", "reference_type", "reference_id"] +class AdvisoryReferenceSerializer(serializers.ModelSerializer): + url = serializers.CharField() + reference_type = serializers.CharField() + reference_id = serializers.CharField() + + class Meta: + model = AdvisoryReference + fields = ["url", "reference_type", "reference_id"] + + +class AdvisorySeveritySerializer(serializers.ModelSerializer): + class Meta: + model = AdvisorySeverity + fields = ["url", "value", "scoring_system", "scoring_elements", "published_at"] + + def to_representation(self, instance): + data = super().to_representation(instance) + published_at = data.get("published_at", None) + if not published_at: + data.pop("published_at") + return data + + class VulnerabilitySeverityV2Serializer(serializers.ModelSerializer): class Meta: model = VulnerabilitySeverity @@ -94,6 +133,32 @@ def get_aliases(self, obj): return [alias.alias for alias in obj.aliases.all()] +class AdvisoryV2Serializer(serializers.ModelSerializer): + aliases = serializers.SerializerMethodField() + weaknesses = AdvisoryWeaknessSerializer(many=True) + references = AdvisoryReferenceSerializer(many=True) + severities = AdvisorySeveritySerializer(many=True) + advisory_id = serializers.CharField(source="avid", read_only=True) + + class Meta: + model = AdvisoryV2 + fields = [ + "advisory_id", + "url", + "aliases", + "summary", + "severities", + "weaknesses", + "references", + "exploitability", + "weighted_severity", + "risk_score", + ] + + def get_aliases(self, obj): + return [alias.alias for alias in obj.aliases.all()] + + class VulnerabilityListSerializer(serializers.ModelSerializer): url = serializers.SerializerMethodField() @@ -233,6 +298,57 @@ def get_fixing_vulnerabilities(self, obj): return [vuln.vulnerability_id for vuln in obj.fixing_vulnerabilities.all()] +class AdvisoryPackageV2Serializer(serializers.ModelSerializer): + purl = serializers.CharField(source="package_url") + risk_score = serializers.FloatField(read_only=True) + affected_by_vulnerabilities = serializers.SerializerMethodField() + fixing_vulnerabilities = serializers.SerializerMethodField() + next_non_vulnerable_version = serializers.CharField(read_only=True) + latest_non_vulnerable_version = serializers.CharField(read_only=True) + + class Meta: + model = Package + fields = [ + "purl", + "affected_by_vulnerabilities", + "fixing_vulnerabilities", + "next_non_vulnerable_version", + "latest_non_vulnerable_version", + "risk_score", + ] + + def get_affected_by_vulnerabilities(self, obj): + """ + Return a dictionary with vulnerabilities as keys and their details, including fixed_by_packages. + """ + result = {} + request = self.context.get("request") + for adv in getattr(obj, "prefetched_affected_advisories", []): + fixed_by_package = adv.fixed_by_packages.first() + purl = None + if fixed_by_package: + purl = fixed_by_package.package_url + # Get code fixed for a vulnerability + code_fixes = CodeFixV2.objects.filter(advisory=adv).distinct() + code_fix_urls = [ + reverse("codefix-detail", args=[code_fix.id], request=request) + for code_fix in code_fixes + ] + + result[adv.avid] = { + "advisory_id": adv.avid, + "fixed_by_packages": purl, + "code_fixes": code_fix_urls, + } + return result + + def get_fixing_vulnerabilities(self, obj): + # Ghost package should not fix any vulnerability. + if obj.is_ghost: + return [] + return [adv.advisory_id for adv in obj.fixing_advisories.all()] + + class PackageurlListSerializer(serializers.Serializer): purls = serializers.ListField( child=serializers.CharField(), @@ -261,6 +377,12 @@ class PackageV2FilterSet(filters.FilterSet): purl = filters.CharFilter(field_name="package_url") +class AdvisoryPackageV2FilterSet(filters.FilterSet): + affected_by_vulnerability = filters.CharFilter(field_name="affected_by_advisory__advisory_id") + fixing_vulnerability = filters.CharFilter(field_name="fixing_advisories__advisory_id") + purl = filters.CharFilter(field_name="package_url") + + class PackageV2ViewSet(viewsets.ReadOnlyModelViewSet): queryset = Package.objects.all().prefetch_related( Prefetch( @@ -754,3 +876,263 @@ def get_permissions(self): if self.action not in ["list", "retrieve"]: return [IsAdminWithSessionAuth()] return super().get_permissions() + + +class AdvisoriesPackageV2ViewSet(viewsets.ReadOnlyModelViewSet): + queryset = PackageV2.objects.all().prefetch_related( + Prefetch( + "affected_by_advisories", + queryset=AdvisoryV2.objects.prefetch_related("fixed_by_packages"), + to_attr="prefetched_affected_advisories", + ) + ) + serializer_class = AdvisoryPackageV2Serializer + filter_backends = (filters.DjangoFilterBackend,) + filterset_class = AdvisoryPackageV2FilterSet + + def get_queryset(self): + queryset = super().get_queryset() + package_purls = self.request.query_params.getlist("purl") + affected_by_advisory = self.request.query_params.get("affected_by_advisory") + fixing_advisory = self.request.query_params.get("fixing_advisory") + if package_purls: + queryset = queryset.filter(package_url__in=package_purls) + if affected_by_advisory: + queryset = queryset.filter(affected_by_advisories__advisory_id=affected_by_advisory) + if fixing_advisory: + queryset = queryset.filter(fixing_advisories__advisory=fixing_advisory) + return queryset.with_is_vulnerable() + + def list(self, request, *args, **kwargs): + queryset = self.get_queryset() + # Apply pagination + page = self.paginate_queryset(queryset) + if page is not None: + # Collect only vulnerabilities for packages in the current page + advisories = set() + for package in page: + advisories.update(package.affected_by_advisories.all()) + advisories.update(package.fixing_advisories.all()) + + # Serialize the vulnerabilities with advisory_id and advisory label as keys + advisory_data = {f"{adv.avid}": AdvisoryV2Serializer(adv).data for adv in advisories} + + # Serialize the current page of packages + serializer = self.get_serializer(page, many=True) + data = serializer.data + print(data) + # Use 'self.get_paginated_response' to include pagination data + return self.get_paginated_response({"advisories": advisory_data, "packages": data}) + + # If pagination is not applied, collect vulnerabilities for all packages + advisories = set() + for package in queryset: + advisories.update(package.affected_by_vulnerabilities.all()) + advisories.update(package.fixing_vulnerabilities.all()) + + advisory_data = {f"{adv.avid}": AdvisoryV2Serializer(adv).data for adv in advisories} + + serializer = self.get_serializer(queryset, many=True) + data = serializer.data + return Response({"advisories": advisory_data, "packages": data}) + + @extend_schema( + request=PackageurlListSerializer, + responses={200: PackageV2Serializer(many=True)}, + ) + @action( + detail=False, + methods=["post"], + serializer_class=PackageurlListSerializer, + filter_backends=[], + pagination_class=None, + ) + def bulk_lookup(self, request): + """ + Return the response for exact PackageURLs requested for. + """ + serializer = self.serializer_class(data=request.data) + if not serializer.is_valid(): + return Response( + status=status.HTTP_400_BAD_REQUEST, + data={ + "error": serializer.errors, + "message": "A non-empty 'purls' list of PURLs is required.", + }, + ) + validated_data = serializer.validated_data + purls = validated_data.get("purls") + + # Fetch packages matching the provided purls + packages = PackageV2.objects.for_purls(purls).with_is_vulnerable() + + # Collect vulnerabilities associated with these packages + advisories = set() + for package in packages: + advisories.update(package.affected_by_advisories.all()) + advisories.update(package.fixing_advisories.all()) + + # Serialize vulnerabilities with vulnerability_id as keys + advisory_data = {adv.avid: AdvisoryV2Serializer(adv).data for adv in advisories} + + # Serialize packages + package_data = AdvisoryPackageV2Serializer( + packages, + many=True, + context={"request": request}, + ).data + + return Response( + { + "advisories": advisory_data, + "packages": package_data, + } + ) + + @extend_schema( + request=PackageBulkSearchRequestSerializer, + responses={200: PackageV2Serializer(many=True)}, + ) + @action( + detail=False, + methods=["post"], + serializer_class=PackageBulkSearchRequestSerializer, + filter_backends=[], + pagination_class=None, + ) + def bulk_search(self, request): + """ + Lookup for vulnerable packages using many Package URLs at once. + """ + serializer = self.serializer_class(data=request.data) + if not serializer.is_valid(): + return Response( + status=status.HTTP_400_BAD_REQUEST, + data={ + "error": serializer.errors, + "message": "A non-empty 'purls' list of PURLs is required.", + }, + ) + validated_data = serializer.validated_data + purls = validated_data.get("purls") + purl_only = validated_data.get("purl_only", False) + plain_purl = validated_data.get("plain_purl", False) + + if plain_purl: + purl_objects = [PackageURL.from_string(purl) for purl in purls] + plain_purl_objects = [ + PackageURL( + type=purl.type, + namespace=purl.namespace, + name=purl.name, + version=purl.version, + ) + for purl in purl_objects + ] + plain_purls = [str(purl) for purl in plain_purl_objects] + + query = ( + PackageV2.objects.filter(plain_package_url__in=plain_purls) + .order_by("plain_package_url") + .distinct("plain_package_url") + .with_is_vulnerable() + ) + + packages = query + + # Collect vulnerabilities associated with these packages + advisories = set() + for package in packages: + advisories.update(package.affected_by_vulnerabilities.all()) + advisories.update(package.fixing_vulnerabilities.all()) + + advisory_data = {adv.avid: VulnerabilityV2Serializer(adv).data for adv in advisories} + + if not purl_only: + package_data = AdvisoryPackageV2Serializer( + packages, many=True, context={"request": request} + ).data + return Response( + { + "advisories": advisory_data, + "packages": package_data, + } + ) + + # Using order by and distinct because there will be + # many fully qualified purl for a single plain purl + vulnerable_purls = query.vulnerable().only("plain_package_url") + vulnerable_purls = [str(package.plain_package_url) for package in vulnerable_purls] + return Response(data=vulnerable_purls) + + query = PackageV2.objects.filter(package_url__in=purls).distinct().with_is_vulnerable() + packages = query + + # Collect vulnerabilities associated with these packages + advisories = set() + for package in packages: + advisories.update(package.affected_by_vulnerabilities.all()) + advisories.update(package.fixing_vulnerabilities.all()) + + advisory_data = {adv.advisory_id: AdvisoryV2Serializer(adv).data for adv in advisories} + + if not purl_only: + package_data = AdvisoryPackageV2Serializer( + packages, many=True, context={"request": request} + ).data + return Response( + { + "advisories": advisory_data, + "packages": package_data, + } + ) + + vulnerable_purls = query.vulnerable().only("package_url") + vulnerable_purls = [str(package.package_url) for package in vulnerable_purls] + return Response(data=vulnerable_purls) + + @action(detail=False, methods=["get"]) + def all(self, request): + """ + Return a list of Package URLs of vulnerable packages. + """ + vulnerable_purls = ( + PackageV2.objects.vulnerable() + .only("package_url") + .order_by("package_url") + .distinct() + .values_list("package_url", flat=True) + ) + return Response(vulnerable_purls) + + @extend_schema( + request=LookupRequestSerializer, + responses={200: PackageV2Serializer(many=True)}, + ) + @action( + detail=False, + methods=["post"], + serializer_class=LookupRequestSerializer, + filter_backends=[], + pagination_class=None, + ) + def lookup(self, request): + """ + Return the response for exact PackageURL requested for. + """ + serializer = self.serializer_class(data=request.data) + if not serializer.is_valid(): + return Response( + status=status.HTTP_400_BAD_REQUEST, + data={ + "error": serializer.errors, + "message": "A 'purl' is required.", + }, + ) + validated_data = serializer.validated_data + purl = validated_data.get("purl") + + qs = self.get_queryset().for_purls([purl]).with_is_vulnerable() + return Response( + AdvisoryPackageV2Serializer(qs, many=True, context={"request": request}).data + ) diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index 520ef36c3..11b74ac44 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -21,6 +21,7 @@ from vulnerabilities.pipelines import flag_ghost_packages from vulnerabilities.pipelines import populate_vulnerability_summary_pipeline from vulnerabilities.pipelines import remove_duplicate_advisories +from vulnerabilities.pipelines.v2_improvers import collect_commits as collect_commits_v2 from vulnerabilities.pipelines.v2_improvers import compute_package_risk as compute_package_risk_v2 from vulnerabilities.pipelines.v2_improvers import ( computer_package_version_rank as compute_version_rank_v2, @@ -66,6 +67,7 @@ enhance_with_metasploit_v2.MetasploitImproverPipeline, compute_package_risk_v2.ComputePackageRiskPipeline, compute_version_rank_v2.ComputeVersionRankPipeline, + collect_commits_v2.CollectFixCommitsPipeline, ] IMPROVERS_REGISTRY = { diff --git a/vulnerabilities/migrations/0093_advisoryalias_advisoryreference_advisoryseverity_and_more.py b/vulnerabilities/migrations/0093_advisoryalias_advisoryreference_advisoryseverity_and_more.py index 74de631fd..b83f69040 100644 --- a/vulnerabilities/migrations/0093_advisoryalias_advisoryreference_advisoryseverity_and_more.py +++ b/vulnerabilities/migrations/0093_advisoryalias_advisoryreference_advisoryseverity_and_more.py @@ -1,4 +1,4 @@ -# Generated by Django 4.2.20 on 2025-05-28 13:31 +# Generated by Django 4.2.20 on 2025-06-20 07:50 from django.db import migrations, models import django.db.models.deletion @@ -141,6 +141,95 @@ class Migration(migrations.Migration): "ordering": ["url", "scoring_system", "value"], }, ), + migrations.CreateModel( + name="AdvisoryV2", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "datasource_id", + models.CharField( + help_text="Unique ID for the datasource used for this advisory .e.g.: nginx_importer_v2", + max_length=100, + ), + ), + ( + "avid", + models.CharField( + help_text="Unique ID for the datasource used for this advisory .e.g.: pysec_importer_v2/PYSEC-2020-2233", + max_length=500, + ), + ), + ( + "advisory_id", + models.CharField( + help_text="An advisory is a unique vulnerability identifier in some database, such as PYSEC-2020-2233", + max_length=50, + ), + ), + ( + "unique_content_id", + models.CharField( + help_text="A 64 character unique identifier for the content of the advisory since we use sha256 as hex", + max_length=64, + unique=True, + ), + ), + ("url", models.URLField(help_text="Link to the advisory on the upstream website")), + ("summary", models.TextField(blank=True)), + ( + "date_published", + models.DateTimeField( + blank=True, help_text="UTC Date of publication of the advisory", null=True + ), + ), + ( + "date_collected", + models.DateTimeField(help_text="UTC Date on which the advisory was collected"), + ), + ( + "date_imported", + models.DateTimeField( + blank=True, + help_text="UTC Date on which the advisory was imported", + null=True, + ), + ), + ( + "status", + models.IntegerField( + choices=[(1, "Published"), (2, "Disputed"), (3, "Invalid")], default=1 + ), + ), + ( + "exploitability", + models.DecimalField( + blank=True, + decimal_places=1, + help_text="Exploitability indicates the likelihood that a vulnerability in a software package could be used by malicious actors to compromise systems, applications, or networks. This metric is determined automatically based on the discovery of known exploits.", + max_digits=2, + null=True, + ), + ), + ( + "weighted_severity", + models.DecimalField( + blank=True, + decimal_places=1, + help_text="Weighted severity is the highest value calculated by multiplying each severity by its corresponding weight, divided by 10.", + max_digits=3, + null=True, + ), + ), + ], + options={ + "ordering": ["datasource_id", "advisory_id", "date_published", "unique_content_id"], + }, + ), migrations.CreateModel( name="AdvisoryWeakness", fields=[ @@ -250,18 +339,8 @@ class Migration(migrations.Migration): "abstract": False, }, ), - migrations.AlterField( - model_name="pipelineschedule", - name="is_active", - field=models.BooleanField( - db_index=True, - default=True, - help_text="When set to True, this Pipeline is active. When set to False, this Pipeline is inactive and not run.", - null=True, - ), - ), migrations.CreateModel( - name="AdvisoryV2", + name="CodeFixV2", fields=[ ( "id", @@ -270,126 +349,171 @@ class Migration(migrations.Migration): ), ), ( - "advisory_id", - models.CharField( - help_text="An advisory is a unique vulnerability identifier in some database, such as PYSEC-2020-2233", - max_length=50, + "commits", + models.JSONField( + blank=True, + default=list, + help_text="List of commit identifiers using VCS URLs associated with the code change.", ), ), ( - "unique_content_id", - models.CharField( - help_text="A 64 character unique identifier for the content of the advisory since we use sha256 as hex", - max_length=64, - unique=True, + "pulls", + models.JSONField( + blank=True, + default=list, + help_text="List of pull request URLs associated with the code change.", ), ), - ("summary", models.TextField(blank=True)), ( - "date_published", - models.DateTimeField( - blank=True, help_text="UTC Date of publication of the advisory", null=True + "downloads", + models.JSONField( + blank=True, + default=list, + help_text="List of download URLs for the patched code.", ), ), ( - "date_collected", - models.DateTimeField(help_text="UTC Date on which the advisory was collected"), - ), - ( - "date_imported", - models.DateTimeField( + "patch", + models.TextField( blank=True, - help_text="UTC Date on which the advisory was imported", + help_text="The code change as a patch in unified diff format.", null=True, ), ), ( - "datasource_ID", - models.CharField( - help_text="Fully qualified name of the importer prefixed with themodule name importing the advisory. Eg:nginx_importer_v2", - max_length=100, - ), - ), - ("url", models.URLField(help_text="Link to the advisory on the upstream website")), - ( - "status", - models.IntegerField( - choices=[(1, "Published"), (2, "Disputed"), (3, "Invalid")], default=1 + "notes", + models.TextField( + blank=True, + help_text="Notes or instructions about this code change.", + null=True, ), ), ( - "exploitability", - models.DecimalField( + "references", + models.JSONField( blank=True, - decimal_places=1, - help_text="Exploitability indicates the likelihood that a vulnerability in a software package could be used by malicious actors to compromise systems, applications, or networks. This metric is determined automatically based on the discovery of known exploits.", - max_digits=2, - null=True, + default=list, + help_text="URL references related to this code change.", ), ), ( - "weighted_severity", - models.DecimalField( - blank=True, - decimal_places=1, - help_text="Weighted severity is the highest value calculated by multiplying each severity by its corresponding weight, divided by 10.", - max_digits=3, - null=True, + "is_reviewed", + models.BooleanField( + default=False, help_text="Indicates if this code change has been reviewed." ), ), ( - "affecting_packages", - models.ManyToManyField( - help_text="A list of packages that are affected by this advisory.", - related_name="affected_by_advisories", - to="vulnerabilities.packagev2", + "created_at", + models.DateTimeField( + auto_now_add=True, + help_text="Timestamp indicating when this code change was created.", ), ), ( - "aliases", - models.ManyToManyField( - help_text="A list of serializable Alias objects", - related_name="advisories", - to="vulnerabilities.advisoryalias", + "updated_at", + models.DateTimeField( + auto_now=True, + help_text="Timestamp indicating when this code change was last updated.", ), ), ( - "fixed_by_packages", - models.ManyToManyField( - help_text="A list of packages that are reported by this advisory.", - related_name="fixing_advisories", - to="vulnerabilities.packagev2", + "advisory", + models.ForeignKey( + help_text="The affected package version to which this code fix applies.", + on_delete=django.db.models.deletion.CASCADE, + related_name="code_fix_v2", + to="vulnerabilities.advisoryv2", ), ), ( - "references", - models.ManyToManyField( - help_text="A list of serializable Reference objects", - related_name="advisories", - to="vulnerabilities.advisoryreference", + "affected_package", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="code_fix_v2_affected", + to="vulnerabilities.packagev2", ), ), ( - "severities", - models.ManyToManyField( - help_text="A list of vulnerability severities associated with this advisory.", - related_name="advisories", - to="vulnerabilities.advisoryseverity", + "base_package_version", + models.ForeignKey( + blank=True, + help_text="The base package version to which this code change applies.", + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="codechanges_v2", + to="vulnerabilities.packagev2", ), ), ( - "weaknesses", - models.ManyToManyField( - help_text="A list of software weaknesses associated with this advisory.", - related_name="advisories", - to="vulnerabilities.advisoryweakness", + "fixed_package", + models.ForeignKey( + blank=True, + help_text="The fixing package version with this code fix", + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="code_fix_v2_fixed", + to="vulnerabilities.packagev2", ), ), ], options={ - "ordering": ["date_published", "unique_content_id"], + "abstract": False, }, ), + migrations.AddField( + model_name="advisoryv2", + name="affecting_packages", + field=models.ManyToManyField( + help_text="A list of packages that are affected by this advisory.", + related_name="affected_by_advisories", + to="vulnerabilities.packagev2", + ), + ), + migrations.AddField( + model_name="advisoryv2", + name="aliases", + field=models.ManyToManyField( + help_text="A list of serializable Alias objects", + related_name="advisories", + to="vulnerabilities.advisoryalias", + ), + ), + migrations.AddField( + model_name="advisoryv2", + name="fixed_by_packages", + field=models.ManyToManyField( + help_text="A list of packages that are reported by this advisory.", + related_name="fixing_advisories", + to="vulnerabilities.packagev2", + ), + ), + migrations.AddField( + model_name="advisoryv2", + name="references", + field=models.ManyToManyField( + help_text="A list of serializable Reference objects", + related_name="advisories", + to="vulnerabilities.advisoryreference", + ), + ), + migrations.AddField( + model_name="advisoryv2", + name="severities", + field=models.ManyToManyField( + help_text="A list of vulnerability severities associated with this advisory.", + related_name="advisories", + to="vulnerabilities.advisoryseverity", + ), + ), + migrations.AddField( + model_name="advisoryv2", + name="weaknesses", + field=models.ManyToManyField( + help_text="A list of software weaknesses associated with this advisory.", + related_name="advisories", + to="vulnerabilities.advisoryweakness", + ), + ), migrations.CreateModel( name="AdvisoryExploit", fields=[ @@ -504,4 +628,8 @@ class Migration(migrations.Migration): ), ], ), + migrations.AlterUniqueTogether( + name="advisoryv2", + unique_together={("datasource_id", "advisory_id", "unique_content_id")}, + ), ] diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 09cd28491..35c9f6de4 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -1323,7 +1323,7 @@ def url(self): return f"https://github.com/nodejs/security-wg/blob/main/vuln/npm/{id}.json" -class AdvisoryQuerySet(BaseQuerySet): +class AdvisoryV2QuerySet(BaseQuerySet): def search(query): """ This function will take a string as an input, the string could be an alias or an advisory ID or @@ -1337,6 +1337,20 @@ def search(query): ).distinct() +class AdvisoryQuerySet(BaseQuerySet): + def search(query): + """ + This function will take a string as an input, the string could be an alias or an advisory ID or + something in the advisory description. + """ + return Advisory.objects.filter( + Q(advisory_id__icontains=query) + | Q(aliases__alias__icontains=query) + | Q(summary__icontains=query) + | Q(references__url__icontains=query) + ).distinct() + + # FIXME: Remove when migration from Vulnerability to Advisory is completed class Advisory(models.Model): """ @@ -1820,6 +1834,60 @@ class Meta: abstract = True +class CodeChangeV2(models.Model): + """ + Abstract base model representing a change in code, either introducing or fixing a vulnerability. + This includes details about commits, patches, and related metadata. + + We are tracking commits, pulls and downloads as references to the code change. The goal is to + keep track and store the actual code patch in the ``patch`` field. When not available the patch + will be inferred from these references using improvers. + """ + + commits = models.JSONField( + blank=True, + default=list, + help_text="List of commit identifiers using VCS URLs associated with the code change.", + ) + pulls = models.JSONField( + blank=True, + default=list, + help_text="List of pull request URLs associated with the code change.", + ) + downloads = models.JSONField( + blank=True, default=list, help_text="List of download URLs for the patched code." + ) + patch = models.TextField( + blank=True, null=True, help_text="The code change as a patch in unified diff format." + ) + base_package_version = models.ForeignKey( + "PackageV2", + null=True, + blank=True, + on_delete=models.SET_NULL, + related_name="codechanges_v2", + help_text="The base package version to which this code change applies.", + ) + notes = models.TextField( + blank=True, null=True, help_text="Notes or instructions about this code change." + ) + references = models.JSONField( + blank=True, default=list, help_text="URL references related to this code change." + ) + is_reviewed = models.BooleanField( + default=False, help_text="Indicates if this code change has been reviewed." + ) + created_at = models.DateTimeField( + auto_now_add=True, help_text="Timestamp indicating when this code change was created." + ) + updated_at = models.DateTimeField( + auto_now=True, help_text="Timestamp indicating when this code change was last updated." + ) + + class Meta: + abstract = True + + class CodeFix(CodeChange): """ A code fix is a code change that addresses a vulnerability and is associated: @@ -1844,6 +1912,35 @@ class CodeFix(CodeChange): ) +class CodeFixV2(CodeChangeV2): + """ + A code fix is a code change that addresses a vulnerability and is associated: + - with a specific advisory + - package that has been affected + - optionally with a specific fixing package version when it is known + """ + + advisory = models.ForeignKey( + "AdvisoryV2", + on_delete=models.CASCADE, + related_name="code_fix_v2", + help_text="The affected package version to which this code fix applies.", + ) + + affected_package = models.ForeignKey( + "PackageV2", on_delete=models.CASCADE, related_name="code_fix_v2_affected" + ) + + fixed_package = models.ForeignKey( + "PackageV2", + null=True, + blank=True, + on_delete=models.SET_NULL, + related_name="code_fix_v2_fixed", + help_text="The fixing package version with this code fix", + ) + + class PipelineRun(models.Model): """The Database representation of a pipeline execution.""" @@ -2525,6 +2622,23 @@ class AdvisoryV2(models.Model): into structured data """ + # This is similar to a type or a namespace + datasource_id = models.CharField( + max_length=100, + blank=False, + null=False, + help_text="Unique ID for the datasource used for this advisory ." "e.g.: nginx_importer_v2", + ) + + avid = models.CharField( + max_length=500, + blank=False, + null=False, + help_text="Unique ID for the datasource used for this advisory ." + "e.g.: pysec_importer_v2/PYSEC-2020-2233", + ) + + # This is similar to a name advisory_id = models.CharField( max_length=50, blank=False, @@ -2534,6 +2648,7 @@ class AdvisoryV2(models.Model): "such as PYSEC-2020-2233", ) + # This is similar to a version unique_content_id = models.CharField( max_length=64, blank=False, @@ -2541,6 +2656,19 @@ class AdvisoryV2(models.Model): unique=True, help_text="A 64 character unique identifier for the content of the advisory since we use sha256 as hex", ) + url = models.URLField( + blank=False, + null=False, + help_text="Link to the advisory on the upstream website", + ) + + # TODO: Have a mapping that gives datasource class by datasource ID + # Get label from datasource class + # Remove this from model + # In the UI - Use label + # In the API - Use datasource_id + # Have an API endpoint for all info for datasources - show license, label + summary = models.TextField( blank=True, ) @@ -2571,18 +2699,6 @@ class AdvisoryV2(models.Model): date_imported = models.DateTimeField( blank=True, null=True, help_text="UTC Date on which the advisory was imported" ) - # TODO: Rename to datasource ID - datasource_ID = models.CharField( - max_length=100, - help_text="Fully qualified name of the importer prefixed with the" - "module name importing the advisory. Eg:" - "nginx_importer_v2", - ) - url = models.URLField( - blank=False, - null=False, - help_text="Link to the advisory on the upstream website", - ) affecting_packages = models.ManyToManyField( "PackageV2", @@ -2632,7 +2748,8 @@ def risk_score(self): objects = AdvisoryQuerySet.as_manager() class Meta: - ordering = ["date_published", "unique_content_id"] + unique_together = ["datasource_id", "advisory_id", "unique_content_id"] + ordering = ["datasource_id", "advisory_id", "date_published", "unique_content_id"] def save(self, *args, **kwargs): self.full_clean() diff --git a/vulnerabilities/pipelines/__init__.py b/vulnerabilities/pipelines/__init__.py index 89d942f21..4d472a650 100644 --- a/vulnerabilities/pipelines/__init__.py +++ b/vulnerabilities/pipelines/__init__.py @@ -273,6 +273,7 @@ class VulnerableCodeBaseImporterPipelineV2(VulnerableCodePipeline): pipeline_id = None # Unique Pipeline ID, this should be the name of pipeline module. license_url = None + label = None spdx_license_expression = None repo_url = None importer_name = None diff --git a/vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py b/vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py index 5c3296a81..f6d1f0b9a 100644 --- a/vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py +++ b/vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py @@ -140,10 +140,12 @@ def get_weaknesses(cve_data): class ApacheHTTPDImporterPipeline(VulnerableCodeBaseImporterPipelineV2): pipeline_id = "apache_httpd_importer_v2" + label = "Apache-Httpd" spdx_license_expression = "Apache-2.0" license_url = "https://www.apache.org/licenses/LICENSE-2.0" importer_name = "Apache HTTPD Importer" base_url = "https://httpd.apache.org/security/json/" + unfurl_version_ranges = True links = [] diff --git a/vulnerabilities/pipelines/v2_importers/elixir_security_importer.py b/vulnerabilities/pipelines/v2_importers/elixir_security_importer.py new file mode 100644 index 000000000..15f5ea9f1 --- /dev/null +++ b/vulnerabilities/pipelines/v2_importers/elixir_security_importer.py @@ -0,0 +1,120 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from pathlib import Path +from typing import Iterable + +from dateutil import parser as dateparser +from fetchcode.vcs import fetch_via_vcs +from packageurl import PackageURL +from univers.version_constraint import VersionConstraint +from univers.version_range import HexVersionRange + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import Reference +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 +from vulnerabilities.utils import is_cve +from vulnerabilities.utils import load_yaml + + +class ElixirSecurityImporterPipeline(VulnerableCodeBaseImporterPipelineV2): + + pipeline_id = "elixir_security_importer_v2" + label = "Elixir Security" + repo_url = "git+https://github.com/dependabot/elixir-security-advisories" + license_url = "https://github.com/dependabot/elixir-security-advisories/blob/master/LICENSE.txt" + spdx_license_expression = "CC0-1.0" + importer_name = "Elixir Security Importer" + + @classmethod + def steps(cls): + return (cls.collect_and_store_advisories,) + + def clone(self): + self.log(f"Cloning `{self.repo_url}`") + self.vcs_response = fetch_via_vcs(self.repo_url) + + def advisories_count(self) -> int: + base_path = Path(self.vcs_response.dest_dir) + count = len(list((base_path / "packages").glob("**/*.yml"))) + return count + + def collect_advisories(self) -> Iterable[AdvisoryData]: + try: + base_path = Path(self.vcs_response.dest_dir) + vuln = base_path / "packages" + for file in vuln.glob("**/*.yml"): + yield from self.process_file(file, base_path) + finally: + if self.vcs_response: + self.vcs_response.delete() + + def process_file(self, file, base_path) -> Iterable[AdvisoryData]: + relative_path = str(file.relative_to(base_path)).strip("/") + advisory_url = ( + f"https://github.com/dependabot/elixir-security-advisories/blob/master/{relative_path}" + ) + yaml_file = load_yaml(str(file)) + + summary = yaml_file.get("description") or "" + pkg_name = yaml_file.get("package") or "" + + cve_id = "" + cve = yaml_file.get("cve") or "" + if cve and not cve.startswith("CVE-"): + cve_id = f"CVE-{cve}" + elif cve: + cve_id = cve + + if not cve_id or not is_cve(cve_id): + return + + references = [] + link = yaml_file.get("link") or "" + if link: + references.append(Reference(url=link)) + + constraints = [] + vrc = HexVersionRange.version_class + unaffected_versions = yaml_file.get("unaffected_versions") or [] + patched_versions = yaml_file.get("patched_versions") or [] + + for version in unaffected_versions: + constraints.append(VersionConstraint.from_string(version_class=vrc, string=version)) + + for version in patched_versions: + if version.startswith("~>"): + version = version[2:] + constraints.append( + VersionConstraint.from_string(version_class=vrc, string=version).invert() + ) + + affected_packages = [] + if pkg_name: + affected_packages.append( + AffectedPackage( + package=PackageURL(type="hex", name=pkg_name), + affected_version_range=HexVersionRange(constraints=constraints), + ) + ) + + date_published = None + if yaml_file.get("disclosure_date"): + date_published = dateparser.parse(yaml_file.get("disclosure_date")) + + yield AdvisoryData( + advisory_id=cve_id, + aliases=[], + summary=summary, + references_v2=references, + affected_packages=affected_packages, + url=advisory_url, + date_published=date_published, + ) diff --git a/vulnerabilities/pipelines/v2_importers/gitlab_importer.py b/vulnerabilities/pipelines/v2_importers/gitlab_importer.py index 0001580cb..c8f005e65 100644 --- a/vulnerabilities/pipelines/v2_importers/gitlab_importer.py +++ b/vulnerabilities/pipelines/v2_importers/gitlab_importer.py @@ -59,9 +59,9 @@ def steps(cls): # Entering issue to parse go package names https://github.com/nexB/vulnerablecode/issues/742 # "go": "golang", # "maven": "maven", - "npm": "npm", + # "npm": "npm", # "nuget": "nuget", - # "packagist": "composer", + "packagist": "composer", # "pypi": "pypi", } @@ -88,10 +88,10 @@ def collect_advisories(self) -> Iterable[AdvisoryData]: ) if gitlab_type not in self.purl_type_by_gitlab_scheme: - # self.log( - # f"Unknown package type {gitlab_type!r} in {file_path!r}", - # level=logging.ERROR, - # ) + self.log( + f"Unknown package type {gitlab_type!r} in {file_path!r}", + level=logging.ERROR, + ) continue yield parse_gitlab_advisory( @@ -229,6 +229,8 @@ def parse_gitlab_advisory( urls = gitlab_advisory.get("urls") references = [Reference.from_url(u) for u in urls] + print(references) + cwe_ids = gitlab_advisory.get("cwe_ids") or [] cwe_list = list(map(get_cwe_id, cwe_ids)) diff --git a/vulnerabilities/pipelines/v2_importers/npm_importer.py b/vulnerabilities/pipelines/v2_importers/npm_importer.py index 098895e75..2a22034d2 100644 --- a/vulnerabilities/pipelines/v2_importers/npm_importer.py +++ b/vulnerabilities/pipelines/v2_importers/npm_importer.py @@ -32,12 +32,11 @@ class NpmImporterPipeline(VulnerableCodeBaseImporterPipelineV2): """Collect advisories from nodejs GitHub repository.""" - pipeline_id = "npm_importer_v2" - + pipeline_id = "nodejs_security_wg" spdx_license_expression = "MIT" license_url = "https://github.com/nodejs/security-wg/blob/main/LICENSE.md" repo_url = "git+https://github.com/nodejs/security-wg" - importer_name = "Npm Importer" + importer_name = "npm Importer" unfurl_version_ranges = True @@ -84,6 +83,7 @@ def to_advisory_data(self, file: Path) -> Iterable[AdvisoryData]: VulnerabilitySeverity( system=CVSSV3, value=cvss_score, + url=f"https://github.com/nodejs/security-wg/blob/main/vuln/npm/{id}.json", ) ) if cvss_vector and cvss_vector.startswith("CVSS:2.0/"): @@ -91,6 +91,7 @@ def to_advisory_data(self, file: Path) -> Iterable[AdvisoryData]: VulnerabilitySeverity( system=CVSSV2, value=cvss_score, + url=f"https://github.com/nodejs/security-wg/blob/main/vuln/npm/{id}.json", ) ) if not id: @@ -106,7 +107,6 @@ def to_advisory_data(self, file: Path) -> Iterable[AdvisoryData]: references.append( Reference( url=ref, - severities=severities, ) ) diff --git a/vulnerabilities/pipelines/v2_importers/postgresql_importer.py b/vulnerabilities/pipelines/v2_importers/postgresql_importer.py new file mode 100644 index 000000000..e41dfdde4 --- /dev/null +++ b/vulnerabilities/pipelines/v2_importers/postgresql_importer.py @@ -0,0 +1,155 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import urllib.parse as urlparse +from typing import Iterable + +import requests +from bs4 import BeautifulSoup +from packageurl import PackageURL +from univers.version_range import GenericVersionRange +from univers.versions import GenericVersion + +from vulnerabilities import severity_systems +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import Reference +from vulnerabilities.importer import VulnerabilitySeverity +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 + + +class PostgreSQLImporterPipeline(VulnerableCodeBaseImporterPipelineV2): + + pipeline_id = "postgresql_importer_v2" + label = "PostgreSQL" + license_url = "https://www.postgresql.org/about/licence/" + spdx_license_expression = "PostgreSQL" + importer_name = "PostgreSQL Importer" + base_url = "https://www.postgresql.org/support/security/" + + links = set() + + @classmethod + def steps(cls): + return (cls.collect_and_store_advisories,) + + def advisories_count(self) -> int: + if not self.links: + self.collect_links() + return len(self.links) + + def collect_advisories(self) -> Iterable[AdvisoryData]: + if not self.links: + self.collect_links() + + for url in self.links: + data = requests.get(url).content + yield from self.to_advisories(data) + + def collect_links(self): + known_urls = {self.base_url} + visited_urls = set() + + while True: + unvisited_urls = known_urls - visited_urls + for url in unvisited_urls: + data = requests.get(url).content + visited_urls.add(url) + known_urls.update(self.find_advisory_urls(data)) + if known_urls == visited_urls: + break + self.links = known_urls + + def to_advisories(self, data): + advisories = [] + soup = BeautifulSoup(data, features="lxml") + table = soup.select("table")[0] + + for row in table.select("tbody tr"): + ref_col, affected_col, fixed_col, severity_score_col, desc_col = row.select("td") + summary = desc_col.text + pkg_qualifiers = {"os": "windows"} if "windows" in summary.lower() else {} + + affected_packages = [] + affected_version_list = [v.strip() for v in affected_col.text.split(",") if v.strip()] + fixed_version_list = [v.strip() for v in fixed_col.text.split(",") if v.strip()] + + if fixed_version_list: + for fixed_version in fixed_version_list: + affected_packages.append( + AffectedPackage( + package=PackageURL( + name="postgresql", type="generic", qualifiers=pkg_qualifiers + ), + affected_version_range=GenericVersionRange.from_versions( + affected_version_list + ) + if affected_version_list + else None, + fixed_version=GenericVersion(fixed_version), + ) + ) + elif affected_version_list: + affected_packages.append( + AffectedPackage( + package=PackageURL( + name="postgresql", type="generic", qualifiers=pkg_qualifiers + ), + affected_version_range=GenericVersionRange.from_versions( + affected_version_list + ), + ) + ) + + cve_id = "" + try: + cve_id = ref_col.select(".nobr")[0].text + except IndexError: + pass + + references = [] + vector_link_tag = severity_score_col.find("a") + for a_tag in ref_col.select("a"): + link = a_tag.attrs["href"] + if link.startswith("/"): + link = urlparse.urljoin("https://www.postgresql.org/", link) + severities = [] + if "support/security/CVE" in link and vector_link_tag: + parsed_link = urlparse.urlparse(vector_link_tag["href"]) + cvss3_vector = urlparse.parse_qs(parsed_link.query).get("vector", [""])[0] + cvss3_base_score = vector_link_tag.text + severities.append( + VulnerabilitySeverity( + system=severity_systems.CVSSV3, + value=cvss3_base_score, + scoring_elements=cvss3_vector, + ) + ) + references.append(Reference(url=link, severities=severities)) + + if cve_id: + advisories.append( + AdvisoryData( + advisory_id=cve_id, + aliases=[], + summary=summary, + references_v2=references, + affected_packages=affected_packages, + url=f"https://www.postgresql.org/support/security/{cve_id}", + ) + ) + + return advisories + + def find_advisory_urls(self, page_data): + soup = BeautifulSoup(page_data, features="lxml") + return { + urlparse.urljoin("https://www.postgresql.org/", a_tag.attrs["href"]) + for a_tag in soup.select("h3+ p a") + } diff --git a/vulnerabilities/pipelines/v2_importers/pypa_importer.py b/vulnerabilities/pipelines/v2_importers/pypa_importer.py index 63449926c..018a50d51 100644 --- a/vulnerabilities/pipelines/v2_importers/pypa_importer.py +++ b/vulnerabilities/pipelines/v2_importers/pypa_importer.py @@ -22,7 +22,7 @@ class PyPaImporterPipeline(VulnerableCodeBaseImporterPipelineV2): """Collect advisories from PyPA GitHub repository.""" pipeline_id = "pypa_importer_v2" - + label = "Pypa" spdx_license_expression = "CC-BY-4.0" license_url = "https://github.com/pypa/advisory-database/blob/main/LICENSE" repo_url = "git+https://github.com/pypa/advisory-database" diff --git a/vulnerabilities/pipelines/v2_importers/pysec_importer.py b/vulnerabilities/pipelines/v2_importers/pysec_importer.py index 5b39af4af..b5fb21a0a 100644 --- a/vulnerabilities/pipelines/v2_importers/pysec_importer.py +++ b/vulnerabilities/pipelines/v2_importers/pysec_importer.py @@ -22,7 +22,7 @@ class PyPIImporterPipeline(VulnerableCodeBaseImporterPipelineV2): """Collect advisories from PyPI.""" pipeline_id = "pysec_importer_v2" - + label = "Pypi" license_url = "https://github.com/pypa/advisory-database/blob/main/LICENSE" url = "https://osv-vulnerabilities.storage.googleapis.com/PyPI/all.zip" spdx_license_expression = "CC-BY-4.0" diff --git a/vulnerabilities/pipelines/v2_importers/ruby_importer.py b/vulnerabilities/pipelines/v2_importers/ruby_importer.py new file mode 100644 index 000000000..7d5d692e4 --- /dev/null +++ b/vulnerabilities/pipelines/v2_importers/ruby_importer.py @@ -0,0 +1,159 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging +from pathlib import Path +from typing import Iterable + +from dateutil.parser import parse +from packageurl import PackageURL +from pytz import UTC +from univers.version_range import GemVersionRange + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import Reference +from fetchcode.vcs import fetch_via_vcs +from vulnerabilities.importer import VulnerabilitySeverity +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 +from vulnerabilities.severity_systems import SCORING_SYSTEMS +from vulnerabilities.utils import build_description, get_advisory_url, load_yaml + +logger = logging.getLogger(__name__) + + +class RubyImporterPipeline(VulnerableCodeBaseImporterPipelineV2): + + pipeline_id = "ruby_importer_v2" + label = "Ruby" + repo_url = "git+https://github.com/rubysec/ruby-advisory-db" + license_url = "https://github.com/rubysec/ruby-advisory-db/blob/master/LICENSE.txt" + spdx_license_expression = "LicenseRef-scancode-public-domain-disclaimer" + importer_name = "Ruby Importer" + + @classmethod + def steps(cls): + return (cls.collect_and_store_advisories,) + + def clone(self): + self.log(f"Cloning `{self.repo_url}`") + self.vcs_response = fetch_via_vcs(self.repo_url) + + def advisories_count(self) -> int: + base_path = Path(self.vcs_response.dest_dir) + count = 0 + for subdir in ["rubies", "gems"]: + count += len(list((base_path / subdir).glob("**/*.yml"))) + return count + + def collect_advisories(self) -> Iterable[AdvisoryData]: + try: + base_path = Path(self.vcs_response.dest_dir) + for subdir in ["rubies", "gems"]: + for file_path in (base_path / subdir).glob("**/*.yml"): + if file_path.name.startswith("OSVDB-"): + continue + raw_data = load_yaml(file_path) + advisory_url = get_advisory_url( + file=file_path, + base_path=base_path, + url="https://github.com/rubysec/ruby-advisory-db/blob/master/", + ) + advisory = self.parse_ruby_advisory(raw_data, subdir, advisory_url) + if advisory: + yield advisory + finally: + if self.vcs_response: + self.vcs_response.delete() + + def parse_ruby_advisory(self, record, schema_type, advisory_url) -> AdvisoryData: + if schema_type == "gems": + package_name = record.get("gem") + if not package_name: + logger.error("Invalid gem package name") + return + purl = PackageURL(type="gem", name=package_name) + elif schema_type == "rubies": + engine = record.get("engine") + if not engine: + logger.error("Invalid ruby engine name") + return + purl = PackageURL(type="ruby", name=engine) + else: + return + + return AdvisoryData( + advisory_id=self.get_advisory_id(record), + aliases=self.get_aliases(record), + summary=self.get_summary(record), + affected_packages=self.get_affected_packages(record, purl), + references_v2=self.get_references(record), + date_published=self.get_publish_time(record), + url=advisory_url, + ) + + def get_advisory_id(self, record): + cve = record.get("cve") + if cve: + return f"CVE-{cve}" if not cve.startswith("CVE-") else cve + ghsa = record.get("ghsa") + return f"GHSA-{ghsa}" if ghsa else None + + def get_aliases(self, record) -> list[str]: + aliases = [] + if record.get("cve"): + aliases.append("CVE-{}".format(record.get("cve"))) + if record.get("osvdb"): + aliases.append("OSV-{}".format(record.get("osvdb"))) + if record.get("ghsa"): + aliases.append("GHSA-{}".format(record.get("ghsa"))) + return aliases + + def get_affected_packages(self, record, purl) -> list[AffectedPackage]: + safe_version_ranges = record.get("patched_versions", []) or [] + safe_version_ranges += record.get("unaffected_versions", []) or [] + safe_version_ranges = [r for r in safe_version_ranges if r] + + affected_packages = [] + for range_str in safe_version_ranges: + affected_packages.append( + AffectedPackage( + package=purl, + affected_version_range=GemVersionRange.from_native(range_str).invert(), + ) + ) + return affected_packages + + def get_references(self, record) -> list[Reference]: + references = [] + url = record.get("url") + cvss_v3 = record.get("cvss_v3") + if url: + if not cvss_v3: + references.append(Reference(url=url)) + else: + references.append( + Reference( + url=url, + severities=[ + VulnerabilitySeverity(system=SCORING_SYSTEMS["cvssv3"], value=cvss_v3) + ], + ) + ) + return references + + def get_publish_time(self, record): + date = record.get("date") + return parse(date).replace(tzinfo=UTC) if date else None + + def get_summary(self, record): + return build_description( + summary=record.get("title") or "", + description=record.get("description") or "", + ) diff --git a/vulnerabilities/pipelines/v2_importers/vulnrichment_importer.py b/vulnerabilities/pipelines/v2_importers/vulnrichment_importer.py index 1f0f8975c..7aa38f25f 100644 --- a/vulnerabilities/pipelines/v2_importers/vulnrichment_importer.py +++ b/vulnerabilities/pipelines/v2_importers/vulnrichment_importer.py @@ -22,6 +22,7 @@ class VulnrichImporterPipeline(VulnerableCodeBaseImporterPipelineV2): pipeline_id = "vulnrichment_importer_v2" + label = "Vulnrichment" spdx_license_expression = "CC0-1.0" license_url = "https://github.com/cisagov/vulnrichment/blob/develop/LICENSE" repo_url = "git+https://github.com/cisagov/vulnrichment.git" diff --git a/vulnerabilities/pipelines/v2_improvers/collect_commits.py b/vulnerabilities/pipelines/v2_improvers/collect_commits.py new file mode 100644 index 000000000..32fb1ce79 --- /dev/null +++ b/vulnerabilities/pipelines/v2_improvers/collect_commits.py @@ -0,0 +1,252 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import re + +from aboutcode.pipeline import LoopProgress + +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.models import CodeFixV2 +from vulnerabilities.pipelines import VulnerableCodePipeline + + +def is_vcs_url_already_processed(commit_id): + """ + Check if a VCS URL exists in a CodeFix entry. + """ + return CodeFixV2.objects.filter(commits__contains=[commit_id]).exists() + + +class CollectFixCommitsPipeline(VulnerableCodePipeline): + """ + Improver pipeline to scout References and create CodeFix entries. + """ + + pipeline_id = "collect_fix_commits_v2" + license_expression = None + + @classmethod + def steps(cls): + return (cls.collect_and_store_fix_commits,) + + def collect_and_store_fix_commits(self): + affected_advisories = ( + AdvisoryV2.objects.filter(affecting_packages__isnull=False) + .prefetch_related("affecting_packages") + .distinct() + ) + + self.log(f"Processing {affected_advisories.count():,d} references to collect fix commits.") + + created_fix_count = 0 + progress = LoopProgress(total_iterations=affected_advisories.count(), logger=self.log) + + for adv in progress.iter(affected_advisories.paginated(per_page=500)): + for reference in adv.references.all(): + if not "/commit/" in reference.url: + continue + if not is_vcs_url(reference.url): + continue + + vcs_url = normalize_vcs_url(repo_url=reference.url) + + if not vcs_url: + continue + + # Skip if already processed + if is_vcs_url_already_processed(commit_id=vcs_url): + self.log( + f"Skipping already processed reference: {reference.url} with VCS URL {vcs_url}" + ) + continue + # check if vcs_url has commit + for package in adv.affecting_packages.all(): + code_fix, created = CodeFixV2.objects.get_or_create( + commits=[vcs_url], + advisory=adv, + affected_package=package, + ) + + if created: + created_fix_count += 1 + self.log( + f"Created CodeFix entry for reference: {reference.url} with VCS URL {vcs_url}" + ) + + self.log(f"Successfully created {created_fix_count:,d} CodeFix entries.") + + +PLAIN_URLS = ( + "https://", + "http://", +) + +VCS_URLS = ( + "git://", + "git+git://", + "git+https://", + "git+http://", + "hg://", + "hg+http://", + "hg+https://", + "svn://", + "svn+https://", + "svn+http://", +) + + +# TODO: This function was borrowed from scancode-toolkit. We need to create a shared library for that. +def normalize_vcs_url(repo_url, vcs_tool=None): + """ + Return a normalized vcs_url version control URL given some `repo_url` and an + optional `vcs_tool` hint (such as 'git', 'hg', etc.) + + Return None if repo_url is not recognized as a VCS URL. + + Handles shortcuts for GitHub, GitHub gist, Bitbucket, or GitLab repositories + and more using the same approach as npm install: + + See https://docs.npmjs.com/files/package.json#repository + or https://getcomposer.org/doc/05-repositories.md + + This is done here in npm: + https://github.com/npm/npm/blob/d3c858ce4cfb3aee515bb299eb034fe1b5e44344/node_modules/hosted-git-info/git-host-info.js + + These should be resolved: + npm/npm + gist:11081aaa281 + bitbucket:example/repo + gitlab:another/repo + expressjs/serve-static + git://github.com/angular/di.js.git + git://github.com/hapijs/boom + git@github.com:balderdashy/waterline-criteria.git + http://github.com/ariya/esprima.git + http://github.com/isaacs/nopt + https://github.com/chaijs/chai + https://github.com/christkv/kerberos.git + https://gitlab.com/foo/private.git + git@gitlab.com:foo/private.git + """ + if not repo_url or not isinstance(repo_url, str): + return + + repo_url = repo_url.strip() + if not repo_url: + return + + # TODO: If we match http and https, we may should add more check in + # case if the url is not a repo one. For example, check the domain + # name in the url... + if repo_url.startswith(VCS_URLS + PLAIN_URLS): + return repo_url + + if repo_url.startswith("git@"): + tool, _, right = repo_url.partition("@") + if ":" in repo_url: + host, _, repo = right.partition(":") + else: + # git@github.com/Filirom1/npm2aur.git + host, _, repo = right.partition("/") + + if any(r in host for r in ("bitbucket", "gitlab", "github")): + scheme = "https" + else: + scheme = "git" + + return f"{scheme}://{host}/{repo}" + + # FIXME: where these URL schemes come from?? + if repo_url.startswith(("bitbucket:", "gitlab:", "github:", "gist:")): + repo = repo_url.split(":")[1] + hoster_urls = { + "bitbucket": f"https://bitbucket.org/{repo}", + "github": f"https://github.com/{repo}", + "gitlab": f"https://gitlab.com/{repo}", + "gist": f"https://gist.github.com/{repo}", + } + hoster, _, repo = repo_url.partition(":") + return hoster_urls[hoster] % locals() + + if len(repo_url.split("/")) == 2: + # implicit github, but that's only on NPM? + return f"https://github.com/{repo_url}" + return repo_url + + +def is_vcs_url(repo_url): + """ + Check if a given URL or string matches a valid VCS (Version Control System) URL. + + Supports: + - Standard VCS URL protocols (git, http, https, ssh) + - Shortcut syntax (e.g., github:user/repo, gitlab:group/repo) + - GitHub shortcut (e.g., user/repo) + + Args: + repo_url (str): The repository URL or shortcut to validate. + + Returns: + bool: True if the string is a valid VCS URL, False otherwise. + + Examples: + >>> is_vcs_url("git://github.com/angular/di.js.git") + True + >>> is_vcs_url("github:user/repo") + True + >>> is_vcs_url("user/repo") + True + >>> is_vcs_url("https://github.com/user/repo.git") + True + >>> is_vcs_url("git@github.com:user/repo.git") + True + >>> is_vcs_url("http://github.com/isaacs/nopt") + True + >>> is_vcs_url("https://gitlab.com/foo/private.git") + True + >>> is_vcs_url("git@gitlab.com:foo/private.git") + True + >>> is_vcs_url("bitbucket:example/repo") + True + >>> is_vcs_url("gist:11081aaa281") + True + >>> is_vcs_url("ftp://example.com/not-a-repo") + False + >>> is_vcs_url("random-string") + False + >>> is_vcs_url("https://example.com/not-a-repo") + False + """ + if not repo_url or not isinstance(repo_url, str): + return False + + repo_url = repo_url.strip() + if not repo_url: + return False + + # Define valid VCS domains + vcs_domains = r"(github\.com|gitlab\.com|bitbucket\.org|gist\.github\.com)" + + # 1. Match URLs with standard protocols pointing to VCS domains + if re.match(rf"^(git|ssh|http|https)://{vcs_domains}/[\w\-.]+/[\w\-.]+", repo_url): + return True + + # 2. Match SSH URLs (e.g., git@github.com:user/repo.git) + if re.match(rf"^git@{vcs_domains}:[\w\-.]+/[\w\-.]+(\.git)?$", repo_url): + return True + + # 3. Match shortcut syntax (e.g., github:user/repo) + if re.match(r"^(github|gitlab|bitbucket|gist):[\w\-./]+$", repo_url): + return True + + # 4. Match implicit GitHub shortcut (e.g., user/repo) + if re.match(r"^[\w\-]+/[\w\-]+$", repo_url): + return True + + return False diff --git a/vulnerabilities/pipes/advisory.py b/vulnerabilities/pipes/advisory.py index 9fbe8ce24..ea19cbe87 100644 --- a/vulnerabilities/pipes/advisory.py +++ b/vulnerabilities/pipes/advisory.py @@ -57,18 +57,18 @@ def get_or_create_advisory_aliases(aliases: List[str]) -> List[AdvisoryAlias]: def get_or_create_advisory_references(references: List) -> List[AdvisoryReference]: - reference_ids = [ref.reference_id for ref in references] - existing = AdvisoryReference.objects.filter(reference_id__in=reference_ids) - existing_ids = {r.reference_id for r in existing} + reference_urls = [ref.url for ref in references] + existing = AdvisoryReference.objects.filter(url__in=reference_urls) + existing_urls = {r.url for r in existing} to_create = [ AdvisoryReference(reference_id=ref.reference_id, url=ref.url) for ref in references - if ref.reference_id not in existing_ids + if ref.url not in existing_urls ] AdvisoryReference.objects.bulk_create(to_create, ignore_conflicts=True) - return list(AdvisoryReference.objects.filter(reference_id__in=reference_ids)) + return list(AdvisoryReference.objects.filter(url__in=reference_urls)) def get_or_create_advisory_severities(severities: List) -> QuerySet: @@ -155,9 +155,10 @@ def insert_advisory_v2( default_data = { "summary": advisory.summary, "date_published": advisory.date_published, - "datasource_ID": pipeline_id, + "datasource_id": pipeline_id, "date_collected": datetime.now(timezone.utc), "advisory_id": advisory.advisory_id, + "avid": f"{pipeline_id}/{advisory.advisory_id}", } advisory_obj, _ = AdvisoryV2.objects.get_or_create( diff --git a/vulnerabilities/templates/advisory_detail.html b/vulnerabilities/templates/advisory_detail.html index 5a6fbe2b1..8a386d4ec 100644 --- a/vulnerabilities/templates/advisory_detail.html +++ b/vulnerabilities/templates/advisory_detail.html @@ -18,7 +18,7 @@
Advisory details: - {{ advisory.advisory_id }} + {{advisory.datasource_id}} / {{ advisory.advisory_id }}
@@ -86,17 +86,7 @@
Advisory ID{{ advisory.advisory_id }}
URL - {{advisory.url}} - {{ advisory.datasource_id }}/{{ advisory.advisory_id }}
Aliases
+ Risk score + + {{package.risk_score}} +
@@ -139,7 +147,7 @@ - {{advisory.advisory_id }} + {{advisory.avid }}
{% if advisory.alias|length != 0 %} @@ -260,7 +268,7 @@ - {{advisory.advisory_id }} + {{advisory.avid }} diff --git a/vulnerablecode/urls.py b/vulnerablecode/urls.py index 55ba4fdd5..245b8e917 100644 --- a/vulnerablecode/urls.py +++ b/vulnerablecode/urls.py @@ -20,6 +20,7 @@ from vulnerabilities.api import CPEViewSet from vulnerabilities.api import PackageViewSet from vulnerabilities.api import VulnerabilityViewSet +from vulnerabilities.api_v2 import AdvisoriesPackageV2ViewSet from vulnerabilities.api_v2 import CodeFixViewSet from vulnerabilities.api_v2 import PackageV2ViewSet from vulnerabilities.api_v2 import PipelineScheduleV2ViewSet @@ -60,6 +61,9 @@ def __init__(self, *args, **kwargs): api_v2_router = OptionalSlashRouter() api_v2_router.register("packages", PackageV2ViewSet, basename="package-v2") +api_v2_router.register( + "advisories-packages", AdvisoriesPackageV2ViewSet, basename="advisories-package-v2" +) api_v2_router.register("vulnerabilities", VulnerabilityV2ViewSet, basename="vulnerability-v2") api_v2_router.register("codefixes", CodeFixViewSet, basename="codefix") api_v2_router.register("schedule", PipelineScheduleV2ViewSet, basename="schedule") From df7f7227d8c9d1c25bb3b82afffea69a35280b46 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 26 Jun 2025 18:30:40 +0530 Subject: [PATCH 35/44] Add tests for gitlab importer Signed-off-by: Tushar Goel --- .../pipelines/v2_importers/gitlab_importer.py | 23 ++- .../pipelines/v2_importers/ruby_importer.py | 159 ------------------ .../pipelines/test_gitlab_v2_importer.py | 153 +++++++++++++++++ 3 files changed, 169 insertions(+), 166 deletions(-) delete mode 100644 vulnerabilities/pipelines/v2_importers/ruby_importer.py create mode 100644 vulnerabilities/tests/pipelines/test_gitlab_v2_importer.py diff --git a/vulnerabilities/pipelines/v2_importers/gitlab_importer.py b/vulnerabilities/pipelines/v2_importers/gitlab_importer.py index c8f005e65..412733da7 100644 --- a/vulnerabilities/pipelines/v2_importers/gitlab_importer.py +++ b/vulnerabilities/pipelines/v2_importers/gitlab_importer.py @@ -54,15 +54,15 @@ def steps(cls): ) purl_type_by_gitlab_scheme = { - # "conan": "conan", - # "gem": "gem", + "conan": "conan", + "gem": "gem", # Entering issue to parse go package names https://github.com/nexB/vulnerablecode/issues/742 # "go": "golang", - # "maven": "maven", - # "npm": "npm", - # "nuget": "nuget", + "maven": "maven", + "npm": "npm", + "nuget": "nuget", "packagist": "composer", - # "pypi": "pypi", + "pypi": "pypi", } gitlab_scheme_by_purl_type = {v: k for k, v in purl_type_by_gitlab_scheme.items()} @@ -94,7 +94,7 @@ def collect_advisories(self) -> Iterable[AdvisoryData]: ) continue - yield parse_gitlab_advisory( + advisory = parse_gitlab_advisory( file=file_path, base_path=base_path, gitlab_scheme_by_purl_type=self.gitlab_scheme_by_purl_type, @@ -102,6 +102,15 @@ def collect_advisories(self) -> Iterable[AdvisoryData]: logger=self.log, ) + if not advisory: + self.log( + f"Failed to parse advisory from {file_path!r}", + level=logging.ERROR, + ) + continue + + yield advisory + def clean_downloads(self): if self.vcs_response: self.log(f"Removing cloned repository") diff --git a/vulnerabilities/pipelines/v2_importers/ruby_importer.py b/vulnerabilities/pipelines/v2_importers/ruby_importer.py deleted file mode 100644 index 7d5d692e4..000000000 --- a/vulnerabilities/pipelines/v2_importers/ruby_importer.py +++ /dev/null @@ -1,159 +0,0 @@ -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# VulnerableCode is a trademark of nexB Inc. -# SPDX-License-Identifier: Apache-2.0 -# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/aboutcode-org/vulnerablecode for support or download. -# See https://aboutcode.org for more information about nexB OSS projects. -# - -import logging -from pathlib import Path -from typing import Iterable - -from dateutil.parser import parse -from packageurl import PackageURL -from pytz import UTC -from univers.version_range import GemVersionRange - -from vulnerabilities.importer import AdvisoryData -from vulnerabilities.importer import AffectedPackage -from vulnerabilities.importer import Reference -from fetchcode.vcs import fetch_via_vcs -from vulnerabilities.importer import VulnerabilitySeverity -from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 -from vulnerabilities.severity_systems import SCORING_SYSTEMS -from vulnerabilities.utils import build_description, get_advisory_url, load_yaml - -logger = logging.getLogger(__name__) - - -class RubyImporterPipeline(VulnerableCodeBaseImporterPipelineV2): - - pipeline_id = "ruby_importer_v2" - label = "Ruby" - repo_url = "git+https://github.com/rubysec/ruby-advisory-db" - license_url = "https://github.com/rubysec/ruby-advisory-db/blob/master/LICENSE.txt" - spdx_license_expression = "LicenseRef-scancode-public-domain-disclaimer" - importer_name = "Ruby Importer" - - @classmethod - def steps(cls): - return (cls.collect_and_store_advisories,) - - def clone(self): - self.log(f"Cloning `{self.repo_url}`") - self.vcs_response = fetch_via_vcs(self.repo_url) - - def advisories_count(self) -> int: - base_path = Path(self.vcs_response.dest_dir) - count = 0 - for subdir in ["rubies", "gems"]: - count += len(list((base_path / subdir).glob("**/*.yml"))) - return count - - def collect_advisories(self) -> Iterable[AdvisoryData]: - try: - base_path = Path(self.vcs_response.dest_dir) - for subdir in ["rubies", "gems"]: - for file_path in (base_path / subdir).glob("**/*.yml"): - if file_path.name.startswith("OSVDB-"): - continue - raw_data = load_yaml(file_path) - advisory_url = get_advisory_url( - file=file_path, - base_path=base_path, - url="https://github.com/rubysec/ruby-advisory-db/blob/master/", - ) - advisory = self.parse_ruby_advisory(raw_data, subdir, advisory_url) - if advisory: - yield advisory - finally: - if self.vcs_response: - self.vcs_response.delete() - - def parse_ruby_advisory(self, record, schema_type, advisory_url) -> AdvisoryData: - if schema_type == "gems": - package_name = record.get("gem") - if not package_name: - logger.error("Invalid gem package name") - return - purl = PackageURL(type="gem", name=package_name) - elif schema_type == "rubies": - engine = record.get("engine") - if not engine: - logger.error("Invalid ruby engine name") - return - purl = PackageURL(type="ruby", name=engine) - else: - return - - return AdvisoryData( - advisory_id=self.get_advisory_id(record), - aliases=self.get_aliases(record), - summary=self.get_summary(record), - affected_packages=self.get_affected_packages(record, purl), - references_v2=self.get_references(record), - date_published=self.get_publish_time(record), - url=advisory_url, - ) - - def get_advisory_id(self, record): - cve = record.get("cve") - if cve: - return f"CVE-{cve}" if not cve.startswith("CVE-") else cve - ghsa = record.get("ghsa") - return f"GHSA-{ghsa}" if ghsa else None - - def get_aliases(self, record) -> list[str]: - aliases = [] - if record.get("cve"): - aliases.append("CVE-{}".format(record.get("cve"))) - if record.get("osvdb"): - aliases.append("OSV-{}".format(record.get("osvdb"))) - if record.get("ghsa"): - aliases.append("GHSA-{}".format(record.get("ghsa"))) - return aliases - - def get_affected_packages(self, record, purl) -> list[AffectedPackage]: - safe_version_ranges = record.get("patched_versions", []) or [] - safe_version_ranges += record.get("unaffected_versions", []) or [] - safe_version_ranges = [r for r in safe_version_ranges if r] - - affected_packages = [] - for range_str in safe_version_ranges: - affected_packages.append( - AffectedPackage( - package=purl, - affected_version_range=GemVersionRange.from_native(range_str).invert(), - ) - ) - return affected_packages - - def get_references(self, record) -> list[Reference]: - references = [] - url = record.get("url") - cvss_v3 = record.get("cvss_v3") - if url: - if not cvss_v3: - references.append(Reference(url=url)) - else: - references.append( - Reference( - url=url, - severities=[ - VulnerabilitySeverity(system=SCORING_SYSTEMS["cvssv3"], value=cvss_v3) - ], - ) - ) - return references - - def get_publish_time(self, record): - date = record.get("date") - return parse(date).replace(tzinfo=UTC) if date else None - - def get_summary(self, record): - return build_description( - summary=record.get("title") or "", - description=record.get("description") or "", - ) diff --git a/vulnerabilities/tests/pipelines/test_gitlab_v2_importer.py b/vulnerabilities/tests/pipelines/test_gitlab_v2_importer.py new file mode 100644 index 000000000..6e5c8eb15 --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_gitlab_v2_importer.py @@ -0,0 +1,153 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# + +from pathlib import Path +from unittest.mock import MagicMock +from unittest.mock import patch + +import pytest + +from vulnerabilities.importer import AdvisoryData + + +@pytest.fixture +def mock_vcs_response(tmp_path): + mock_response = MagicMock() + mock_response.dest_dir = str(tmp_path) + mock_response.delete = MagicMock() + return mock_response + + +@pytest.fixture +def mock_fetch_via_vcs(mock_vcs_response): + with patch("vulnerabilities.pipelines.v2_importers.gitlab_importer.fetch_via_vcs") as mock: + mock.return_value = mock_vcs_response + yield mock + + +@pytest.fixture +def mock_gitlab_yaml(tmp_path): + advisory_dir = tmp_path / "pypi" / "package_name" + advisory_dir.mkdir(parents=True) + + advisory_file = advisory_dir / "CVE-2022-0001.yml" + advisory_file.write_text( + """ + identifier: "CVE-2022-0001" + package_slug: "pypi/package_name" + title: "Example vulnerability" + description: "Example description" + pubdate: "2022-06-15" + affected_range: "<2.0.0" + fixed_versions: + - "2.0.0" + urls: + - "https://example.com/advisory" + cwe_ids: + - "CWE-79" + identifiers: + - "CVE-2022-0001" + """ + ) + return tmp_path + + +def test_clone(mock_fetch_via_vcs, mock_vcs_response): + from vulnerabilities.pipelines.v2_importers.gitlab_importer import GitLabImporterPipeline + + pipeline = GitLabImporterPipeline() + pipeline.clone() + + mock_fetch_via_vcs.assert_called_once_with(pipeline.repo_url) + assert pipeline.vcs_response == mock_vcs_response + + +def test_advisories_count(mock_gitlab_yaml, mock_vcs_response, mock_fetch_via_vcs): + from vulnerabilities.pipelines.v2_importers.gitlab_importer import GitLabImporterPipeline + + mock_vcs_response.dest_dir = str(mock_gitlab_yaml) + + pipeline = GitLabImporterPipeline() + pipeline.clone() + mock_fetch_via_vcs.assert_called_once() + + count = pipeline.advisories_count() + assert count == 1 + + +def test_collect_advisories(mock_gitlab_yaml, mock_vcs_response, mock_fetch_via_vcs): + from vulnerabilities.pipelines.v2_importers.gitlab_importer import GitLabImporterPipeline + + mock_vcs_response.dest_dir = str(mock_gitlab_yaml) + + pipeline = GitLabImporterPipeline() + pipeline.clone() + + advisories = list(pipeline.collect_advisories()) + assert len(advisories) == 1 + advisory = advisories[0] + + assert isinstance(advisory, AdvisoryData) + assert advisory.advisory_id == "CVE-2022-0001" + assert advisory.summary == "Example vulnerability\nExample description" + assert advisory.references_v2[0].url == "https://example.com/advisory" + assert advisory.affected_packages[0].package.name == "package-name" + assert advisory.affected_packages[0].fixed_version + assert advisory.weaknesses[0] == 79 + + +def test_clean_downloads(mock_vcs_response): + from vulnerabilities.pipelines.v2_importers.gitlab_importer import GitLabImporterPipeline + + pipeline = GitLabImporterPipeline() + pipeline.vcs_response = mock_vcs_response + + pipeline.clean_downloads() + mock_vcs_response.delete.assert_called_once() + + +def test_on_failure(mock_vcs_response): + from vulnerabilities.pipelines.v2_importers.gitlab_importer import GitLabImporterPipeline + + pipeline = GitLabImporterPipeline() + pipeline.vcs_response = mock_vcs_response + + with patch.object(pipeline, "clean_downloads") as mock_clean: + pipeline.on_failure() + mock_clean.assert_called_once() + + +def test_collect_advisories_with_invalid_yaml( + mock_gitlab_yaml, mock_vcs_response, mock_fetch_via_vcs +): + from vulnerabilities.pipelines.v2_importers.gitlab_importer import GitLabImporterPipeline + + # Add an invalid YAML file + invalid_file = Path(mock_gitlab_yaml) / "pypi" / "package_name" / "invalid.yml" + invalid_file.write_text(":::invalid_yaml") + + mock_vcs_response.dest_dir = str(mock_gitlab_yaml) + + pipeline = GitLabImporterPipeline() + pipeline.clone() + + # Should not raise but skip invalid YAML + advisories = list(pipeline.collect_advisories()) + assert len(advisories) == 1 # Only one valid advisory is parsed + + +def test_advisories_count_empty(mock_vcs_response, mock_fetch_via_vcs, tmp_path): + from vulnerabilities.pipelines.v2_importers.gitlab_importer import GitLabImporterPipeline + + mock_vcs_response.dest_dir = str(tmp_path) + + pipeline = GitLabImporterPipeline() + pipeline.clone() + mock_fetch_via_vcs.assert_called_once() + + count = pipeline.advisories_count() + assert count == 0 From 7a5ab4ef26a5b6894566c8d66de2a56ebdf7a06a Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 26 Jun 2025 19:00:59 +0530 Subject: [PATCH 36/44] Test postgresql importer Signed-off-by: Tushar Goel --- .../v2_importers/postgresql_importer.py | 7 +- .../pipelines/test_postgresql_v2_importer.py | 154 ++++++++++++++++++ 2 files changed, 160 insertions(+), 1 deletion(-) create mode 100644 vulnerabilities/tests/pipelines/test_postgresql_v2_importer.py diff --git a/vulnerabilities/pipelines/v2_importers/postgresql_importer.py b/vulnerabilities/pipelines/v2_importers/postgresql_importer.py index e41dfdde4..830db934d 100644 --- a/vulnerabilities/pipelines/v2_importers/postgresql_importer.py +++ b/vulnerabilities/pipelines/v2_importers/postgresql_importer.py @@ -69,7 +69,12 @@ def collect_links(self): def to_advisories(self, data): advisories = [] soup = BeautifulSoup(data, features="lxml") - table = soup.select("table")[0] + tables = soup.select("table") + + if not tables: + return advisories + + table = tables[0] for row in table.select("tbody tr"): ref_col, affected_col, fixed_col, severity_score_col, desc_col = row.select("td") diff --git a/vulnerabilities/tests/pipelines/test_postgresql_v2_importer.py b/vulnerabilities/tests/pipelines/test_postgresql_v2_importer.py new file mode 100644 index 000000000..da077f3ed --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_postgresql_v2_importer.py @@ -0,0 +1,154 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from unittest.mock import MagicMock +from unittest.mock import patch + +import pytest +from univers.versions import SemverVersion + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.pipelines.v2_importers.postgresql_importer import PostgreSQLImporterPipeline + +HTML_PAGE_WITH_LINKS = """ + + +

Security Advisory

+

Advisory 1

+

Another Advisory

+

Advisory 2

+ + +""" + +HTML_ADVISORY = """ + + + + + + + + + + + + +
+ CVE-2022-1234
+ Announcement
+
10.0, 10.110.29.8Description of the issue
+ + +""" + + +@pytest.fixture +def importer(): + return PostgreSQLImporterPipeline() + + +@patch("vulnerabilities.pipelines.v2_importers.postgresql_importer.requests.get") +def test_collect_links(mock_get, importer): + mock_get.return_value.content = HTML_PAGE_WITH_LINKS.encode("utf-8") + + importer.collect_links() + + assert len(importer.links) == 3 # base + 2 new + assert any("advisory1.html" in link for link in importer.links) + assert any("advisory2.html" in link for link in importer.links) + + +@patch("vulnerabilities.pipelines.v2_importers.postgresql_importer.requests.get") +def test_advisories_count(mock_get, importer): + mock_get.return_value.content = HTML_PAGE_WITH_LINKS.encode("utf-8") + + count = importer.advisories_count() + assert count >= 3 + + +@patch("vulnerabilities.pipelines.v2_importers.postgresql_importer.requests.get") +def test_collect_advisories(mock_get, importer): + importer.links = { + "https://www.postgresql.org/support/security/advisory1.html", + "https://www.postgresql.org/support/security/advisory2.html", + } + + mock_get.return_value.content = HTML_ADVISORY.encode("utf-8") + + advisories = list(importer.collect_advisories()) + + assert len(advisories) == 2 + advisory = advisories[0] + assert isinstance(advisory, AdvisoryData) + assert advisory.advisory_id == "CVE-2022-1234" + assert "Description of the issue" in advisory.summary + assert len(advisory.references_v2) > 0 + assert advisory.affected_packages[0].package.name == "postgresql" + assert str(advisory.affected_packages[0].fixed_version) == "10.2" + assert advisory.affected_packages[0].affected_version_range.contains(SemverVersion("10.0.0")) + assert advisory.affected_packages[0].affected_version_range.contains(SemverVersion("10.1.0")) + + +@patch("vulnerabilities.pipelines.v2_importers.postgresql_importer.requests.get") +def test_collect_advisories_with_no_fixed_version(mock_get, importer): + no_fix_html = """ + + + + + + + + + + + + +
+ CVE-2023-5678
+ Announcement
+
9.5, 9.6Unpatched issue
+ + + """ + + def side_effect(url, *args, **kwargs): + if "advisory" not in url: + return MagicMock(content=HTML_PAGE_WITH_LINKS.encode("utf-8")) + return MagicMock(content=no_fix_html.encode("utf-8")) + + mock_get.side_effect = side_effect + + advisories = list(importer.collect_advisories()) + + assert len(advisories) == 2 + advisory = advisories[0] + assert advisory.advisory_id == "CVE-2023-5678" + assert advisory.affected_packages[0].fixed_version is None + assert advisory.affected_packages[0].affected_version_range.contains(SemverVersion("9.5")) + + +@patch("vulnerabilities.pipelines.v2_importers.postgresql_importer.requests.get") +def test_cvss_parsing(mock_get, importer): + mock_get.side_effect = lambda url, *args, **kwargs: MagicMock( + content=HTML_ADVISORY.encode("utf-8") + ) + + importer.links = {"https://www.postgresql.org/support/security/advisory1.html"} + + advisories = list(importer.collect_advisories()) + + assert len(advisories) == 1 + reference = advisories[0].references_v2[0] + + severity = reference.severities[0] + assert severity.system.identifier == "cvssv3" + assert severity.value == "9.8" + assert "AV:N/AC:L/PR:N/UI:N" in severity.scoring_elements From 4b0bedb2f364485fd7b5e650675afac9708ca0a1 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Thu, 26 Jun 2025 19:11:51 +0530 Subject: [PATCH 37/44] Add tests for elixir security importer Signed-off-by: Tushar Goel --- .../test_elixir_security_v2_importer.py | 108 ++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 vulnerabilities/tests/pipelines/test_elixir_security_v2_importer.py diff --git a/vulnerabilities/tests/pipelines/test_elixir_security_v2_importer.py b/vulnerabilities/tests/pipelines/test_elixir_security_v2_importer.py new file mode 100644 index 000000000..e8998d8be --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_elixir_security_v2_importer.py @@ -0,0 +1,108 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import shutil +from pathlib import Path +from unittest.mock import MagicMock +from unittest.mock import patch + +import pytest + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.pipelines.v2_importers.elixir_security_importer import ( + ElixirSecurityImporterPipeline, +) + + +@pytest.fixture +def mock_vcs_response(tmp_path): + repo_dir = tmp_path / "repo" + repo_dir.mkdir() + packages_dir = repo_dir / "packages" / "some_package" + packages_dir.mkdir(parents=True) + + advisory_file = packages_dir / "CVE-2022-9999.yml" + advisory_file.write_text( + """ + cve: "2022-9999" + package: "plug" + description: "Cross-site scripting vulnerability in plug < 1.11.1" + patched_versions: + - ">= 1.11.1" + unaffected_versions: + - "< 1.0.0" + disclosure_date: "2022-12-01" + link: "https://github.com/plug/plug/security/advisories/GHSA-xxxx-yyyy" + """ + ) + + mock = MagicMock() + mock.dest_dir = str(repo_dir) + mock.delete = MagicMock() + return mock + + +@pytest.fixture +def mock_fetch_via_vcs(mock_vcs_response): + with patch( + "vulnerabilities.pipelines.v2_importers.elixir_security_importer.fetch_via_vcs" + ) as mock: + mock.return_value = mock_vcs_response + yield mock + + +def test_advisories_count(mock_fetch_via_vcs, mock_vcs_response): + importer = ElixirSecurityImporterPipeline() + importer.clone() + count = importer.advisories_count() + assert count == 1 + + +def test_collect_advisories(mock_fetch_via_vcs, mock_vcs_response): + importer = ElixirSecurityImporterPipeline() + importer.clone() + advisories = list(importer.collect_advisories()) + + assert len(advisories) == 1 + + advisory: AdvisoryData = advisories[0] + assert advisory.advisory_id == "CVE-2022-9999" + assert advisory.summary.startswith("Cross-site scripting vulnerability") + assert advisory.affected_packages[0].package.name == "plug" + assert advisory.affected_packages[0].package.type == "hex" + assert ( + advisory.references_v2[0].url + == "https://github.com/plug/plug/security/advisories/GHSA-xxxx-yyyy" + ) + assert advisory.date_published.isoformat().startswith("2022-12-01") + + +def test_collect_advisories_skips_invalid_cve(mock_fetch_via_vcs, tmp_path): + repo_dir = tmp_path / "repo" + packages_dir = repo_dir / "packages" + + if packages_dir.exists(): + shutil.rmtree(packages_dir) + packages_dir.mkdir(parents=True, exist_ok=True) + + advisory_file = packages_dir / "bad_advisory.yml" + advisory_file.write_text("cve: BAD-ID\npackage: x\n") + + mock_response = MagicMock() + mock_response.dest_dir = str(repo_dir) + mock_response.delete = MagicMock() + + with patch( + "vulnerabilities.pipelines.v2_importers.elixir_security_importer.fetch_via_vcs" + ) as mock: + mock.return_value = mock_response + importer = ElixirSecurityImporterPipeline() + importer.clone() + advisories = list(importer.collect_advisories()) + assert len(advisories) == 0 # Confirm it skipped the invalid CVE From 77b77ee7ebfa8fc222a97fdca92a2eedc9ca1fe6 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 1 Jul 2025 16:01:47 +0530 Subject: [PATCH 38/44] Add tests for models Signed-off-by: Tushar Goel --- vulnerabilities/importers/__init__.py | 101 +++++++------- vulnerabilities/improvers/__init__.py | 80 ++++++----- vulnerabilities/models.py | 17 +-- vulnerabilities/pipelines/__init__.py | 10 -- .../v2_importers/apache_httpd_importer.py | 8 +- .../v2_importers/elixir_security_importer.py | 12 +- .../pipelines/v2_importers/github_importer.py | 10 +- .../pipelines/v2_importers/gitlab_importer.py | 9 +- .../pipelines/v2_importers/npm_importer.py | 10 +- .../pipelines/v2_importers/nvd_importer.py | 8 +- .../v2_importers/postgresql_importer.py | 7 +- .../pipelines/v2_importers/pypa_importer.py | 8 +- .../pipelines/v2_importers/pysec_importer.py | 8 +- .../v2_importers/vulnrichment_importer.py | 8 +- vulnerabilities/pipes/advisory.py | 6 +- .../pipelines/test_collect_commits_v2.py | 131 ++++++++++++++++++ .../test_elixir_security_v2_importer.py | 2 +- .../pipelines/test_flag_ghost_packages_v2.py | 111 +++++++++++++++ vulnerabilities/tests/pipes/test_advisory.py | 91 ++++++++++++ vulnerabilities/utils.py | 25 ++++ 20 files changed, 512 insertions(+), 150 deletions(-) create mode 100644 vulnerabilities/tests/pipelines/test_collect_commits_v2.py create mode 100644 vulnerabilities/tests/pipelines/test_flag_ghost_packages_v2.py diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index 651d46f1f..3dd914a92 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -33,8 +33,6 @@ from vulnerabilities.importers import ubuntu_usn from vulnerabilities.importers import vulnrichment from vulnerabilities.importers import xen -from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline -from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 from vulnerabilities.pipelines import alpine_linux_importer from vulnerabilities.pipelines import github_importer from vulnerabilities.pipelines import gitlab_importer @@ -51,56 +49,51 @@ from vulnerabilities.pipelines.v2_importers import pypa_importer as pypa_importer_v2 from vulnerabilities.pipelines.v2_importers import pysec_importer as pysec_importer_v2 from vulnerabilities.pipelines.v2_importers import vulnrichment_importer as vulnrichment_importer_v2 +from vulnerabilities.utils import create_registry -IMPORTERS_REGISTRY = [ - nvd_importer_v2.NVDImporterPipeline, - github_importer_v2.GitHubAPIImporterPipeline, - npm_importer_v2.NpmImporterPipeline, - vulnrichment_importer_v2.VulnrichImporterPipeline, - apache_httpd_v2.ApacheHTTPDImporterPipeline, - pypa_importer_v2.PyPaImporterPipeline, - gitlab_importer_v2.GitLabImporterPipeline, - pysec_importer_v2.PyPIImporterPipeline, - nvd_importer.NVDImporterPipeline, - github_importer.GitHubAPIImporterPipeline, - gitlab_importer.GitLabImporterPipeline, - github_osv.GithubOSVImporter, - pypa_importer.PyPaImporterPipeline, - npm_importer.NpmImporterPipeline, - nginx_importer.NginxImporterPipeline, - pysec_importer.PyPIImporterPipeline, - apache_tomcat.ApacheTomcatImporter, - postgresql.PostgreSQLImporter, - debian.DebianImporter, - curl.CurlImporter, - epss.EPSSImporter, - vulnrichment.VulnrichImporter, - alpine_linux_importer.AlpineLinuxImporterPipeline, - ruby.RubyImporter, - apache_kafka.ApacheKafkaImporter, - openssl.OpensslImporter, - redhat.RedhatImporter, - archlinux.ArchlinuxImporter, - ubuntu.UbuntuImporter, - debian_oval.DebianOvalImporter, - retiredotnet.RetireDotnetImporter, - apache_httpd.ApacheHTTPDImporter, - mozilla.MozillaImporter, - gentoo.GentooImporter, - istio.IstioImporter, - project_kb_msr2019.ProjectKBMSRImporter, - suse_scores.SUSESeverityScoreImporter, - elixir_security.ElixirSecurityImporter, - xen.XenImporter, - ubuntu_usn.UbuntuUSNImporter, - fireeye.FireyeImporter, - oss_fuzz.OSSFuzzImporter, -] - -IMPORTERS_REGISTRY = { - x.pipeline_id - if issubclass(x, VulnerableCodeBaseImporterPipeline) - or issubclass(x, VulnerableCodeBaseImporterPipelineV2) - else x.qualified_name: x - for x in IMPORTERS_REGISTRY -} +IMPORTERS_REGISTRY = create_registry( + [ + nvd_importer_v2.NVDImporterPipeline, + github_importer_v2.GitHubAPIImporterPipeline, + npm_importer_v2.NpmImporterPipeline, + vulnrichment_importer_v2.VulnrichImporterPipeline, + apache_httpd_v2.ApacheHTTPDImporterPipeline, + pypa_importer_v2.PyPaImporterPipeline, + gitlab_importer_v2.GitLabImporterPipeline, + pysec_importer_v2.PyPIImporterPipeline, + nvd_importer.NVDImporterPipeline, + github_importer.GitHubAPIImporterPipeline, + gitlab_importer.GitLabImporterPipeline, + github_osv.GithubOSVImporter, + pypa_importer.PyPaImporterPipeline, + npm_importer.NpmImporterPipeline, + nginx_importer.NginxImporterPipeline, + pysec_importer.PyPIImporterPipeline, + apache_tomcat.ApacheTomcatImporter, + postgresql.PostgreSQLImporter, + debian.DebianImporter, + curl.CurlImporter, + epss.EPSSImporter, + vulnrichment.VulnrichImporter, + alpine_linux_importer.AlpineLinuxImporterPipeline, + ruby.RubyImporter, + apache_kafka.ApacheKafkaImporter, + openssl.OpensslImporter, + redhat.RedhatImporter, + archlinux.ArchlinuxImporter, + ubuntu.UbuntuImporter, + debian_oval.DebianOvalImporter, + retiredotnet.RetireDotnetImporter, + apache_httpd.ApacheHTTPDImporter, + mozilla.MozillaImporter, + gentoo.GentooImporter, + istio.IstioImporter, + project_kb_msr2019.ProjectKBMSRImporter, + suse_scores.SUSESeverityScoreImporter, + elixir_security.ElixirSecurityImporter, + xen.XenImporter, + ubuntu_usn.UbuntuUSNImporter, + fireeye.FireyeImporter, + oss_fuzz.OSSFuzzImporter, + ] +) diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index 11b74ac44..f4fcf3ecc 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -32,45 +32,43 @@ enhance_with_metasploit as enhance_with_metasploit_v2, ) from vulnerabilities.pipelines.v2_improvers import flag_ghost_packages as flag_ghost_packages_v2 +from vulnerabilities.utils import create_registry -IMPROVERS_REGISTRY = [ - valid_versions.GitHubBasicImprover, - valid_versions.GitLabBasicImprover, - valid_versions.NginxBasicImprover, - valid_versions.ApacheHTTPDImprover, - valid_versions.DebianBasicImprover, - valid_versions.NpmImprover, - valid_versions.ElixirImprover, - valid_versions.ApacheTomcatImprover, - valid_versions.ApacheKafkaImprover, - valid_versions.IstioImprover, - valid_versions.DebianOvalImprover, - valid_versions.UbuntuOvalImprover, - valid_versions.OSSFuzzImprover, - valid_versions.RubyImprover, - valid_versions.GithubOSVImprover, - vulnerability_status.VulnerabilityStatusImprover, - valid_versions.CurlImprover, - flag_ghost_packages.FlagGhostPackagePipeline, - enhance_with_kev.VulnerabilityKevPipeline, - enhance_with_metasploit.MetasploitImproverPipeline, - enhance_with_exploitdb.ExploitDBImproverPipeline, - compute_package_risk.ComputePackageRiskPipeline, - compute_package_version_rank.ComputeVersionRankPipeline, - collect_commits.CollectFixCommitsPipeline, - add_cvss31_to_CVEs.CVEAdvisoryMappingPipeline, - remove_duplicate_advisories.RemoveDuplicateAdvisoriesPipeline, - populate_vulnerability_summary_pipeline.PopulateVulnerabilitySummariesPipeline, - exploitdb_v2.ExploitDBImproverPipeline, - enhance_with_kev_v2.VulnerabilityKevPipeline, - flag_ghost_packages_v2.FlagGhostPackagePipeline, - enhance_with_metasploit_v2.MetasploitImproverPipeline, - compute_package_risk_v2.ComputePackageRiskPipeline, - compute_version_rank_v2.ComputeVersionRankPipeline, - collect_commits_v2.CollectFixCommitsPipeline, -] - -IMPROVERS_REGISTRY = { - x.pipeline_id if issubclass(x, VulnerableCodePipeline) else x.qualified_name: x - for x in IMPROVERS_REGISTRY -} +IMPROVERS_REGISTRY = create_registry( + [ + valid_versions.GitHubBasicImprover, + valid_versions.GitLabBasicImprover, + valid_versions.NginxBasicImprover, + valid_versions.ApacheHTTPDImprover, + valid_versions.DebianBasicImprover, + valid_versions.NpmImprover, + valid_versions.ElixirImprover, + valid_versions.ApacheTomcatImprover, + valid_versions.ApacheKafkaImprover, + valid_versions.IstioImprover, + valid_versions.DebianOvalImprover, + valid_versions.UbuntuOvalImprover, + valid_versions.OSSFuzzImprover, + valid_versions.RubyImprover, + valid_versions.GithubOSVImprover, + vulnerability_status.VulnerabilityStatusImprover, + valid_versions.CurlImprover, + flag_ghost_packages.FlagGhostPackagePipeline, + enhance_with_kev.VulnerabilityKevPipeline, + enhance_with_metasploit.MetasploitImproverPipeline, + enhance_with_exploitdb.ExploitDBImproverPipeline, + compute_package_risk.ComputePackageRiskPipeline, + compute_package_version_rank.ComputeVersionRankPipeline, + collect_commits.CollectFixCommitsPipeline, + add_cvss31_to_CVEs.CVEAdvisoryMappingPipeline, + remove_duplicate_advisories.RemoveDuplicateAdvisoriesPipeline, + populate_vulnerability_summary_pipeline.PopulateVulnerabilitySummariesPipeline, + exploitdb_v2.ExploitDBImproverPipeline, + enhance_with_kev_v2.VulnerabilityKevPipeline, + flag_ghost_packages_v2.FlagGhostPackagePipeline, + enhance_with_metasploit_v2.MetasploitImproverPipeline, + compute_package_risk_v2.ComputePackageRiskPipeline, + compute_version_rank_v2.ComputeVersionRankPipeline, + collect_commits_v2.CollectFixCommitsPipeline, + ] +) diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 35c9f6de4..7d9a591c8 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -2630,6 +2630,15 @@ class AdvisoryV2(models.Model): help_text="Unique ID for the datasource used for this advisory ." "e.g.: nginx_importer_v2", ) + advisory_id = models.CharField( + max_length=50, + blank=False, + null=False, + unique=False, + help_text="An advisory is a unique vulnerability identifier in some database, " + "such as PYSEC-2020-2233", + ) + avid = models.CharField( max_length=500, blank=False, @@ -2639,14 +2648,6 @@ class AdvisoryV2(models.Model): ) # This is similar to a name - advisory_id = models.CharField( - max_length=50, - blank=False, - null=False, - unique=False, - help_text="An advisory is a unique vulnerability identifier in some database, " - "such as PYSEC-2020-2233", - ) # This is similar to a version unique_content_id = models.CharField( diff --git a/vulnerabilities/pipelines/__init__.py b/vulnerabilities/pipelines/__init__.py index 4d472a650..3d1316cce 100644 --- a/vulnerabilities/pipelines/__init__.py +++ b/vulnerabilities/pipelines/__init__.py @@ -159,14 +159,6 @@ def on_failure(self): """ pass - @classproperty - def pipeline_id(cls): - """Return unique pipeline_id set in cls.pipeline_id""" - - if cls.pipeline_id is None or cls.pipeline_id == "": - raise NotImplementedError("pipeline_id is not defined or is empty") - return cls.pipeline_id - class VulnerableCodeBaseImporterPipeline(VulnerableCodePipeline): """ @@ -273,10 +265,8 @@ class VulnerableCodeBaseImporterPipelineV2(VulnerableCodePipeline): pipeline_id = None # Unique Pipeline ID, this should be the name of pipeline module. license_url = None - label = None spdx_license_expression = None repo_url = None - importer_name = None advisory_confidence = MAX_CONFIDENCE ignorable_versions = [] unfurl_version_ranges = False diff --git a/vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py b/vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py index f6d1f0b9a..90ea32b75 100644 --- a/vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py +++ b/vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py @@ -139,11 +139,15 @@ def get_weaknesses(cve_data): class ApacheHTTPDImporterPipeline(VulnerableCodeBaseImporterPipelineV2): + """ + Apache HTTPD Importer Pipeline + + This pipeline imports security advisories from the Apache HTTPD project. + """ + pipeline_id = "apache_httpd_importer_v2" - label = "Apache-Httpd" spdx_license_expression = "Apache-2.0" license_url = "https://www.apache.org/licenses/LICENSE-2.0" - importer_name = "Apache HTTPD Importer" base_url = "https://httpd.apache.org/security/json/" unfurl_version_ranges = True diff --git a/vulnerabilities/pipelines/v2_importers/elixir_security_importer.py b/vulnerabilities/pipelines/v2_importers/elixir_security_importer.py index 15f5ea9f1..902dd5248 100644 --- a/vulnerabilities/pipelines/v2_importers/elixir_security_importer.py +++ b/vulnerabilities/pipelines/v2_importers/elixir_security_importer.py @@ -25,13 +25,17 @@ class ElixirSecurityImporterPipeline(VulnerableCodeBaseImporterPipelineV2): + """ + Elixir Security Advisiories Importer Pipeline + + This pipeline imports security advisories for elixir. + """ pipeline_id = "elixir_security_importer_v2" - label = "Elixir Security" - repo_url = "git+https://github.com/dependabot/elixir-security-advisories" - license_url = "https://github.com/dependabot/elixir-security-advisories/blob/master/LICENSE.txt" spdx_license_expression = "CC0-1.0" - importer_name = "Elixir Security Importer" + license_url = "https://github.com/dependabot/elixir-security-advisories/blob/master/LICENSE.txt" + repo_url = "git+https://github.com/dependabot/elixir-security-advisories" + unfurl_version_ranges = True @classmethod def steps(cls): diff --git a/vulnerabilities/pipelines/v2_importers/github_importer.py b/vulnerabilities/pipelines/v2_importers/github_importer.py index aff1d27ba..9ac360016 100644 --- a/vulnerabilities/pipelines/v2_importers/github_importer.py +++ b/vulnerabilities/pipelines/v2_importers/github_importer.py @@ -33,15 +33,17 @@ class GitHubAPIImporterPipeline(VulnerableCodeBaseImporterPipelineV2): - """Collect GitHub advisories.""" + """ + GitHub Importer Pipeline + + This pipeline imports security advisories from GitHub Security Advisories. + """ pipeline_id = "github_importer_v2" - label = "GitHub" spdx_license_expression = "CC-BY-4.0" license_url = "https://github.com/github/advisory-database/blob/main/LICENSE.md" - importer_name = "GHSA Importer" - unfurl_version_ranges = True + ignorable_versions = frozenset( [ "0.1-bulbasaur", diff --git a/vulnerabilities/pipelines/v2_importers/gitlab_importer.py b/vulnerabilities/pipelines/v2_importers/gitlab_importer.py index 412733da7..1f175f07f 100644 --- a/vulnerabilities/pipelines/v2_importers/gitlab_importer.py +++ b/vulnerabilities/pipelines/v2_importers/gitlab_importer.py @@ -34,15 +34,16 @@ class GitLabImporterPipeline(VulnerableCodeBaseImporterPipelineV2): - """Collect advisory from GitLab Advisory Database (Open Source Edition).""" + """ + GitLab Importer Pipeline + + Collect advisory from GitLab Advisory Database (Open Source Edition). + """ pipeline_id = "gitlab_importer_v2" - label = "GitLab" spdx_license_expression = "MIT" license_url = "https://gitlab.com/gitlab-org/advisories-community/-/blob/main/LICENSE" - importer_name = "GitLab Importer" repo_url = "git+https://gitlab.com/gitlab-org/advisories-community/" - unfurl_version_ranges = True @classmethod diff --git a/vulnerabilities/pipelines/v2_importers/npm_importer.py b/vulnerabilities/pipelines/v2_importers/npm_importer.py index 2a22034d2..19d21c987 100644 --- a/vulnerabilities/pipelines/v2_importers/npm_importer.py +++ b/vulnerabilities/pipelines/v2_importers/npm_importer.py @@ -30,14 +30,16 @@ class NpmImporterPipeline(VulnerableCodeBaseImporterPipelineV2): - """Collect advisories from nodejs GitHub repository.""" + """ + Node.js Security Working Group importer pipeline + + Import advisories from nodejs security working group including node proper advisories and npm advisories. + """ pipeline_id = "nodejs_security_wg" spdx_license_expression = "MIT" license_url = "https://github.com/nodejs/security-wg/blob/main/LICENSE.md" repo_url = "git+https://github.com/nodejs/security-wg" - importer_name = "npm Importer" - unfurl_version_ranges = True @classmethod @@ -120,7 +122,7 @@ def to_advisory_data(self, file: Path) -> Iterable[AdvisoryData]: advsisory_aliases = data.get("cves") or [] return AdvisoryData( - advisory_id=f"NODESEC-NPM-{id}", + advisory_id=f"npm-{id}", aliases=advsisory_aliases, summary=build_description(summary=summary, description=description), date_published=date_published, diff --git a/vulnerabilities/pipelines/v2_importers/nvd_importer.py b/vulnerabilities/pipelines/v2_importers/nvd_importer.py index c4d0c09c5..1166ac8ef 100644 --- a/vulnerabilities/pipelines/v2_importers/nvd_importer.py +++ b/vulnerabilities/pipelines/v2_importers/nvd_importer.py @@ -28,10 +28,13 @@ class NVDImporterPipeline(VulnerableCodeBaseImporterPipelineV2): - """Collect advisories from NVD.""" + """ + NVD Importer Pipeline + + Collect advisories from NVD. + """ pipeline_id = "nvd_importer_v2" - label = "NVD" # See https://github.com/nexB/vulnerablecode/issues/665 for follow up spdx_license_expression = ( "LicenseRef-scancode-us-govt-public-domain AND LicenseRef-scancode-cve-tou" @@ -66,7 +69,6 @@ class NVDImporterPipeline(VulnerableCodeBaseImporterPipelineV2): INFORMATION THEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. """ - importer_name = "NVD Importer" @classmethod def steps(cls): diff --git a/vulnerabilities/pipelines/v2_importers/postgresql_importer.py b/vulnerabilities/pipelines/v2_importers/postgresql_importer.py index 830db934d..2f5a49439 100644 --- a/vulnerabilities/pipelines/v2_importers/postgresql_importer.py +++ b/vulnerabilities/pipelines/v2_importers/postgresql_importer.py @@ -25,12 +25,15 @@ class PostgreSQLImporterPipeline(VulnerableCodeBaseImporterPipelineV2): + """ + PostgreSQL Importer Pipeline + + This pipeline imports security advisories from the PostgreSQL project. + """ pipeline_id = "postgresql_importer_v2" - label = "PostgreSQL" license_url = "https://www.postgresql.org/about/licence/" spdx_license_expression = "PostgreSQL" - importer_name = "PostgreSQL Importer" base_url = "https://www.postgresql.org/support/security/" links = set() diff --git a/vulnerabilities/pipelines/v2_importers/pypa_importer.py b/vulnerabilities/pipelines/v2_importers/pypa_importer.py index 018a50d51..7463cc4bd 100644 --- a/vulnerabilities/pipelines/v2_importers/pypa_importer.py +++ b/vulnerabilities/pipelines/v2_importers/pypa_importer.py @@ -19,15 +19,15 @@ class PyPaImporterPipeline(VulnerableCodeBaseImporterPipelineV2): - """Collect advisories from PyPA GitHub repository.""" + """ + Pypa Importer Pipeline + + Collect advisories from PyPA GitHub repository.""" pipeline_id = "pypa_importer_v2" - label = "Pypa" spdx_license_expression = "CC-BY-4.0" license_url = "https://github.com/pypa/advisory-database/blob/main/LICENSE" repo_url = "git+https://github.com/pypa/advisory-database" - importer_name = "Pypa Importer" - unfurl_version_ranges = True @classmethod diff --git a/vulnerabilities/pipelines/v2_importers/pysec_importer.py b/vulnerabilities/pipelines/v2_importers/pysec_importer.py index b5fb21a0a..e67f41a28 100644 --- a/vulnerabilities/pipelines/v2_importers/pysec_importer.py +++ b/vulnerabilities/pipelines/v2_importers/pysec_importer.py @@ -19,15 +19,15 @@ class PyPIImporterPipeline(VulnerableCodeBaseImporterPipelineV2): - """Collect advisories from PyPI.""" + """ + PyPI Importer Pipeline + + Collect advisories from PyPI.""" pipeline_id = "pysec_importer_v2" - label = "Pypi" license_url = "https://github.com/pypa/advisory-database/blob/main/LICENSE" url = "https://osv-vulnerabilities.storage.googleapis.com/PyPI/all.zip" spdx_license_expression = "CC-BY-4.0" - importer_name = "PyPI Importer" - unfurl_version_ranges = True @classmethod diff --git a/vulnerabilities/pipelines/v2_importers/vulnrichment_importer.py b/vulnerabilities/pipelines/v2_importers/vulnrichment_importer.py index 7aa38f25f..b2ddfd3cd 100644 --- a/vulnerabilities/pipelines/v2_importers/vulnrichment_importer.py +++ b/vulnerabilities/pipelines/v2_importers/vulnrichment_importer.py @@ -21,12 +21,16 @@ class VulnrichImporterPipeline(VulnerableCodeBaseImporterPipelineV2): + """ + Vulnrichment Importer Pipeline + + This pipeline imports security advisories from Vulnrichment project. + """ + pipeline_id = "vulnrichment_importer_v2" - label = "Vulnrichment" spdx_license_expression = "CC0-1.0" license_url = "https://github.com/cisagov/vulnrichment/blob/develop/LICENSE" repo_url = "git+https://github.com/cisagov/vulnrichment.git" - importer_name = "Vulnrichment" @classmethod def steps(cls): diff --git a/vulnerabilities/pipes/advisory.py b/vulnerabilities/pipes/advisory.py index ea19cbe87..d5d88fbfd 100644 --- a/vulnerabilities/pipes/advisory.py +++ b/vulnerabilities/pipes/advisory.py @@ -153,12 +153,12 @@ def insert_advisory_v2( affecting_packages, fixed_by_packages = get_advisory_packages(advisory_data=advisory) try: default_data = { - "summary": advisory.summary, - "date_published": advisory.date_published, "datasource_id": pipeline_id, - "date_collected": datetime.now(timezone.utc), "advisory_id": advisory.advisory_id, "avid": f"{pipeline_id}/{advisory.advisory_id}", + "summary": advisory.summary, + "date_published": advisory.date_published, + "date_collected": datetime.now(timezone.utc), } advisory_obj, _ = AdvisoryV2.objects.get_or_create( diff --git a/vulnerabilities/tests/pipelines/test_collect_commits_v2.py b/vulnerabilities/tests/pipelines/test_collect_commits_v2.py new file mode 100644 index 000000000..dddec9084 --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_collect_commits_v2.py @@ -0,0 +1,131 @@ +from datetime import datetime +from unittest.mock import patch + +import pytest + +from vulnerabilities.models import AdvisoryReference +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.models import CodeFixV2 +from vulnerabilities.models import PackageV2 +from vulnerabilities.pipelines.v2_improvers.collect_commits import CollectFixCommitsPipeline +from vulnerabilities.pipelines.v2_improvers.collect_commits import is_vcs_url +from vulnerabilities.pipelines.v2_improvers.collect_commits import is_vcs_url_already_processed +from vulnerabilities.pipelines.v2_improvers.collect_commits import normalize_vcs_url + + +@pytest.mark.parametrize( + "url,expected", + [ + ("git://github.com/angular/di.js.git", True), + ("github:user/repo", True), + ("user/repo", True), + ("https://github.com/user/repo.git", True), + ("git@github.com:user/repo.git", True), + ("ftp://example.com/not-a-repo", False), + ("random-string", False), + ("https://example.com/not-a-repo", False), + ], +) +def test_is_vcs_url(url, expected): + assert is_vcs_url(url) is expected + + +@pytest.mark.parametrize( + "url,normalized", + [ + ("git@github.com:user/repo.git", "https://github.com/user/repo.git"), + ("github:user/repo", "https://github.com/user/repo"), + ("bitbucket:example/repo", "https://bitbucket.org/example/repo"), + ("user/repo", "https://github.com/user/repo"), + ("https://gitlab.com/foo/bar.git", "https://gitlab.com/foo/bar.git"), + ], +) +def test_normalize_vcs_url(url, normalized): + assert normalize_vcs_url(url) == normalized + + +@pytest.mark.django_db +def test_is_vcs_url_already_processed_true(): + advisory = AdvisoryV2.objects.create( + advisory_id="CVE-2025-9999", + datasource_id="test-ds", + avid="test-ds/CVE-2025-9999", + url="https://example.com/advisory/CVE-2025-9999", + unique_content_id="11111", + date_collected=datetime.now(), + ) + package = PackageV2.objects.create( + type="bar", + name="foo", + version="1.0", + ) + advisory.affecting_packages.add(package) + advisory.save() + CodeFixV2.objects.create( + commits=["https://github.com/user/repo/commit/abc123"], + advisory=advisory, + affected_package=package, + ) + assert is_vcs_url_already_processed("https://github.com/user/repo/commit/abc123") is True + + +@pytest.mark.django_db +def test_collect_fix_commits_pipeline_creates_entry(): + advisory = AdvisoryV2.objects.create( + advisory_id="CVE-2025-1000", + datasource_id="test-ds", + avid="test-ds/CVE-2025-1000", + url="https://example.com/advisory/CVE-2025-1000", + unique_content_id="11111", + date_collected=datetime.now(), + ) + package = PackageV2.objects.create( + type="foo", + name="testpkg", + version="1.0", + ) + reference = AdvisoryReference.objects.create( + url="https://github.com/test/testpkg/commit/abc123" + ) + advisory.affecting_packages.add(package) + advisory.references.add(reference) + advisory.save() + + pipeline = CollectFixCommitsPipeline() + pipeline.collect_and_store_fix_commits() + + codefixes = CodeFixV2.objects.all() + assert codefixes.count() == 1 + fix = codefixes.first() + assert "abc123" in fix.commits[0] + assert fix.advisory == advisory + assert fix.affected_package == package + + +@pytest.mark.django_db +def test_collect_fix_commits_pipeline_skips_non_commit_urls(): + advisory = AdvisoryV2.objects.create( + advisory_id="CVE-2025-2000", + datasource_id="test-ds", + avid="test-ds/CVE-2025-2000", + url="https://example.com/advisory/CVE-2025-2000", + unique_content_id="11111", + date_collected=datetime.now(), + ) + package = PackageV2.objects.create( + type="pypi", + name="otherpkg", + version="2.0", + ) + + advisory.affecting_packages.add(package) + + reference = AdvisoryReference.objects.create(url="https://github.com/test/testpkg/issues/12") + + advisory.references.add(reference) + advisory.save() + + pipeline = CollectFixCommitsPipeline() + pipeline.collect_and_store_fix_commits() + + assert CodeFixV2.objects.count() == 0 diff --git a/vulnerabilities/tests/pipelines/test_elixir_security_v2_importer.py b/vulnerabilities/tests/pipelines/test_elixir_security_v2_importer.py index e8998d8be..96359ca3c 100644 --- a/vulnerabilities/tests/pipelines/test_elixir_security_v2_importer.py +++ b/vulnerabilities/tests/pipelines/test_elixir_security_v2_importer.py @@ -105,4 +105,4 @@ def test_collect_advisories_skips_invalid_cve(mock_fetch_via_vcs, tmp_path): importer = ElixirSecurityImporterPipeline() importer.clone() advisories = list(importer.collect_advisories()) - assert len(advisories) == 0 # Confirm it skipped the invalid CVE + assert len(advisories) == 0 diff --git a/vulnerabilities/tests/pipelines/test_flag_ghost_packages_v2.py b/vulnerabilities/tests/pipelines/test_flag_ghost_packages_v2.py new file mode 100644 index 000000000..d082fdc3a --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_flag_ghost_packages_v2.py @@ -0,0 +1,111 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from unittest.mock import patch + +import pytest +from packageurl import PackageURL + +from vulnerabilities.models import PackageV2 +from vulnerabilities.pipelines.v2_improvers.flag_ghost_packages import ( + detect_and_flag_ghost_packages, +) +from vulnerabilities.pipelines.v2_improvers.flag_ghost_packages import flag_ghost_packages + + +@pytest.mark.django_db +def test_flag_ghost_package_marked_correctly(): + pkg = PackageV2.objects.create( + type="pypi", + namespace=None, + name="requests", + version="999.999.999", + ) + + with patch( + "vulnerabilities.pipelines.v2_improvers.flag_ghost_packages.get_versions" + ) as mock_get_versions: + mock_get_versions.return_value = {"2.25.1", "2.26.0"} + + base_purl = PackageURL(type="pypi", namespace=None, name="requests") + ghost_count = flag_ghost_packages(base_purl, [pkg]) + + pkg.refresh_from_db() + assert ghost_count == 1 + assert pkg.is_ghost is True + + +@pytest.mark.django_db +def test_flag_non_ghost_package_not_marked(): + pkg = PackageV2.objects.create( + type="pypi", + namespace=None, + name="requests", + version="2.26.0", + ) + + with patch( + "vulnerabilities.pipelines.v2_improvers.flag_ghost_packages.get_versions" + ) as mock_get_versions: + mock_get_versions.return_value = {"2.25.1", "2.26.0"} + + base_purl = PackageURL(type="pypi", namespace=None, name="requests") + ghost_count = flag_ghost_packages(base_purl, [pkg]) + + pkg.refresh_from_db() + assert ghost_count == 0 + assert pkg.is_ghost is False + + +@pytest.mark.django_db +def test_flag_ghost_packages_gracefully_handles_version_fetch_failure(): + pkg = PackageV2.objects.create( + type="pypi", + namespace=None, + name="some-lib", + version="1.0.0", + ) + + with patch( + "vulnerabilities.pipelines.v2_improvers.flag_ghost_packages.get_versions" + ) as mock_get_versions: + mock_get_versions.return_value = None + + base_purl = PackageURL(type="pypi", namespace=None, name="some-lib") + ghost_count = flag_ghost_packages(base_purl, [pkg]) + + pkg.refresh_from_db() + assert ghost_count == 0 + assert pkg.is_ghost is False + + +@pytest.mark.django_db +def test_detect_and_flag_ghost_packages(monkeypatch): + ghost_pkg = PackageV2.objects.create(type="pypi", name="fakepkg", version="9.9.9") + real_pkg = PackageV2.objects.create(type="pypi", name="realpkg", version="1.0.0") + + def fake_versions(purl, logger=None): + if purl.name == "realpkg": + return {"1.0.0"} + if purl.name == "fakepkg": + return {"0.1.0", "0.2.0"} + return set() + + monkeypatch.setattr( + "vulnerabilities.pipelines.v2_improvers.flag_ghost_packages.get_versions", + fake_versions, + ) + + detect_and_flag_ghost_packages() + + ghost_pkg.refresh_from_db() + real_pkg.refresh_from_db() + + assert ghost_pkg.is_ghost is True + assert real_pkg.is_ghost is False diff --git a/vulnerabilities/tests/pipes/test_advisory.py b/vulnerabilities/tests/pipes/test_advisory.py index ee29a4b8d..72c477455 100644 --- a/vulnerabilities/tests/pipes/test_advisory.py +++ b/vulnerabilities/tests/pipes/test_advisory.py @@ -9,6 +9,7 @@ from datetime import datetime +import pytest from django.core.exceptions import ValidationError from django.test import TestCase from django.utils import timezone @@ -19,6 +20,14 @@ from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import AffectedPackage from vulnerabilities.importer import Reference +from vulnerabilities.models import AdvisoryAlias +from vulnerabilities.models import AdvisoryReference +from vulnerabilities.models import AdvisorySeverity +from vulnerabilities.models import AdvisoryWeakness +from vulnerabilities.pipes.advisory import get_or_create_advisory_aliases +from vulnerabilities.pipes.advisory import get_or_create_advisory_references +from vulnerabilities.pipes.advisory import get_or_create_advisory_severities +from vulnerabilities.pipes.advisory import get_or_create_advisory_weaknesses from vulnerabilities.pipes.advisory import get_or_create_aliases from vulnerabilities.pipes.advisory import import_advisory from vulnerabilities.utils import compute_content_id @@ -134,3 +143,85 @@ def test_advisory_insert_no_duplicate_content_id(self): date_collected=date, created_by="test_pipeline", ) + + +@pytest.fixture +def advisory_aliases(): + return ["CVE-2021-12345", "GHSA-xyz"] + + +@pytest.fixture +def advisory_references(): + return [ + Reference(reference_id="REF-1", url="https://example.com/advisory/1"), + Reference(reference_id="REF-2", url="https://example.com/advisory/2"), + Reference(reference_id="", url="https://example.com/advisory/3"), + Reference(url="https://example.com/advisory/4"), + ] + + +@pytest.fixture +def advisory_severities(): + class Severity: + def __init__(self, system, value, scoring_elements, published_at=None, url=None): + self.system = system + self.value = value + self.scoring_elements = scoring_elements + self.published_at = published_at + self.url = url + + class System: + def __init__(self, identifier): + self.identifier = identifier + + return [ + Severity( + System("CVSSv3"), + "7.5", + "AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H", + timezone.now(), + "https://cvss.example.com", + ), + ] + + +@pytest.fixture +def advisory_weaknesses(): + return [79, 89] + + +@pytest.mark.django_db +def test_get_or_create_advisory_aliases(advisory_aliases): + aliases = get_or_create_advisory_aliases(advisory_aliases) + assert len(aliases) == len(advisory_aliases) + for alias_obj in aliases: + assert isinstance(alias_obj, AdvisoryAlias) + assert alias_obj.alias in advisory_aliases + + +@pytest.mark.django_db +def test_get_or_create_advisory_references(advisory_references): + refs = get_or_create_advisory_references(advisory_references) + assert len(refs) == len(advisory_references) + for ref in refs: + assert isinstance(ref, AdvisoryReference) + assert ref.url in [r.url for r in advisory_references] + + +@pytest.mark.django_db +def test_get_or_create_advisory_severities(advisory_severities): + sevs = get_or_create_advisory_severities(advisory_severities) + assert len(sevs) == len(advisory_severities) + for sev in sevs: + assert isinstance(sev, AdvisorySeverity) + assert sev.scoring_system == advisory_severities[0].system.identifier + assert sev.value == advisory_severities[0].value + + +@pytest.mark.django_db +def test_get_or_create_advisory_weaknesses(advisory_weaknesses): + weaknesses = get_or_create_advisory_weaknesses(advisory_weaknesses) + assert len(weaknesses) == len(advisory_weaknesses) + for w in weaknesses: + assert isinstance(w, AdvisoryWeakness) + assert w.cwe_id in advisory_weaknesses diff --git a/vulnerabilities/utils.py b/vulnerabilities/utils.py index 9d2806bf6..3aec1f56c 100644 --- a/vulnerabilities/utils.py +++ b/vulnerabilities/utils.py @@ -654,3 +654,28 @@ def compute_content_id(advisory_data): content_id = hashlib.sha256(normalized_json.encode("utf-8")).hexdigest() return content_id + + +def create_registry(pipelines): + """ + Return a mapping of {pipeline ID: pipeline class} for a list of pipelines. + """ + from vulnerabilities.pipelines import VulnerableCodePipeline + + registry = {} + for pipeline in pipelines: + if issubclass(pipeline, VulnerableCodePipeline): + key = pipeline.pipeline_id + else: + # For everything legacy use qualified_name + key = pipeline.qualified_name + + if not key: + raise Exception(f"Pipeline ID can not be empty: {pipeline!r}") + + if key in registry: + raise Exception(f"Duplicate pipeline found: {key}") + + registry[key] = pipeline + + return registry From 4bc2651cec0450db2333aad667931ddc7a2a21f5 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 1 Jul 2025 16:09:09 +0530 Subject: [PATCH 39/44] Merge changes Signed-off-by: Tushar Goel --- vulnerabilities/improvers/__init__.py | 2 +- ...visoryreference_advisoryseverity_and_more.py} | 16 ++++++++-------- vulnerabilities/models.py | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) rename vulnerabilities/migrations/{0093_advisoryalias_advisoryreference_advisoryseverity_and_more.py => 0094_advisoryalias_advisoryreference_advisoryseverity_and_more.py} (99%) diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index f4fcf3ecc..be6f73cb9 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -9,7 +9,6 @@ from vulnerabilities.improvers import valid_versions from vulnerabilities.improvers import vulnerability_status -from vulnerabilities.pipelines import VulnerableCodePipeline from vulnerabilities.pipelines import add_cvss31_to_CVEs from vulnerabilities.pipelines import collect_commits from vulnerabilities.pipelines import compute_advisory_todo @@ -70,5 +69,6 @@ compute_package_risk_v2.ComputePackageRiskPipeline, compute_version_rank_v2.ComputeVersionRankPipeline, collect_commits_v2.CollectFixCommitsPipeline, + compute_advisory_todo.ComputeToDo, ] ) diff --git a/vulnerabilities/migrations/0093_advisoryalias_advisoryreference_advisoryseverity_and_more.py b/vulnerabilities/migrations/0094_advisoryalias_advisoryreference_advisoryseverity_and_more.py similarity index 99% rename from vulnerabilities/migrations/0093_advisoryalias_advisoryreference_advisoryseverity_and_more.py rename to vulnerabilities/migrations/0094_advisoryalias_advisoryreference_advisoryseverity_and_more.py index b83f69040..fd3f74d98 100644 --- a/vulnerabilities/migrations/0093_advisoryalias_advisoryreference_advisoryseverity_and_more.py +++ b/vulnerabilities/migrations/0094_advisoryalias_advisoryreference_advisoryseverity_and_more.py @@ -1,4 +1,4 @@ -# Generated by Django 4.2.20 on 2025-06-20 07:50 +# Generated by Django 4.2.20 on 2025-07-01 10:38 from django.db import migrations, models import django.db.models.deletion @@ -7,7 +7,7 @@ class Migration(migrations.Migration): dependencies = [ - ("vulnerabilities", "0092_pipelineschedule_pipelinerun"), + ("vulnerabilities", "0093_advisorytodo_todorelatedadvisory_and_more"), ] operations = [ @@ -158,17 +158,17 @@ class Migration(migrations.Migration): ), ), ( - "avid", + "advisory_id", models.CharField( - help_text="Unique ID for the datasource used for this advisory .e.g.: pysec_importer_v2/PYSEC-2020-2233", - max_length=500, + help_text="An advisory is a unique vulnerability identifier in some database, such as PYSEC-2020-2233", + max_length=50, ), ), ( - "advisory_id", + "avid", models.CharField( - help_text="An advisory is a unique vulnerability identifier in some database, such as PYSEC-2020-2233", - max_length=50, + help_text="Unique ID for the datasource used for this advisory .e.g.: pysec_importer_v2/PYSEC-2020-2233", + max_length=500, ), ), ( diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 7d9a591c8..2fc5ec575 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -2630,6 +2630,7 @@ class AdvisoryV2(models.Model): help_text="Unique ID for the datasource used for this advisory ." "e.g.: nginx_importer_v2", ) + # This is similar to a name advisory_id = models.CharField( max_length=50, blank=False, @@ -2647,7 +2648,6 @@ class AdvisoryV2(models.Model): "e.g.: pysec_importer_v2/PYSEC-2020-2233", ) - # This is similar to a name # This is similar to a version unique_content_id = models.CharField( From e8b4bf52d17b06f757025263d18fa7790bc90f3d Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 1 Jul 2025 16:19:54 +0530 Subject: [PATCH 40/44] Add tests for compute package risk V2 Signed-off-by: Tushar Goel --- vulnerabilities/models.py | 1 - .../pipelines/test_compute_package_risk_v2.py | 69 +++++++++++++++++++ 2 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 vulnerabilities/tests/pipelines/test_compute_package_risk_v2.py diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 2fc5ec575..ab01010d7 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -2648,7 +2648,6 @@ class AdvisoryV2(models.Model): "e.g.: pysec_importer_v2/PYSEC-2020-2233", ) - # This is similar to a version unique_content_id = models.CharField( max_length=64, diff --git a/vulnerabilities/tests/pipelines/test_compute_package_risk_v2.py b/vulnerabilities/tests/pipelines/test_compute_package_risk_v2.py new file mode 100644 index 000000000..4dbfb222a --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_compute_package_risk_v2.py @@ -0,0 +1,69 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# +from datetime import datetime +from decimal import Decimal + +import pytest + +from vulnerabilities.models import AdvisorySeverity +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.models import AdvisoryWeakness +from vulnerabilities.models import PackageV2 +from vulnerabilities.pipelines.v2_improvers.compute_package_risk import ComputePackageRiskPipeline +from vulnerabilities.severity_systems import CVSSV3 +from vulnerabilities.severity_systems import GENERIC + + +@pytest.mark.django_db +def test_simple_risk_pipeline(): + pkg = PackageV2.objects.create(type="pypi", name="foo", version="2.3.0") + assert PackageV2.objects.count() == 1 + + adv = AdvisoryV2( + advisory_id="VCID-Existing", + summary="vulnerability description here", + datasource_id="ds", + avid="ds/VCID-Existing", + unique_content_id="ajkef", + url="https://test.com", + date_collected=datetime.now(), + ) + adv.save() + + severity1 = AdvisorySeverity.objects.create( + url="https://nvd.nist.gov/vuln/detail/CVE-xxxx-xxx1", + scoring_system=CVSSV3.identifier, + scoring_elements="CVSS:3.0/AV:P/AC:H/PR:H/UI:R/S:C/C:H/I:H/A:N/E:H/RL:O/RC:R/CR:H/MAC:H/MC:L", + value="6.5", + ) + + severity2 = AdvisorySeverity.objects.create( + url="https://nvd.nist.gov/vuln/detail/CVE-xxxx-xxx1", + scoring_system=GENERIC.identifier, + value="MODERATE", # 6.9 + ) + adv.severities.add(severity1) + adv.severities.add(severity2) + + weaknesses = AdvisoryWeakness.objects.create(cwe_id=119) + adv.weaknesses.add(weaknesses) + + adv.affecting_packages.add(pkg) + adv.save() + + improver = ComputePackageRiskPipeline() + improver.execute() + + assert pkg.risk_score is None + + improver = ComputePackageRiskPipeline() + improver.execute() + + pkg = PackageV2.objects.get(type="pypi", name="foo", version="2.3.0") + assert pkg.risk_score == Decimal("3.1") From dba9493d1f06b70c60d9fa0da879aa057df61831 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 1 Jul 2025 16:24:39 +0530 Subject: [PATCH 41/44] Add tests for compute package rank V2 Signed-off-by: Tushar Goel --- .../pipelines/test_compute_version_rank_v2.py | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 vulnerabilities/tests/pipelines/test_compute_version_rank_v2.py diff --git a/vulnerabilities/tests/pipelines/test_compute_version_rank_v2.py b/vulnerabilities/tests/pipelines/test_compute_version_rank_v2.py new file mode 100644 index 000000000..eb8d3aebd --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_compute_version_rank_v2.py @@ -0,0 +1,70 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from unittest.mock import patch + +import pytest +from univers.versions import Version + +from vulnerabilities.models import PackageV2 +from vulnerabilities.pipelines.v2_improvers.computer_package_version_rank import ( + ComputeVersionRankPipeline, +) + + +@pytest.mark.django_db +class TestComputeVersionRankPipeline: + @pytest.fixture + def pipeline(self): + return ComputeVersionRankPipeline() + + @pytest.fixture + def packages(self, db): + package_type = "pypi" + namespace = "test_namespace" + name = "test_package" + PackageV2.objects.create(type=package_type, namespace=namespace, name=name, version="1.0.0") + PackageV2.objects.create(type=package_type, namespace=namespace, name=name, version="1.1.0") + PackageV2.objects.create(type=package_type, namespace=namespace, name=name, version="0.9.0") + return PackageV2.objects.filter(type=package_type, namespace=namespace, name=name) + + def test_compute_and_store_version_rank(self, pipeline, packages): + with patch.object(pipeline, "log") as mock_log: + pipeline.compute_and_store_version_rank() + assert mock_log.call_count > 0 + for package in packages: + assert package.version_rank is not None + + def test_update_version_rank_for_group(self, pipeline, packages): + with patch.object(PackageV2.objects, "bulk_update") as mock_bulk_update: + pipeline.update_version_rank_for_group(packages) + mock_bulk_update.assert_called_once() + updated_packages = mock_bulk_update.call_args[0][0] + assert len(updated_packages) == len(packages) + for idx, package in enumerate(sorted(packages, key=lambda p: Version(p.version))): + assert updated_packages[idx].version_rank == idx + + def test_sort_packages_by_version(self, pipeline, packages): + sorted_packages = pipeline.sort_packages_by_version(packages) + versions = [p.version for p in sorted_packages] + assert versions == sorted(versions, key=Version) + + def test_sort_packages_by_version_empty(self, pipeline): + assert pipeline.sort_packages_by_version([]) == [] + + def test_sort_packages_by_version_invalid_scheme(self, pipeline, packages): + for package in packages: + package.type = "invalid" + assert pipeline.sort_packages_by_version(packages) == [] + + def test_compute_and_store_version_rank_invalid_scheme(self, pipeline): + PackageV2.objects.create(type="invalid", namespace="test", name="package", version="1.0.0") + with patch.object(pipeline, "log") as mock_log: + pipeline.compute_and_store_version_rank() + mock_log.assert_any_call("Successfully populated `version_rank` for all packages.") From d1e8c544b68a5f03c2f856559dee7d108d90fc84 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 1 Jul 2025 16:27:29 +0530 Subject: [PATCH 42/44] Fix tests Signed-off-by: Tushar Goel --- .../tests/pipelines/test_npm_importer_pipeline_v2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vulnerabilities/tests/pipelines/test_npm_importer_pipeline_v2.py b/vulnerabilities/tests/pipelines/test_npm_importer_pipeline_v2.py index 7063174b6..7941c9b69 100644 --- a/vulnerabilities/tests/pipelines/test_npm_importer_pipeline_v2.py +++ b/vulnerabilities/tests/pipelines/test_npm_importer_pipeline_v2.py @@ -63,7 +63,7 @@ def test_advisories_count_and_collect(tmp_path): # Should yield None for index.json and one AdvisoryData real = [a for a in advisories if isinstance(a, AdvisoryData)] assert len(real) == 1 - assert real[0].advisory_id == "NODESEC-NPM-001" + assert real[0].advisory_id == "npm-001" def test_to_advisory_data_skips_index(tmp_path): @@ -92,7 +92,7 @@ def test_to_advisory_data_full(tmp_path): p = NpmImporterPipeline() adv = p.to_advisory_data(file) assert isinstance(adv, AdvisoryData) - assert adv.advisory_id == "NODESEC-NPM-123" + assert adv.advisory_id == "npm-123" assert "ti" in adv.summary and "desc" in adv.summary assert adv.date_published.tzinfo == pytz.UTC assert len(adv.severities) == 1 and adv.severities[0].system == CVSSV3 From c66d400bf0fd42b0a963c606209bb757a2c4984a Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 1 Jul 2025 16:37:49 +0530 Subject: [PATCH 43/44] Add tests for V2 Importer Pipeline Signed-off-by: Tushar Goel --- ...est_vulnerablecode_importer_v2_pipeline.py | 180 ++++++++++++++++++ 1 file changed, 180 insertions(+) create mode 100644 vulnerabilities/tests/pipelines/test_vulnerablecode_importer_v2_pipeline.py diff --git a/vulnerabilities/tests/pipelines/test_vulnerablecode_importer_v2_pipeline.py b/vulnerabilities/tests/pipelines/test_vulnerablecode_importer_v2_pipeline.py new file mode 100644 index 000000000..f995f0c1f --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_vulnerablecode_importer_v2_pipeline.py @@ -0,0 +1,180 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging +from datetime import datetime +from datetime import timedelta +from unittest import mock + +import pytest +from packageurl import PackageURL + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import UnMergeablePackageError +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.models import PackageV2 +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 + + +class DummyImporter(VulnerableCodeBaseImporterPipelineV2): + pipeline_id = "dummy" + log_messages = [] + + def log(self, message, level=logging.INFO): + self.log_messages.append((level, message)) + + def collect_advisories(self): + yield from self._advisories + + def advisories_count(self): + return len(self._advisories) + + +@pytest.fixture +def dummy_advisory(): + return AdvisoryData( + summary="Test advisory", + aliases=["CVE-2025-0001"], + references_v2=[], + severities=[], + weaknesses=[], + affected_packages=[], + advisory_id="ADV-123", + date_published=datetime.now() - timedelta(days=10), + url="https://example.com/advisory/1", + ) + + +@pytest.fixture +def dummy_importer(dummy_advisory): + importer = DummyImporter() + importer._advisories = [dummy_advisory] + return importer + + +@pytest.mark.django_db +def test_collect_and_store_advisories(dummy_importer): + dummy_importer.collect_and_store_advisories() + assert len(dummy_importer.log_messages) >= 2 + assert "Successfully collected" in dummy_importer.log_messages[-1][1] + assert AdvisoryV2.objects.count() == 1 + + +def test_get_advisory_packages_basic(dummy_importer): + purl = PackageURL("pypi", None, "dummy", "1.0.0") + affected_package = mock.Mock() + affected_package.package = purl + dummy_importer.unfurl_version_ranges = False + + with mock.patch( + "vulnerabilities.improvers.default.get_exact_purls", return_value=([purl], [purl]) + ): + with mock.patch.object( + PackageV2.objects, "get_or_create_from_purl", return_value=(mock.Mock(), True) + ) as mock_get: + dummy_importer.get_advisory_packages( + advisory_data=mock.Mock(affected_packages=[affected_package]) + ) + assert mock_get.call_count == 2 # one affected, one fixed + + +def test_get_published_package_versions_filters(dummy_importer): + purl = PackageURL("pypi", None, "example", None) + + dummy_versions = [ + mock.Mock(value="1.0.0", release_date=datetime.now() - timedelta(days=5)), + mock.Mock(value="2.0.0", release_date=datetime.now() + timedelta(days=5)), # future + ] + + with mock.patch( + "vulnerabilities.pipelines.package_versions.versions", return_value=dummy_versions + ): + versions = dummy_importer.get_published_package_versions(purl, until=datetime.now()) + assert "1.0.0" in versions + assert "2.0.0" not in versions + + +def test_get_published_package_versions_failure_logs(dummy_importer): + purl = PackageURL("pypi", None, "example", None) + with mock.patch( + "vulnerabilities.pipelines.package_versions.versions", side_effect=Exception("fail") + ): + versions = dummy_importer.get_published_package_versions(purl) + assert versions == [] + assert any("Failed to fetch versions" in msg for lvl, msg in dummy_importer.log_messages) + + +def test_expand_version_range_to_purls(dummy_importer): + purls = list( + dummy_importer.expand_verion_range_to_purls("npm", "lodash", "lodash", ["1.0.0", "1.1.0"]) + ) + assert all(isinstance(p, PackageURL) for p in purls) + assert purls[0].name == "lodash" + + +def test_resolve_package_versions(dummy_importer): + dummy_importer.ignorable_versions = [] + dummy_importer.expand_verion_range_to_purls = lambda *args, **kwargs: [ + PackageURL("npm", None, "a", "1.0.0") + ] + + with mock.patch( + "vulnerabilities.pipelines.resolve_version_range", return_value=(["1.0.0"], ["1.1.0"]) + ), mock.patch( + "vulnerabilities.pipelines.get_affected_packages_by_patched_package", + return_value={None: [PackageURL("npm", None, "a", "1.0.0")]}, + ), mock.patch( + "vulnerabilities.pipelines.nearest_patched_package", return_value=[] + ): + aff, fix = dummy_importer.resolve_package_versions( + affected_version_range=">=1.0.0", + pkg_type="npm", + pkg_namespace=None, + pkg_name="a", + valid_versions=["1.0.0", "1.1.0"], + ) + assert any(isinstance(p, PackageURL) for p in aff) + + +def test_get_impacted_packages_mergeable(dummy_importer): + ap = mock.Mock() + ap.package = PackageURL("npm", None, "abc", None) + dummy_importer.get_published_package_versions = lambda package_url, until: ["1.0.0", "1.1.0"] + dummy_importer.resolve_package_versions = lambda **kwargs: ( + [PackageURL("npm", None, "abc", "1.0.0")], + [PackageURL("npm", None, "abc", "1.1.0")], + ) + + with mock.patch( + "vulnerabilities.importer.AffectedPackage.merge", + return_value=(ap.package, [">=1.0.0"], ["1.1.0"]), + ): + aff, fix = dummy_importer.get_impacted_packages([ap], datetime.now()) + assert len(aff) == 1 and aff[0].version == "1.0.0" + assert len(fix) == 1 and fix[0].version == "1.1.0" + + +def test_get_impacted_packages_unmergeable(dummy_importer): + ap = mock.Mock() + ap.package = PackageURL("npm", None, "abc", None) + ap.affected_version_range = ">=1.0.0" + ap.fixed_version = None + + dummy_importer.get_published_package_versions = lambda package_url, until: ["1.0.0", "1.1.0"] + dummy_importer.resolve_package_versions = lambda **kwargs: ( + [PackageURL("npm", None, "abc", "1.0.0")], + [PackageURL("npm", None, "abc", "1.1.0")], + ) + + with mock.patch( + "vulnerabilities.importer.AffectedPackage.merge", side_effect=UnMergeablePackageError + ): + aff, fix = dummy_importer.get_impacted_packages([ap], datetime.utcnow()) + assert len(aff) == 1 + assert aff[0].version == "1.0.0" From c0541e52e2c2676d95de4e41486a7cc35392e4ba Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 1 Jul 2025 16:52:07 +0530 Subject: [PATCH 44/44] Add tests for exploits enhancement pipeline Signed-off-by: Tushar Goel --- .../test_enhance_with_exploitdb_v2.py | 56 ++++++++++++++++++ .../pipelines/test_enhance_with_kev_v2.py | 57 +++++++++++++++++++ .../test_enhance_with_metasploit_v2.py | 56 ++++++++++++++++++ 3 files changed, 169 insertions(+) create mode 100644 vulnerabilities/tests/pipelines/test_enhance_with_exploitdb_v2.py create mode 100644 vulnerabilities/tests/pipelines/test_enhance_with_kev_v2.py create mode 100644 vulnerabilities/tests/pipelines/test_enhance_with_metasploit_v2.py diff --git a/vulnerabilities/tests/pipelines/test_enhance_with_exploitdb_v2.py b/vulnerabilities/tests/pipelines/test_enhance_with_exploitdb_v2.py new file mode 100644 index 000000000..865356158 --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_enhance_with_exploitdb_v2.py @@ -0,0 +1,56 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import os +from datetime import datetime +from unittest import mock +from unittest.mock import Mock + +import pytest + +from vulnerabilities.models import AdvisoryAlias +from vulnerabilities.models import AdvisoryExploit +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.pipelines.v2_improvers.enhance_with_exploitdb import ExploitDBImproverPipeline + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) +TEST_DATA = os.path.join(BASE_DIR, "../test_data", "exploitdb_improver/files_exploits.csv") + + +@pytest.mark.django_db +@mock.patch("requests.get") +def test_exploit_db_improver(mock_get): + mock_response = Mock(status_code=200) + with open(TEST_DATA, "r") as f: + mock_response.text = f.read() + mock_get.return_value = mock_response + + improver = ExploitDBImproverPipeline() + + # Run the improver when there is no matching aliases + improver.execute() + + assert AdvisoryExploit.objects.count() == 0 + + adv1 = AdvisoryV2.objects.create( + advisory_id="VCIO-123-2002", + datasource_id="ds", + avid="ds/VCIO-123-2002", + unique_content_id="i3giu", + url="https://test.com", + date_collected=datetime.now(), + ) + + alias = AdvisoryAlias.objects.create(alias="CVE-2009-3699") + + adv1.aliases.add(alias) + + # Run Exploit-DB Improver again when there are matching aliases. + improver.execute() + assert AdvisoryExploit.objects.count() == 1 diff --git a/vulnerabilities/tests/pipelines/test_enhance_with_kev_v2.py b/vulnerabilities/tests/pipelines/test_enhance_with_kev_v2.py new file mode 100644 index 000000000..bd58fa5fd --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_enhance_with_kev_v2.py @@ -0,0 +1,57 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import os +from datetime import datetime +from unittest import mock +from unittest.mock import Mock + +import pytest + +from vulnerabilities.models import AdvisoryAlias +from vulnerabilities.models import AdvisoryExploit +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.pipelines.v2_improvers.enhance_with_kev import VulnerabilityKevPipeline +from vulnerabilities.utils import load_json + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) +TEST_DATA = os.path.join(BASE_DIR, "../test_data", "kev_data.json") + + +@pytest.mark.django_db +@mock.patch("requests.get") +def test_kev_improver(mock_get): + mock_response = Mock(status_code=200) + mock_response.json.return_value = load_json(TEST_DATA) + mock_get.return_value = mock_response + + improver = VulnerabilityKevPipeline() + + # Run the improver when there is no matching aliases + improver.execute() + + assert AdvisoryExploit.objects.count() == 0 + + adv1 = AdvisoryV2.objects.create( + advisory_id="VCIO-123-2002", + datasource_id="ds", + avid="ds/VCIO-123-2002", + unique_content_id="i3giu", + url="https://test.com", + date_collected=datetime.now(), + ) + adv1.save() + + alias = AdvisoryAlias.objects.create(alias="CVE-2021-38647") + + adv1.aliases.add(alias) + + # Run Kev Improver again when there are matching aliases. + improver.execute() + assert AdvisoryExploit.objects.count() == 1 diff --git a/vulnerabilities/tests/pipelines/test_enhance_with_metasploit_v2.py b/vulnerabilities/tests/pipelines/test_enhance_with_metasploit_v2.py new file mode 100644 index 000000000..c20437145 --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_enhance_with_metasploit_v2.py @@ -0,0 +1,56 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import os +from datetime import datetime +from unittest import mock +from unittest.mock import Mock + +import pytest + +from vulnerabilities.models import AdvisoryAlias +from vulnerabilities.models import AdvisoryExploit +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.pipelines.v2_improvers.enhance_with_metasploit import ( + MetasploitImproverPipeline, +) +from vulnerabilities.utils import load_json + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) +TEST_DATA = os.path.join(BASE_DIR, "../test_data", "metasploit_improver/modules_metadata_base.json") + + +@pytest.mark.django_db +@mock.patch("requests.get") +def test_metasploit_improver(mock_get): + mock_response = Mock(status_code=200) + mock_response.json.return_value = load_json(TEST_DATA) + mock_get.return_value = mock_response + + improver = MetasploitImproverPipeline() + + # Run the improver when there is no matching aliases + improver.execute() + assert AdvisoryExploit.objects.count() == 0 + + adv1 = AdvisoryV2.objects.create( + advisory_id="VCIO-123-2002", + datasource_id="ds", + avid="ds/VCIO-123-2002", + unique_content_id="i3giu", + url="https://test.com", + date_collected=datetime.now(), + ) + alias = AdvisoryAlias.objects.create(alias="CVE-2007-4387") + + adv1.aliases.add(alias) + + # Run metasploit Improver again when there are matching aliases. + improver.execute() + assert AdvisoryExploit.objects.count() == 1