diff --git a/vulnerabilities/api_v2.py b/vulnerabilities/api_v2.py
index 4c2562216..4915dda63 100644
--- a/vulnerabilities/api_v2.py
+++ b/vulnerabilities/api_v2.py
@@ -24,8 +24,14 @@
from rest_framework.response import Response
from rest_framework.reverse import reverse
+from vulnerabilities.models import AdvisoryReference
+from vulnerabilities.models import AdvisorySeverity
+from vulnerabilities.models import AdvisoryV2
+from vulnerabilities.models import AdvisoryWeakness
from vulnerabilities.models import CodeFix
+from vulnerabilities.models import CodeFixV2
from vulnerabilities.models import Package
+from vulnerabilities.models import PackageV2
from vulnerabilities.models import PipelineRun
from vulnerabilities.models import PipelineSchedule
from vulnerabilities.models import Vulnerability
@@ -44,6 +50,16 @@ class Meta:
fields = ["cwe_id", "name", "description"]
+class AdvisoryWeaknessSerializer(serializers.ModelSerializer):
+ cwe_id = serializers.CharField()
+ name = serializers.CharField()
+ description = serializers.CharField()
+
+ class Meta:
+ model = AdvisoryWeakness
+ fields = ["cwe_id", "name", "description"]
+
+
class VulnerabilityReferenceV2Serializer(serializers.ModelSerializer):
url = serializers.CharField()
reference_type = serializers.CharField()
@@ -54,6 +70,29 @@ class Meta:
fields = ["url", "reference_type", "reference_id"]
+class AdvisoryReferenceSerializer(serializers.ModelSerializer):
+ url = serializers.CharField()
+ reference_type = serializers.CharField()
+ reference_id = serializers.CharField()
+
+ class Meta:
+ model = AdvisoryReference
+ fields = ["url", "reference_type", "reference_id"]
+
+
+class AdvisorySeveritySerializer(serializers.ModelSerializer):
+ class Meta:
+ model = AdvisorySeverity
+ fields = ["url", "value", "scoring_system", "scoring_elements", "published_at"]
+
+ def to_representation(self, instance):
+ data = super().to_representation(instance)
+ published_at = data.get("published_at", None)
+ if not published_at:
+ data.pop("published_at")
+ return data
+
+
class VulnerabilitySeverityV2Serializer(serializers.ModelSerializer):
class Meta:
model = VulnerabilitySeverity
@@ -94,6 +133,32 @@ def get_aliases(self, obj):
return [alias.alias for alias in obj.aliases.all()]
+class AdvisoryV2Serializer(serializers.ModelSerializer):
+ aliases = serializers.SerializerMethodField()
+ weaknesses = AdvisoryWeaknessSerializer(many=True)
+ references = AdvisoryReferenceSerializer(many=True)
+ severities = AdvisorySeveritySerializer(many=True)
+ advisory_id = serializers.CharField(source="avid", read_only=True)
+
+ class Meta:
+ model = AdvisoryV2
+ fields = [
+ "advisory_id",
+ "url",
+ "aliases",
+ "summary",
+ "severities",
+ "weaknesses",
+ "references",
+ "exploitability",
+ "weighted_severity",
+ "risk_score",
+ ]
+
+ def get_aliases(self, obj):
+ return [alias.alias for alias in obj.aliases.all()]
+
+
class VulnerabilityListSerializer(serializers.ModelSerializer):
url = serializers.SerializerMethodField()
@@ -233,6 +298,57 @@ def get_fixing_vulnerabilities(self, obj):
return [vuln.vulnerability_id for vuln in obj.fixing_vulnerabilities.all()]
+class AdvisoryPackageV2Serializer(serializers.ModelSerializer):
+ purl = serializers.CharField(source="package_url")
+ risk_score = serializers.FloatField(read_only=True)
+ affected_by_vulnerabilities = serializers.SerializerMethodField()
+ fixing_vulnerabilities = serializers.SerializerMethodField()
+ next_non_vulnerable_version = serializers.CharField(read_only=True)
+ latest_non_vulnerable_version = serializers.CharField(read_only=True)
+
+ class Meta:
+ model = Package
+ fields = [
+ "purl",
+ "affected_by_vulnerabilities",
+ "fixing_vulnerabilities",
+ "next_non_vulnerable_version",
+ "latest_non_vulnerable_version",
+ "risk_score",
+ ]
+
+ def get_affected_by_vulnerabilities(self, obj):
+ """
+ Return a dictionary with vulnerabilities as keys and their details, including fixed_by_packages.
+ """
+ result = {}
+ request = self.context.get("request")
+ for adv in getattr(obj, "prefetched_affected_advisories", []):
+ fixed_by_package = adv.fixed_by_packages.first()
+ purl = None
+ if fixed_by_package:
+ purl = fixed_by_package.package_url
+ # Get code fixed for a vulnerability
+ code_fixes = CodeFixV2.objects.filter(advisory=adv).distinct()
+ code_fix_urls = [
+ reverse("codefix-detail", args=[code_fix.id], request=request)
+ for code_fix in code_fixes
+ ]
+
+ result[adv.avid] = {
+ "advisory_id": adv.avid,
+ "fixed_by_packages": purl,
+ "code_fixes": code_fix_urls,
+ }
+ return result
+
+ def get_fixing_vulnerabilities(self, obj):
+ # Ghost package should not fix any vulnerability.
+ if obj.is_ghost:
+ return []
+ return [adv.advisory_id for adv in obj.fixing_advisories.all()]
+
+
class PackageurlListSerializer(serializers.Serializer):
purls = serializers.ListField(
child=serializers.CharField(),
@@ -261,6 +377,12 @@ class PackageV2FilterSet(filters.FilterSet):
purl = filters.CharFilter(field_name="package_url")
+class AdvisoryPackageV2FilterSet(filters.FilterSet):
+ affected_by_vulnerability = filters.CharFilter(field_name="affected_by_advisory__advisory_id")
+ fixing_vulnerability = filters.CharFilter(field_name="fixing_advisories__advisory_id")
+ purl = filters.CharFilter(field_name="package_url")
+
+
class PackageV2ViewSet(viewsets.ReadOnlyModelViewSet):
queryset = Package.objects.all().prefetch_related(
Prefetch(
@@ -754,3 +876,263 @@ def get_permissions(self):
if self.action not in ["list", "retrieve"]:
return [IsAdminWithSessionAuth()]
return super().get_permissions()
+
+
+class AdvisoriesPackageV2ViewSet(viewsets.ReadOnlyModelViewSet):
+ queryset = PackageV2.objects.all().prefetch_related(
+ Prefetch(
+ "affected_by_advisories",
+ queryset=AdvisoryV2.objects.prefetch_related("fixed_by_packages"),
+ to_attr="prefetched_affected_advisories",
+ )
+ )
+ serializer_class = AdvisoryPackageV2Serializer
+ filter_backends = (filters.DjangoFilterBackend,)
+ filterset_class = AdvisoryPackageV2FilterSet
+
+ def get_queryset(self):
+ queryset = super().get_queryset()
+ package_purls = self.request.query_params.getlist("purl")
+ affected_by_advisory = self.request.query_params.get("affected_by_advisory")
+ fixing_advisory = self.request.query_params.get("fixing_advisory")
+ if package_purls:
+ queryset = queryset.filter(package_url__in=package_purls)
+ if affected_by_advisory:
+ queryset = queryset.filter(affected_by_advisories__advisory_id=affected_by_advisory)
+ if fixing_advisory:
+ queryset = queryset.filter(fixing_advisories__advisory=fixing_advisory)
+ return queryset.with_is_vulnerable()
+
+ def list(self, request, *args, **kwargs):
+ queryset = self.get_queryset()
+ # Apply pagination
+ page = self.paginate_queryset(queryset)
+ if page is not None:
+ # Collect only vulnerabilities for packages in the current page
+ advisories = set()
+ for package in page:
+ advisories.update(package.affected_by_advisories.all())
+ advisories.update(package.fixing_advisories.all())
+
+ # Serialize the vulnerabilities with advisory_id and advisory label as keys
+ advisory_data = {f"{adv.avid}": AdvisoryV2Serializer(adv).data for adv in advisories}
+
+ # Serialize the current page of packages
+ serializer = self.get_serializer(page, many=True)
+ data = serializer.data
+ print(data)
+ # Use 'self.get_paginated_response' to include pagination data
+ return self.get_paginated_response({"advisories": advisory_data, "packages": data})
+
+ # If pagination is not applied, collect vulnerabilities for all packages
+ advisories = set()
+ for package in queryset:
+ advisories.update(package.affected_by_vulnerabilities.all())
+ advisories.update(package.fixing_vulnerabilities.all())
+
+ advisory_data = {f"{adv.avid}": AdvisoryV2Serializer(adv).data for adv in advisories}
+
+ serializer = self.get_serializer(queryset, many=True)
+ data = serializer.data
+ return Response({"advisories": advisory_data, "packages": data})
+
+ @extend_schema(
+ request=PackageurlListSerializer,
+ responses={200: PackageV2Serializer(many=True)},
+ )
+ @action(
+ detail=False,
+ methods=["post"],
+ serializer_class=PackageurlListSerializer,
+ filter_backends=[],
+ pagination_class=None,
+ )
+ def bulk_lookup(self, request):
+ """
+ Return the response for exact PackageURLs requested for.
+ """
+ serializer = self.serializer_class(data=request.data)
+ if not serializer.is_valid():
+ return Response(
+ status=status.HTTP_400_BAD_REQUEST,
+ data={
+ "error": serializer.errors,
+ "message": "A non-empty 'purls' list of PURLs is required.",
+ },
+ )
+ validated_data = serializer.validated_data
+ purls = validated_data.get("purls")
+
+ # Fetch packages matching the provided purls
+ packages = PackageV2.objects.for_purls(purls).with_is_vulnerable()
+
+ # Collect vulnerabilities associated with these packages
+ advisories = set()
+ for package in packages:
+ advisories.update(package.affected_by_advisories.all())
+ advisories.update(package.fixing_advisories.all())
+
+ # Serialize vulnerabilities with vulnerability_id as keys
+ advisory_data = {adv.avid: AdvisoryV2Serializer(adv).data for adv in advisories}
+
+ # Serialize packages
+ package_data = AdvisoryPackageV2Serializer(
+ packages,
+ many=True,
+ context={"request": request},
+ ).data
+
+ return Response(
+ {
+ "advisories": advisory_data,
+ "packages": package_data,
+ }
+ )
+
+ @extend_schema(
+ request=PackageBulkSearchRequestSerializer,
+ responses={200: PackageV2Serializer(many=True)},
+ )
+ @action(
+ detail=False,
+ methods=["post"],
+ serializer_class=PackageBulkSearchRequestSerializer,
+ filter_backends=[],
+ pagination_class=None,
+ )
+ def bulk_search(self, request):
+ """
+ Lookup for vulnerable packages using many Package URLs at once.
+ """
+ serializer = self.serializer_class(data=request.data)
+ if not serializer.is_valid():
+ return Response(
+ status=status.HTTP_400_BAD_REQUEST,
+ data={
+ "error": serializer.errors,
+ "message": "A non-empty 'purls' list of PURLs is required.",
+ },
+ )
+ validated_data = serializer.validated_data
+ purls = validated_data.get("purls")
+ purl_only = validated_data.get("purl_only", False)
+ plain_purl = validated_data.get("plain_purl", False)
+
+ if plain_purl:
+ purl_objects = [PackageURL.from_string(purl) for purl in purls]
+ plain_purl_objects = [
+ PackageURL(
+ type=purl.type,
+ namespace=purl.namespace,
+ name=purl.name,
+ version=purl.version,
+ )
+ for purl in purl_objects
+ ]
+ plain_purls = [str(purl) for purl in plain_purl_objects]
+
+ query = (
+ PackageV2.objects.filter(plain_package_url__in=plain_purls)
+ .order_by("plain_package_url")
+ .distinct("plain_package_url")
+ .with_is_vulnerable()
+ )
+
+ packages = query
+
+ # Collect vulnerabilities associated with these packages
+ advisories = set()
+ for package in packages:
+ advisories.update(package.affected_by_vulnerabilities.all())
+ advisories.update(package.fixing_vulnerabilities.all())
+
+ advisory_data = {adv.avid: VulnerabilityV2Serializer(adv).data for adv in advisories}
+
+ if not purl_only:
+ package_data = AdvisoryPackageV2Serializer(
+ packages, many=True, context={"request": request}
+ ).data
+ return Response(
+ {
+ "advisories": advisory_data,
+ "packages": package_data,
+ }
+ )
+
+ # Using order by and distinct because there will be
+ # many fully qualified purl for a single plain purl
+ vulnerable_purls = query.vulnerable().only("plain_package_url")
+ vulnerable_purls = [str(package.plain_package_url) for package in vulnerable_purls]
+ return Response(data=vulnerable_purls)
+
+ query = PackageV2.objects.filter(package_url__in=purls).distinct().with_is_vulnerable()
+ packages = query
+
+ # Collect vulnerabilities associated with these packages
+ advisories = set()
+ for package in packages:
+ advisories.update(package.affected_by_vulnerabilities.all())
+ advisories.update(package.fixing_vulnerabilities.all())
+
+ advisory_data = {adv.advisory_id: AdvisoryV2Serializer(adv).data for adv in advisories}
+
+ if not purl_only:
+ package_data = AdvisoryPackageV2Serializer(
+ packages, many=True, context={"request": request}
+ ).data
+ return Response(
+ {
+ "advisories": advisory_data,
+ "packages": package_data,
+ }
+ )
+
+ vulnerable_purls = query.vulnerable().only("package_url")
+ vulnerable_purls = [str(package.package_url) for package in vulnerable_purls]
+ return Response(data=vulnerable_purls)
+
+ @action(detail=False, methods=["get"])
+ def all(self, request):
+ """
+ Return a list of Package URLs of vulnerable packages.
+ """
+ vulnerable_purls = (
+ PackageV2.objects.vulnerable()
+ .only("package_url")
+ .order_by("package_url")
+ .distinct()
+ .values_list("package_url", flat=True)
+ )
+ return Response(vulnerable_purls)
+
+ @extend_schema(
+ request=LookupRequestSerializer,
+ responses={200: PackageV2Serializer(many=True)},
+ )
+ @action(
+ detail=False,
+ methods=["post"],
+ serializer_class=LookupRequestSerializer,
+ filter_backends=[],
+ pagination_class=None,
+ )
+ def lookup(self, request):
+ """
+ Return the response for exact PackageURL requested for.
+ """
+ serializer = self.serializer_class(data=request.data)
+ if not serializer.is_valid():
+ return Response(
+ status=status.HTTP_400_BAD_REQUEST,
+ data={
+ "error": serializer.errors,
+ "message": "A 'purl' is required.",
+ },
+ )
+ validated_data = serializer.validated_data
+ purl = validated_data.get("purl")
+
+ qs = self.get_queryset().for_purls([purl]).with_is_vulnerable()
+ return Response(
+ AdvisoryPackageV2Serializer(qs, many=True, context={"request": request}).data
+ )
diff --git a/vulnerabilities/forms.py b/vulnerabilities/forms.py
index 74a10340c..7d955ac37 100644
--- a/vulnerabilities/forms.py
+++ b/vulnerabilities/forms.py
@@ -36,6 +36,14 @@ class VulnerabilitySearchForm(forms.Form):
)
+class AdvisorySearchForm(forms.Form):
+
+ search = forms.CharField(
+ required=True,
+ widget=forms.TextInput(attrs={"placeholder": "Advisory id or alias such as CVE or GHSA"}),
+ )
+
+
class ApiUserCreationForm(forms.ModelForm):
"""
Support a simplified creation for API-only users directly from the UI.
diff --git a/vulnerabilities/importer.py b/vulnerabilities/importer.py
index 759ec9330..9cef5e0fa 100644
--- a/vulnerabilities/importer.py
+++ b/vulnerabilities/importer.py
@@ -55,6 +55,7 @@ class VulnerabilitySeverity:
value: str
scoring_elements: str = ""
published_at: Optional[datetime.datetime] = None
+ url: Optional[str] = None
def to_dict(self):
data = {
@@ -145,6 +146,54 @@ def from_url(cls, url):
return cls(url=url)
+@dataclasses.dataclass(eq=True)
+@functools.total_ordering
+class ReferenceV2:
+ reference_id: str = ""
+ reference_type: str = ""
+ url: str = ""
+
+ def __post_init__(self):
+ if not self.url:
+ raise TypeError("Reference must have a url")
+ if self.reference_id and not isinstance(self.reference_id, str):
+ self.reference_id = str(self.reference_id)
+
+ def __lt__(self, other):
+ if not isinstance(other, Reference):
+ return NotImplemented
+ return self._cmp_key() < other._cmp_key()
+
+ # TODO: Add cache
+ def _cmp_key(self):
+ return (self.reference_id, self.reference_type, self.url)
+
+ def to_dict(self):
+ """Return a normalized dictionary representation"""
+ return {
+ "reference_id": self.reference_id,
+ "reference_type": self.reference_type,
+ "url": self.url,
+ }
+
+ @classmethod
+ def from_dict(cls, ref: dict):
+ return cls(
+ reference_id=str(ref["reference_id"]),
+ reference_type=ref.get("reference_type") or "",
+ url=ref["url"],
+ )
+
+ @classmethod
+ def from_url(cls, url):
+ reference_id = get_reference_id(url)
+ if "GHSA-" in reference_id.upper():
+ return cls(reference_id=reference_id, url=url)
+ if is_cve(reference_id):
+ return cls(url=url, reference_id=reference_id.upper())
+ return cls(url=url)
+
+
class UnMergeablePackageError(Exception):
"""
Raised when a package cannot be merged with another one.
@@ -302,10 +351,81 @@ class AdvisoryData:
date_published must be aware datetime
"""
+ advisory_id: str = ""
aliases: List[str] = dataclasses.field(default_factory=list)
summary: Optional[str] = ""
affected_packages: List[AffectedPackage] = dataclasses.field(default_factory=list)
references: List[Reference] = dataclasses.field(default_factory=list)
+ references_v2: List[ReferenceV2] = dataclasses.field(default_factory=list)
+ date_published: Optional[datetime.datetime] = None
+ weaknesses: List[int] = dataclasses.field(default_factory=list)
+ severities: List[VulnerabilitySeverity] = dataclasses.field(default_factory=list)
+ url: Optional[str] = None
+
+ def __post_init__(self):
+ if self.date_published and not self.date_published.tzinfo:
+ logger.warning(f"AdvisoryData with no tzinfo: {self!r}")
+ if self.summary:
+ self.summary = self.clean_summary(self.summary)
+
+ def clean_summary(self, summary):
+ # https://nvd.nist.gov/vuln/detail/CVE-2013-4314
+ # https://github.com/cms-dev/cms/issues/888#issuecomment-516977572
+ summary = summary.strip()
+ if summary:
+ summary = summary.replace("\x00", "\uFFFD")
+ return summary
+
+ def to_dict(self):
+ return {
+ "aliases": self.aliases,
+ "summary": self.summary,
+ "affected_packages": [pkg.to_dict() for pkg in self.affected_packages],
+ "references": [ref.to_dict() for ref in self.references],
+ "date_published": self.date_published.isoformat() if self.date_published else None,
+ "weaknesses": self.weaknesses,
+ "url": self.url if self.url else "",
+ }
+
+ @classmethod
+ def from_dict(cls, advisory_data):
+ date_published = advisory_data["date_published"]
+ transformed = {
+ "aliases": advisory_data["aliases"],
+ "summary": advisory_data["summary"],
+ "affected_packages": [
+ AffectedPackage.from_dict(pkg)
+ for pkg in advisory_data["affected_packages"]
+ if pkg is not None
+ ],
+ "references": [Reference.from_dict(ref) for ref in advisory_data["references"]],
+ "date_published": datetime.datetime.fromisoformat(date_published)
+ if date_published
+ else None,
+ "weaknesses": advisory_data["weaknesses"],
+ "url": advisory_data.get("url") or None,
+ }
+ return cls(**transformed)
+
+
+@dataclasses.dataclass(order=True)
+class AdvisoryDataV2:
+ """
+ This data class expresses the contract between data sources and the import runner.
+
+ If a vulnerability_id is present then:
+ summary or affected_packages or references must be present
+ otherwise
+ either affected_package or references should be present
+
+ date_published must be aware datetime
+ """
+
+ advisory_id: str = ""
+ aliases: List[str] = dataclasses.field(default_factory=list)
+ summary: Optional[str] = ""
+ affected_packages: List[AffectedPackage] = dataclasses.field(default_factory=list)
+ references: List[ReferenceV2] = dataclasses.field(default_factory=list)
date_published: Optional[datetime.datetime] = None
weaknesses: List[int] = dataclasses.field(default_factory=list)
url: Optional[str] = None
diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py
index f0d9532ab..3dd914a92 100644
--- a/vulnerabilities/importers/__init__.py
+++ b/vulnerabilities/importers/__init__.py
@@ -33,7 +33,6 @@
from vulnerabilities.importers import ubuntu_usn
from vulnerabilities.importers import vulnrichment
from vulnerabilities.importers import xen
-from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline
from vulnerabilities.pipelines import alpine_linux_importer
from vulnerabilities.pipelines import github_importer
from vulnerabilities.pipelines import gitlab_importer
@@ -42,45 +41,59 @@
from vulnerabilities.pipelines import nvd_importer
from vulnerabilities.pipelines import pypa_importer
from vulnerabilities.pipelines import pysec_importer
+from vulnerabilities.pipelines.v2_importers import apache_httpd_importer as apache_httpd_v2
+from vulnerabilities.pipelines.v2_importers import github_importer as github_importer_v2
+from vulnerabilities.pipelines.v2_importers import gitlab_importer as gitlab_importer_v2
+from vulnerabilities.pipelines.v2_importers import npm_importer as npm_importer_v2
+from vulnerabilities.pipelines.v2_importers import nvd_importer as nvd_importer_v2
+from vulnerabilities.pipelines.v2_importers import pypa_importer as pypa_importer_v2
+from vulnerabilities.pipelines.v2_importers import pysec_importer as pysec_importer_v2
+from vulnerabilities.pipelines.v2_importers import vulnrichment_importer as vulnrichment_importer_v2
+from vulnerabilities.utils import create_registry
-IMPORTERS_REGISTRY = [
- nvd_importer.NVDImporterPipeline,
- github_importer.GitHubAPIImporterPipeline,
- gitlab_importer.GitLabImporterPipeline,
- github_osv.GithubOSVImporter,
- pypa_importer.PyPaImporterPipeline,
- npm_importer.NpmImporterPipeline,
- nginx_importer.NginxImporterPipeline,
- pysec_importer.PyPIImporterPipeline,
- apache_tomcat.ApacheTomcatImporter,
- postgresql.PostgreSQLImporter,
- debian.DebianImporter,
- curl.CurlImporter,
- epss.EPSSImporter,
- vulnrichment.VulnrichImporter,
- alpine_linux_importer.AlpineLinuxImporterPipeline,
- ruby.RubyImporter,
- apache_kafka.ApacheKafkaImporter,
- openssl.OpensslImporter,
- redhat.RedhatImporter,
- archlinux.ArchlinuxImporter,
- ubuntu.UbuntuImporter,
- debian_oval.DebianOvalImporter,
- retiredotnet.RetireDotnetImporter,
- apache_httpd.ApacheHTTPDImporter,
- mozilla.MozillaImporter,
- gentoo.GentooImporter,
- istio.IstioImporter,
- project_kb_msr2019.ProjectKBMSRImporter,
- suse_scores.SUSESeverityScoreImporter,
- elixir_security.ElixirSecurityImporter,
- xen.XenImporter,
- ubuntu_usn.UbuntuUSNImporter,
- fireeye.FireyeImporter,
- oss_fuzz.OSSFuzzImporter,
-]
-
-IMPORTERS_REGISTRY = {
- x.pipeline_id if issubclass(x, VulnerableCodeBaseImporterPipeline) else x.qualified_name: x
- for x in IMPORTERS_REGISTRY
-}
+IMPORTERS_REGISTRY = create_registry(
+ [
+ nvd_importer_v2.NVDImporterPipeline,
+ github_importer_v2.GitHubAPIImporterPipeline,
+ npm_importer_v2.NpmImporterPipeline,
+ vulnrichment_importer_v2.VulnrichImporterPipeline,
+ apache_httpd_v2.ApacheHTTPDImporterPipeline,
+ pypa_importer_v2.PyPaImporterPipeline,
+ gitlab_importer_v2.GitLabImporterPipeline,
+ pysec_importer_v2.PyPIImporterPipeline,
+ nvd_importer.NVDImporterPipeline,
+ github_importer.GitHubAPIImporterPipeline,
+ gitlab_importer.GitLabImporterPipeline,
+ github_osv.GithubOSVImporter,
+ pypa_importer.PyPaImporterPipeline,
+ npm_importer.NpmImporterPipeline,
+ nginx_importer.NginxImporterPipeline,
+ pysec_importer.PyPIImporterPipeline,
+ apache_tomcat.ApacheTomcatImporter,
+ postgresql.PostgreSQLImporter,
+ debian.DebianImporter,
+ curl.CurlImporter,
+ epss.EPSSImporter,
+ vulnrichment.VulnrichImporter,
+ alpine_linux_importer.AlpineLinuxImporterPipeline,
+ ruby.RubyImporter,
+ apache_kafka.ApacheKafkaImporter,
+ openssl.OpensslImporter,
+ redhat.RedhatImporter,
+ archlinux.ArchlinuxImporter,
+ ubuntu.UbuntuImporter,
+ debian_oval.DebianOvalImporter,
+ retiredotnet.RetireDotnetImporter,
+ apache_httpd.ApacheHTTPDImporter,
+ mozilla.MozillaImporter,
+ gentoo.GentooImporter,
+ istio.IstioImporter,
+ project_kb_msr2019.ProjectKBMSRImporter,
+ suse_scores.SUSESeverityScoreImporter,
+ elixir_security.ElixirSecurityImporter,
+ xen.XenImporter,
+ ubuntu_usn.UbuntuUSNImporter,
+ fireeye.FireyeImporter,
+ oss_fuzz.OSSFuzzImporter,
+ ]
+)
diff --git a/vulnerabilities/importers/curl.py b/vulnerabilities/importers/curl.py
index a7f5e86fa..7cbc3208e 100644
--- a/vulnerabilities/importers/curl.py
+++ b/vulnerabilities/importers/curl.py
@@ -97,7 +97,7 @@ def parse_advisory_data(raw_data) -> AdvisoryData:
... ]
... }
>>> parse_advisory_data(raw_data)
- AdvisoryData(aliases=['CVE-2024-2379'], summary='QUIC certificate check bypass with wolfSSL', affected_packages=[AffectedPackage(package=PackageURL(type='generic', namespace='curl.se', name='curl', version=None, qualifiers={}, subpath=None), affected_version_range=GenericVersionRange(constraints=(VersionConstraint(comparator='=', version=SemverVersion(string='8.6.0')),)), fixed_version=SemverVersion(string='8.7.0'))], references=[Reference(reference_id='', reference_type='', url='https://curl.se/docs/CVE-2024-2379.html', severities=[VulnerabilitySeverity(system=Cvssv3ScoringSystem(identifier='cvssv3.1', name='CVSSv3.1 Base Score', url='https://www.first.org/cvss/v3-1/', notes='CVSSv3.1 base score and vector'), value='Low', scoring_elements='', published_at=None)]), Reference(reference_id='', reference_type='', url='https://hackerone.com/reports/2410774', severities=[])], date_published=datetime.datetime(2024, 3, 27, 8, 0, tzinfo=datetime.timezone.utc), weaknesses=[297], url='https://curl.se/docs/CVE-2024-2379.json')
+ AdvisoryData(advisory_id='', aliases=['CVE-2024-2379'], summary='QUIC certificate check bypass with wolfSSL', affected_packages=[AffectedPackage(package=PackageURL(type='generic', namespace='curl.se', name='curl', version=None, qualifiers={}, subpath=None), affected_version_range=GenericVersionRange(constraints=(VersionConstraint(comparator='=', version=SemverVersion(string='8.6.0')),)), fixed_version=SemverVersion(string='8.7.0'))], references=[Reference(reference_id='', reference_type='', url='https://curl.se/docs/CVE-2024-2379.html', severities=[VulnerabilitySeverity(system=Cvssv3ScoringSystem(identifier='cvssv3.1', name='CVSSv3.1 Base Score', url='https://www.first.org/cvss/v3-1/', notes='CVSSv3.1 base score and vector'), value='Low', scoring_elements='', published_at=None, url=None)]), Reference(reference_id='', reference_type='', url='https://hackerone.com/reports/2410774', severities=[])], references_v2=[], date_published=datetime.datetime(2024, 3, 27, 8, 0, tzinfo=datetime.timezone.utc), weaknesses=[297], severities=[], url='https://curl.se/docs/CVE-2024-2379.json')
"""
affected = get_item(raw_data, "affected")[0] if len(get_item(raw_data, "affected")) > 0 else []
diff --git a/vulnerabilities/importers/osv.py b/vulnerabilities/importers/osv.py
index 19867cda5..01f2d8023 100644
--- a/vulnerabilities/importers/osv.py
+++ b/vulnerabilities/importers/osv.py
@@ -107,6 +107,74 @@ def parse_advisory_data(
)
+def parse_advisory_data_v2(
+ raw_data: dict, supported_ecosystems, advisory_url: str
+) -> Optional[AdvisoryData]:
+ """
+ Return an AdvisoryData build from a ``raw_data`` mapping of OSV advisory and
+ a ``supported_ecosystem`` string.
+ """
+ advisory_id = raw_data.get("id") or ""
+ if not advisory_id:
+ logger.error(f"Missing advisory id in OSV data: {raw_data}")
+ return None
+ summary = raw_data.get("summary") or ""
+ details = raw_data.get("details") or ""
+ summary = build_description(summary=summary, description=details)
+ aliases = raw_data.get("aliases") or []
+
+ date_published = get_published_date(raw_data=raw_data)
+ severities = list(get_severities(raw_data=raw_data))
+ references = get_references_v2(raw_data=raw_data)
+
+ affected_packages = []
+
+ for affected_pkg in raw_data.get("affected") or []:
+ purl = get_affected_purl(affected_pkg=affected_pkg, raw_id=advisory_id)
+
+ if not purl or purl.type not in supported_ecosystems:
+ logger.error(f"Unsupported package type: {affected_pkg!r} in OSV: {advisory_id!r}")
+ continue
+
+ affected_version_range = get_affected_version_range(
+ affected_pkg=affected_pkg,
+ raw_id=advisory_id,
+ supported_ecosystem=purl.type,
+ )
+
+ for fixed_range in affected_pkg.get("ranges") or []:
+ fixed_version = get_fixed_versions(
+ fixed_range=fixed_range, raw_id=advisory_id, supported_ecosystem=purl.type
+ )
+
+ for version in fixed_version:
+ affected_packages.append(
+ AffectedPackage(
+ package=purl,
+ affected_version_range=affected_version_range,
+ fixed_version=version,
+ )
+ )
+ database_specific = raw_data.get("database_specific") or {}
+ cwe_ids = database_specific.get("cwe_ids") or []
+ weaknesses = list(map(get_cwe_id, cwe_ids))
+
+ if advisory_id in aliases:
+ aliases.remove(advisory_id)
+
+ return AdvisoryData(
+ advisory_id=advisory_id,
+ aliases=aliases,
+ summary=summary,
+ references_v2=references,
+ severities=severities,
+ affected_packages=affected_packages,
+ date_published=date_published,
+ weaknesses=weaknesses,
+ url=advisory_url,
+ )
+
+
def extract_fixed_versions(fixed_range) -> Iterable[str]:
"""
Return a list of fixed version strings given a ``fixed_range`` mapping of
@@ -187,6 +255,23 @@ def get_references(raw_data, severities) -> List[Reference]:
return references
+def get_references_v2(raw_data) -> List[Reference]:
+ """
+ Return a list Reference extracted from a mapping of OSV ``raw_data`` given a
+ ``severities`` list of VulnerabilitySeverity.
+ """
+ references = []
+ for ref in raw_data.get("references") or []:
+ if not ref:
+ continue
+ url = ref["url"]
+ if not url:
+ logger.error(f"Reference without URL : {ref!r} for OSV id: {raw_data['id']!r}")
+ continue
+ references.append(Reference(url=ref["url"]))
+ return references
+
+
def get_affected_purl(affected_pkg, raw_id):
"""
Return an affected PackageURL or None given a mapping of ``affected_pkg``
diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py
index 08cce6ff9..be6f73cb9 100644
--- a/vulnerabilities/improvers/__init__.py
+++ b/vulnerabilities/improvers/__init__.py
@@ -9,7 +9,6 @@
from vulnerabilities.improvers import valid_versions
from vulnerabilities.improvers import vulnerability_status
-from vulnerabilities.pipelines import VulnerableCodePipeline
from vulnerabilities.pipelines import add_cvss31_to_CVEs
from vulnerabilities.pipelines import collect_commits
from vulnerabilities.pipelines import compute_advisory_todo
@@ -21,39 +20,55 @@
from vulnerabilities.pipelines import flag_ghost_packages
from vulnerabilities.pipelines import populate_vulnerability_summary_pipeline
from vulnerabilities.pipelines import remove_duplicate_advisories
+from vulnerabilities.pipelines.v2_improvers import collect_commits as collect_commits_v2
+from vulnerabilities.pipelines.v2_improvers import compute_package_risk as compute_package_risk_v2
+from vulnerabilities.pipelines.v2_improvers import (
+ computer_package_version_rank as compute_version_rank_v2,
+)
+from vulnerabilities.pipelines.v2_improvers import enhance_with_exploitdb as exploitdb_v2
+from vulnerabilities.pipelines.v2_improvers import enhance_with_kev as enhance_with_kev_v2
+from vulnerabilities.pipelines.v2_improvers import (
+ enhance_with_metasploit as enhance_with_metasploit_v2,
+)
+from vulnerabilities.pipelines.v2_improvers import flag_ghost_packages as flag_ghost_packages_v2
+from vulnerabilities.utils import create_registry
-IMPROVERS_REGISTRY = [
- valid_versions.GitHubBasicImprover,
- valid_versions.GitLabBasicImprover,
- valid_versions.NginxBasicImprover,
- valid_versions.ApacheHTTPDImprover,
- valid_versions.DebianBasicImprover,
- valid_versions.NpmImprover,
- valid_versions.ElixirImprover,
- valid_versions.ApacheTomcatImprover,
- valid_versions.ApacheKafkaImprover,
- valid_versions.IstioImprover,
- valid_versions.DebianOvalImprover,
- valid_versions.UbuntuOvalImprover,
- valid_versions.OSSFuzzImprover,
- valid_versions.RubyImprover,
- valid_versions.GithubOSVImprover,
- vulnerability_status.VulnerabilityStatusImprover,
- valid_versions.CurlImprover,
- flag_ghost_packages.FlagGhostPackagePipeline,
- enhance_with_kev.VulnerabilityKevPipeline,
- enhance_with_metasploit.MetasploitImproverPipeline,
- enhance_with_exploitdb.ExploitDBImproverPipeline,
- compute_package_risk.ComputePackageRiskPipeline,
- compute_package_version_rank.ComputeVersionRankPipeline,
- collect_commits.CollectFixCommitsPipeline,
- add_cvss31_to_CVEs.CVEAdvisoryMappingPipeline,
- remove_duplicate_advisories.RemoveDuplicateAdvisoriesPipeline,
- populate_vulnerability_summary_pipeline.PopulateVulnerabilitySummariesPipeline,
- compute_advisory_todo.ComputeToDo,
-]
-
-IMPROVERS_REGISTRY = {
- x.pipeline_id if issubclass(x, VulnerableCodePipeline) else x.qualified_name: x
- for x in IMPROVERS_REGISTRY
-}
+IMPROVERS_REGISTRY = create_registry(
+ [
+ valid_versions.GitHubBasicImprover,
+ valid_versions.GitLabBasicImprover,
+ valid_versions.NginxBasicImprover,
+ valid_versions.ApacheHTTPDImprover,
+ valid_versions.DebianBasicImprover,
+ valid_versions.NpmImprover,
+ valid_versions.ElixirImprover,
+ valid_versions.ApacheTomcatImprover,
+ valid_versions.ApacheKafkaImprover,
+ valid_versions.IstioImprover,
+ valid_versions.DebianOvalImprover,
+ valid_versions.UbuntuOvalImprover,
+ valid_versions.OSSFuzzImprover,
+ valid_versions.RubyImprover,
+ valid_versions.GithubOSVImprover,
+ vulnerability_status.VulnerabilityStatusImprover,
+ valid_versions.CurlImprover,
+ flag_ghost_packages.FlagGhostPackagePipeline,
+ enhance_with_kev.VulnerabilityKevPipeline,
+ enhance_with_metasploit.MetasploitImproverPipeline,
+ enhance_with_exploitdb.ExploitDBImproverPipeline,
+ compute_package_risk.ComputePackageRiskPipeline,
+ compute_package_version_rank.ComputeVersionRankPipeline,
+ collect_commits.CollectFixCommitsPipeline,
+ add_cvss31_to_CVEs.CVEAdvisoryMappingPipeline,
+ remove_duplicate_advisories.RemoveDuplicateAdvisoriesPipeline,
+ populate_vulnerability_summary_pipeline.PopulateVulnerabilitySummariesPipeline,
+ exploitdb_v2.ExploitDBImproverPipeline,
+ enhance_with_kev_v2.VulnerabilityKevPipeline,
+ flag_ghost_packages_v2.FlagGhostPackagePipeline,
+ enhance_with_metasploit_v2.MetasploitImproverPipeline,
+ compute_package_risk_v2.ComputePackageRiskPipeline,
+ compute_version_rank_v2.ComputeVersionRankPipeline,
+ collect_commits_v2.CollectFixCommitsPipeline,
+ compute_advisory_todo.ComputeToDo,
+ ]
+)
diff --git a/vulnerabilities/management/commands/import.py b/vulnerabilities/management/commands/import.py
index f4876b11a..78ec8bb0a 100644
--- a/vulnerabilities/management/commands/import.py
+++ b/vulnerabilities/management/commands/import.py
@@ -14,6 +14,7 @@
from vulnerabilities.import_runner import ImportRunner
from vulnerabilities.importers import IMPORTERS_REGISTRY
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline
+from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
class Command(BaseCommand):
@@ -57,7 +58,9 @@ def import_data(self, importers):
failed_importers = []
for importer in importers:
- if issubclass(importer, VulnerableCodeBaseImporterPipeline):
+ if issubclass(importer, VulnerableCodeBaseImporterPipeline) or issubclass(
+ importer, VulnerableCodeBaseImporterPipelineV2
+ ):
self.stdout.write(f"Importing data using {importer.pipeline_id}")
status, error = importer().execute()
if status != 0:
diff --git a/vulnerabilities/migrations/0094_advisoryalias_advisoryreference_advisoryseverity_and_more.py b/vulnerabilities/migrations/0094_advisoryalias_advisoryreference_advisoryseverity_and_more.py
new file mode 100644
index 000000000..fd3f74d98
--- /dev/null
+++ b/vulnerabilities/migrations/0094_advisoryalias_advisoryreference_advisoryseverity_and_more.py
@@ -0,0 +1,635 @@
+# Generated by Django 4.2.20 on 2025-07-01 10:38
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ("vulnerabilities", "0093_advisorytodo_todorelatedadvisory_and_more"),
+ ]
+
+ operations = [
+ migrations.CreateModel(
+ name="AdvisoryAlias",
+ fields=[
+ (
+ "id",
+ models.AutoField(
+ auto_created=True, primary_key=True, serialize=False, verbose_name="ID"
+ ),
+ ),
+ (
+ "alias",
+ models.CharField(
+ help_text="An alias is a unique vulnerability identifier in some database, such as CVE-2020-2233",
+ max_length=50,
+ unique=True,
+ ),
+ ),
+ ],
+ options={
+ "ordering": ["alias"],
+ },
+ ),
+ migrations.CreateModel(
+ name="AdvisoryReference",
+ fields=[
+ (
+ "id",
+ models.AutoField(
+ auto_created=True, primary_key=True, serialize=False, verbose_name="ID"
+ ),
+ ),
+ (
+ "url",
+ models.URLField(
+ help_text="URL to the vulnerability reference", max_length=1024, unique=True
+ ),
+ ),
+ (
+ "reference_type",
+ models.CharField(
+ blank=True,
+ choices=[
+ ("advisory", "Advisory"),
+ ("exploit", "Exploit"),
+ ("mailing_list", "Mailing List"),
+ ("bug", "Bug"),
+ ("other", "Other"),
+ ],
+ max_length=20,
+ ),
+ ),
+ (
+ "reference_id",
+ models.CharField(
+ blank=True,
+ db_index=True,
+ help_text="An optional reference ID, such as DSA-4465-1 when available",
+ max_length=500,
+ ),
+ ),
+ ],
+ options={
+ "ordering": ["reference_id", "url", "reference_type"],
+ },
+ ),
+ migrations.CreateModel(
+ name="AdvisorySeverity",
+ fields=[
+ (
+ "id",
+ models.AutoField(
+ auto_created=True, primary_key=True, serialize=False, verbose_name="ID"
+ ),
+ ),
+ (
+ "url",
+ models.URLField(
+ db_index=True,
+ help_text="URL to the vulnerability severity",
+ max_length=1024,
+ null=True,
+ ),
+ ),
+ (
+ "scoring_system",
+ models.CharField(
+ choices=[
+ ("cvssv2", "CVSSv2 Base Score"),
+ ("cvssv3", "CVSSv3 Base Score"),
+ ("cvssv3.1", "CVSSv3.1 Base Score"),
+ ("cvssv4", "CVSSv4 Base Score"),
+ ("rhbs", "RedHat Bugzilla severity"),
+ ("rhas", "RedHat Aggregate severity"),
+ ("archlinux", "Archlinux Vulnerability Group Severity"),
+ ("cvssv3.1_qr", "CVSSv3.1 Qualitative Severity Rating"),
+ ("generic_textual", "Generic textual severity rating"),
+ ("apache_httpd", "Apache Httpd Severity"),
+ ("apache_tomcat", "Apache Tomcat Severity"),
+ ("epss", "Exploit Prediction Scoring System"),
+ ("ssvc", "Stakeholder-Specific Vulnerability Categorization"),
+ ],
+ help_text="Identifier for the scoring system used. Available choices are: cvssv2: CVSSv2 Base Score,\ncvssv3: CVSSv3 Base Score,\ncvssv3.1: CVSSv3.1 Base Score,\ncvssv4: CVSSv4 Base Score,\nrhbs: RedHat Bugzilla severity,\nrhas: RedHat Aggregate severity,\narchlinux: Archlinux Vulnerability Group Severity,\ncvssv3.1_qr: CVSSv3.1 Qualitative Severity Rating,\ngeneric_textual: Generic textual severity rating,\napache_httpd: Apache Httpd Severity,\napache_tomcat: Apache Tomcat Severity,\nepss: Exploit Prediction Scoring System,\nssvc: Stakeholder-Specific Vulnerability Categorization ",
+ max_length=50,
+ ),
+ ),
+ (
+ "value",
+ models.CharField(help_text="Example: 9.0, Important, High", max_length=50),
+ ),
+ (
+ "scoring_elements",
+ models.CharField(
+ help_text="Supporting scoring elements used to compute the score values. For example a CVSS vector string as used to compute a CVSS score.",
+ max_length=150,
+ null=True,
+ ),
+ ),
+ (
+ "published_at",
+ models.DateTimeField(
+ blank=True,
+ help_text="UTC Date of publication of the vulnerability severity",
+ null=True,
+ ),
+ ),
+ ],
+ options={
+ "ordering": ["url", "scoring_system", "value"],
+ },
+ ),
+ migrations.CreateModel(
+ name="AdvisoryV2",
+ fields=[
+ (
+ "id",
+ models.AutoField(
+ auto_created=True, primary_key=True, serialize=False, verbose_name="ID"
+ ),
+ ),
+ (
+ "datasource_id",
+ models.CharField(
+ help_text="Unique ID for the datasource used for this advisory .e.g.: nginx_importer_v2",
+ max_length=100,
+ ),
+ ),
+ (
+ "advisory_id",
+ models.CharField(
+ help_text="An advisory is a unique vulnerability identifier in some database, such as PYSEC-2020-2233",
+ max_length=50,
+ ),
+ ),
+ (
+ "avid",
+ models.CharField(
+ help_text="Unique ID for the datasource used for this advisory .e.g.: pysec_importer_v2/PYSEC-2020-2233",
+ max_length=500,
+ ),
+ ),
+ (
+ "unique_content_id",
+ models.CharField(
+ help_text="A 64 character unique identifier for the content of the advisory since we use sha256 as hex",
+ max_length=64,
+ unique=True,
+ ),
+ ),
+ ("url", models.URLField(help_text="Link to the advisory on the upstream website")),
+ ("summary", models.TextField(blank=True)),
+ (
+ "date_published",
+ models.DateTimeField(
+ blank=True, help_text="UTC Date of publication of the advisory", null=True
+ ),
+ ),
+ (
+ "date_collected",
+ models.DateTimeField(help_text="UTC Date on which the advisory was collected"),
+ ),
+ (
+ "date_imported",
+ models.DateTimeField(
+ blank=True,
+ help_text="UTC Date on which the advisory was imported",
+ null=True,
+ ),
+ ),
+ (
+ "status",
+ models.IntegerField(
+ choices=[(1, "Published"), (2, "Disputed"), (3, "Invalid")], default=1
+ ),
+ ),
+ (
+ "exploitability",
+ models.DecimalField(
+ blank=True,
+ decimal_places=1,
+ help_text="Exploitability indicates the likelihood that a vulnerability in a software package could be used by malicious actors to compromise systems, applications, or networks. This metric is determined automatically based on the discovery of known exploits.",
+ max_digits=2,
+ null=True,
+ ),
+ ),
+ (
+ "weighted_severity",
+ models.DecimalField(
+ blank=True,
+ decimal_places=1,
+ help_text="Weighted severity is the highest value calculated by multiplying each severity by its corresponding weight, divided by 10.",
+ max_digits=3,
+ null=True,
+ ),
+ ),
+ ],
+ options={
+ "ordering": ["datasource_id", "advisory_id", "date_published", "unique_content_id"],
+ },
+ ),
+ migrations.CreateModel(
+ name="AdvisoryWeakness",
+ fields=[
+ (
+ "id",
+ models.AutoField(
+ auto_created=True, primary_key=True, serialize=False, verbose_name="ID"
+ ),
+ ),
+ ("cwe_id", models.IntegerField(help_text="CWE id")),
+ ],
+ ),
+ migrations.CreateModel(
+ name="PackageV2",
+ fields=[
+ (
+ "id",
+ models.AutoField(
+ auto_created=True, primary_key=True, serialize=False, verbose_name="ID"
+ ),
+ ),
+ (
+ "type",
+ models.CharField(
+ blank=True,
+ help_text="A short code to identify the type of this package. For example: gem for a Rubygem, docker for a container, pypi for a Python Wheel or Egg, maven for a Maven Jar, deb for a Debian package, etc.",
+ max_length=16,
+ ),
+ ),
+ (
+ "namespace",
+ models.CharField(
+ blank=True,
+ help_text="Package name prefix, such as Maven groupid, Docker image owner, GitHub user or organization, etc.",
+ max_length=255,
+ ),
+ ),
+ (
+ "name",
+ models.CharField(blank=True, help_text="Name of the package.", max_length=100),
+ ),
+ (
+ "version",
+ models.CharField(
+ blank=True, help_text="Version of the package.", max_length=100
+ ),
+ ),
+ (
+ "qualifiers",
+ models.CharField(
+ blank=True,
+ help_text="Extra qualifying data for a package such as the name of an OS, architecture, distro, etc.",
+ max_length=1024,
+ ),
+ ),
+ (
+ "subpath",
+ models.CharField(
+ blank=True,
+ help_text="Extra subpath within a package, relative to the package root.",
+ max_length=200,
+ ),
+ ),
+ (
+ "package_url",
+ models.CharField(
+ db_index=True,
+ help_text="The Package URL for this package.",
+ max_length=1000,
+ ),
+ ),
+ (
+ "plain_package_url",
+ models.CharField(
+ db_index=True,
+ help_text="The Package URL for this package without qualifiers and subpath.",
+ max_length=1000,
+ ),
+ ),
+ (
+ "is_ghost",
+ models.BooleanField(
+ db_index=True,
+ default=False,
+ help_text="True if the package does not exist in the upstream package manager or its repository.",
+ ),
+ ),
+ (
+ "risk_score",
+ models.DecimalField(
+ decimal_places=1,
+ help_text="Risk score between 0.00 and 10.00, where higher values indicate greater vulnerability risk for the package.",
+ max_digits=3,
+ null=True,
+ ),
+ ),
+ (
+ "version_rank",
+ models.IntegerField(
+ db_index=True,
+ default=0,
+ help_text="Rank of the version to support ordering by version. Rank zero means the rank has not been defined yet",
+ ),
+ ),
+ ],
+ options={
+ "abstract": False,
+ },
+ ),
+ migrations.CreateModel(
+ name="CodeFixV2",
+ fields=[
+ (
+ "id",
+ models.AutoField(
+ auto_created=True, primary_key=True, serialize=False, verbose_name="ID"
+ ),
+ ),
+ (
+ "commits",
+ models.JSONField(
+ blank=True,
+ default=list,
+ help_text="List of commit identifiers using VCS URLs associated with the code change.",
+ ),
+ ),
+ (
+ "pulls",
+ models.JSONField(
+ blank=True,
+ default=list,
+ help_text="List of pull request URLs associated with the code change.",
+ ),
+ ),
+ (
+ "downloads",
+ models.JSONField(
+ blank=True,
+ default=list,
+ help_text="List of download URLs for the patched code.",
+ ),
+ ),
+ (
+ "patch",
+ models.TextField(
+ blank=True,
+ help_text="The code change as a patch in unified diff format.",
+ null=True,
+ ),
+ ),
+ (
+ "notes",
+ models.TextField(
+ blank=True,
+ help_text="Notes or instructions about this code change.",
+ null=True,
+ ),
+ ),
+ (
+ "references",
+ models.JSONField(
+ blank=True,
+ default=list,
+ help_text="URL references related to this code change.",
+ ),
+ ),
+ (
+ "is_reviewed",
+ models.BooleanField(
+ default=False, help_text="Indicates if this code change has been reviewed."
+ ),
+ ),
+ (
+ "created_at",
+ models.DateTimeField(
+ auto_now_add=True,
+ help_text="Timestamp indicating when this code change was created.",
+ ),
+ ),
+ (
+ "updated_at",
+ models.DateTimeField(
+ auto_now=True,
+ help_text="Timestamp indicating when this code change was last updated.",
+ ),
+ ),
+ (
+ "advisory",
+ models.ForeignKey(
+ help_text="The affected package version to which this code fix applies.",
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="code_fix_v2",
+ to="vulnerabilities.advisoryv2",
+ ),
+ ),
+ (
+ "affected_package",
+ models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="code_fix_v2_affected",
+ to="vulnerabilities.packagev2",
+ ),
+ ),
+ (
+ "base_package_version",
+ models.ForeignKey(
+ blank=True,
+ help_text="The base package version to which this code change applies.",
+ null=True,
+ on_delete=django.db.models.deletion.SET_NULL,
+ related_name="codechanges_v2",
+ to="vulnerabilities.packagev2",
+ ),
+ ),
+ (
+ "fixed_package",
+ models.ForeignKey(
+ blank=True,
+ help_text="The fixing package version with this code fix",
+ null=True,
+ on_delete=django.db.models.deletion.SET_NULL,
+ related_name="code_fix_v2_fixed",
+ to="vulnerabilities.packagev2",
+ ),
+ ),
+ ],
+ options={
+ "abstract": False,
+ },
+ ),
+ migrations.AddField(
+ model_name="advisoryv2",
+ name="affecting_packages",
+ field=models.ManyToManyField(
+ help_text="A list of packages that are affected by this advisory.",
+ related_name="affected_by_advisories",
+ to="vulnerabilities.packagev2",
+ ),
+ ),
+ migrations.AddField(
+ model_name="advisoryv2",
+ name="aliases",
+ field=models.ManyToManyField(
+ help_text="A list of serializable Alias objects",
+ related_name="advisories",
+ to="vulnerabilities.advisoryalias",
+ ),
+ ),
+ migrations.AddField(
+ model_name="advisoryv2",
+ name="fixed_by_packages",
+ field=models.ManyToManyField(
+ help_text="A list of packages that are reported by this advisory.",
+ related_name="fixing_advisories",
+ to="vulnerabilities.packagev2",
+ ),
+ ),
+ migrations.AddField(
+ model_name="advisoryv2",
+ name="references",
+ field=models.ManyToManyField(
+ help_text="A list of serializable Reference objects",
+ related_name="advisories",
+ to="vulnerabilities.advisoryreference",
+ ),
+ ),
+ migrations.AddField(
+ model_name="advisoryv2",
+ name="severities",
+ field=models.ManyToManyField(
+ help_text="A list of vulnerability severities associated with this advisory.",
+ related_name="advisories",
+ to="vulnerabilities.advisoryseverity",
+ ),
+ ),
+ migrations.AddField(
+ model_name="advisoryv2",
+ name="weaknesses",
+ field=models.ManyToManyField(
+ help_text="A list of software weaknesses associated with this advisory.",
+ related_name="advisories",
+ to="vulnerabilities.advisoryweakness",
+ ),
+ ),
+ migrations.CreateModel(
+ name="AdvisoryExploit",
+ fields=[
+ (
+ "id",
+ models.AutoField(
+ auto_created=True, primary_key=True, serialize=False, verbose_name="ID"
+ ),
+ ),
+ (
+ "date_added",
+ models.DateField(
+ blank=True,
+ help_text="The date the vulnerability was added to an exploit catalog.",
+ null=True,
+ ),
+ ),
+ (
+ "description",
+ models.TextField(
+ blank=True,
+ help_text="Description of the vulnerability in an exploit catalog, often a refinement of the original CVE description",
+ null=True,
+ ),
+ ),
+ (
+ "required_action",
+ models.TextField(
+ blank=True,
+ help_text="The required action to address the vulnerability, typically to apply vendor updates or apply vendor mitigations or to discontinue use.",
+ null=True,
+ ),
+ ),
+ (
+ "due_date",
+ models.DateField(
+ blank=True,
+ help_text="The date the required action is due, which applies to all USA federal civilian executive branch (FCEB) agencies, but all organizations are strongly encouraged to execute the required action",
+ null=True,
+ ),
+ ),
+ (
+ "notes",
+ models.TextField(
+ blank=True,
+ help_text="Additional notes and resources about the vulnerability, often a URL to vendor instructions.",
+ null=True,
+ ),
+ ),
+ (
+ "known_ransomware_campaign_use",
+ models.BooleanField(
+ default=False,
+ help_text="Known' if this vulnerability is known to have been leveraged as part of a ransomware campaign; \n or 'Unknown' if there is no confirmation that the vulnerability has been utilized for ransomware.",
+ ),
+ ),
+ (
+ "source_date_published",
+ models.DateField(
+ blank=True,
+ help_text="The date that the exploit was published or disclosed.",
+ null=True,
+ ),
+ ),
+ (
+ "exploit_type",
+ models.TextField(
+ blank=True,
+ help_text="The type of the exploit as provided by the original upstream data source.",
+ null=True,
+ ),
+ ),
+ (
+ "platform",
+ models.TextField(
+ blank=True,
+ help_text="The platform associated with the exploit as provided by the original upstream data source.",
+ null=True,
+ ),
+ ),
+ (
+ "source_date_updated",
+ models.DateField(
+ blank=True,
+ help_text="The date the exploit was updated in the original upstream data source.",
+ null=True,
+ ),
+ ),
+ (
+ "data_source",
+ models.TextField(
+ blank=True,
+ help_text="The source of the exploit information, such as CISA KEV, exploitdb, metaspoit, or others.",
+ null=True,
+ ),
+ ),
+ (
+ "source_url",
+ models.URLField(
+ blank=True,
+ help_text="The URL to the exploit as provided in the original upstream data source.",
+ null=True,
+ ),
+ ),
+ (
+ "advisory",
+ models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="exploits",
+ to="vulnerabilities.advisoryv2",
+ ),
+ ),
+ ],
+ ),
+ migrations.AlterUniqueTogether(
+ name="advisoryv2",
+ unique_together={("datasource_id", "advisory_id", "unique_content_id")},
+ ),
+ ]
diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py
index c4ccbd1fa..ab01010d7 100644
--- a/vulnerabilities/models.py
+++ b/vulnerabilities/models.py
@@ -172,6 +172,7 @@ def with_package_counts(self):
)
+# FIXME: Remove when migration from Vulnerability to Advisory is completed
class VulnerabilitySeverity(models.Model):
url = models.URLField(
max_length=1024,
@@ -211,6 +212,7 @@ class Meta:
ordering = ["url", "scoring_system", "value"]
+# FIXME: Remove when migration from Vulnerability to Advisory is completed
class VulnerabilityStatusType(models.IntegerChoices):
"""List of vulnerability statuses."""
@@ -219,6 +221,7 @@ class VulnerabilityStatusType(models.IntegerChoices):
INVALID = 3, "Invalid"
+# FIXME: Remove when migration from Vulnerability to Advisory is completed
class Vulnerability(models.Model):
"""
A software vulnerability with a unique identifier and alternate ``aliases``.
@@ -511,6 +514,7 @@ def get_cwes(self):
Database.get_cwes = get_cwes
+# FIXME: Remove when migration from Vulnerability to Advisory is completed
class Weakness(models.Model):
"""
A Common Weakness Enumeration model
@@ -557,6 +561,7 @@ def to_dict(self):
return {"cwe_id": self.cwe_id, "name": self.name, "description": self.description}
+# FIXME: Remove when migration from Vulnerability to Advisory is completed
class VulnerabilityReferenceQuerySet(BaseQuerySet):
def for_cpe(self):
"""
@@ -565,6 +570,7 @@ def for_cpe(self):
return self.filter(reference_id__startswith="cpe")
+# FIXME: Remove when migration from Vulnerability to Advisory is completed
class VulnerabilityReference(models.Model):
"""
A reference to a vulnerability such as a security advisory from a Linux distribution or language
@@ -622,6 +628,7 @@ def is_cpe(self):
return self.reference_id.startswith("cpe")
+# FIXME: Remove when migration from Vulnerability to Advisory is completed
class VulnerabilityRelatedReference(models.Model):
"""
A reference related to a vulnerability.
@@ -642,6 +649,7 @@ class Meta:
ordering = ["vulnerability", "reference"]
+# FIXME: Remove when migration from Vulnerability to Advisory is completed
class PackageQuerySet(BaseQuerySet, PackageURLQuerySet):
def get_fixed_by_package_versions(self, purl: PackageURL, fix=True):
"""
@@ -808,6 +816,7 @@ def get_purl_query_lookups(purl):
return purl_to_dict(plain_purl, with_empty=False)
+# FIXME: Remove when migration from Vulnerability to Advisory is completed
class Package(PackageURLMixin):
"""
A software package with related vulnerabilities.
@@ -1118,7 +1127,6 @@ def fixing_vulnerabilities(self):
"""
Return a queryset of Vulnerabilities that are fixed by this package.
"""
- print("A")
return self.fixed_by_vulnerabilities.all()
@property
@@ -1136,6 +1144,7 @@ def affecting_vulns(self):
)
+# FIXME: Remove when migration from Vulnerability to Advisory is completed
class PackageRelatedVulnerabilityBase(models.Model):
"""
Abstract base class for package-vulnerability relations.
@@ -1232,11 +1241,13 @@ def add_package_vulnerability_changelog(self, advisory):
)
+# FIXME: Remove when migration from Vulnerability to Advisory is completed
class FixingPackageRelatedVulnerability(PackageRelatedVulnerabilityBase):
class Meta(PackageRelatedVulnerabilityBase.Meta):
verbose_name_plural = "Fixing Package Related Vulnerabilities"
+# FIXME: Remove when migration from Vulnerability to Advisory is completed
class AffectedByPackageRelatedVulnerability(PackageRelatedVulnerabilityBase):
severities = models.ManyToManyField(
@@ -1258,6 +1269,7 @@ def for_cve(self):
return self.filter(alias__startswith="CVE")
+# FIXME: Remove when migration from Vulnerability to Advisory is completed
class Alias(models.Model):
"""
An alias is a unique vulnerability identifier in some database, such as
@@ -1311,10 +1323,35 @@ def url(self):
return f"https://github.com/nodejs/security-wg/blob/main/vuln/npm/{id}.json"
+class AdvisoryV2QuerySet(BaseQuerySet):
+ def search(query):
+ """
+ This function will take a string as an input, the string could be an alias or an advisory ID or
+ something in the advisory description.
+ """
+ return AdvisoryV2.objects.filter(
+ Q(advisory_id__icontains=query)
+ | Q(aliases__alias__icontains=query)
+ | Q(summary__icontains=query)
+ | Q(references__url__icontains=query)
+ ).distinct()
+
+
class AdvisoryQuerySet(BaseQuerySet):
- pass
+ def search(query):
+ """
+ This function will take a string as an input, the string could be an alias or an advisory ID or
+ something in the advisory description.
+ """
+ return Advisory.objects.filter(
+ Q(advisory_id__icontains=query)
+ | Q(aliases__alias__icontains=query)
+ | Q(summary__icontains=query)
+ | Q(references__url__icontains=query)
+ ).distinct()
+# FIXME: Remove when migration from Vulnerability to Advisory is completed
class Advisory(models.Model):
"""
An advisory represents data directly obtained from upstream transformed
@@ -1797,6 +1834,60 @@ class Meta:
abstract = True
+class CodeChangeV2(models.Model):
+ """
+ Abstract base model representing a change in code, either introducing or fixing a vulnerability.
+ This includes details about commits, patches, and related metadata.
+
+ We are tracking commits, pulls and downloads as references to the code change. The goal is to
+ keep track and store the actual code patch in the ``patch`` field. When not available the patch
+ will be inferred from these references using improvers.
+ """
+
+ commits = models.JSONField(
+ blank=True,
+ default=list,
+ help_text="List of commit identifiers using VCS URLs associated with the code change.",
+ )
+ pulls = models.JSONField(
+ blank=True,
+ default=list,
+ help_text="List of pull request URLs associated with the code change.",
+ )
+ downloads = models.JSONField(
+ blank=True, default=list, help_text="List of download URLs for the patched code."
+ )
+ patch = models.TextField(
+ blank=True, null=True, help_text="The code change as a patch in unified diff format."
+ )
+ base_package_version = models.ForeignKey(
+ "PackageV2",
+ null=True,
+ blank=True,
+ on_delete=models.SET_NULL,
+ related_name="codechanges_v2",
+ help_text="The base package version to which this code change applies.",
+ )
+ notes = models.TextField(
+ blank=True, null=True, help_text="Notes or instructions about this code change."
+ )
+ references = models.JSONField(
+ blank=True, default=list, help_text="URL references related to this code change."
+ )
+ is_reviewed = models.BooleanField(
+ default=False, help_text="Indicates if this code change has been reviewed."
+ )
+ created_at = models.DateTimeField(
+ auto_now_add=True, help_text="Timestamp indicating when this code change was created."
+ )
+ updated_at = models.DateTimeField(
+ auto_now=True, help_text="Timestamp indicating when this code change was last updated."
+ )
+
+ class Meta:
+ abstract = True
+
+
class CodeFix(CodeChange):
"""
A code fix is a code change that addresses a vulnerability and is associated:
@@ -1821,6 +1912,35 @@ class CodeFix(CodeChange):
)
+class CodeFixV2(CodeChangeV2):
+ """
+ A code fix is a code change that addresses a vulnerability and is associated:
+ - with a specific advisory
+ - package that has been affected
+ - optionally with a specific fixing package version when it is known
+ """
+
+ advisory = models.ForeignKey(
+ "AdvisoryV2",
+ on_delete=models.CASCADE,
+ related_name="code_fix_v2",
+ help_text="The affected package version to which this code fix applies.",
+ )
+
+ affected_package = models.ForeignKey(
+ "PackageV2", on_delete=models.CASCADE, related_name="code_fix_v2_affected"
+ )
+
+ fixed_package = models.ForeignKey(
+ "PackageV2",
+ null=True,
+ blank=True,
+ on_delete=models.SET_NULL,
+ related_name="code_fix_v2_fixed",
+ help_text="The fixing package version with this code fix",
+ )
+
+
class PipelineRun(models.Model):
"""The Database representation of a pipeline execution."""
@@ -2333,21 +2453,812 @@ class AdvisoryToDo(models.Model):
class Meta:
unique_together = ("related_advisories_id", "issue_type")
- def save(self, *args, **kwargs):
- self.full_clean()
- return super().save(*args, **kwargs)
+class AdvisorySeverity(models.Model):
+ url = models.URLField(
+ max_length=1024,
+ null=True,
+ help_text="URL to the vulnerability severity",
+ db_index=True,
+ )
-class ToDoRelatedAdvisory(models.Model):
- todo = models.ForeignKey(
- AdvisoryToDo,
- on_delete=models.CASCADE,
+ scoring_system_choices = tuple(
+ (system.identifier, system.name) for system in SCORING_SYSTEMS.values()
)
- advisory = models.ForeignKey(
- Advisory,
- on_delete=models.CASCADE,
+ scoring_system = models.CharField(
+ max_length=50,
+ choices=scoring_system_choices,
+ help_text="Identifier for the scoring system used. Available choices are: {} ".format(
+ ",\n".join(f"{sid}: {sname}" for sid, sname in scoring_system_choices)
+ ),
+ )
+
+ value = models.CharField(max_length=50, help_text="Example: 9.0, Important, High")
+
+ scoring_elements = models.CharField(
+ max_length=150,
+ null=True,
+ help_text="Supporting scoring elements used to compute the score values. "
+ "For example a CVSS vector string as used to compute a CVSS score.",
+ )
+
+ published_at = models.DateTimeField(
+ blank=True, null=True, help_text="UTC Date of publication of the vulnerability severity"
)
+ objects = BaseQuerySet.as_manager()
+
class Meta:
- unique_together = ("todo", "advisory")
+ ordering = ["url", "scoring_system", "value"]
+
+
+class AdvisoryWeakness(models.Model):
+ """
+ A weakness is a software weakness that is associated with a vulnerability.
+ """
+
+ cwe_id = models.IntegerField(help_text="CWE id")
+
+ cwe_by_id = {}
+
+ def get_cwe(self, cwe_id):
+ if not self.cwe_by_id:
+ db = Database()
+ for weakness in db.get_cwes():
+ self.cwe_by_id[str(weakness.cwe_id)] = weakness
+ return self.cwe_by_id[cwe_id]
+
+ @property
+ def cwe(self):
+ return f"CWE-{self.cwe_id}"
+
+ @property
+ def weakness(self):
+ """
+ Return a queryset of Weakness for this vulnerability.
+ """
+ try:
+ weakness = self.get_cwe(str(self.cwe_id))
+ return weakness
+ except Exception as e:
+ logger.warning(f"Could not find CWE {self.cwe_id}: {e}")
+
+ @property
+ def name(self):
+ """Return the weakness's name."""
+ return self.weakness.name if self.weakness else ""
+
+ @property
+ def description(self):
+ """Return the weakness's description."""
+ return self.weakness.description if self.weakness else ""
+
+ def to_dict(self):
+ return {"cwe_id": self.cwe_id, "name": self.name, "description": self.description}
+
+
+class AdvisoryReference(models.Model):
+ url = models.URLField(
+ max_length=1024,
+ help_text="URL to the vulnerability reference",
+ unique=True,
+ )
+
+ ADVISORY = "advisory"
+ EXPLOIT = "exploit"
+ MAILING_LIST = "mailing_list"
+ BUG = "bug"
+ OTHER = "other"
+
+ REFERENCE_TYPES = [
+ (ADVISORY, "Advisory"),
+ (EXPLOIT, "Exploit"),
+ (MAILING_LIST, "Mailing List"),
+ (BUG, "Bug"),
+ (OTHER, "Other"),
+ ]
+
+ reference_type = models.CharField(max_length=20, choices=REFERENCE_TYPES, blank=True)
+
+ reference_id = models.CharField(
+ max_length=500,
+ help_text="An optional reference ID, such as DSA-4465-1 when available",
+ blank=True,
+ db_index=True,
+ )
+
+ class Meta:
+ ordering = ["reference_id", "url", "reference_type"]
+
+ def __str__(self):
+ reference_id = f" {self.reference_id}" if self.reference_id else ""
+ return f"{self.url}{reference_id}"
+
+ @property
+ def is_cpe(self):
+ """
+ Return True if this is a CPE reference.
+ """
+ return self.reference_id.startswith("cpe")
+
+
+class AdvisoryAlias(models.Model):
+ alias = models.CharField(
+ max_length=50,
+ unique=True,
+ blank=False,
+ null=False,
+ help_text="An alias is a unique vulnerability identifier in some database, "
+ "such as CVE-2020-2233",
+ )
+
+ class Meta:
+ ordering = ["alias"]
+
+ def __str__(self):
+ return self.alias
+
+ @cached_property
+ def url(self):
+ """
+ Create a URL for the alias.
+ """
+ alias: str = self.alias
+ if alias.startswith("CVE"):
+ return f"https://nvd.nist.gov/vuln/detail/{alias}"
+
+ if alias.startswith("GHSA"):
+ return f"https://github.com/advisories/{alias}"
+
+ if alias.startswith("NPM-"):
+ id = alias.lstrip("NPM-")
+ return f"https://github.com/nodejs/security-wg/blob/main/vuln/npm/{id}.json"
+
+
+class AdvisoryV2(models.Model):
+ """
+ An advisory represents data directly obtained from upstream transformed
+ into structured data
+ """
+
+ # This is similar to a type or a namespace
+ datasource_id = models.CharField(
+ max_length=100,
+ blank=False,
+ null=False,
+ help_text="Unique ID for the datasource used for this advisory ." "e.g.: nginx_importer_v2",
+ )
+
+ # This is similar to a name
+ advisory_id = models.CharField(
+ max_length=50,
+ blank=False,
+ null=False,
+ unique=False,
+ help_text="An advisory is a unique vulnerability identifier in some database, "
+ "such as PYSEC-2020-2233",
+ )
+
+ avid = models.CharField(
+ max_length=500,
+ blank=False,
+ null=False,
+ help_text="Unique ID for the datasource used for this advisory ."
+ "e.g.: pysec_importer_v2/PYSEC-2020-2233",
+ )
+
+ # This is similar to a version
+ unique_content_id = models.CharField(
+ max_length=64,
+ blank=False,
+ null=False,
+ unique=True,
+ help_text="A 64 character unique identifier for the content of the advisory since we use sha256 as hex",
+ )
+ url = models.URLField(
+ blank=False,
+ null=False,
+ help_text="Link to the advisory on the upstream website",
+ )
+
+ # TODO: Have a mapping that gives datasource class by datasource ID
+ # Get label from datasource class
+ # Remove this from model
+ # In the UI - Use label
+ # In the API - Use datasource_id
+ # Have an API endpoint for all info for datasources - show license, label
+
+ summary = models.TextField(
+ blank=True,
+ )
+ aliases = models.ManyToManyField(
+ AdvisoryAlias,
+ related_name="advisories",
+ help_text="A list of serializable Alias objects",
+ )
+ references = models.ManyToManyField(
+ AdvisoryReference,
+ related_name="advisories",
+ help_text="A list of serializable Reference objects",
+ )
+ severities = models.ManyToManyField(
+ AdvisorySeverity,
+ related_name="advisories",
+ help_text="A list of vulnerability severities associated with this advisory.",
+ )
+ weaknesses = models.ManyToManyField(
+ AdvisoryWeakness,
+ related_name="advisories",
+ help_text="A list of software weaknesses associated with this advisory.",
+ )
+ date_published = models.DateTimeField(
+ blank=True, null=True, help_text="UTC Date of publication of the advisory"
+ )
+ date_collected = models.DateTimeField(help_text="UTC Date on which the advisory was collected")
+ date_imported = models.DateTimeField(
+ blank=True, null=True, help_text="UTC Date on which the advisory was imported"
+ )
+
+ affecting_packages = models.ManyToManyField(
+ "PackageV2",
+ related_name="affected_by_advisories",
+ help_text="A list of packages that are affected by this advisory.",
+ )
+
+ fixed_by_packages = models.ManyToManyField(
+ "PackageV2",
+ related_name="fixing_advisories",
+ help_text="A list of packages that are reported by this advisory.",
+ )
+
+ status = models.IntegerField(
+ choices=VulnerabilityStatusType.choices, default=VulnerabilityStatusType.PUBLISHED
+ )
+
+ exploitability = models.DecimalField(
+ null=True,
+ blank=True,
+ max_digits=2,
+ decimal_places=1,
+ help_text="Exploitability indicates the likelihood that a vulnerability in a software package could be used by malicious actors to compromise systems, "
+ "applications, or networks. This metric is determined automatically based on the discovery of known exploits.",
+ )
+
+ weighted_severity = models.DecimalField(
+ null=True,
+ blank=True,
+ max_digits=3,
+ decimal_places=1,
+ help_text="Weighted severity is the highest value calculated by multiplying each severity by its corresponding weight, divided by 10.",
+ )
+
+ @property
+ def risk_score(self):
+ """
+ Risk expressed as a number ranging from 0 to 10.
+ Risk is calculated from weighted severity and exploitability values.
+ It is the maximum value of (the weighted severity multiplied by its exploitability) or 10
+ Risk = min(weighted severity * exploitability, 10)
+ """
+ if self.exploitability and self.weighted_severity:
+ risk_score = min(float(self.exploitability * self.weighted_severity), 10.0)
+ return round(risk_score, 1)
+
+ objects = AdvisoryQuerySet.as_manager()
+
+ class Meta:
+ unique_together = ["datasource_id", "advisory_id", "unique_content_id"]
+ ordering = ["datasource_id", "advisory_id", "date_published", "unique_content_id"]
+
+ def save(self, *args, **kwargs):
+ self.full_clean()
+ return super().save(*args, **kwargs)
+
+ @property
+ def get_status_label(self):
+ label_by_status = {choice[0]: choice[1] for choice in VulnerabilityStatusType.choices}
+ return label_by_status.get(self.status) or VulnerabilityStatusType.PUBLISHED.label
+
+ def get_absolute_url(self):
+ """
+ Return this Vulnerability details absolute URL.
+ """
+ return reverse("advisory_details", args=[self.id])
+
+ def to_advisory_data(self) -> "AdvisoryDataV2":
+ from vulnerabilities.importer import AdvisoryDataV2
+ from vulnerabilities.importer import AffectedPackage
+ from vulnerabilities.importer import ReferenceV2
+
+ return AdvisoryDataV2(
+ aliases=[item.alias for item in self.aliases.all()],
+ summary=self.summary,
+ affected_packages=[
+ AffectedPackage.from_dict(pkg) for pkg in self.affected_packages if pkg
+ ],
+ references=[ReferenceV2.from_dict(ref) for ref in self.references],
+ date_published=self.date_published,
+ weaknesses=self.weaknesses,
+ severities=self.severities,
+ url=self.url,
+ )
+
+ @property
+ def get_aliases(self):
+ """
+ Return a queryset of all Aliases for this vulnerability.
+ """
+ return self.aliases.all()
+
+ def aggregate_fixed_and_affected_packages(self):
+ from vulnerabilities.utils import get_purl_version_class
+
+ sorted_fixed_by_packages = self.fixed_by_packages.filter(is_ghost=False).order_by(
+ "type", "namespace", "name", "qualifiers", "subpath"
+ )
+
+ if sorted_fixed_by_packages:
+ sorted_fixed_by_packages.first().calculate_version_rank
+
+ sorted_affected_packages = self.affecting_packages.all()
+
+ if sorted_affected_packages:
+ sorted_affected_packages.first().calculate_version_rank
+
+ grouped_fixed_by_packages = {
+ key: list(group)
+ for key, group in groupby(
+ sorted_fixed_by_packages,
+ key=attrgetter("type", "namespace", "name", "qualifiers", "subpath"),
+ )
+ }
+
+ all_affected_fixed_by_matches = []
+
+ for sorted_affected_package in sorted_affected_packages:
+ affected_fixed_by_matches = {
+ "affected_package": sorted_affected_package,
+ "matched_fixed_by_packages": [],
+ }
+
+ # Build the key to find matching group
+ key = (
+ sorted_affected_package.type,
+ sorted_affected_package.namespace,
+ sorted_affected_package.name,
+ sorted_affected_package.qualifiers,
+ sorted_affected_package.subpath,
+ )
+
+ # Get matching group from pre-grouped fixed_by_packages
+ matching_fixed_packages = grouped_fixed_by_packages.get(key, [])
+
+ # Get version classes for comparison
+ affected_version_class = get_purl_version_class(sorted_affected_package)
+ affected_version = affected_version_class(sorted_affected_package.version)
+
+ # Compare versions and filter valid matches
+ matched_fixed_by_packages = [
+ fixed_by_package.purl
+ for fixed_by_package in matching_fixed_packages
+ if get_purl_version_class(fixed_by_package)(fixed_by_package.version)
+ > affected_version
+ ]
+
+ affected_fixed_by_matches["matched_fixed_by_packages"] = matched_fixed_by_packages
+ all_affected_fixed_by_matches.append(affected_fixed_by_matches)
+ return sorted_fixed_by_packages, sorted_affected_packages, all_affected_fixed_by_matches
+
+ alias = get_aliases
+
+
+class ToDoRelatedAdvisory(models.Model):
+ todo = models.ForeignKey(
+ AdvisoryToDo,
+ on_delete=models.CASCADE,
+ )
+
+ advisory = models.ForeignKey(
+ Advisory,
+ on_delete=models.CASCADE,
+ )
+
+ class Meta:
+ unique_together = ("todo", "advisory")
+
+
+class PackageQuerySetV2(BaseQuerySet, PackageURLQuerySet):
+ def search(self, query: str = None):
+ """
+ Return a Package queryset searching for the ``query``.
+ Make a best effort approach to find matching packages either based
+ on exact purl, partial purl or just name and namespace.
+ """
+ query = query and query.strip()
+ if not query:
+ return self.none()
+ qs = self
+
+ try:
+ # if it's a valid purl, try to parse it and use it as is
+ purl = str(utils.plain_purl(query))
+ qs = qs.filter(package_url__istartswith=purl)
+ except ValueError:
+ # otherwise use query as a plain string
+ qs = qs.filter(package_url__icontains=query)
+ return qs.order_by("package_url")
+
+ def with_vulnerability_counts(self):
+ return self.annotate(
+ vulnerability_count=Count(
+ "affected_by_advisories",
+ ),
+ patched_vulnerability_count=Count(
+ "fixing_advisories",
+ ),
+ )
+
+ def get_fixed_by_package_versions(self, purl: PackageURL, fix=True):
+ """
+ Return a queryset of all the package versions of this `package` that fix any vulnerability.
+ If `fix` is False, return all package versions whether or not they fix a vulnerability.
+ """
+ filter_dict = {
+ "name": purl.name,
+ "namespace": purl.namespace,
+ "type": purl.type,
+ "qualifiers": purl.qualifiers,
+ "subpath": purl.subpath,
+ }
+
+ if fix:
+ filter_dict["fixing_advisories__isnull"] = False
+
+ # TODO: why do we need distinct
+ return PackageV2.objects.filter(**filter_dict).distinct()
+
+ def get_or_create_from_purl(self, purl: Union[PackageURL, str]):
+ """
+ Return a new or existing Package given a ``purl`` PackageURL object or PURL string.
+ """
+ package, is_created = PackageV2.objects.get_or_create(**purl_to_dict(purl=purl))
+
+ return package, is_created
+
+ def only_vulnerable(self):
+ return self._vulnerable(True)
+
+ def only_non_vulnerable(self):
+ return self._vulnerable(False).filter(is_ghost=False)
+
+ def for_purl(self, purl):
+ """
+ Return a queryset matching the ``purl`` Package URL.
+ """
+ return self.filter(package_url=purl)
+
+ def for_purls(self, purls=()):
+ """
+ Return a queryset of Packages matching a list of PURLs.
+ """
+ return self.filter(package_url__in=purls).distinct()
+
+ def _vulnerable(self, vulnerable=True):
+ """
+ Filter to select only vulnerable or non-vulnearble packages.
+ """
+ return self.with_is_vulnerable().filter(is_vulnerable=vulnerable)
+
+ def with_is_vulnerable(self):
+ """
+ Annotate Package with ``is_vulnerable`` boolean attribute.
+ """
+ return self.annotate(
+ is_vulnerable=Exists(AdvisoryV2.objects.filter(affecting_packages__pk=OuterRef("pk")))
+ )
+
+
+class PackageV2(PackageURLMixin):
+ """
+ A software package with related vulnerabilities.
+ """
+
+ package_url = models.CharField(
+ max_length=1000,
+ null=False,
+ help_text="The Package URL for this package.",
+ db_index=True,
+ )
+
+ plain_package_url = models.CharField(
+ max_length=1000,
+ null=False,
+ help_text="The Package URL for this package without qualifiers and subpath.",
+ db_index=True,
+ )
+
+ is_ghost = models.BooleanField(
+ default=False,
+ help_text="True if the package does not exist in the upstream package manager or its repository.",
+ db_index=True,
+ )
+
+ risk_score = models.DecimalField(
+ null=True,
+ max_digits=3,
+ decimal_places=1,
+ help_text="Risk score between 0.00 and 10.00, where higher values "
+ "indicate greater vulnerability risk for the package.",
+ )
+
+ version_rank = models.IntegerField(
+ help_text="Rank of the version to support ordering by version. Rank "
+ "zero means the rank has not been defined yet",
+ default=0,
+ db_index=True,
+ )
+
+ def __str__(self):
+ return self.package_url
+
+ @property
+ def purl(self):
+ return self.package_url
+
+ def save(self, *args, **kwargs):
+ """
+ Save, normalizing PURL fields.
+ """
+ purl = PackageURL(
+ type=self.type,
+ namespace=self.namespace,
+ name=self.name,
+ version=self.version,
+ qualifiers=self.qualifiers,
+ subpath=self.subpath,
+ )
+
+ # We re-parse the purl to ensure name and namespace
+ # are set correctly
+ normalized = normalize_purl(purl=purl)
+
+ for name, value in purl_to_dict(normalized).items():
+ setattr(self, name, value)
+
+ self.package_url = str(normalized)
+ plain_purl = utils.plain_purl(normalized)
+ self.plain_package_url = str(plain_purl)
+ super().save(*args, **kwargs)
+
+ objects = PackageQuerySetV2.as_manager()
+
+ @property
+ def calculate_version_rank(self):
+ """
+ Calculate and return the `version_rank` for a package that does not have one.
+ If this package already has a `version_rank`, return it.
+
+ The calculated rank will be interpolated between two packages that have
+ `version_rank` values and are closest to this package in terms of version order.
+ """
+
+ group_packages = PackageV2.objects.filter(
+ type=self.type,
+ namespace=self.namespace,
+ name=self.name,
+ )
+
+ if any(p.version_rank == 0 for p in group_packages):
+ sorted_packages = sorted(group_packages, key=lambda p: self.version_class(p.version))
+ for rank, package in enumerate(sorted_packages, start=1):
+ package.version_rank = rank
+ PackageV2.objects.bulk_update(sorted_packages, fields=["version_rank"])
+ return self.version_rank
+
+ @property
+ def fixed_package_details(self):
+ """
+ Return a mapping of vulnerabilities that affect this package and the next and
+ latest non-vulnerable versions.
+ """
+ package_details = {}
+ package_details["purl"] = PackageURL.from_string(self.purl)
+
+ next_non_vulnerable, latest_non_vulnerable = self.get_non_vulnerable_versions()
+ package_details["next_non_vulnerable"] = next_non_vulnerable
+ package_details["latest_non_vulnerable"] = latest_non_vulnerable
+
+ package_details["advisories"] = self.get_affecting_vulnerabilities()
+
+ return package_details
+
+ def get_non_vulnerable_versions(self):
+ """
+ Return a tuple of the next and latest non-vulnerable versions as Package instance.
+ Return a tuple of (None, None) if there is no non-vulnerable version.
+ """
+ if self.version_rank == 0:
+ self.calculate_version_rank
+ non_vulnerable_versions = PackageV2.objects.get_fixed_by_package_versions(
+ self, fix=False
+ ).only_non_vulnerable()
+
+ later_non_vulnerable_versions = non_vulnerable_versions.filter(
+ version_rank__gt=self.version_rank
+ )
+
+ later_non_vulnerable_versions = list(later_non_vulnerable_versions)
+
+ if later_non_vulnerable_versions:
+ sorted_versions = later_non_vulnerable_versions
+ next_non_vulnerable = sorted_versions[0]
+ latest_non_vulnerable = sorted_versions[-1]
+ return next_non_vulnerable, latest_non_vulnerable
+
+ return None, None
+
+ @cached_property
+ def version_class(self):
+ range_class = RANGE_CLASS_BY_SCHEMES.get(self.type)
+ return range_class.version_class if range_class else Version
+
+ def get_absolute_url(self):
+ """
+ Return this Vulnerability details absolute URL.
+ """
+ return reverse("package_details_v2", args=[self.purl])
+
+ @cached_property
+ def current_version(self):
+ return self.version_class(self.version)
+
+ def get_affecting_vulnerabilities(self):
+ """
+ Return a list of vulnerabilities that affect this package together with information regarding
+ the versions that fix the vulnerabilities.
+ """
+ if self.version_rank == 0:
+ self.calculate_version_rank
+ package_details_advs = []
+
+ fixed_by_packages = PackageV2.objects.get_fixed_by_package_versions(self, fix=True)
+
+ package_advisories = self.affected_by_advisories.prefetch_related(
+ Prefetch(
+ "fixed_by_packages",
+ queryset=fixed_by_packages,
+ to_attr="fixed_packages",
+ )
+ )
+
+ for adv in package_advisories:
+ package_details_advs.append({"advisory": adv})
+ later_fixed_packages = []
+
+ for fixed_pkg in adv.fixed_by_packages.all():
+ if fixed_pkg not in fixed_by_packages:
+ continue
+ fixed_version = self.version_class(fixed_pkg.version)
+ if fixed_version > self.current_version:
+ later_fixed_packages.append(fixed_pkg)
+
+ next_fixed_package_vulns = []
+
+ sort_fixed_by_packages_by_version = []
+ if later_fixed_packages:
+ sort_fixed_by_packages_by_version = sorted(
+ later_fixed_packages, key=lambda p: p.version_rank
+ )
+
+ fixed_by_pkgs = []
+
+ for vuln_details in package_details_advs:
+ if vuln_details["advisory"] != adv:
+ continue
+ vuln_details["fixed_by_purl"] = []
+ vuln_details["fixed_by_purl_advisories"] = []
+
+ for fixed_by_pkg in sort_fixed_by_packages_by_version:
+ fixed_by_package_details = {}
+ fixed_by_purl = PackageURL.from_string(fixed_by_pkg.purl)
+ next_fixed_package_vulns = list(fixed_by_pkg.affected_by_advisories.all())
+
+ fixed_by_package_details["fixed_by_purl"] = fixed_by_purl
+ fixed_by_package_details["fixed_by_purl_advisories"] = next_fixed_package_vulns
+ fixed_by_pkgs.append(fixed_by_package_details)
+
+ vuln_details["fixed_by_package_details"] = fixed_by_pkgs
+
+ return package_details_advs
+
+
+class AdvisoryExploit(models.Model):
+ """
+ A vulnerability exploit is code used to
+ take advantage of a security flaw for unauthorized access or malicious activity.
+ """
+
+ advisory = models.ForeignKey(
+ AdvisoryV2,
+ related_name="exploits",
+ on_delete=models.CASCADE,
+ )
+
+ date_added = models.DateField(
+ null=True,
+ blank=True,
+ help_text="The date the vulnerability was added to an exploit catalog.",
+ )
+
+ description = models.TextField(
+ null=True,
+ blank=True,
+ help_text="Description of the vulnerability in an exploit catalog, often a refinement of the original CVE description",
+ )
+
+ required_action = models.TextField(
+ null=True,
+ blank=True,
+ help_text="The required action to address the vulnerability, typically to "
+ "apply vendor updates or apply vendor mitigations or to discontinue use.",
+ )
+
+ due_date = models.DateField(
+ null=True,
+ blank=True,
+ help_text="The date the required action is due, which applies"
+ " to all USA federal civilian executive branch (FCEB) agencies, "
+ "but all organizations are strongly encouraged to execute the required action",
+ )
+
+ notes = models.TextField(
+ null=True,
+ blank=True,
+ help_text="Additional notes and resources about the vulnerability,"
+ " often a URL to vendor instructions.",
+ )
+
+ known_ransomware_campaign_use = models.BooleanField(
+ default=False,
+ help_text="""Known' if this vulnerability is known to have been leveraged as part of a ransomware campaign;
+ or 'Unknown' if there is no confirmation that the vulnerability has been utilized for ransomware.""",
+ )
+
+ source_date_published = models.DateField(
+ null=True, blank=True, help_text="The date that the exploit was published or disclosed."
+ )
+
+ exploit_type = models.TextField(
+ null=True,
+ blank=True,
+ help_text="The type of the exploit as provided by the original upstream data source.",
+ )
+
+ platform = models.TextField(
+ null=True,
+ blank=True,
+ help_text="The platform associated with the exploit as provided by the original upstream data source.",
+ )
+
+ source_date_updated = models.DateField(
+ null=True,
+ blank=True,
+ help_text="The date the exploit was updated in the original upstream data source.",
+ )
+
+ data_source = models.TextField(
+ null=True,
+ blank=True,
+ help_text="The source of the exploit information, such as CISA KEV, exploitdb, metaspoit, or others.",
+ )
+
+ source_url = models.URLField(
+ null=True,
+ blank=True,
+ help_text="The URL to the exploit as provided in the original upstream data source.",
+ )
+
+ @property
+ def get_known_ransomware_campaign_use_type(self):
+ return "Known" if self.known_ransomware_campaign_use else "Unknown"
diff --git a/vulnerabilities/pipelines/__init__.py b/vulnerabilities/pipelines/__init__.py
index d41b05321..3d1316cce 100644
--- a/vulnerabilities/pipelines/__init__.py
+++ b/vulnerabilities/pipelines/__init__.py
@@ -15,18 +15,29 @@
from traceback import format_exc as traceback_format_exc
from typing import Iterable
from typing import List
+from typing import Optional
from aboutcode.pipeline import LoopProgress
from aboutcode.pipeline import PipelineDefinition
from aboutcode.pipeline import humanize_time
+from fetchcode import package_versions
+from packageurl import PackageURL
from vulnerabilities.importer import AdvisoryData
+from vulnerabilities.importer import AffectedPackage
+from vulnerabilities.importer import UnMergeablePackageError
from vulnerabilities.improver import MAX_CONFIDENCE
from vulnerabilities.models import Advisory
+from vulnerabilities.models import PackageV2
from vulnerabilities.models import PipelineRun
from vulnerabilities.pipes.advisory import import_advisory
from vulnerabilities.pipes.advisory import insert_advisory
+from vulnerabilities.pipes.advisory import insert_advisory_v2
+from vulnerabilities.utils import AffectedPackage as LegacyAffectedPackage
from vulnerabilities.utils import classproperty
+from vulnerabilities.utils import get_affected_packages_by_patched_package
+from vulnerabilities.utils import nearest_patched_package
+from vulnerabilities.utils import resolve_version_range
module_logger = logging.getLogger(__name__)
@@ -148,14 +159,6 @@ def on_failure(self):
"""
pass
- @classproperty
- def pipeline_id(cls):
- """Return unique pipeline_id set in cls.pipeline_id"""
-
- if cls.pipeline_id is None or cls.pipeline_id == "":
- raise NotImplementedError("pipeline_id is not defined or is empty")
- return cls.pipeline_id
-
class VulnerableCodeBaseImporterPipeline(VulnerableCodePipeline):
"""
@@ -207,12 +210,13 @@ def collect_and_store_advisories(self):
progress = LoopProgress(total_iterations=estimated_advisory_count, logger=self.log)
for advisory in progress.iter(self.collect_advisories()):
- if _obj := insert_advisory(
- advisory=advisory,
- pipeline_id=self.pipeline_id,
- logger=self.log,
- ):
- collected_advisory_count += 1
+ if isinstance(advisory, AdvisoryData):
+ if _obj := insert_advisory(
+ advisory=advisory,
+ pipeline_id=self.pipeline_id,
+ logger=self.log,
+ ):
+ collected_advisory_count += 1
self.log(f"Successfully collected {collected_advisory_count:,d} advisories")
@@ -248,3 +252,256 @@ def import_advisory(self, advisory: Advisory) -> int:
f"Failed to import advisory: {advisory!r} with error {e!r}:\n{traceback_format_exc()}",
level=logging.ERROR,
)
+
+
+class VulnerableCodeBaseImporterPipelineV2(VulnerableCodePipeline):
+ """
+ Base importer pipeline for importing advisories.
+
+ Uses:
+ Subclass this Pipeline and implement ``advisories_count`` and ``collect_advisories``
+ method. Also override the ``steps`` and ``advisory_confidence`` as needed.
+ """
+
+ pipeline_id = None # Unique Pipeline ID, this should be the name of pipeline module.
+ license_url = None
+ spdx_license_expression = None
+ repo_url = None
+ advisory_confidence = MAX_CONFIDENCE
+ ignorable_versions = []
+ unfurl_version_ranges = False
+
+ @classmethod
+ def steps(cls):
+ return (cls.collect_and_store_advisories,)
+
+ def collect_advisories(self) -> Iterable[AdvisoryData]:
+ """
+ Yield AdvisoryData for importer pipeline.
+
+ Populate the `self.collected_advisories_count` field and yield AdvisoryData
+ """
+ raise NotImplementedError
+
+ def advisories_count(self) -> int:
+ """
+ Return the estimated AdvisoryData to be yielded by ``collect_advisories``.
+
+ Used by ``collect_and_store_advisories`` to log the progress of advisory collection.
+ """
+ raise NotImplementedError
+
+ def collect_and_store_advisories(self):
+ collected_advisory_count = 0
+ estimated_advisory_count = self.advisories_count()
+
+ if estimated_advisory_count > 0:
+ self.log(f"Collecting {estimated_advisory_count:,d} advisories")
+
+ progress = LoopProgress(total_iterations=estimated_advisory_count, logger=self.log)
+ for advisory in progress.iter(self.collect_advisories()):
+ if advisory is None:
+ self.log("Advisory is None, skipping")
+ continue
+ if _obj := insert_advisory_v2(
+ advisory=advisory,
+ pipeline_id=self.pipeline_id,
+ get_advisory_packages=self.get_advisory_packages,
+ logger=self.log,
+ ):
+ collected_advisory_count += 1
+
+ self.log(f"Successfully collected {collected_advisory_count:,d} advisories")
+
+ def get_advisory_packages(self, advisory_data: AdvisoryData) -> list:
+ """
+ Return the list of packages for the given advisory.
+
+ Used by ``import_advisory`` to get the list of packages for the advisory.
+ """
+ from vulnerabilities.improvers import default
+
+ affected_purls = []
+ fixed_purls = []
+ for affected_package in advisory_data.affected_packages:
+ package_affected_purls, package_fixed_purls = default.get_exact_purls(
+ affected_package=affected_package
+ )
+ affected_purls.extend(package_affected_purls)
+ fixed_purls.extend(package_fixed_purls)
+
+ if self.unfurl_version_ranges:
+ vulnerable_pvs, fixed_pvs = self.get_impacted_packages(
+ affected_packages=advisory_data.affected_packages,
+ advisory_date_published=advisory_data.date_published,
+ )
+ affected_purls.extend(vulnerable_pvs)
+ fixed_purls.extend(fixed_pvs)
+
+ vulnerable_packages = []
+ fixed_packages = []
+
+ for affected_purl in affected_purls:
+ vulnerable_package, _ = PackageV2.objects.get_or_create_from_purl(purl=affected_purl)
+ vulnerable_packages.append(vulnerable_package)
+
+ for fixed_purl in fixed_purls:
+ fixed_package, _ = PackageV2.objects.get_or_create_from_purl(purl=fixed_purl)
+ fixed_packages.append(fixed_package)
+
+ return vulnerable_packages, fixed_packages
+
+ def get_published_package_versions(
+ self, package_url: PackageURL, until: Optional[datetime] = None
+ ) -> List[str]:
+ """
+ Return a list of versions published before `until` for the `package_url`
+ """
+ versions_before_until = []
+ try:
+ versions = package_versions.versions(str(package_url))
+ for version in versions or []:
+ if until and version.release_date and version.release_date > until:
+ continue
+ versions_before_until.append(version.value)
+
+ return versions_before_until
+ except Exception as e:
+ self.log(
+ f"Failed to fetch versions for package {str(package_url)} {e!r}",
+ level=logging.ERROR,
+ )
+ return []
+
+ def get_impacted_packages(self, affected_packages, advisory_date_published):
+ """
+ Return a tuple of lists of affected and fixed PackageURLs
+ """
+ if not affected_packages:
+ return [], []
+
+ mergable = True
+
+ # TODO: We should never had the exception in first place
+ try:
+ purl, affected_version_ranges, fixed_versions = AffectedPackage.merge(affected_packages)
+ except UnMergeablePackageError:
+ self.log(f"Cannot merge with different purls {affected_packages!r}", logging.ERROR)
+ mergable = False
+
+ if not mergable:
+ vulnerable_packages = []
+ fixed_packages = []
+ for affected_package in affected_packages:
+ purl = affected_package.package
+ affected_version_range = affected_package.affected_version_range
+ fixed_version = affected_package.fixed_version
+ pkg_type = purl.type
+ pkg_namespace = purl.namespace
+ pkg_name = purl.name
+ if not affected_version_range and fixed_version:
+ fixed_packages.append(
+ PackageURL(
+ type=pkg_type,
+ namespace=pkg_namespace,
+ name=pkg_name,
+ version=str(fixed_version),
+ )
+ )
+ else:
+ valid_versions = self.get_published_package_versions(
+ package_url=purl, until=advisory_date_published
+ )
+ affected_pvs, fixed_pvs = self.resolve_package_versions(
+ affected_version_range=affected_version_range,
+ pkg_type=pkg_type,
+ pkg_namespace=pkg_namespace,
+ pkg_name=pkg_name,
+ valid_versions=valid_versions,
+ )
+ vulnerable_packages.extend(affected_pvs)
+ fixed_packages.extend(fixed_pvs)
+ return vulnerable_packages, fixed_packages
+ else:
+ pkg_type = purl.type
+ pkg_namespace = purl.namespace
+ pkg_name = purl.name
+ pkg_qualifiers = purl.qualifiers
+ fixed_purls = [
+ PackageURL(
+ type=pkg_type,
+ namespace=pkg_namespace,
+ name=pkg_name,
+ version=str(version),
+ qualifiers=pkg_qualifiers,
+ )
+ for version in fixed_versions
+ ]
+ if not affected_version_ranges:
+ return [], fixed_purls
+ else:
+ valid_versions = self.get_published_package_versions(
+ package_url=purl, until=advisory_date_published
+ )
+ vulnerable_packages = []
+ fixed_packages = []
+ for affected_version_range in affected_version_ranges:
+ vulnerable_pvs, fixed_pvs = self.resolve_package_versions(
+ affected_version_range=affected_version_range,
+ pkg_type=pkg_type,
+ pkg_namespace=pkg_namespace,
+ pkg_name=pkg_name,
+ valid_versions=valid_versions,
+ )
+ vulnerable_packages.extend(vulnerable_pvs)
+ fixed_packages.extend(fixed_pvs)
+ return vulnerable_packages, fixed_packages
+
+ def resolve_package_versions(
+ self,
+ affected_version_range,
+ pkg_type,
+ pkg_namespace,
+ pkg_name,
+ valid_versions,
+ ):
+ """
+ Return a tuple of lists of ``affected_packages`` and ``fixed_packages`` PackageURL for the given `affected_version_range` and `valid_versions`.
+
+ ``valid_versions`` are the valid version listed on the package registry for that package
+
+ """
+ aff_vers, unaff_vers = resolve_version_range(
+ affected_version_range=affected_version_range,
+ ignorable_versions=self.ignorable_versions,
+ package_versions=valid_versions,
+ )
+
+ affected_purls = list(
+ self.expand_verion_range_to_purls(pkg_type, pkg_namespace, pkg_name, aff_vers)
+ )
+
+ unaffected_purls = list(
+ self.expand_verion_range_to_purls(pkg_type, pkg_namespace, pkg_name, unaff_vers)
+ )
+
+ fixed_packages = []
+ affected_packages = []
+
+ patched_packages = nearest_patched_package(
+ vulnerable_packages=affected_purls, resolved_packages=unaffected_purls
+ )
+
+ for (
+ fixed_package,
+ affected_purls,
+ ) in get_affected_packages_by_patched_package(patched_packages).items():
+ if fixed_package:
+ fixed_packages.append(fixed_package)
+ affected_packages.extend(affected_purls)
+
+ return affected_packages, fixed_packages
+
+ def expand_verion_range_to_purls(self, pkg_type, pkg_namespace, pkg_name, versions):
+ for version in versions:
+ yield PackageURL(type=pkg_type, namespace=pkg_namespace, name=pkg_name, version=version)
diff --git a/vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py b/vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py
new file mode 100644
index 000000000..90ea32b75
--- /dev/null
+++ b/vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py
@@ -0,0 +1,341 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# VulnerableCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/aboutcode-org/vulnerablecode for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+
+import logging
+import re
+import urllib.parse
+from typing import Iterable
+
+import requests
+from bs4 import BeautifulSoup
+from packageurl import PackageURL
+from univers.version_constraint import VersionConstraint
+from univers.version_range import ApacheVersionRange
+from univers.versions import SemverVersion
+
+from vulnerabilities.importer import AdvisoryData
+from vulnerabilities.importer import AffectedPackage
+from vulnerabilities.importer import Reference
+from vulnerabilities.importer import VulnerabilitySeverity
+from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
+from vulnerabilities.severity_systems import APACHE_HTTPD
+from vulnerabilities.utils import create_weaknesses_list
+from vulnerabilities.utils import cwe_regex
+from vulnerabilities.utils import get_item
+
+logger = logging.getLogger(__name__)
+
+
+def fetch_links(url):
+ links = []
+ data = requests.get(url).content
+ soup = BeautifulSoup(data, features="lxml")
+ for tag in soup.find_all("a"):
+ link = tag.get("href")
+ if not link.endswith("json"):
+ continue
+ links.append(urllib.parse.urljoin(url, link))
+ return links
+
+
+def get_weaknesses(cve_data):
+ """
+ Extract CWE IDs from CVE data.
+
+ Args:
+ cve_data (dict): The CVE data in a dictionary format.
+
+ Returns:
+ List[int]: A list of unique CWE IDs.
+
+ Examples:
+ >>> mock_cve_data1 = {
+ ... "containers": {
+ ... "cna": {
+ ... "providerMetadata": {
+ ... "orgId": "f0158376-9dc2-43b6-827c-5f631a4d8d09"
+ ... },
+ ... "title": "mod_macro buffer over-read",
+ ... "problemTypes": [
+ ... {
+ ... "descriptions": [
+ ... {
+ ... "description": "CWE-125 Out-of-bounds Read",
+ ... "lang": "en",
+ ... "cweId": "CWE-125",
+ ... "type": "CWE"
+ ... }
+ ... ]
+ ... }
+ ... ]
+ ... }
+ ... }
+ ... }
+ >>> mock_cve_data2 = {
+ ... "data_type": "CVE",
+ ... "data_format": "MITRE",
+ ... "data_version": "4.0",
+ ... "generator": {
+ ... "engine": "Vulnogram 0.0.9"
+ ... },
+ ... "CVE_data_meta": {
+ ... "ID": "CVE-2022-28614",
+ ... "ASSIGNER": "security@apache.org",
+ ... "TITLE": "read beyond bounds via ap_rwrite() ",
+ ... "STATE": "PUBLIC"
+ ... },
+ ... "problemtype": {
+ ... "problemtype_data": [
+ ... {
+ ... "description": [
+ ... {
+ ... "lang": "eng",
+ ... "value": "CWE-190 Integer Overflow or Wraparound"
+ ... }
+ ... ]
+ ... },
+ ... {
+ ... "description": [
+ ... {
+ ... "lang": "eng",
+ ... "value": "CWE-200 Exposure of Sensitive Information to an Unauthorized Actor"
+ ... }
+ ... ]
+ ... }
+ ... ]
+ ... }
+ ... }
+
+ >>> get_weaknesses(mock_cve_data1)
+ [125]
+
+ >>> get_weaknesses(mock_cve_data2)
+ [190, 200]
+ """
+ alias = get_item(cve_data, "CVE_data_meta", "ID")
+ cwe_strings = []
+ if alias:
+ problemtype_data = get_item(cve_data, "problemtype", "problemtype_data") or []
+ for problem in problemtype_data:
+ for desc in problem.get("description", []):
+ value = desc.get("value", "")
+ cwe_id_string_list = re.findall(cwe_regex, value)
+ cwe_strings.extend(cwe_id_string_list)
+ else:
+ problemTypes = cve_data.get("containers", {}).get("cna", {}).get("problemTypes", [])
+ descriptions = problemTypes[0].get("descriptions", []) if len(problemTypes) > 0 else []
+ for description in descriptions:
+ cwe_id_string = description.get("cweId", "")
+ cwe_strings.append(cwe_id_string)
+
+ weaknesses = create_weaknesses_list(cwe_strings)
+ return weaknesses
+
+
+class ApacheHTTPDImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
+ """
+ Apache HTTPD Importer Pipeline
+
+ This pipeline imports security advisories from the Apache HTTPD project.
+ """
+
+ pipeline_id = "apache_httpd_importer_v2"
+ spdx_license_expression = "Apache-2.0"
+ license_url = "https://www.apache.org/licenses/LICENSE-2.0"
+ base_url = "https://httpd.apache.org/security/json/"
+ unfurl_version_ranges = True
+
+ links = []
+
+ ignorable_versions = frozenset(
+ [
+ "AGB_BEFORE_AAA_CHANGES",
+ "APACHE_1_2b1",
+ "APACHE_1_2b10",
+ "APACHE_1_2b11",
+ "APACHE_1_2b2",
+ "APACHE_1_2b3",
+ "APACHE_1_2b4",
+ "APACHE_1_2b5",
+ "APACHE_1_2b6",
+ "APACHE_1_2b7",
+ "APACHE_1_2b8",
+ "APACHE_1_2b9",
+ "APACHE_1_3_PRE_NT",
+ "APACHE_1_3a1",
+ "APACHE_1_3b1",
+ "APACHE_1_3b2",
+ "APACHE_1_3b3",
+ "APACHE_1_3b5",
+ "APACHE_1_3b6",
+ "APACHE_1_3b7",
+ "APACHE_2_0_2001_02_09",
+ "APACHE_2_0_52_WROWE_RC1",
+ "APACHE_2_0_ALPHA",
+ "APACHE_2_0_ALPHA_2",
+ "APACHE_2_0_ALPHA_3",
+ "APACHE_2_0_ALPHA_4",
+ "APACHE_2_0_ALPHA_5",
+ "APACHE_2_0_ALPHA_6",
+ "APACHE_2_0_ALPHA_7",
+ "APACHE_2_0_ALPHA_8",
+ "APACHE_2_0_ALPHA_9",
+ "APACHE_2_0_BETA_CANDIDATE_1",
+ "APACHE_BIG_SYMBOL_RENAME_POST",
+ "APACHE_BIG_SYMBOL_RENAME_PRE",
+ "CHANGES",
+ "HTTPD_LDAP_1_0_0",
+ "INITIAL",
+ "MOD_SSL_2_8_3",
+ "PCRE_3_9",
+ "POST_APR_SPLIT",
+ "PRE_APR_CHANGES",
+ "STRIKER_2_0_51_RC1",
+ "STRIKER_2_0_51_RC2",
+ "STRIKER_2_1_0_RC1",
+ "WROWE_2_0_43_PRE1",
+ "apache-1_3-merge-1-post",
+ "apache-1_3-merge-1-pre",
+ "apache-1_3-merge-2-post",
+ "apache-1_3-merge-2-pre",
+ "apache-apr-merge-3",
+ "apache-doc-split-01",
+ "dg_last_1_2_doc_merge",
+ "djg-apache-nspr-07",
+ "djg_nspr_split",
+ "moving_to_httpd_module",
+ "mpm-3",
+ "mpm-merge-1",
+ "mpm-merge-2",
+ "post_ajp_proxy",
+ "pre_ajp_proxy",
+ ]
+ )
+ unfurl_version_ranges = True
+
+ @classmethod
+ def steps(cls):
+ return (cls.collect_and_store_advisories,)
+
+ def collect_advisories(self) -> Iterable[AdvisoryData]:
+ if not self.links:
+ self.links = fetch_links(self.base_url)
+ for link in self.links:
+ data = requests.get(link).json()
+ yield self.to_advisory(data)
+
+ def advisories_count(self) -> int:
+ """Count the number of advisories available in the JSON files."""
+ if not self.links:
+ self.links = fetch_links(self.base_url)
+ return len(self.links)
+
+ def to_advisory(self, data):
+ alias = get_item(data, "CVE_data_meta", "ID")
+ if not alias:
+ alias = get_item(data, "cveMetadata", "cveId")
+ descriptions = get_item(data, "description", "description_data") or []
+ description = None
+ for desc in descriptions:
+ if desc.get("lang") == "eng":
+ description = desc.get("value")
+ break
+
+ severities = []
+ impacts = data.get("impact") or []
+ for impact in impacts:
+ value = impact.get("other")
+ if value:
+ severities.append(
+ VulnerabilitySeverity(
+ system=APACHE_HTTPD,
+ value=value,
+ scoring_elements="",
+ )
+ )
+ break
+ reference = Reference(
+ reference_id=alias,
+ url=urllib.parse.urljoin(self.base_url, f"{alias}.json"),
+ )
+
+ versions_data = []
+ for vendor in get_item(data, "affects", "vendor", "vendor_data") or []:
+ for products in get_item(vendor, "product", "product_data") or []:
+ for version_data in get_item(products, "version", "version_data") or []:
+ versions_data.append(version_data)
+
+ fixed_versions = []
+ for timeline_object in data.get("timeline") or []:
+ timeline_value = timeline_object.get("value")
+ if "release" in timeline_value:
+ split_timeline_value = timeline_value.split(" ")
+ if "never" in timeline_value:
+ continue
+ if "release" in split_timeline_value[-1]:
+ fixed_versions.append(split_timeline_value[0])
+ if "release" in split_timeline_value[0]:
+ fixed_versions.append(split_timeline_value[-1])
+
+ affected_packages = []
+ affected_version_range = self.to_version_ranges(versions_data, fixed_versions)
+ if affected_version_range:
+ affected_packages.append(
+ AffectedPackage(
+ package=PackageURL(
+ type="apache",
+ name="httpd",
+ ),
+ affected_version_range=affected_version_range,
+ )
+ )
+
+ weaknesses = get_weaknesses(data)
+
+ return AdvisoryData(
+ advisory_id=alias,
+ aliases=[],
+ summary=description or "",
+ affected_packages=affected_packages,
+ references_v2=[reference],
+ weaknesses=weaknesses,
+ url=reference.url,
+ severities=severities,
+ )
+
+ def to_version_ranges(self, versions_data, fixed_versions):
+ constraints = []
+ for version_data in versions_data:
+ version_value = version_data["version_value"]
+ range_expression = version_data["version_affected"]
+ if range_expression not in {"<=", ">=", "?=", "!<", "="}:
+ raise ValueError(f"unknown comparator found! {range_expression}")
+ comparator_by_range_expression = {
+ ">=": ">=",
+ "!<": ">=",
+ "<=": "<=",
+ "=": "=",
+ }
+ comparator = comparator_by_range_expression.get(range_expression)
+ if comparator:
+ constraints.append(
+ VersionConstraint(comparator=comparator, version=SemverVersion(version_value))
+ )
+
+ for fixed_version in fixed_versions:
+ # The VersionConstraint method `invert()` inverts the fixed_version's comparator,
+ # enabling inclusion of multiple fixed versions with the `affected_version_range` values.
+ constraints.append(
+ VersionConstraint(
+ comparator="=",
+ version=SemverVersion(fixed_version),
+ ).invert()
+ )
+
+ return ApacheVersionRange(constraints=constraints)
diff --git a/vulnerabilities/pipelines/v2_importers/elixir_security_importer.py b/vulnerabilities/pipelines/v2_importers/elixir_security_importer.py
new file mode 100644
index 000000000..902dd5248
--- /dev/null
+++ b/vulnerabilities/pipelines/v2_importers/elixir_security_importer.py
@@ -0,0 +1,124 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# VulnerableCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/aboutcode-org/vulnerablecode for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+
+from pathlib import Path
+from typing import Iterable
+
+from dateutil import parser as dateparser
+from fetchcode.vcs import fetch_via_vcs
+from packageurl import PackageURL
+from univers.version_constraint import VersionConstraint
+from univers.version_range import HexVersionRange
+
+from vulnerabilities.importer import AdvisoryData
+from vulnerabilities.importer import AffectedPackage
+from vulnerabilities.importer import Reference
+from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
+from vulnerabilities.utils import is_cve
+from vulnerabilities.utils import load_yaml
+
+
+class ElixirSecurityImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
+ """
+ Elixir Security Advisiories Importer Pipeline
+
+ This pipeline imports security advisories for elixir.
+ """
+
+ pipeline_id = "elixir_security_importer_v2"
+ spdx_license_expression = "CC0-1.0"
+ license_url = "https://github.com/dependabot/elixir-security-advisories/blob/master/LICENSE.txt"
+ repo_url = "git+https://github.com/dependabot/elixir-security-advisories"
+ unfurl_version_ranges = True
+
+ @classmethod
+ def steps(cls):
+ return (cls.collect_and_store_advisories,)
+
+ def clone(self):
+ self.log(f"Cloning `{self.repo_url}`")
+ self.vcs_response = fetch_via_vcs(self.repo_url)
+
+ def advisories_count(self) -> int:
+ base_path = Path(self.vcs_response.dest_dir)
+ count = len(list((base_path / "packages").glob("**/*.yml")))
+ return count
+
+ def collect_advisories(self) -> Iterable[AdvisoryData]:
+ try:
+ base_path = Path(self.vcs_response.dest_dir)
+ vuln = base_path / "packages"
+ for file in vuln.glob("**/*.yml"):
+ yield from self.process_file(file, base_path)
+ finally:
+ if self.vcs_response:
+ self.vcs_response.delete()
+
+ def process_file(self, file, base_path) -> Iterable[AdvisoryData]:
+ relative_path = str(file.relative_to(base_path)).strip("/")
+ advisory_url = (
+ f"https://github.com/dependabot/elixir-security-advisories/blob/master/{relative_path}"
+ )
+ yaml_file = load_yaml(str(file))
+
+ summary = yaml_file.get("description") or ""
+ pkg_name = yaml_file.get("package") or ""
+
+ cve_id = ""
+ cve = yaml_file.get("cve") or ""
+ if cve and not cve.startswith("CVE-"):
+ cve_id = f"CVE-{cve}"
+ elif cve:
+ cve_id = cve
+
+ if not cve_id or not is_cve(cve_id):
+ return
+
+ references = []
+ link = yaml_file.get("link") or ""
+ if link:
+ references.append(Reference(url=link))
+
+ constraints = []
+ vrc = HexVersionRange.version_class
+ unaffected_versions = yaml_file.get("unaffected_versions") or []
+ patched_versions = yaml_file.get("patched_versions") or []
+
+ for version in unaffected_versions:
+ constraints.append(VersionConstraint.from_string(version_class=vrc, string=version))
+
+ for version in patched_versions:
+ if version.startswith("~>"):
+ version = version[2:]
+ constraints.append(
+ VersionConstraint.from_string(version_class=vrc, string=version).invert()
+ )
+
+ affected_packages = []
+ if pkg_name:
+ affected_packages.append(
+ AffectedPackage(
+ package=PackageURL(type="hex", name=pkg_name),
+ affected_version_range=HexVersionRange(constraints=constraints),
+ )
+ )
+
+ date_published = None
+ if yaml_file.get("disclosure_date"):
+ date_published = dateparser.parse(yaml_file.get("disclosure_date"))
+
+ yield AdvisoryData(
+ advisory_id=cve_id,
+ aliases=[],
+ summary=summary,
+ references_v2=references,
+ affected_packages=affected_packages,
+ url=advisory_url,
+ date_published=date_published,
+ )
diff --git a/vulnerabilities/pipelines/v2_importers/github_importer.py b/vulnerabilities/pipelines/v2_importers/github_importer.py
new file mode 100644
index 000000000..9ac360016
--- /dev/null
+++ b/vulnerabilities/pipelines/v2_importers/github_importer.py
@@ -0,0 +1,393 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# VulnerableCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/aboutcode-org/vulnerablecode for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+
+import logging
+from traceback import format_exc as traceback_format_exc
+from typing import Callable
+from typing import Iterable
+from typing import List
+from typing import Optional
+
+from cwe2.database import Database
+from dateutil import parser as dateparser
+from packageurl import PackageURL
+from univers.version_range import RANGE_CLASS_BY_SCHEMES
+from univers.version_range import build_range_from_github_advisory_constraint
+
+from vulnerabilities import severity_systems
+from vulnerabilities import utils
+from vulnerabilities.importer import AdvisoryData
+from vulnerabilities.importer import AffectedPackage
+from vulnerabilities.importer import Reference
+from vulnerabilities.importer import VulnerabilitySeverity
+from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
+from vulnerabilities.utils import dedupe
+from vulnerabilities.utils import get_cwe_id
+from vulnerabilities.utils import get_item
+
+
+class GitHubAPIImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
+ """
+ GitHub Importer Pipeline
+
+ This pipeline imports security advisories from GitHub Security Advisories.
+ """
+
+ pipeline_id = "github_importer_v2"
+ spdx_license_expression = "CC-BY-4.0"
+ license_url = "https://github.com/github/advisory-database/blob/main/LICENSE.md"
+ unfurl_version_ranges = True
+
+ ignorable_versions = frozenset(
+ [
+ "0.1-bulbasaur",
+ "0.1-charmander",
+ "0.3m1",
+ "0.3m2",
+ "0.3m3",
+ "0.3m4",
+ "0.3m5",
+ "0.4m1",
+ "0.4m2",
+ "0.4m3",
+ "0.4m4",
+ "0.4m5",
+ "0.5m1",
+ "0.5m2",
+ "0.5m3",
+ "0.5m4",
+ "0.5m5",
+ "0.6m1",
+ "0.6m2",
+ "0.6m3",
+ "0.6m4",
+ "0.6m5",
+ "0.6m6",
+ "0.7.10p1",
+ "0.7.11p1",
+ "0.7.11p2",
+ "0.7.11p3",
+ "0.8.1p1",
+ "0.8.3p1",
+ "0.8.4p1",
+ "0.8.4p2",
+ "0.8.6p1",
+ "0.8.7p1",
+ "0.9-doduo",
+ "0.9-eevee",
+ "0.9-fearow",
+ "0.9-gyarados",
+ "0.9-horsea",
+ "0.9-ivysaur",
+ "2013-01-21T20:33:09+0100",
+ "2013-01-23T17:11:52+0100",
+ "2013-02-01T20:50:46+0100",
+ "2013-02-02T19:59:03+0100",
+ "2013-02-02T20:23:17+0100",
+ "2013-02-08T17:40:57+0000",
+ "2013-03-27T16:32:26+0100",
+ "2013-05-09T12:47:53+0200",
+ "2013-05-10T17:55:56+0200",
+ "2013-05-14T20:16:05+0200",
+ "2013-06-01T10:32:51+0200",
+ "2013-07-19T09:11:08+0000",
+ "2013-08-12T21:48:56+0200",
+ "2013-09-11T19-27-10",
+ "2013-12-23T17-51-15",
+ "2014-01-12T15-52-10",
+ "2.0.1rc2-git",
+ "3.0.0b3-",
+ "3.0b6dev-r41684",
+ "-class.-jw.util.version.Version-",
+ "vulnerabilities",
+ ]
+ )
+
+ @classmethod
+ def steps(cls):
+ return (cls.collect_and_store_advisories,)
+
+ package_type_by_github_ecosystem = {
+ # "MAVEN": "maven",
+ # "NUGET": "nuget",
+ # "COMPOSER": "composer",
+ # "PIP": "pypi",
+ # "RUBYGEMS": "gem",
+ "NPM": "npm",
+ # "RUST": "cargo",
+ # "GO": "golang",
+ }
+
+ def advisories_count(self):
+ advisory_query = """
+ query{
+ securityVulnerabilities(first: 0, ecosystem: %s) {
+ totalCount
+ }
+ }
+ """
+ advisory_counts = 0
+ for ecosystem in self.package_type_by_github_ecosystem.keys():
+ graphql_query = {"query": advisory_query % (ecosystem)}
+ response = utils.fetch_github_graphql_query(graphql_query)
+ advisory_counts += get_item(response, "data", "securityVulnerabilities", "totalCount")
+ return advisory_counts
+
+ def collect_advisories(self) -> Iterable[AdvisoryData]:
+
+ # TODO: We will try to gather more info from GH API
+ # Check https://github.com/nexB/vulnerablecode/issues/1039#issuecomment-1366458885
+ # Check https://github.com/nexB/vulnerablecode/issues/645
+ # set of all possible values of first '%s' = {'MAVEN','COMPOSER', 'NUGET', 'RUBYGEMS', 'PYPI', 'NPM', 'RUST'}
+ # second '%s' is interesting, it will have the value '' for the first request,
+ advisory_query = """
+ query{
+ securityVulnerabilities(first: 100, ecosystem: %s, %s) {
+ edges {
+ node {
+ advisory {
+ identifiers {
+ type
+ value
+ }
+ summary
+ references {
+ url
+ }
+ severity
+ cwes(first: 10){
+ nodes {
+ cweId
+ }
+ }
+ publishedAt
+ }
+ firstPatchedVersion{
+ identifier
+ }
+ package {
+ name
+ }
+ vulnerableVersionRange
+ }
+ }
+ pageInfo {
+ hasNextPage
+ endCursor
+ }
+ }
+ }
+ """
+ for ecosystem, package_type in self.package_type_by_github_ecosystem.items():
+ end_cursor_exp = ""
+ while True:
+ graphql_query = {"query": advisory_query % (ecosystem, end_cursor_exp)}
+ response = utils.fetch_github_graphql_query(graphql_query)
+
+ page_info = get_item(response, "data", "securityVulnerabilities", "pageInfo")
+ end_cursor = get_item(page_info, "endCursor")
+ if end_cursor:
+ end_cursor = f'"{end_cursor}"'
+ end_cursor_exp = f"after: {end_cursor}"
+
+ yield from process_response(response, package_type=package_type)
+
+ if not get_item(page_info, "hasNextPage"):
+ break
+
+
+def get_purl(pkg_type: str, github_name: str, logger: Callable = None) -> Optional[PackageURL]:
+ """
+ Return a PackageURL by splitting the `github_name` using the `pkg_type`
+ convention. Return None and log an error if we can not split or it is an
+ unknown package type.
+
+ For example::
+ >>> expected = PackageURL(type='maven', namespace='org.apache.commons', name='commons-lang3')
+ >>> assert get_purl("maven", "org.apache.commons:commons-lang3") == expected
+
+ >>> expected = PackageURL(type="composer", namespace="foo", name="bar")
+ >>> assert get_purl("composer", "foo/bar") == expected
+ """
+ if pkg_type == "maven":
+ if ":" not in github_name:
+ if logger:
+ logger(f"get_purl: Invalid maven package name {github_name}", level=logging.ERROR)
+ return
+ ns, _, name = github_name.partition(":")
+ return PackageURL(type=pkg_type, namespace=ns, name=name)
+
+ if pkg_type in ("composer", "npm"):
+ if "/" not in github_name:
+ return PackageURL(type=pkg_type, name=github_name)
+ vendor, _, name = github_name.partition("/")
+ return PackageURL(type=pkg_type, namespace=vendor, name=name)
+
+ if pkg_type in ("nuget", "pypi", "gem", "golang", "npm", "cargo"):
+ return PackageURL(type=pkg_type, name=github_name)
+
+ if logger:
+ logger(f"get_purl: Unknown package type {pkg_type}", level=logging.ERROR)
+
+
+def process_response(
+ resp: dict, package_type: str, logger: Callable = None
+) -> Iterable[AdvisoryData]:
+ """
+ Yield `AdvisoryData` by taking `resp` and `ecosystem` as input
+ """
+ vulnerabilities = get_item(resp, "data", "securityVulnerabilities", "edges") or []
+ if not vulnerabilities:
+ if logger:
+ logger(
+ f"No vulnerabilities found for package_type: {package_type!r} in response: {resp!r}",
+ level=logging.ERROR,
+ )
+ return
+
+ for vulnerability in vulnerabilities:
+ aliases = []
+ affected_packages = []
+ github_advisory = get_item(vulnerability, "node")
+ if not github_advisory:
+ if logger:
+ logger(f"No node found in {vulnerability!r}", level=logging.ERROR)
+ continue
+
+ advisory = get_item(github_advisory, "advisory")
+ if not advisory:
+ if logger:
+ logger(f"No advisory found in {github_advisory!r}", level=logging.ERROR)
+ continue
+
+ summary = get_item(advisory, "summary") or ""
+
+ references = get_item(advisory, "references") or []
+ if references:
+ urls = (ref["url"] for ref in references)
+ references = [Reference.from_url(u) for u in urls]
+
+ date_published = get_item(advisory, "publishedAt")
+ if date_published:
+ date_published = dateparser.parse(date_published)
+
+ name = get_item(github_advisory, "package", "name")
+ if name:
+ purl = get_purl(pkg_type=package_type, github_name=name, logger=logger)
+ if purl:
+ affected_range = get_item(github_advisory, "vulnerableVersionRange")
+ fixed_version = get_item(github_advisory, "firstPatchedVersion", "identifier")
+ if affected_range:
+ try:
+ affected_range = build_range_from_github_advisory_constraint(
+ package_type, affected_range
+ )
+ except Exception as e:
+ if logger:
+ logger(
+ f"Could not parse affected range {affected_range!r} {e!r} \n {traceback_format_exc()}",
+ level=logging.ERROR,
+ )
+ affected_range = None
+ if fixed_version:
+ try:
+ fixed_version = RANGE_CLASS_BY_SCHEMES[package_type].version_class(
+ fixed_version
+ )
+ except Exception as e:
+ if logger:
+ logger(
+ f"Invalid fixed version {fixed_version!r} {e!r} \n {traceback_format_exc()}",
+ level=logging.ERROR,
+ )
+ fixed_version = None
+ if affected_range or fixed_version:
+ affected_packages.append(
+ AffectedPackage(
+ package=purl,
+ affected_version_range=affected_range,
+ fixed_version=fixed_version,
+ )
+ )
+ identifiers = get_item(advisory, "identifiers") or []
+ ghsa_id = ""
+ severities = []
+ for identifier in identifiers:
+ value = identifier["value"]
+ identifier_type = identifier["type"]
+ aliases.append(value)
+ # attach the GHSA with severity score
+ if identifier_type == "GHSA":
+ # Each Node has only one GHSA, hence exit after attaching
+ # score to this GHSA
+ ghsa_id = value
+ for ref in references:
+ if ref.reference_id == value:
+ severity = get_item(advisory, "severity")
+ if severity:
+ severities = [
+ VulnerabilitySeverity(
+ system=severity_systems.CVSS31_QUALITY,
+ value=severity,
+ url=ref.url,
+ )
+ ]
+
+ elif identifier_type == "CVE":
+ pass
+ else:
+ if logger:
+ logger(
+ f"Unknown identifier type {identifier_type!r} and value {value!r}",
+ level=logging.ERROR,
+ )
+
+ weaknesses = get_cwes_from_github_advisory(advisory, logger)
+
+ advisory_id = None
+
+ aliases = sorted(dedupe(aliases))
+
+ advisory_id = ghsa_id or aliases[0]
+
+ aliases.remove(advisory_id)
+
+ yield AdvisoryData(
+ advisory_id=ghsa_id,
+ aliases=aliases,
+ summary=summary,
+ references_v2=references,
+ severities=severities,
+ affected_packages=affected_packages,
+ date_published=date_published,
+ weaknesses=weaknesses,
+ url=f"https://github.com/advisories/{ghsa_id}",
+ )
+
+
+def get_cwes_from_github_advisory(advisory, logger=None) -> List[int]:
+ """
+ Return the cwe-id list from advisory ex: [ 522 ]
+ by extracting the cwe_list from advisory ex: [{'cweId': 'CWE-522'}]
+ then remove the CWE- from string and convert it to integer 522 and Check if the CWE in CWE-Database
+ """
+ weaknesses = []
+ db = Database()
+ cwe_list = get_item(advisory, "cwes", "nodes") or []
+ for cwe_item in cwe_list:
+ cwe_string = get_item(cwe_item, "cweId")
+ if cwe_string:
+ cwe_id = get_cwe_id(cwe_string)
+ try:
+ db.get(cwe_id)
+ weaknesses.append(cwe_id)
+ except Exception as e:
+ if logger:
+ logger(f"Invalid CWE id {e!r} \n {traceback_format_exc()}", level=logging.ERROR)
+ return weaknesses
diff --git a/vulnerabilities/pipelines/v2_importers/gitlab_importer.py b/vulnerabilities/pipelines/v2_importers/gitlab_importer.py
new file mode 100644
index 000000000..1f175f07f
--- /dev/null
+++ b/vulnerabilities/pipelines/v2_importers/gitlab_importer.py
@@ -0,0 +1,329 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# VulnerableCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/aboutcode-org/vulnerablecode for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+
+import logging
+import traceback
+from pathlib import Path
+from typing import Iterable
+from typing import List
+from typing import Tuple
+
+import pytz
+import saneyaml
+from dateutil import parser as dateparser
+from fetchcode.vcs import fetch_via_vcs
+from packageurl import PackageURL
+from univers.version_range import RANGE_CLASS_BY_SCHEMES
+from univers.version_range import VersionRange
+from univers.version_range import from_gitlab_native
+from univers.versions import Version
+
+from vulnerabilities.importer import AdvisoryData
+from vulnerabilities.importer import AffectedPackage
+from vulnerabilities.importer import Reference
+from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
+from vulnerabilities.utils import build_description
+from vulnerabilities.utils import get_advisory_url
+from vulnerabilities.utils import get_cwe_id
+
+
+class GitLabImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
+ """
+ GitLab Importer Pipeline
+
+ Collect advisory from GitLab Advisory Database (Open Source Edition).
+ """
+
+ pipeline_id = "gitlab_importer_v2"
+ spdx_license_expression = "MIT"
+ license_url = "https://gitlab.com/gitlab-org/advisories-community/-/blob/main/LICENSE"
+ repo_url = "git+https://gitlab.com/gitlab-org/advisories-community/"
+ unfurl_version_ranges = True
+
+ @classmethod
+ def steps(cls):
+ return (
+ cls.clone,
+ cls.collect_and_store_advisories,
+ cls.clean_downloads,
+ )
+
+ purl_type_by_gitlab_scheme = {
+ "conan": "conan",
+ "gem": "gem",
+ # Entering issue to parse go package names https://github.com/nexB/vulnerablecode/issues/742
+ # "go": "golang",
+ "maven": "maven",
+ "npm": "npm",
+ "nuget": "nuget",
+ "packagist": "composer",
+ "pypi": "pypi",
+ }
+
+ gitlab_scheme_by_purl_type = {v: k for k, v in purl_type_by_gitlab_scheme.items()}
+
+ def clone(self):
+ self.log(f"Cloning `{self.repo_url}`")
+ self.vcs_response = fetch_via_vcs(self.repo_url)
+
+ def advisories_count(self):
+ root = Path(self.vcs_response.dest_dir)
+ return sum(1 for _ in root.rglob("*.yml"))
+
+ def collect_advisories(self) -> Iterable[AdvisoryData]:
+ base_path = Path(self.vcs_response.dest_dir)
+
+ for file_path in base_path.rglob("*.yml"):
+ if file_path.parent == base_path:
+ continue
+
+ gitlab_type, _, _ = parse_advisory_path(
+ base_path=base_path,
+ file_path=file_path,
+ )
+
+ if gitlab_type not in self.purl_type_by_gitlab_scheme:
+ self.log(
+ f"Unknown package type {gitlab_type!r} in {file_path!r}",
+ level=logging.ERROR,
+ )
+ continue
+
+ advisory = parse_gitlab_advisory(
+ file=file_path,
+ base_path=base_path,
+ gitlab_scheme_by_purl_type=self.gitlab_scheme_by_purl_type,
+ purl_type_by_gitlab_scheme=self.purl_type_by_gitlab_scheme,
+ logger=self.log,
+ )
+
+ if not advisory:
+ self.log(
+ f"Failed to parse advisory from {file_path!r}",
+ level=logging.ERROR,
+ )
+ continue
+
+ yield advisory
+
+ def clean_downloads(self):
+ if self.vcs_response:
+ self.log(f"Removing cloned repository")
+ self.vcs_response.delete()
+
+ def on_failure(self):
+ self.clean_downloads()
+
+
+def parse_advisory_path(base_path: Path, file_path: Path) -> Tuple[str, str, str]:
+ """
+ Parse a gitlab advisory file and return a 3-tuple of:
+ (gitlab_type, package_slug, vulnerability_id)
+
+ For example::
+
+ >>> base_path = Path("/tmp/tmpi1klhpmd/checkout")
+ >>> file_path=Path("/tmp/tmpi1klhpmd/checkout/pypi/gradio/CVE-2021-43831.yml")
+ >>> parse_advisory_path(base_path=base_path, file_path=file_path)
+ ('pypi', 'gradio', 'CVE-2021-43831')
+
+ >>> file_path=Path("/tmp/tmpi1klhpmd/checkout/nuget/github.com/beego/beego/v2/nuget/CVE-2021-43831.yml")
+ >>> parse_advisory_path(base_path=base_path, file_path=file_path)
+ ('nuget', 'github.com/beego/beego/v2/nuget', 'CVE-2021-43831')
+
+ >>> file_path = Path("/tmp/tmpi1klhpmd/checkout/npm/@express/beego/beego/v2/CVE-2021-43831.yml")
+ >>> parse_advisory_path(base_path=base_path, file_path=file_path)
+ ('npm', '@express/beego/beego/v2', 'CVE-2021-43831')
+ """
+ relative_path_segments = file_path.relative_to(base_path).parts
+ gitlab_type = relative_path_segments[0]
+ vuln_id = file_path.stem
+ package_slug = "/".join(relative_path_segments[1:-1])
+
+ return gitlab_type, package_slug, vuln_id
+
+
+def get_purl(package_slug, purl_type_by_gitlab_scheme, logger):
+ """
+ Return a PackageURL object from a package slug
+ """
+ parts = [p for p in package_slug.strip("/").split("/") if p]
+ gitlab_scheme = parts[0]
+ purl_type = purl_type_by_gitlab_scheme[gitlab_scheme]
+ if gitlab_scheme == "go":
+ name = "/".join(parts[1:])
+ return PackageURL(type=purl_type, namespace=None, name=name)
+ # if package slug is of the form:
+ # "nuget/NuGet.Core"
+ if len(parts) == 2:
+ name = parts[1]
+ return PackageURL(type=purl_type, name=name)
+ # if package slug is of the form:
+ # "nuget/github.com/beego/beego/v2/nuget"
+ if len(parts) >= 3:
+ name = parts[-1]
+ namespace = "/".join(parts[1:-1])
+ return PackageURL(type=purl_type, namespace=namespace, name=name)
+ logger(f"get_purl: package_slug can not be parsed: {package_slug!r}", level=logging.ERROR)
+ return
+
+
+def extract_affected_packages(
+ affected_version_range: VersionRange,
+ fixed_versions: List[Version],
+ purl: PackageURL,
+) -> Iterable[AffectedPackage]:
+ """
+ Yield AffectedPackage objects, one for each fixed_version
+
+ In case of gitlab advisory data we get a list of fixed_versions and a affected_version_range.
+ Since we can not determine which package fixes which range.
+ We store the all the fixed_versions with the same affected_version_range in the advisory.
+ Later the advisory data is used to be inferred in the GitLabBasicImprover.
+ """
+ for fixed_version in fixed_versions:
+ yield AffectedPackage(
+ package=purl,
+ fixed_version=fixed_version,
+ affected_version_range=affected_version_range,
+ )
+
+
+def parse_gitlab_advisory(
+ file, base_path, gitlab_scheme_by_purl_type, purl_type_by_gitlab_scheme, logger
+):
+ """
+ Parse a Gitlab advisory file and return an AdvisoryData or None.
+ These files are YAML. There is a JSON schema documented at
+ https://gitlab.com/gitlab-org/advisories-community/-/blob/main/ci/schema/schema.json
+
+ Sample YAML file:
+ ---
+ identifier: "GMS-2018-26"
+ package_slug: "packagist/amphp/http"
+ title: "Incorrect header injection check"
+ description: "amphp/http isn't properly protected against HTTP header injection."
+ pubdate: "2018-03-15"
+ affected_range: "<1.0.1"
+ fixed_versions:
+ - "v1.0.1"
+ urls:
+ - "https://github.com/amphp/http/pull/4"
+ cwe_ids:
+ - "CWE-1035"
+ - "CWE-937"
+ identifiers:
+ - "GMS-2018-26"
+ """
+ with open(file) as f:
+ gitlab_advisory = saneyaml.load(f)
+ if not isinstance(gitlab_advisory, dict):
+ logger(
+ f"parse_gitlab_advisory: unknown gitlab advisory format in {file!r} with data: {gitlab_advisory!r}",
+ level=logging.ERROR,
+ )
+ return
+
+ # refer to schema here https://gitlab.com/gitlab-org/advisories-community/-/blob/main/ci/schema/schema.json
+ aliases = gitlab_advisory.get("identifiers")
+ advisory_id = gitlab_advisory.get("identifier")
+ if advisory_id in aliases:
+ aliases.remove(advisory_id)
+ summary = build_description(gitlab_advisory.get("title"), gitlab_advisory.get("description"))
+ urls = gitlab_advisory.get("urls")
+ references = [Reference.from_url(u) for u in urls]
+
+ print(references)
+
+ cwe_ids = gitlab_advisory.get("cwe_ids") or []
+ cwe_list = list(map(get_cwe_id, cwe_ids))
+
+ date_published = dateparser.parse(gitlab_advisory.get("pubdate"))
+ date_published = date_published.replace(tzinfo=pytz.UTC)
+ package_slug = gitlab_advisory.get("package_slug")
+ advisory_url = get_advisory_url(
+ file=file,
+ base_path=base_path,
+ url="https://gitlab.com/gitlab-org/advisories-community/-/blob/main/",
+ )
+ purl: PackageURL = get_purl(
+ package_slug=package_slug,
+ purl_type_by_gitlab_scheme=purl_type_by_gitlab_scheme,
+ logger=logger,
+ )
+ if not purl:
+ logger(
+ f"parse_yaml_file: purl is not valid: {file!r} {package_slug!r}", level=logging.ERROR
+ )
+ return AdvisoryData(
+ aliases=aliases,
+ summary=summary,
+ references=references,
+ date_published=date_published,
+ url=advisory_url,
+ )
+ affected_version_range = None
+ fixed_versions = gitlab_advisory.get("fixed_versions") or []
+ affected_range = gitlab_advisory.get("affected_range")
+ gitlab_native_schemes = set(["pypi", "gem", "npm", "go", "packagist", "conan"])
+ vrc: VersionRange = RANGE_CLASS_BY_SCHEMES[purl.type]
+ gitlab_scheme = gitlab_scheme_by_purl_type[purl.type]
+ try:
+ if affected_range:
+ if gitlab_scheme in gitlab_native_schemes:
+ affected_version_range = from_gitlab_native(
+ gitlab_scheme=gitlab_scheme, string=affected_range
+ )
+ else:
+ affected_version_range = vrc.from_native(affected_range)
+ except Exception as e:
+ logger(
+ f"parse_yaml_file: affected_range is not parsable: {affected_range!r} for: {purl!s} error: {e!r}\n {traceback.format_exc()}",
+ level=logging.ERROR,
+ )
+
+ parsed_fixed_versions = []
+ for fixed_version in fixed_versions:
+ try:
+ fixed_version = vrc.version_class(fixed_version)
+ parsed_fixed_versions.append(fixed_version)
+ except Exception as e:
+ logger(
+ f"parse_yaml_file: fixed_version is not parsable`: {fixed_version!r} error: {e!r}\n {traceback.format_exc()}",
+ level=logging.ERROR,
+ )
+
+ if parsed_fixed_versions:
+ affected_packages = list(
+ extract_affected_packages(
+ affected_version_range=affected_version_range,
+ fixed_versions=parsed_fixed_versions,
+ purl=purl,
+ )
+ )
+ else:
+ if not affected_version_range:
+ affected_packages = []
+ else:
+ affected_packages = [
+ AffectedPackage(
+ package=purl,
+ affected_version_range=affected_version_range,
+ )
+ ]
+ return AdvisoryData(
+ advisory_id=advisory_id,
+ aliases=aliases,
+ summary=summary,
+ references_v2=references,
+ date_published=date_published,
+ affected_packages=affected_packages,
+ weaknesses=cwe_list,
+ url=advisory_url,
+ )
diff --git a/vulnerabilities/pipelines/v2_importers/npm_importer.py b/vulnerabilities/pipelines/v2_importers/npm_importer.py
new file mode 100644
index 000000000..19d21c987
--- /dev/null
+++ b/vulnerabilities/pipelines/v2_importers/npm_importer.py
@@ -0,0 +1,178 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# VulnerableCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/aboutcode-org/vulnerablecode for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+
+# Author: Navonil Das (@NavonilDas)
+
+from pathlib import Path
+from typing import Iterable
+
+import pytz
+from dateutil.parser import parse
+from fetchcode.vcs import fetch_via_vcs
+from packageurl import PackageURL
+from univers.version_range import NpmVersionRange
+
+from vulnerabilities.importer import AdvisoryData
+from vulnerabilities.importer import AffectedPackage
+from vulnerabilities.importer import Reference
+from vulnerabilities.importer import VulnerabilitySeverity
+from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
+from vulnerabilities.severity_systems import CVSSV2
+from vulnerabilities.severity_systems import CVSSV3
+from vulnerabilities.utils import build_description
+from vulnerabilities.utils import load_json
+
+
+class NpmImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
+ """
+ Node.js Security Working Group importer pipeline
+
+ Import advisories from nodejs security working group including node proper advisories and npm advisories.
+ """
+
+ pipeline_id = "nodejs_security_wg"
+ spdx_license_expression = "MIT"
+ license_url = "https://github.com/nodejs/security-wg/blob/main/LICENSE.md"
+ repo_url = "git+https://github.com/nodejs/security-wg"
+ unfurl_version_ranges = True
+
+ @classmethod
+ def steps(cls):
+ return (
+ cls.clone,
+ cls.collect_and_store_advisories,
+ cls.clean_downloads,
+ )
+
+ def clone(self):
+ self.log(f"Cloning `{self.repo_url}`")
+ self.vcs_response = fetch_via_vcs(self.repo_url)
+
+ def advisories_count(self):
+ vuln_directory = Path(self.vcs_response.dest_dir) / "vuln" / "npm"
+ return sum(1 for _ in vuln_directory.glob("*.json"))
+
+ def collect_advisories(self) -> Iterable[AdvisoryData]:
+ vuln_directory = Path(self.vcs_response.dest_dir) / "vuln" / "npm"
+
+ for advisory in vuln_directory.glob("*.json"):
+ yield self.to_advisory_data(advisory)
+
+ def to_advisory_data(self, file: Path) -> Iterable[AdvisoryData]:
+ if file.name == "index.json":
+ self.log(f"Skipping {file.name} file")
+ return
+ data = load_json(file)
+ id = data.get("id")
+ description = data.get("overview") or ""
+ summary = data.get("title") or ""
+ # TODO: Take care of description
+ date_published = None
+ if isinstance(data.get("created_at"), str):
+ date_published = parse(data.get("created_at")).replace(tzinfo=pytz.UTC)
+ references = []
+ cvss_vector = data.get("cvss_vector")
+ cvss_score = data.get("cvss_score")
+ severities = []
+ if cvss_vector and cvss_vector.startswith("CVSS:3.0/"):
+ severities.append(
+ VulnerabilitySeverity(
+ system=CVSSV3,
+ value=cvss_score,
+ url=f"https://github.com/nodejs/security-wg/blob/main/vuln/npm/{id}.json",
+ )
+ )
+ if cvss_vector and cvss_vector.startswith("CVSS:2.0/"):
+ severities.append(
+ VulnerabilitySeverity(
+ system=CVSSV2,
+ value=cvss_score,
+ url=f"https://github.com/nodejs/security-wg/blob/main/vuln/npm/{id}.json",
+ )
+ )
+ if not id:
+ self.log(f"Advisory ID not found in {file}")
+ return
+
+ advisory_reference = Reference(
+ url=f"https://github.com/nodejs/security-wg/blob/main/vuln/npm/{id}.json",
+ reference_id=id,
+ )
+
+ for ref in data.get("references") or []:
+ references.append(
+ Reference(
+ url=ref,
+ )
+ )
+
+ if advisory_reference not in references:
+ references.append(advisory_reference)
+
+ package_name = data.get("module_name")
+ affected_packages = []
+ if package_name:
+ affected_packages.append(self.get_affected_package(data, package_name))
+ advsisory_aliases = data.get("cves") or []
+
+ return AdvisoryData(
+ advisory_id=f"npm-{id}",
+ aliases=advsisory_aliases,
+ summary=build_description(summary=summary, description=description),
+ date_published=date_published,
+ affected_packages=affected_packages,
+ references_v2=references,
+ severities=severities,
+ url=f"https://github.com/nodejs/security-wg/blob/main/vuln/npm/{id}.json",
+ )
+
+ def get_affected_package(self, data, package_name):
+ affected_version_range = None
+ unaffected_version_range = None
+ fixed_version = None
+
+ vulnerable_range = data.get("vulnerable_versions") or ""
+ patched_range = data.get("patched_versions") or ""
+
+ # https://github.com/nodejs/security-wg/blob/cfaa51cc5c83f01eea61b69658f7bc76a77c5979/vuln/npm/213.json#L14
+ if vulnerable_range == "<=99.999.99999":
+ vulnerable_range = "*"
+ if vulnerable_range:
+ affected_version_range = NpmVersionRange.from_native(vulnerable_range)
+
+ # https://github.com/nodejs/security-wg/blob/cfaa51cc5c83f01eea61b69658f7bc76a77c5979/vuln/npm/213.json#L15
+ if patched_range == "<0.0.0":
+ patched_range = None
+ if patched_range:
+ unaffected_version_range = NpmVersionRange.from_native(patched_range)
+
+ # We only store single fixed versions and not a range of fixed versions
+ # If there is a single constraint in the unaffected_version_range
+ # having comparator as ">=" then we store that as the fixed version
+ if unaffected_version_range and len(unaffected_version_range.constraints) == 1:
+ constraint = unaffected_version_range.constraints[0]
+ if constraint.comparator == ">=":
+ fixed_version = constraint.version
+
+ return AffectedPackage(
+ package=PackageURL(
+ type="npm",
+ name=package_name,
+ ),
+ affected_version_range=affected_version_range,
+ fixed_version=fixed_version,
+ )
+
+ def clean_downloads(self):
+ if self.vcs_response:
+ self.log(f"Removing cloned repository")
+ self.vcs_response.delete()
+
+ def on_failure(self):
+ self.clean_downloads()
diff --git a/vulnerabilities/pipelines/v2_importers/nvd_importer.py b/vulnerabilities/pipelines/v2_importers/nvd_importer.py
new file mode 100644
index 000000000..1166ac8ef
--- /dev/null
+++ b/vulnerabilities/pipelines/v2_importers/nvd_importer.py
@@ -0,0 +1,338 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# VulnerableCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/aboutcode-org/vulnerablecode for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+
+import gzip
+import json
+import logging
+from datetime import date
+from traceback import format_exc as traceback_format_exc
+from typing import Iterable
+
+import attr
+import requests
+from dateutil import parser as dateparser
+
+from vulnerabilities import severity_systems
+from vulnerabilities.importer import AdvisoryData
+from vulnerabilities.importer import Reference
+from vulnerabilities.importer import VulnerabilitySeverity
+from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
+from vulnerabilities.utils import get_cwe_id
+from vulnerabilities.utils import get_item
+
+
+class NVDImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
+ """
+ NVD Importer Pipeline
+
+ Collect advisories from NVD.
+ """
+
+ pipeline_id = "nvd_importer_v2"
+ # See https://github.com/nexB/vulnerablecode/issues/665 for follow up
+ spdx_license_expression = (
+ "LicenseRef-scancode-us-govt-public-domain AND LicenseRef-scancode-cve-tou"
+ )
+ license_url = "https://nvd.nist.gov/general/FAQ-Sections/General-FAQs#faqLink7"
+ notice = """
+ See https://nvd.nist.gov/general/FAQ-Sections/General-FAQs#faqLink7
+ All NVD data is freely available from our data feeds
+ (https://nvd.nist.gov/vuln/data-feeds). There are no fees, licensing
+ restrictions, or even a requirement to register. All NIST publications are
+ available in the public domain according to Title 17 of the United States
+ Code. Acknowledgment of the NVD when using our information is appreciated.
+ In addition, please email nvd@nist.gov to let us know how the information is
+ being used
+
+ See also https://cve.mitre.org/about/termsofuse.html
+ Terms of Use
+ LICENSE
+ [...]
+ CVE Usage: MITRE hereby grants you a perpetual, worldwide, non-exclusive, no-
+ charge, royalty-free, irrevocable copyright license to reproduce, prepare
+ derivative works of, publicly display, publicly perform, sublicense, and
+ distribute Common Vulnerabilities and Exposures (CVE®). Any copy you make for
+ such purposes is authorized provided that you reproduce MITRE's copyright
+ designation and this license in any such copy. DISCLAIMERS
+
+ ALL DOCUMENTS AND THE INFORMATION CONTAINED THEREIN PROVIDED BY MITRE ARE
+ PROVIDED ON AN "AS IS" BASIS AND THE CONTRIBUTOR, THE ORGANIZATION HE/SHE
+ REPRESENTS OR IS SPONSORED BY (IF ANY), THE MITRE CORPORATION, ITS BOARD OF
+ TRUSTEES, OFFICERS, AGENTS, AND EMPLOYEES, DISCLAIM ALL WARRANTIES, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE
+ INFORMATION THEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF
+ MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+ """
+
+ @classmethod
+ def steps(cls):
+ return (cls.collect_and_store_advisories,)
+
+ def advisories_count(self):
+ url = "https://services.nvd.nist.gov/rest/json/cves/2.0?resultsPerPage=1"
+
+ advisory_count = 0
+ try:
+ response = requests.get(url)
+ response.raise_for_status()
+ data = response.json()
+ except requests.HTTPError as http_err:
+ self.log(
+ f"HTTP error occurred: {http_err} \n {traceback_format_exc()}",
+ level=logging.ERROR,
+ )
+ return advisory_count
+
+ advisory_count = data.get("totalResults", 0)
+ return advisory_count
+
+ def collect_advisories(self) -> Iterable[AdvisoryData]:
+ for _year, cve_data in fetch_cve_data_1_1(logger=self.log):
+ yield from to_advisories(cve_data=cve_data)
+
+
+# Isolating network calls for simplicity of testing
+def fetch(url, logger=None):
+ if logger:
+ logger(f"Fetching `{url}`")
+ gz_file = requests.get(url)
+ data = gzip.decompress(gz_file.content)
+ try:
+ data = data.decode("utf-8")
+ except UnicodeDecodeError:
+ logger(f"Failed to decode data from {url}")
+ return {}
+ return json.loads(data)
+
+
+def fetch_cve_data_1_1(starting_year=2002, logger=None):
+ """
+ Yield tuples of (year, lists of CVE mappings) from the NVD, one for each
+ year since ``starting_year`` defaulting to 2002.
+ """
+ current_year = date.today().year
+ # NVD json feeds start from 2002.
+ for year in range(starting_year, current_year + 1):
+ download_url = f"https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-{year}.json.gz"
+ yield year, fetch(url=download_url, logger=logger)
+
+
+def to_advisories(cve_data):
+ """
+ Yield AdvisoryData objects from a CVE json feed.
+ """
+ for cve_item in CveItem.from_cve_data(cve_data=cve_data):
+ if cve_item.is_related_to_hardware or not cve_item.cve_id:
+ continue
+ yield cve_item.to_advisory()
+
+
+@attr.attributes
+class CveItem:
+ cve_item = attr.attrib(default=attr.Factory(dict), type=dict)
+
+ @classmethod
+ def to_advisories(cls, cve_data, skip_hardware=True):
+ """
+ Yield AdvisoryData objects from ``cve_data`` data for CVE JSON 1.1feed.
+ Skip hardware
+ """
+ for cve_item in CveItem.from_cve_data(cve_data=cve_data, skip_hardware=skip_hardware):
+ yield cve_item.to_advisory()
+
+ @classmethod
+ def from_cve_data(cls, cve_data, skip_hardware=True):
+ """
+ Yield CVE items mapping from a cve_data list of CVE mappings from the NVD.
+ """
+ for cve_item in cve_data.get("CVE_Items") or []:
+ if not cve_item:
+ continue
+ if not isinstance(cve_item, dict):
+ raise ValueError(f"cve_item: {cve_item!r} is not a mapping")
+ cve_item = cls(cve_item=cve_item)
+ if skip_hardware and cve_item.is_related_to_hardware:
+ continue
+ yield cve_item
+
+ @property
+ def cve_id(self):
+ return self.cve_item["cve"]["CVE_data_meta"]["ID"]
+
+ @property
+ def summary(self):
+ """
+ Return a descriptive summary.
+ """
+ # In 99% of cases len(cve_item['cve']['description']['description_data']) == 1 , so
+ # this usually returns cve_item['cve']['description']['description_data'][0]['value']
+ # In the remaining 1% cases this returns the longest summary.
+ # FIXME: we should retun the full description WITH the summry as the first line instead
+ summaries = []
+ for desc in get_item(self.cve_item, "cve", "description", "description_data") or []:
+ if desc.get("value"):
+ summaries.append(desc["value"])
+ return max(summaries, key=len) if summaries else None
+
+ @property
+ def cpes(self):
+ """
+ Return a list of unique CPE strings for this CVE.
+ """
+ # FIXME: we completely ignore the configurations here
+ cpes = []
+ for node in get_item(self.cve_item, "configurations", "nodes") or []:
+ for cpe_data in node.get("cpe_match") or []:
+ cpe23_uri = cpe_data.get("cpe23Uri")
+ if cpe23_uri and cpe23_uri not in cpes:
+ cpes.append(cpe23_uri)
+ return cpes
+
+ @property
+ def severities(self):
+ """
+ Return a list of VulnerabilitySeverity for this CVE.
+ """
+ severities = []
+ impact = self.cve_item.get("impact") or {}
+ base_metric_v4 = impact.get("baseMetricV4") or {}
+ if base_metric_v4:
+ cvss_v4 = base_metric_v4.get("cvssV4") or {}
+ vs = VulnerabilitySeverity(
+ system=severity_systems.CVSSV4,
+ value=str(cvss_v4.get("baseScore") or ""),
+ scoring_elements=str(cvss_v4.get("vectorString") or ""),
+ url=f"https://nvd.nist.gov/vuln/detail/{self.cve_id}",
+ )
+ severities.append(vs)
+
+ base_metric_v3 = impact.get("baseMetricV3") or {}
+ if base_metric_v3:
+ cvss_v3 = get_item(base_metric_v3, "cvssV3")
+ version = cvss_v3.get("version")
+ system = None
+ if version == "3.1":
+ system = severity_systems.CVSSV31
+ else:
+ system = severity_systems.CVSSV3
+ vs = VulnerabilitySeverity(
+ system=system,
+ value=str(cvss_v3.get("baseScore") or ""),
+ scoring_elements=str(cvss_v3.get("vectorString") or ""),
+ url=f"https://nvd.nist.gov/vuln/detail/{self.cve_id}",
+ )
+ severities.append(vs)
+
+ base_metric_v2 = impact.get("baseMetricV2") or {}
+ if base_metric_v2:
+ cvss_v2 = base_metric_v2.get("cvssV2") or {}
+ vs = VulnerabilitySeverity(
+ system=severity_systems.CVSSV2,
+ value=str(cvss_v2.get("baseScore") or ""),
+ scoring_elements=str(cvss_v2.get("vectorString") or ""),
+ url=f"https://nvd.nist.gov/vuln/detail/{self.cve_id}",
+ )
+ severities.append(vs)
+
+ return severities
+
+ @property
+ def reference_urls(self):
+ """
+ Return a list unique of reference URLs.
+ """
+ # FIXME: we should also collect additional data from the references such as tags and ids
+
+ urls = []
+ for reference in get_item(self.cve_item, "cve", "references", "reference_data") or []:
+ ref_url = reference.get("url")
+ if ref_url and ref_url.startswith(("http", "ftp")) and ref_url not in urls:
+ urls.append(ref_url)
+ return urls
+
+ @property
+ def references(self):
+ """
+ Return a list of AdvisoryReference.
+ """
+ # FIXME: we should also collect additional data from the references such as tags and ids
+ references = []
+
+ # we track each CPE as a reference for now
+ for cpe in self.cpes:
+ cpe_url = f"https://nvd.nist.gov/vuln/search/results?adv_search=true&isCpeNameSearch=true&query={cpe}"
+ references.append(Reference(reference_id=cpe, url=cpe_url))
+
+ # FIXME: we also add the CVE proper as a reference, but is this correct?
+ references.append(
+ Reference(
+ url=f"https://nvd.nist.gov/vuln/detail/{self.cve_id}",
+ reference_id=self.cve_id,
+ )
+ )
+
+ # clean to remove dupes for the CVE id proper
+ ref_urls = [
+ ru
+ for ru in self.reference_urls
+ if ru != f"https://nvd.nist.gov/vuln/detail/{self.cve_id}"
+ ]
+ references.extend([Reference(url=url) for url in ref_urls])
+
+ return references
+
+ @property
+ def is_related_to_hardware(self):
+ """
+ Return True if this CVE item is for hardware (as opposed to software).
+ """
+ return any(is_related_to_hardware(cpe) for cpe in self.cpes)
+
+ @property
+ def weaknesses(self):
+ """
+ Return a list of CWE IDs like: [119, 189]
+ """
+ weaknesses = []
+ for weaknesses_item in (
+ get_item(self.cve_item, "cve", "problemtype", "problemtype_data") or []
+ ):
+ weaknesses_description = weaknesses_item.get("description") or []
+ for weaknesses_value in weaknesses_description:
+ cwe_id = (
+ weaknesses_value.get("value") if weaknesses_value.get("lang") == "en" else None
+ )
+ if cwe_id in ["NVD-CWE-Other", "NVD-CWE-noinfo"] or not cwe_id:
+ continue # Skip Invalid CWE
+ weaknesses.append(get_cwe_id(cwe_id))
+ return weaknesses
+
+ def to_advisory(self):
+ """
+ Return an AdvisoryData object from this CVE item
+ """
+ return AdvisoryData(
+ advisory_id=self.cve_id,
+ aliases=[],
+ summary=self.summary,
+ references_v2=self.references,
+ date_published=dateparser.parse(self.cve_item.get("publishedDate")),
+ weaknesses=self.weaknesses,
+ severities=self.severities,
+ url=f"https://nvd.nist.gov/vuln/detail/{self.cve_id}",
+ )
+
+
+def is_related_to_hardware(cpe):
+ """
+ Return True if the ``cpe`` is related to hardware.
+ """
+ cpe_comps = cpe.split(":")
+ # CPE follow the format cpe:cpe_version:product_type:vendor:product
+ return len(cpe_comps) > 2 and cpe_comps[2] == "h"
diff --git a/vulnerabilities/pipelines/v2_importers/postgresql_importer.py b/vulnerabilities/pipelines/v2_importers/postgresql_importer.py
new file mode 100644
index 000000000..2f5a49439
--- /dev/null
+++ b/vulnerabilities/pipelines/v2_importers/postgresql_importer.py
@@ -0,0 +1,163 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# VulnerableCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/aboutcode-org/vulnerablecode for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+
+import urllib.parse as urlparse
+from typing import Iterable
+
+import requests
+from bs4 import BeautifulSoup
+from packageurl import PackageURL
+from univers.version_range import GenericVersionRange
+from univers.versions import GenericVersion
+
+from vulnerabilities import severity_systems
+from vulnerabilities.importer import AdvisoryData
+from vulnerabilities.importer import AffectedPackage
+from vulnerabilities.importer import Reference
+from vulnerabilities.importer import VulnerabilitySeverity
+from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
+
+
+class PostgreSQLImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
+ """
+ PostgreSQL Importer Pipeline
+
+ This pipeline imports security advisories from the PostgreSQL project.
+ """
+
+ pipeline_id = "postgresql_importer_v2"
+ license_url = "https://www.postgresql.org/about/licence/"
+ spdx_license_expression = "PostgreSQL"
+ base_url = "https://www.postgresql.org/support/security/"
+
+ links = set()
+
+ @classmethod
+ def steps(cls):
+ return (cls.collect_and_store_advisories,)
+
+ def advisories_count(self) -> int:
+ if not self.links:
+ self.collect_links()
+ return len(self.links)
+
+ def collect_advisories(self) -> Iterable[AdvisoryData]:
+ if not self.links:
+ self.collect_links()
+
+ for url in self.links:
+ data = requests.get(url).content
+ yield from self.to_advisories(data)
+
+ def collect_links(self):
+ known_urls = {self.base_url}
+ visited_urls = set()
+
+ while True:
+ unvisited_urls = known_urls - visited_urls
+ for url in unvisited_urls:
+ data = requests.get(url).content
+ visited_urls.add(url)
+ known_urls.update(self.find_advisory_urls(data))
+ if known_urls == visited_urls:
+ break
+ self.links = known_urls
+
+ def to_advisories(self, data):
+ advisories = []
+ soup = BeautifulSoup(data, features="lxml")
+ tables = soup.select("table")
+
+ if not tables:
+ return advisories
+
+ table = tables[0]
+
+ for row in table.select("tbody tr"):
+ ref_col, affected_col, fixed_col, severity_score_col, desc_col = row.select("td")
+ summary = desc_col.text
+ pkg_qualifiers = {"os": "windows"} if "windows" in summary.lower() else {}
+
+ affected_packages = []
+ affected_version_list = [v.strip() for v in affected_col.text.split(",") if v.strip()]
+ fixed_version_list = [v.strip() for v in fixed_col.text.split(",") if v.strip()]
+
+ if fixed_version_list:
+ for fixed_version in fixed_version_list:
+ affected_packages.append(
+ AffectedPackage(
+ package=PackageURL(
+ name="postgresql", type="generic", qualifiers=pkg_qualifiers
+ ),
+ affected_version_range=GenericVersionRange.from_versions(
+ affected_version_list
+ )
+ if affected_version_list
+ else None,
+ fixed_version=GenericVersion(fixed_version),
+ )
+ )
+ elif affected_version_list:
+ affected_packages.append(
+ AffectedPackage(
+ package=PackageURL(
+ name="postgresql", type="generic", qualifiers=pkg_qualifiers
+ ),
+ affected_version_range=GenericVersionRange.from_versions(
+ affected_version_list
+ ),
+ )
+ )
+
+ cve_id = ""
+ try:
+ cve_id = ref_col.select(".nobr")[0].text
+ except IndexError:
+ pass
+
+ references = []
+ vector_link_tag = severity_score_col.find("a")
+ for a_tag in ref_col.select("a"):
+ link = a_tag.attrs["href"]
+ if link.startswith("/"):
+ link = urlparse.urljoin("https://www.postgresql.org/", link)
+ severities = []
+ if "support/security/CVE" in link and vector_link_tag:
+ parsed_link = urlparse.urlparse(vector_link_tag["href"])
+ cvss3_vector = urlparse.parse_qs(parsed_link.query).get("vector", [""])[0]
+ cvss3_base_score = vector_link_tag.text
+ severities.append(
+ VulnerabilitySeverity(
+ system=severity_systems.CVSSV3,
+ value=cvss3_base_score,
+ scoring_elements=cvss3_vector,
+ )
+ )
+ references.append(Reference(url=link, severities=severities))
+
+ if cve_id:
+ advisories.append(
+ AdvisoryData(
+ advisory_id=cve_id,
+ aliases=[],
+ summary=summary,
+ references_v2=references,
+ affected_packages=affected_packages,
+ url=f"https://www.postgresql.org/support/security/{cve_id}",
+ )
+ )
+
+ return advisories
+
+ def find_advisory_urls(self, page_data):
+ soup = BeautifulSoup(page_data, features="lxml")
+ return {
+ urlparse.urljoin("https://www.postgresql.org/", a_tag.attrs["href"])
+ for a_tag in soup.select("h3+ p a")
+ }
diff --git a/vulnerabilities/pipelines/v2_importers/pypa_importer.py b/vulnerabilities/pipelines/v2_importers/pypa_importer.py
new file mode 100644
index 000000000..7463cc4bd
--- /dev/null
+++ b/vulnerabilities/pipelines/v2_importers/pypa_importer.py
@@ -0,0 +1,74 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# VulnerableCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/aboutcode-org/vulnerablecode for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+
+from pathlib import Path
+from typing import Iterable
+
+import saneyaml
+from fetchcode.vcs import fetch_via_vcs
+
+from vulnerabilities.importer import AdvisoryData
+from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
+from vulnerabilities.utils import get_advisory_url
+
+
+class PyPaImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
+ """
+ Pypa Importer Pipeline
+
+ Collect advisories from PyPA GitHub repository."""
+
+ pipeline_id = "pypa_importer_v2"
+ spdx_license_expression = "CC-BY-4.0"
+ license_url = "https://github.com/pypa/advisory-database/blob/main/LICENSE"
+ repo_url = "git+https://github.com/pypa/advisory-database"
+ unfurl_version_ranges = True
+
+ @classmethod
+ def steps(cls):
+ return (
+ cls.clone,
+ cls.collect_and_store_advisories,
+ cls.clean_downloads,
+ )
+
+ def clone(self):
+ self.log(f"Cloning `{self.repo_url}`")
+ self.vcs_response = fetch_via_vcs(self.repo_url)
+
+ def advisories_count(self):
+ vulns_directory = Path(self.vcs_response.dest_dir) / "vulns"
+ return sum(1 for _ in vulns_directory.rglob("*.yaml"))
+
+ def collect_advisories(self) -> Iterable[AdvisoryData]:
+ from vulnerabilities.importers.osv import parse_advisory_data_v2
+
+ base_directory = Path(self.vcs_response.dest_dir)
+ vulns_directory = base_directory / "vulns"
+
+ for advisory in vulns_directory.rglob("*.yaml"):
+ advisory_url = get_advisory_url(
+ file=advisory,
+ base_path=base_directory,
+ url="https://github.com/pypa/advisory-database/blob/main/",
+ )
+ advisory_dict = saneyaml.load(advisory.read_text())
+ yield parse_advisory_data_v2(
+ raw_data=advisory_dict,
+ supported_ecosystems=["pypi"],
+ advisory_url=advisory_url,
+ )
+
+ def clean_downloads(self):
+ if self.vcs_response:
+ self.log(f"Removing cloned repository")
+ self.vcs_response.delete()
+
+ def on_failure(self):
+ self.clean_downloads()
diff --git a/vulnerabilities/pipelines/v2_importers/pysec_importer.py b/vulnerabilities/pipelines/v2_importers/pysec_importer.py
new file mode 100644
index 000000000..e67f41a28
--- /dev/null
+++ b/vulnerabilities/pipelines/v2_importers/pysec_importer.py
@@ -0,0 +1,67 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# VulnerableCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/aboutcode-org/vulnerablecode for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+import json
+import logging
+from io import BytesIO
+from typing import Iterable
+from zipfile import ZipFile
+
+import requests
+
+from vulnerabilities.importer import AdvisoryData
+from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
+
+
+class PyPIImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
+ """
+ PyPI Importer Pipeline
+
+ Collect advisories from PyPI."""
+
+ pipeline_id = "pysec_importer_v2"
+ license_url = "https://github.com/pypa/advisory-database/blob/main/LICENSE"
+ url = "https://osv-vulnerabilities.storage.googleapis.com/PyPI/all.zip"
+ spdx_license_expression = "CC-BY-4.0"
+ unfurl_version_ranges = True
+
+ @classmethod
+ def steps(cls):
+ return (
+ cls.fetch_zip,
+ cls.collect_and_store_advisories,
+ )
+
+ def fetch_zip(self):
+ self.log(f"Fetching `{self.url}`")
+ self.advisory_zip = requests.get(self.url).content
+
+ def advisories_count(self) -> int:
+ with ZipFile(BytesIO(self.advisory_zip)) as zip:
+ advisory_count = sum(1 for file in zip.namelist() if file.startswith("PYSEC-"))
+ return advisory_count
+
+ def collect_advisories(self) -> Iterable[AdvisoryData]:
+ """Yield AdvisoryData using a zipped data dump of OSV data"""
+ from vulnerabilities.importers.osv import parse_advisory_data_v2
+
+ with ZipFile(BytesIO(self.advisory_zip)) as zip_file:
+ for file_name in zip_file.namelist():
+ if not file_name.startswith("PYSEC-"):
+ self.log(
+ f"Unsupported PyPI advisory data file: {file_name}",
+ level=logging.ERROR,
+ )
+ continue
+ with zip_file.open(file_name) as f:
+ vul_info = json.load(f)
+ yield parse_advisory_data_v2(
+ raw_data=vul_info,
+ supported_ecosystems=["pypi"],
+ advisory_url=self.url,
+ )
diff --git a/vulnerabilities/pipelines/v2_importers/vulnrichment_importer.py b/vulnerabilities/pipelines/v2_importers/vulnrichment_importer.py
new file mode 100644
index 000000000..b2ddfd3cd
--- /dev/null
+++ b/vulnerabilities/pipelines/v2_importers/vulnrichment_importer.py
@@ -0,0 +1,318 @@
+import json
+import logging
+import re
+from pathlib import Path
+from typing import Iterable
+
+import dateparser
+from fetchcode.vcs import fetch_via_vcs
+
+from vulnerabilities.importer import AdvisoryData
+from vulnerabilities.importer import Reference
+from vulnerabilities.importer import VulnerabilitySeverity
+from vulnerabilities.models import VulnerabilityReference
+from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
+from vulnerabilities.severity_systems import SCORING_SYSTEMS
+from vulnerabilities.utils import get_advisory_url
+from vulnerabilities.utils import get_cwe_id
+from vulnerabilities.utils import get_reference_id
+
+logger = logging.getLogger(__name__)
+
+
+class VulnrichImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
+ """
+ Vulnrichment Importer Pipeline
+
+ This pipeline imports security advisories from Vulnrichment project.
+ """
+
+ pipeline_id = "vulnrichment_importer_v2"
+ spdx_license_expression = "CC0-1.0"
+ license_url = "https://github.com/cisagov/vulnrichment/blob/develop/LICENSE"
+ repo_url = "git+https://github.com/cisagov/vulnrichment.git"
+
+ @classmethod
+ def steps(cls):
+ return (
+ cls.clone,
+ cls.collect_and_store_advisories,
+ cls.clean_downloads,
+ )
+
+ def clone(self):
+ self.log(f"Cloning `{self.repo_url}`")
+ self.vcs_response = fetch_via_vcs(self.repo_url)
+
+ def advisories_count(self):
+ vuln_directory = Path(self.vcs_response.dest_dir)
+ return sum(1 for _ in vuln_directory.glob("*.json"))
+
+ def collect_advisories(self) -> Iterable[AdvisoryData]:
+ base_path = Path(self.vcs_response.dest_dir)
+ for file_path in base_path.glob("**/**/*.json"):
+ if not file_path.name.startswith("CVE-"):
+ continue
+ with open(file_path) as f:
+ raw_data = json.load(f)
+ advisory_url = get_advisory_url(
+ file=file_path,
+ base_path=base_path,
+ url="https://github.com/cisagov/vulnrichment/blob/develop/",
+ )
+ yield self.parse_cve_advisory(raw_data, advisory_url)
+
+ def parse_cve_advisory(self, raw_data, advisory_url):
+ cve_metadata = raw_data.get("cveMetadata", {})
+ cve_id = cve_metadata.get("cveId")
+ state = cve_metadata.get("state")
+
+ date_published = cve_metadata.get("datePublished")
+ if date_published:
+ date_published = dateparser.parse(date_published)
+
+ # Extract containers
+ containers = raw_data.get("containers", {})
+ cna_data = containers.get("cna", {})
+ adp_data = containers.get("adp", {})
+
+ # Extract descriptions
+ summary = ""
+ description_list = cna_data.get("descriptions", [])
+ for description_dict in description_list:
+ if not description_dict.get("lang") in ["en", "en-US"]:
+ continue
+ summary = description_dict.get("value")
+
+ # Extract metrics
+ severities = []
+ metrics = cna_data.get("metrics", []) + [
+ adp_metrics for data in adp_data for adp_metrics in data.get("metrics", [])
+ ]
+
+ vulnrichment_scoring_system = {
+ "cvssV4_0": SCORING_SYSTEMS["cvssv4"],
+ "cvssV3_1": SCORING_SYSTEMS["cvssv3.1"],
+ "cvssV3_0": SCORING_SYSTEMS["cvssv3"],
+ "cvssV2_0": SCORING_SYSTEMS["cvssv2"],
+ "other": {
+ "ssvc": SCORING_SYSTEMS["ssvc"],
+ }, # ignore kev
+ }
+
+ for metric in metrics:
+ for metric_type, metric_value in metric.items():
+ if metric_type not in vulnrichment_scoring_system:
+ continue
+
+ if metric_type == "other":
+ other_types = metric_value.get("type")
+ if other_types == "ssvc":
+ content = metric_value.get("content", {})
+ vector_string, decision = ssvc_calculator(content)
+ scoring_system = vulnrichment_scoring_system[metric_type][other_types]
+ severity = VulnerabilitySeverity(
+ system=scoring_system, value=decision, scoring_elements=vector_string
+ )
+ severities.append(severity)
+ # ignore kev
+ else:
+ vector_string = metric_value.get("vectorString")
+ base_score = metric_value.get("baseScore")
+ scoring_system = vulnrichment_scoring_system[metric_type]
+ severity = VulnerabilitySeverity(
+ system=scoring_system, value=base_score, scoring_elements=vector_string
+ )
+ severities.append(severity)
+
+ # Extract references cpes and ignore affected products
+ cpes = set()
+ for affected_product in cna_data.get("affected", []):
+ if type(affected_product) != dict:
+ continue
+ cpes.update(affected_product.get("cpes") or [])
+
+ references = []
+ for ref in cna_data.get("references", []):
+ # https://github.com/CVEProject/cve-schema/blob/main/schema/tags/reference-tags.json
+ # We removed all unwanted reference types and set the default reference type to 'OTHER'.
+ ref_type = VulnerabilityReference.OTHER
+ vul_ref_types = {
+ "exploit": VulnerabilityReference.EXPLOIT,
+ "issue-tracking": VulnerabilityReference.BUG,
+ "mailing-list": VulnerabilityReference.MAILING_LIST,
+ "third-party-advisory": VulnerabilityReference.ADVISORY,
+ "vendor-advisory": VulnerabilityReference.ADVISORY,
+ "vdb-entry": VulnerabilityReference.ADVISORY,
+ }
+
+ for tag_type in ref.get("tags", []):
+ if tag_type in vul_ref_types:
+ ref_type = vul_ref_types.get(tag_type)
+
+ url = ref.get("url")
+ reference = Reference(
+ reference_id=get_reference_id(url),
+ url=url,
+ reference_type=ref_type,
+ )
+
+ references.append(reference)
+
+ cpes_ref = [
+ Reference(
+ reference_id=cpe,
+ reference_type=VulnerabilityReference.OTHER,
+ url=f"https://nvd.nist.gov/vuln/search/results?adv_search=true&isCpeNameSearch=true&query={cpe}",
+ )
+ for cpe in sorted(list(cpes))
+ ]
+ references.extend(cpes_ref)
+
+ weaknesses = set()
+ for problem_type in cna_data.get("problemTypes", []):
+ descriptions = problem_type.get("descriptions", [])
+ for description in descriptions:
+ cwe_id = description.get("cweId")
+ if cwe_id:
+ weaknesses.add(get_cwe_id(cwe_id))
+
+ description_text = description.get("description")
+ if description_text:
+ pattern = r"CWE-(\d+)"
+ match = re.search(pattern, description_text)
+ if match:
+ weaknesses.add(int(match.group(1)))
+
+ return AdvisoryData(
+ advisory_id=cve_id,
+ aliases=[],
+ summary=summary,
+ references_v2=references,
+ date_published=date_published,
+ weaknesses=sorted(weaknesses),
+ url=advisory_url,
+ severities=severities,
+ )
+
+ def clean_downloads(self):
+ if self.vcs_response:
+ self.log("Removing cloned repository")
+ self.vcs_response.delete()
+
+ def on_failure(self):
+ self.clean_downloads()
+
+
+def ssvc_calculator(ssvc_data):
+ """
+ Return the ssvc vector and the decision value
+ """
+ options = ssvc_data.get("options", [])
+ timestamp = ssvc_data.get("timestamp")
+
+ # Extract the options into a dictionary
+ options_dict = {k: v.lower() for option in options for k, v in option.items()}
+
+ # We copied the table value from this link.
+ # https://www.cisa.gov/sites/default/files/publications/cisa-ssvc-guide%20508c.pdf
+
+ # Determining Mission and Well-Being Impact Value
+ mission_well_being_table = {
+ # (Mission Prevalence, Public Well-being Impact) : "Mission & Well-being"
+ ("minimal", "minimal"): "low",
+ ("minimal", "material"): "medium",
+ ("minimal", "irreversible"): "high",
+ ("support", "minimal"): "medium",
+ ("support", "material"): "medium",
+ ("support", "irreversible"): "high",
+ ("essential", "minimal"): "high",
+ ("essential", "material"): "high",
+ ("essential", "irreversible"): "high",
+ }
+
+ if "Mission Prevalence" not in options_dict:
+ options_dict["Mission Prevalence"] = "minimal"
+
+ if "Public Well-being Impact" not in options_dict:
+ options_dict["Public Well-being Impact"] = "material"
+
+ options_dict["Mission & Well-being"] = mission_well_being_table[
+ (options_dict["Mission Prevalence"], options_dict["Public Well-being Impact"])
+ ]
+
+ decision_key = (
+ options_dict.get("Exploitation"),
+ options_dict.get("Automatable"),
+ options_dict.get("Technical Impact"),
+ options_dict.get("Mission & Well-being"),
+ )
+
+ decision_points = {
+ "Exploitation": {"E": {"none": "N", "poc": "P", "active": "A"}},
+ "Automatable": {"A": {"no": "N", "yes": "Y"}},
+ "Technical Impact": {"T": {"partial": "P", "total": "T"}},
+ "Public Well-being Impact": {"B": {"minimal": "M", "material": "A", "irreversible": "I"}},
+ "Mission Prevalence": {"P": {"minimal": "M", "support": "S", "essential": "E"}},
+ "Mission & Well-being": {"M": {"low": "L", "medium": "M", "high": "H"}},
+ }
+
+ # Create the SSVC vector
+ ssvc_vector = "SSVCv2/"
+ for key, value_map in options_dict.items():
+ options_key = decision_points.get(key)
+ for lhs, rhs_map in options_key.items():
+ ssvc_vector += f"{lhs}:{rhs_map.get(value_map)}/"
+
+ # "Decision": {"D": {"Track": "T", "Track*": "R", "Attend": "A", "Act": "C"}},
+ decision_values = {"Track": "T", "Track*": "R", "Attend": "A", "Act": "C"}
+
+ decision_lookup = {
+ ("none", "no", "partial", "low"): "Track",
+ ("none", "no", "partial", "medium"): "Track",
+ ("none", "no", "partial", "high"): "Track",
+ ("none", "no", "total", "low"): "Track",
+ ("none", "no", "total", "medium"): "Track",
+ ("none", "no", "total", "high"): "Track*",
+ ("none", "yes", "partial", "low"): "Track",
+ ("none", "yes", "partial", "medium"): "Track",
+ ("none", "yes", "partial", "high"): "Attend",
+ ("none", "yes", "total", "low"): "Track",
+ ("none", "yes", "total", "medium"): "Track",
+ ("none", "yes", "total", "high"): "Attend",
+ ("poc", "no", "partial", "low"): "Track",
+ ("poc", "no", "partial", "medium"): "Track",
+ ("poc", "no", "partial", "high"): "Track*",
+ ("poc", "no", "total", "low"): "Track",
+ ("poc", "no", "total", "medium"): "Track*",
+ ("poc", "no", "total", "high"): "Attend",
+ ("poc", "yes", "partial", "low"): "Track",
+ ("poc", "yes", "partial", "medium"): "Track",
+ ("poc", "yes", "partial", "high"): "Attend",
+ ("poc", "yes", "total", "low"): "Track",
+ ("poc", "yes", "total", "medium"): "Track*",
+ ("poc", "yes", "total", "high"): "Attend",
+ ("active", "no", "partial", "low"): "Track",
+ ("active", "no", "partial", "medium"): "Track",
+ ("active", "no", "partial", "high"): "Attend",
+ ("active", "no", "total", "low"): "Track",
+ ("active", "no", "total", "medium"): "Attend",
+ ("active", "no", "total", "high"): "Act",
+ ("active", "yes", "partial", "low"): "Attend",
+ ("active", "yes", "partial", "medium"): "Attend",
+ ("active", "yes", "partial", "high"): "Act",
+ ("active", "yes", "total", "low"): "Attend",
+ ("active", "yes", "total", "medium"): "Act",
+ ("active", "yes", "total", "high"): "Act",
+ }
+
+ decision = decision_lookup.get(decision_key, "")
+
+ if decision:
+ ssvc_vector += f"D:{decision_values.get(decision)}/"
+
+ if timestamp:
+ timestamp_formatted = dateparser.parse(timestamp).strftime("%Y-%m-%dT%H:%M:%SZ")
+
+ ssvc_vector += f"{timestamp_formatted}/"
+ return ssvc_vector, decision
diff --git a/vulnerabilities/pipelines/v2_improvers/collect_commits.py b/vulnerabilities/pipelines/v2_improvers/collect_commits.py
new file mode 100644
index 000000000..32fb1ce79
--- /dev/null
+++ b/vulnerabilities/pipelines/v2_improvers/collect_commits.py
@@ -0,0 +1,252 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# VulnerableCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/aboutcode-org/vulnerablecode for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+
+import re
+
+from aboutcode.pipeline import LoopProgress
+
+from vulnerabilities.models import AdvisoryV2
+from vulnerabilities.models import CodeFixV2
+from vulnerabilities.pipelines import VulnerableCodePipeline
+
+
+def is_vcs_url_already_processed(commit_id):
+ """
+ Check if a VCS URL exists in a CodeFix entry.
+ """
+ return CodeFixV2.objects.filter(commits__contains=[commit_id]).exists()
+
+
+class CollectFixCommitsPipeline(VulnerableCodePipeline):
+ """
+ Improver pipeline to scout References and create CodeFix entries.
+ """
+
+ pipeline_id = "collect_fix_commits_v2"
+ license_expression = None
+
+ @classmethod
+ def steps(cls):
+ return (cls.collect_and_store_fix_commits,)
+
+ def collect_and_store_fix_commits(self):
+ affected_advisories = (
+ AdvisoryV2.objects.filter(affecting_packages__isnull=False)
+ .prefetch_related("affecting_packages")
+ .distinct()
+ )
+
+ self.log(f"Processing {affected_advisories.count():,d} references to collect fix commits.")
+
+ created_fix_count = 0
+ progress = LoopProgress(total_iterations=affected_advisories.count(), logger=self.log)
+
+ for adv in progress.iter(affected_advisories.paginated(per_page=500)):
+ for reference in adv.references.all():
+ if not "/commit/" in reference.url:
+ continue
+ if not is_vcs_url(reference.url):
+ continue
+
+ vcs_url = normalize_vcs_url(repo_url=reference.url)
+
+ if not vcs_url:
+ continue
+
+ # Skip if already processed
+ if is_vcs_url_already_processed(commit_id=vcs_url):
+ self.log(
+ f"Skipping already processed reference: {reference.url} with VCS URL {vcs_url}"
+ )
+ continue
+ # check if vcs_url has commit
+ for package in adv.affecting_packages.all():
+ code_fix, created = CodeFixV2.objects.get_or_create(
+ commits=[vcs_url],
+ advisory=adv,
+ affected_package=package,
+ )
+
+ if created:
+ created_fix_count += 1
+ self.log(
+ f"Created CodeFix entry for reference: {reference.url} with VCS URL {vcs_url}"
+ )
+
+ self.log(f"Successfully created {created_fix_count:,d} CodeFix entries.")
+
+
+PLAIN_URLS = (
+ "https://",
+ "http://",
+)
+
+VCS_URLS = (
+ "git://",
+ "git+git://",
+ "git+https://",
+ "git+http://",
+ "hg://",
+ "hg+http://",
+ "hg+https://",
+ "svn://",
+ "svn+https://",
+ "svn+http://",
+)
+
+
+# TODO: This function was borrowed from scancode-toolkit. We need to create a shared library for that.
+def normalize_vcs_url(repo_url, vcs_tool=None):
+ """
+ Return a normalized vcs_url version control URL given some `repo_url` and an
+ optional `vcs_tool` hint (such as 'git', 'hg', etc.)
+
+ Return None if repo_url is not recognized as a VCS URL.
+
+ Handles shortcuts for GitHub, GitHub gist, Bitbucket, or GitLab repositories
+ and more using the same approach as npm install:
+
+ See https://docs.npmjs.com/files/package.json#repository
+ or https://getcomposer.org/doc/05-repositories.md
+
+ This is done here in npm:
+ https://github.com/npm/npm/blob/d3c858ce4cfb3aee515bb299eb034fe1b5e44344/node_modules/hosted-git-info/git-host-info.js
+
+ These should be resolved:
+ npm/npm
+ gist:11081aaa281
+ bitbucket:example/repo
+ gitlab:another/repo
+ expressjs/serve-static
+ git://github.com/angular/di.js.git
+ git://github.com/hapijs/boom
+ git@github.com:balderdashy/waterline-criteria.git
+ http://github.com/ariya/esprima.git
+ http://github.com/isaacs/nopt
+ https://github.com/chaijs/chai
+ https://github.com/christkv/kerberos.git
+ https://gitlab.com/foo/private.git
+ git@gitlab.com:foo/private.git
+ """
+ if not repo_url or not isinstance(repo_url, str):
+ return
+
+ repo_url = repo_url.strip()
+ if not repo_url:
+ return
+
+ # TODO: If we match http and https, we may should add more check in
+ # case if the url is not a repo one. For example, check the domain
+ # name in the url...
+ if repo_url.startswith(VCS_URLS + PLAIN_URLS):
+ return repo_url
+
+ if repo_url.startswith("git@"):
+ tool, _, right = repo_url.partition("@")
+ if ":" in repo_url:
+ host, _, repo = right.partition(":")
+ else:
+ # git@github.com/Filirom1/npm2aur.git
+ host, _, repo = right.partition("/")
+
+ if any(r in host for r in ("bitbucket", "gitlab", "github")):
+ scheme = "https"
+ else:
+ scheme = "git"
+
+ return f"{scheme}://{host}/{repo}"
+
+ # FIXME: where these URL schemes come from??
+ if repo_url.startswith(("bitbucket:", "gitlab:", "github:", "gist:")):
+ repo = repo_url.split(":")[1]
+ hoster_urls = {
+ "bitbucket": f"https://bitbucket.org/{repo}",
+ "github": f"https://github.com/{repo}",
+ "gitlab": f"https://gitlab.com/{repo}",
+ "gist": f"https://gist.github.com/{repo}",
+ }
+ hoster, _, repo = repo_url.partition(":")
+ return hoster_urls[hoster] % locals()
+
+ if len(repo_url.split("/")) == 2:
+ # implicit github, but that's only on NPM?
+ return f"https://github.com/{repo_url}"
+ return repo_url
+
+
+def is_vcs_url(repo_url):
+ """
+ Check if a given URL or string matches a valid VCS (Version Control System) URL.
+
+ Supports:
+ - Standard VCS URL protocols (git, http, https, ssh)
+ - Shortcut syntax (e.g., github:user/repo, gitlab:group/repo)
+ - GitHub shortcut (e.g., user/repo)
+
+ Args:
+ repo_url (str): The repository URL or shortcut to validate.
+
+ Returns:
+ bool: True if the string is a valid VCS URL, False otherwise.
+
+ Examples:
+ >>> is_vcs_url("git://github.com/angular/di.js.git")
+ True
+ >>> is_vcs_url("github:user/repo")
+ True
+ >>> is_vcs_url("user/repo")
+ True
+ >>> is_vcs_url("https://github.com/user/repo.git")
+ True
+ >>> is_vcs_url("git@github.com:user/repo.git")
+ True
+ >>> is_vcs_url("http://github.com/isaacs/nopt")
+ True
+ >>> is_vcs_url("https://gitlab.com/foo/private.git")
+ True
+ >>> is_vcs_url("git@gitlab.com:foo/private.git")
+ True
+ >>> is_vcs_url("bitbucket:example/repo")
+ True
+ >>> is_vcs_url("gist:11081aaa281")
+ True
+ >>> is_vcs_url("ftp://example.com/not-a-repo")
+ False
+ >>> is_vcs_url("random-string")
+ False
+ >>> is_vcs_url("https://example.com/not-a-repo")
+ False
+ """
+ if not repo_url or not isinstance(repo_url, str):
+ return False
+
+ repo_url = repo_url.strip()
+ if not repo_url:
+ return False
+
+ # Define valid VCS domains
+ vcs_domains = r"(github\.com|gitlab\.com|bitbucket\.org|gist\.github\.com)"
+
+ # 1. Match URLs with standard protocols pointing to VCS domains
+ if re.match(rf"^(git|ssh|http|https)://{vcs_domains}/[\w\-.]+/[\w\-.]+", repo_url):
+ return True
+
+ # 2. Match SSH URLs (e.g., git@github.com:user/repo.git)
+ if re.match(rf"^git@{vcs_domains}:[\w\-.]+/[\w\-.]+(\.git)?$", repo_url):
+ return True
+
+ # 3. Match shortcut syntax (e.g., github:user/repo)
+ if re.match(r"^(github|gitlab|bitbucket|gist):[\w\-./]+$", repo_url):
+ return True
+
+ # 4. Match implicit GitHub shortcut (e.g., user/repo)
+ if re.match(r"^[\w\-]+/[\w\-]+$", repo_url):
+ return True
+
+ return False
diff --git a/vulnerabilities/pipelines/v2_improvers/compute_package_risk.py b/vulnerabilities/pipelines/v2_improvers/compute_package_risk.py
new file mode 100644
index 000000000..55608f0d1
--- /dev/null
+++ b/vulnerabilities/pipelines/v2_improvers/compute_package_risk.py
@@ -0,0 +1,143 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# VulnerableCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/aboutcode-org/vulnerablecode for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+from aboutcode.pipeline import LoopProgress
+
+from vulnerabilities.models import AdvisoryV2
+from vulnerabilities.models import PackageV2
+from vulnerabilities.pipelines import VulnerableCodePipeline
+from vulnerabilities.risk import compute_package_risk_v2
+from vulnerabilities.risk import compute_vulnerability_risk_factors
+
+
+class ComputePackageRiskPipeline(VulnerableCodePipeline):
+ """
+ Compute risk score for packages.
+
+ See https://github.com/aboutcode-org/vulnerablecode/issues/1543
+ """
+
+ pipeline_id = "compute_package_risk_v2"
+ license_expression = None
+
+ @classmethod
+ def steps(cls):
+ return (
+ cls.compute_and_store_vulnerability_risk_score,
+ cls.compute_and_store_package_risk_score,
+ )
+
+ def compute_and_store_vulnerability_risk_score(self):
+ affected_advisories = (
+ AdvisoryV2.objects.filter(affecting_packages__isnull=False)
+ .prefetch_related(
+ "references",
+ "severities",
+ "exploits",
+ )
+ .distinct()
+ )
+
+ self.log(
+ f"Calculating risk for {affected_advisories.count():,d} vulnerability with a affected packages records"
+ )
+
+ progress = LoopProgress(total_iterations=affected_advisories.count(), logger=self.log)
+
+ updatables = []
+ updated_vulnerability_count = 0
+ batch_size = 5000
+
+ for advisory in progress.iter(affected_advisories.paginated(per_page=batch_size)):
+ severities = advisory.severities.all()
+ references = advisory.references.all()
+ exploits = advisory.exploits.all()
+
+ weighted_severity, exploitability = compute_vulnerability_risk_factors(
+ references=references,
+ severities=severities,
+ exploits=exploits,
+ )
+ advisory.weighted_severity = weighted_severity
+ advisory.exploitability = exploitability
+ print(
+ f"Computed risk for {advisory.advisory_id} with weighted_severity={weighted_severity} and exploitability={exploitability}"
+ )
+ updatables.append(advisory)
+
+ if len(updatables) >= batch_size:
+ updated_vulnerability_count += bulk_update(
+ model=AdvisoryV2,
+ items=updatables,
+ fields=["weighted_severity", "exploitability"],
+ logger=self.log,
+ )
+
+ updated_vulnerability_count += bulk_update(
+ model=AdvisoryV2,
+ items=updatables,
+ fields=["weighted_severity", "exploitability"],
+ logger=self.log,
+ )
+
+ self.log(
+ f"Successfully added risk score for {updated_vulnerability_count:,d} vulnerability"
+ )
+
+ def compute_and_store_package_risk_score(self):
+ affected_packages = (
+ PackageV2.objects.filter(affected_by_advisories__isnull=False)
+ ).distinct()
+
+ self.log(f"Calculating risk for {affected_packages.count():,d} affected package records")
+
+ progress = LoopProgress(
+ total_iterations=affected_packages.count(),
+ logger=self.log,
+ progress_step=5,
+ )
+
+ updatables = []
+ updated_package_count = 0
+ batch_size = 10000
+
+ for package in progress.iter(affected_packages.paginated(per_page=batch_size)):
+ risk_score = compute_package_risk_v2(package)
+
+ if not risk_score:
+ continue
+
+ package.risk_score = risk_score
+ updatables.append(package)
+
+ if len(updatables) >= batch_size:
+ updated_package_count += bulk_update(
+ model=PackageV2,
+ items=updatables,
+ fields=["risk_score"],
+ logger=self.log,
+ )
+ updated_package_count += bulk_update(
+ model=PackageV2,
+ items=updatables,
+ fields=["risk_score"],
+ logger=self.log,
+ )
+ self.log(f"Successfully added risk score for {updated_package_count:,d} package")
+
+
+def bulk_update(model, items, fields, logger):
+ item_count = 0
+ if items:
+ try:
+ model.objects.bulk_update(objs=items, fields=fields)
+ item_count += len(items)
+ except Exception as e:
+ logger(f"Error updating {model.__name__}: {e}")
+ items.clear()
+ return item_count
diff --git a/vulnerabilities/pipelines/v2_improvers/computer_package_version_rank.py b/vulnerabilities/pipelines/v2_improvers/computer_package_version_rank.py
new file mode 100644
index 000000000..dd10a1695
--- /dev/null
+++ b/vulnerabilities/pipelines/v2_improvers/computer_package_version_rank.py
@@ -0,0 +1,93 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# VulnerableCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/aboutcode-org/vulnerablecode for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+
+from itertools import groupby
+
+from aboutcode.pipeline import LoopProgress
+from django.db import transaction
+from univers.version_range import RANGE_CLASS_BY_SCHEMES
+from univers.versions import Version
+
+from vulnerabilities.models import PackageV2
+from vulnerabilities.pipelines import VulnerableCodePipeline
+
+
+class ComputeVersionRankPipeline(VulnerableCodePipeline):
+ """
+ A pipeline to compute and assign version ranks for all packages.
+ """
+
+ pipeline_id = "compute_version_rank_v2"
+ license_expression = None
+
+ @classmethod
+ def steps(cls):
+ return (cls.compute_and_store_version_rank,)
+
+ def compute_and_store_version_rank(self):
+ """
+ Compute and assign version ranks to all packages.
+ """
+ groups = PackageV2.objects.only("type", "namespace", "name").order_by(
+ "type", "namespace", "name"
+ )
+
+ def key(package):
+ return package.type, package.namespace, package.name
+
+ groups = groupby(groups, key=key)
+
+ groups = [(list(x), list(y)) for x, y in groups]
+
+ total_groups = len(groups)
+ self.log(f"Calculating `version_rank` for {total_groups:,d} groups of packages.")
+
+ progress = LoopProgress(
+ total_iterations=total_groups,
+ logger=self.log,
+ progress_step=5,
+ )
+
+ for group, packages in progress.iter(groups):
+ type, namespace, name = group
+ if type not in RANGE_CLASS_BY_SCHEMES:
+ continue
+ self.update_version_rank_for_group(packages)
+
+ self.log("Successfully populated `version_rank` for all packages.")
+
+ @transaction.atomic
+ def update_version_rank_for_group(self, packages):
+ """
+ Update the `version_rank` for all packages in a specific group.
+ """
+
+ # Sort the packages by version
+ sorted_packages = self.sort_packages_by_version(packages)
+
+ # Assign version ranks
+ updates = []
+ for rank, package in enumerate(sorted_packages, start=1):
+ package.version_rank = rank
+ updates.append(package)
+
+ # Bulk update to save the ranks
+ PackageV2.objects.bulk_update(updates, fields=["version_rank"])
+
+ def sort_packages_by_version(self, packages):
+ """
+ Sort packages by version using `version_class`.
+ """
+
+ if not packages:
+ return []
+ version_class = RANGE_CLASS_BY_SCHEMES.get(packages[0].type).version_class
+ if not version_class:
+ version_class = Version
+ return sorted(packages, key=lambda p: version_class(p.version))
diff --git a/vulnerabilities/pipelines/v2_improvers/enhance_with_exploitdb.py b/vulnerabilities/pipelines/v2_improvers/enhance_with_exploitdb.py
new file mode 100644
index 000000000..c306502d8
--- /dev/null
+++ b/vulnerabilities/pipelines/v2_improvers/enhance_with_exploitdb.py
@@ -0,0 +1,169 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# VulnerableCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/aboutcode-org/vulnerablecode for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+
+import csv
+import io
+import logging
+from traceback import format_exc as traceback_format_exc
+
+import requests
+from aboutcode.pipeline import LoopProgress
+from dateutil import parser as dateparser
+from django.db import DataError
+
+from vulnerabilities.models import AdvisoryAlias
+from vulnerabilities.models import AdvisoryExploit
+from vulnerabilities.models import AdvisoryReference
+from vulnerabilities.models import AdvisoryV2
+from vulnerabilities.pipelines import VulnerableCodePipeline
+
+
+class ExploitDBImproverPipeline(VulnerableCodePipeline):
+ """
+ ExploitDB Improver Pipeline: Fetch ExploitDB data, iterate over it to find the vulnerability with
+ the specified alias, and create or update the ref and ref-type accordingly.
+ """
+
+ pipeline_id = "enhance_with_exploitdb_v2"
+ spdx_license_expression = "GPL-2.0"
+
+ @classmethod
+ def steps(cls):
+ return (
+ cls.fetch_exploits,
+ cls.add_exploit,
+ )
+
+ def fetch_exploits(self):
+ exploit_db_url = (
+ "https://gitlab.com/exploit-database/exploitdb/-/raw/main/files_exploits.csv"
+ )
+ self.log(f"Fetching {exploit_db_url}")
+
+ try:
+ response = requests.get(exploit_db_url)
+ response.raise_for_status()
+ except requests.exceptions.HTTPError as http_err:
+ self.log(
+ f"Failed to fetch the Exploit-DB Exploits: {exploit_db_url} with error {http_err!r}:\n{traceback_format_exc()}",
+ level=logging.ERROR,
+ )
+ raise
+
+ self.exploit_data = io.StringIO(response.text)
+
+ def add_exploit(self):
+
+ csvreader = csv.DictReader(self.exploit_data)
+
+ raw_data = list(csvreader)
+ fetched_exploit_count = len(raw_data)
+
+ vulnerability_exploit_count = 0
+ self.log(f"Enhancing the vulnerability with {fetched_exploit_count:,d} exploit records")
+ progress = LoopProgress(total_iterations=fetched_exploit_count, logger=self.log)
+
+ for row in progress.iter(raw_data):
+ vulnerability_exploit_count += add_vulnerability_exploit(row, self.log)
+
+ self.log(f"Successfully added {vulnerability_exploit_count:,d} exploit-db advisory exploit")
+
+
+def add_vulnerability_exploit(row, logger):
+ advisories = set()
+
+ aliases = row["codes"].split(";") if row["codes"] else []
+
+ if not aliases:
+ return 0
+
+ for raw_alias in aliases:
+ try:
+ if alias := AdvisoryAlias.objects.get(alias=raw_alias):
+ for adv in alias.advisories.all():
+ advisories.add(adv)
+ else:
+ advs = AdvisoryV2.objects.filter(advisory_id=raw_alias)
+ for adv in advs:
+ advisories.add(adv)
+ except AdvisoryAlias.DoesNotExist:
+ continue
+
+ if not advisories:
+ logger(f"No advisory found for aliases {aliases}")
+ return 0
+
+ date_added = parse_date(row["date_added"])
+ source_date_published = parse_date(row["date_published"])
+ source_date_updated = parse_date(row["date_updated"])
+
+ for advisory in advisories:
+ add_exploit_references(row["codes"], row["source_url"], row["file"], advisory, logger)
+ try:
+ AdvisoryExploit.objects.update_or_create(
+ advisory=advisory,
+ data_source="Exploit-DB",
+ defaults={
+ "date_added": date_added,
+ "description": row["description"],
+ "known_ransomware_campaign_use": row["verified"],
+ "source_date_published": source_date_published,
+ "exploit_type": row["type"],
+ "platform": row["platform"],
+ "source_date_updated": source_date_updated,
+ "source_url": row["source_url"],
+ },
+ )
+ except DataError as e:
+ logger(
+ f"Failed to Create the Vulnerability Exploit-DB with error {e!r}:\n{traceback_format_exc()}",
+ level=logging.ERROR,
+ )
+ return 1
+
+
+def add_exploit_references(ref_id, direct_url, path, adv, logger):
+ url_map = {
+ "file_url": f"https://gitlab.com/exploit-database/exploitdb/-/blob/main/{path}",
+ "direct_url": direct_url,
+ }
+
+ for key, url in url_map.items():
+ if url:
+ try:
+ ref, created = AdvisoryReference.objects.update_or_create(
+ url=url,
+ defaults={
+ "reference_id": ref_id,
+ "reference_type": AdvisoryReference.EXPLOIT,
+ },
+ )
+
+ if created:
+ ref.advisories.add(adv)
+ ref.save()
+ logger(f"Created {ref} for {adv} with {key}={url}")
+
+ except DataError as e:
+ logger(
+ f"Failed to Create the Vulnerability Reference For Exploit-DB with error {e!r}:\n{traceback_format_exc()}",
+ level=logging.ERROR,
+ )
+
+
+def parse_date(date_string):
+ if date_string:
+ try:
+ date_obj = dateparser.parse(date_string).date()
+ return date_obj.strftime("%Y-%m-%d")
+ except (ValueError, TypeError, Exception) as e:
+ logging.error(
+ f"Error while parsing ExploitDB date '{date_string}' with error {e!r}:\n{traceback_format_exc()}"
+ )
+ return
diff --git a/vulnerabilities/pipelines/v2_improvers/enhance_with_kev.py b/vulnerabilities/pipelines/v2_improvers/enhance_with_kev.py
new file mode 100644
index 000000000..486d79232
--- /dev/null
+++ b/vulnerabilities/pipelines/v2_improvers/enhance_with_kev.py
@@ -0,0 +1,103 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# VulnerableCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/aboutcode-org/vulnerablecode for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+
+import logging
+from traceback import format_exc as traceback_format_exc
+
+import requests
+from aboutcode.pipeline import LoopProgress
+
+from vulnerabilities.models import AdvisoryAlias
+from vulnerabilities.models import AdvisoryExploit
+from vulnerabilities.models import AdvisoryV2
+from vulnerabilities.pipelines import VulnerableCodePipeline
+
+
+class VulnerabilityKevPipeline(VulnerableCodePipeline):
+ """
+ Known Exploited Vulnerabilities Pipeline: Retrieve KEV data, iterate through it to identify vulnerabilities
+ by their associated aliases, and create or update the corresponding Exploit instances.
+ """
+
+ pipeline_id = "enhance_with_kev_v2"
+ license_expression = None
+
+ @classmethod
+ def steps(cls):
+ return (
+ cls.fetch_exploits,
+ cls.add_exploits,
+ )
+
+ def fetch_exploits(self):
+ kev_url = "https://raw.githubusercontent.com/aboutcode-org/aboutcode-mirror-kev/refs/heads/main/known_exploited_vulnerabilities.json"
+ self.log(f"Fetching {kev_url}")
+
+ try:
+ response = requests.get(kev_url)
+ response.raise_for_status()
+ except requests.exceptions.HTTPError as http_err:
+ self.log(
+ f"Failed to fetch the KEV Exploits: {kev_url} with error {http_err!r}:\n{traceback_format_exc()}",
+ level=logging.ERROR,
+ )
+ raise
+ self.kev_data = response.json()
+
+ def add_exploits(self):
+ fetched_exploit_count = self.kev_data.get("count")
+ self.log(f"Enhancing the vulnerability with {fetched_exploit_count:,d} exploit records")
+
+ vulnerability_exploit_count = 0
+ progress = LoopProgress(total_iterations=fetched_exploit_count, logger=self.log)
+
+ for record in progress.iter(self.kev_data.get("vulnerabilities", [])):
+ vulnerability_exploit_count += add_vulnerability_exploit(
+ kev_vul=record,
+ logger=self.log,
+ )
+
+ self.log(f"Successfully added {vulnerability_exploit_count:,d} kev exploit")
+
+
+def add_vulnerability_exploit(kev_vul, logger):
+ cve_id = kev_vul.get("cveID")
+
+ if not cve_id:
+ return 0
+
+ advisories = set()
+ try:
+ if alias := AdvisoryAlias.objects.get(alias=cve_id):
+ for adv in alias.advisories.all():
+ advisories.add(adv)
+ else:
+ advs = AdvisoryV2.objects.filter(advisory_id=cve_id)
+ for adv in advs:
+ advisories.add(adv)
+ except AdvisoryAlias.DoesNotExist:
+ logger(f"No advisory found for aliases {cve_id}")
+ return 0
+
+ for advisory in advisories:
+ AdvisoryExploit.objects.update_or_create(
+ advisory=advisory,
+ data_source="KEV",
+ defaults={
+ "description": kev_vul["shortDescription"],
+ "date_added": kev_vul["dateAdded"],
+ "required_action": kev_vul["requiredAction"],
+ "due_date": kev_vul["dueDate"],
+ "notes": kev_vul["notes"],
+ "known_ransomware_campaign_use": True
+ if kev_vul["knownRansomwareCampaignUse"] == "Known"
+ else False,
+ },
+ )
+ return 1
diff --git a/vulnerabilities/pipelines/v2_improvers/enhance_with_metasploit.py b/vulnerabilities/pipelines/v2_improvers/enhance_with_metasploit.py
new file mode 100644
index 000000000..fbfea5150
--- /dev/null
+++ b/vulnerabilities/pipelines/v2_improvers/enhance_with_metasploit.py
@@ -0,0 +1,126 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# VulnerableCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/aboutcode-org/vulnerablecode for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+
+import logging
+from traceback import format_exc as traceback_format_exc
+
+import requests
+import saneyaml
+from aboutcode.pipeline import LoopProgress
+from dateutil import parser as dateparser
+
+from vulnerabilities.models import AdvisoryAlias
+from vulnerabilities.models import AdvisoryExploit
+from vulnerabilities.models import AdvisoryV2
+from vulnerabilities.pipelines import VulnerableCodePipeline
+
+
+class MetasploitImproverPipeline(VulnerableCodePipeline):
+ """
+ Metasploit Exploits Pipeline: Retrieve Metasploit data, iterate through it to identify vulnerabilities
+ by their associated aliases, and create or update the corresponding Exploit instances.
+ """
+
+ pipeline_id = "enhance_with_metasploit_v2"
+ spdx_license_expression = "BSD-3-clause"
+
+ @classmethod
+ def steps(cls):
+ return (
+ cls.fetch_exploits,
+ cls.add_advisory_exploits,
+ )
+
+ def fetch_exploits(self):
+ url = "https://raw.githubusercontent.com/rapid7/metasploit-framework/master/db/modules_metadata_base.json"
+ self.log(f"Fetching {url}")
+ try:
+ response = requests.get(url)
+ response.raise_for_status()
+ except requests.exceptions.HTTPError as http_err:
+ self.log(
+ f"Failed to fetch the Metasploit Exploits: {url} with error {http_err!r}:\n{traceback_format_exc()}",
+ level=logging.ERROR,
+ )
+ raise
+
+ self.metasploit_data = response.json()
+
+ def add_advisory_exploits(self):
+ fetched_exploit_count = len(self.metasploit_data)
+ self.log(f"Enhancing the vulnerability with {fetched_exploit_count:,d} exploit records")
+
+ vulnerability_exploit_count = 0
+ progress = LoopProgress(total_iterations=fetched_exploit_count, logger=self.log)
+ for _, record in progress.iter(self.metasploit_data.items()):
+ vulnerability_exploit_count += add_advisory_exploit(
+ record=record,
+ logger=self.log,
+ )
+ self.log(f"Successfully added {vulnerability_exploit_count:,d} vulnerability exploit")
+
+
+def add_advisory_exploit(record, logger):
+ advisories = set()
+ references = record.get("references", [])
+
+ interesting_references = [
+ ref for ref in references if not ref.startswith("OSVDB") and not ref.startswith("URL-")
+ ]
+
+ if not interesting_references:
+ return 0
+
+ for ref in interesting_references:
+ try:
+ if alias := AdvisoryAlias.objects.get(alias=ref):
+ for adv in alias.advisories.all():
+ advisories.add(adv)
+ else:
+ advs = AdvisoryV2.objects.filter(advisory_id=ref)
+ for adv in advs:
+ advisories.add(adv)
+ except AdvisoryAlias.DoesNotExist:
+ continue
+
+ if not advisories:
+ logger(f"No advisories found for aliases {interesting_references}")
+ return 0
+
+ description = record.get("description", "")
+ notes = record.get("notes", {})
+ platform = record.get("platform")
+
+ source_url = ""
+ if path := record.get("path"):
+ source_url = f"https://github.com/rapid7/metasploit-framework/tree/master{path}"
+ source_date_published = None
+
+ if disclosure_date := record.get("disclosure_date"):
+ try:
+ source_date_published = dateparser.parse(disclosure_date).date()
+ except ValueError as e:
+ logger(
+ f"Error while parsing date {disclosure_date} with error {e!r}:\n{traceback_format_exc()}",
+ level=logging.ERROR,
+ )
+
+ for advisory in advisories:
+ AdvisoryExploit.objects.update_or_create(
+ advisory=advisory,
+ data_source="Metasploit",
+ defaults={
+ "description": description,
+ "notes": saneyaml.dump(notes),
+ "source_date_published": source_date_published,
+ "platform": platform,
+ "source_url": source_url,
+ },
+ )
+ return 1
diff --git a/vulnerabilities/pipelines/v2_improvers/flag_ghost_packages.py b/vulnerabilities/pipelines/v2_improvers/flag_ghost_packages.py
new file mode 100644
index 000000000..8a4825df4
--- /dev/null
+++ b/vulnerabilities/pipelines/v2_improvers/flag_ghost_packages.py
@@ -0,0 +1,104 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# VulnerableCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/aboutcode-org/vulnerablecode for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+
+import logging
+from itertools import groupby
+from traceback import format_exc as traceback_format_exc
+
+from aboutcode.pipeline import LoopProgress
+from fetchcode.package_versions import SUPPORTED_ECOSYSTEMS as FETCHCODE_SUPPORTED_ECOSYSTEMS
+from fetchcode.package_versions import versions
+from packageurl import PackageURL
+
+from vulnerabilities.models import PackageV2
+from vulnerabilities.pipelines import VulnerableCodePipeline
+
+
+class FlagGhostPackagePipeline(VulnerableCodePipeline):
+ """Detect and flag packages that do not exist upstream."""
+
+ pipeline_id = "flag_ghost_packages_v2"
+
+ @classmethod
+ def steps(cls):
+ return (cls.flag_ghost_packages,)
+
+ def flag_ghost_packages(self):
+ detect_and_flag_ghost_packages(logger=self.log)
+
+
+def detect_and_flag_ghost_packages(logger=None):
+ """Check if packages are available upstream. If not, mark them as ghost package."""
+ interesting_packages_qs = (
+ PackageV2.objects.order_by("type", "namespace", "name")
+ .filter(type__in=FETCHCODE_SUPPORTED_ECOSYSTEMS)
+ .filter(qualifiers="")
+ .filter(subpath="")
+ )
+
+ distinct_packages_count = (
+ interesting_packages_qs.values("type", "namespace", "name")
+ .distinct("type", "namespace", "name")
+ .count()
+ )
+
+ grouped_packages = groupby(
+ interesting_packages_qs.paginated(),
+ key=lambda pkg: (pkg.type, pkg.namespace, pkg.name),
+ )
+
+ ghost_package_count = 0
+ progress = LoopProgress(total_iterations=distinct_packages_count, logger=logger)
+ for type_namespace_name, packages in progress.iter(grouped_packages):
+ ghost_package_count += flag_ghost_packages(
+ base_purl=PackageURL(*type_namespace_name),
+ packages=packages,
+ logger=logger,
+ )
+
+ if logger:
+ logger(f"Successfully flagged {ghost_package_count:,d} ghost Packages")
+
+
+def flag_ghost_packages(base_purl, packages, logger=None):
+ """
+ Check if `packages` are available upstream.
+ If not, update `is_ghost` to `True`.
+ Return the number of packages flagged as ghost.
+ """
+ known_versions = get_versions(purl=base_purl, logger=logger)
+ # Skip if encounter error while fetching known versions
+ if known_versions is None:
+ return 0
+
+ ghost_packages = 0
+ for pkg in packages:
+ pkg.is_ghost = False
+ if pkg.version.lstrip("vV") not in known_versions:
+ pkg.is_ghost = True
+ ghost_packages += 1
+
+ if logger:
+ logger(f"Flagging ghost package {pkg.purl!s}", level=logging.DEBUG)
+ pkg.save()
+
+ return ghost_packages
+
+
+def get_versions(purl, logger=None):
+ """Return set of known versions for the given purl."""
+ try:
+ return {v.value.lstrip("vV") for v in versions(str(purl))}
+ except Exception as e:
+ if logger:
+ logger(
+ f"Error while fetching known versions for {purl!s}: {e!r} \n {traceback_format_exc()}",
+ level=logging.ERROR,
+ )
+ return
diff --git a/vulnerabilities/pipes/advisory.py b/vulnerabilities/pipes/advisory.py
index 46f8b1ed3..d5d88fbfd 100644
--- a/vulnerabilities/pipes/advisory.py
+++ b/vulnerabilities/pipes/advisory.py
@@ -22,6 +22,11 @@
from vulnerabilities.importer import AdvisoryData
from vulnerabilities.improver import MAX_CONFIDENCE
from vulnerabilities.models import Advisory
+from vulnerabilities.models import AdvisoryAlias
+from vulnerabilities.models import AdvisoryReference
+from vulnerabilities.models import AdvisorySeverity
+from vulnerabilities.models import AdvisoryV2
+from vulnerabilities.models import AdvisoryWeakness
from vulnerabilities.models import AffectedByPackageRelatedVulnerability
from vulnerabilities.models import Alias
from vulnerabilities.models import FixingPackageRelatedVulnerability
@@ -38,6 +43,61 @@ def get_or_create_aliases(aliases: List) -> QuerySet:
return Alias.objects.filter(alias__in=aliases)
+from django.db.models import Q
+
+
+def get_or_create_advisory_aliases(aliases: List[str]) -> List[AdvisoryAlias]:
+ existing = AdvisoryAlias.objects.filter(alias__in=aliases)
+ existing_aliases = {a.alias for a in existing}
+
+ to_create = [AdvisoryAlias(alias=alias) for alias in aliases if alias not in existing_aliases]
+ AdvisoryAlias.objects.bulk_create(to_create, ignore_conflicts=True)
+
+ return list(AdvisoryAlias.objects.filter(alias__in=aliases))
+
+
+def get_or_create_advisory_references(references: List) -> List[AdvisoryReference]:
+ reference_urls = [ref.url for ref in references]
+ existing = AdvisoryReference.objects.filter(url__in=reference_urls)
+ existing_urls = {r.url for r in existing}
+
+ to_create = [
+ AdvisoryReference(reference_id=ref.reference_id, url=ref.url)
+ for ref in references
+ if ref.url not in existing_urls
+ ]
+ AdvisoryReference.objects.bulk_create(to_create, ignore_conflicts=True)
+
+ return list(AdvisoryReference.objects.filter(url__in=reference_urls))
+
+
+def get_or_create_advisory_severities(severities: List) -> QuerySet:
+ severity_objs = []
+ for severity in severities:
+ published_at = str(severity.published_at) if severity.published_at else None
+ sev, _ = AdvisorySeverity.objects.get_or_create(
+ scoring_system=severity.system.identifier,
+ value=severity.value,
+ scoring_elements=severity.scoring_elements,
+ defaults={
+ "published_at": published_at,
+ },
+ url=severity.url,
+ )
+ severity_objs.append(sev)
+ return AdvisorySeverity.objects.filter(id__in=[severity.id for severity in severity_objs])
+
+
+def get_or_create_advisory_weaknesses(weaknesses: List[str]) -> List[AdvisoryWeakness]:
+ existing = AdvisoryWeakness.objects.filter(cwe_id__in=weaknesses)
+ existing_ids = {w.cwe_id for w in existing}
+
+ to_create = [AdvisoryWeakness(cwe_id=w) for w in weaknesses if w not in existing_ids]
+ AdvisoryWeakness.objects.bulk_create(to_create, ignore_conflicts=True)
+
+ return list(AdvisoryWeakness.objects.filter(cwe_id__in=weaknesses))
+
+
def insert_advisory(advisory: AdvisoryData, pipeline_id: str, logger: Callable = None):
from vulnerabilities.utils import compute_content_id
@@ -76,6 +136,64 @@ def insert_advisory(advisory: AdvisoryData, pipeline_id: str, logger: Callable =
return advisory_obj
+def insert_advisory_v2(
+ advisory: AdvisoryData,
+ pipeline_id: str,
+ get_advisory_packages: Callable,
+ logger: Callable = None,
+):
+ from vulnerabilities.utils import compute_content_id
+
+ advisory_obj = None
+ aliases = get_or_create_advisory_aliases(aliases=advisory.aliases)
+ references = get_or_create_advisory_references(references=advisory.references_v2)
+ severities = get_or_create_advisory_severities(severities=advisory.severities)
+ weaknesses = get_or_create_advisory_weaknesses(weaknesses=advisory.weaknesses)
+ content_id = compute_content_id(advisory_data=advisory)
+ affecting_packages, fixed_by_packages = get_advisory_packages(advisory_data=advisory)
+ try:
+ default_data = {
+ "datasource_id": pipeline_id,
+ "advisory_id": advisory.advisory_id,
+ "avid": f"{pipeline_id}/{advisory.advisory_id}",
+ "summary": advisory.summary,
+ "date_published": advisory.date_published,
+ "date_collected": datetime.now(timezone.utc),
+ }
+
+ advisory_obj, _ = AdvisoryV2.objects.get_or_create(
+ unique_content_id=content_id,
+ url=advisory.url,
+ defaults=default_data,
+ )
+ related_fields = {
+ "aliases": aliases,
+ "references": references,
+ "severities": severities,
+ "weaknesses": weaknesses,
+ "fixed_by_packages": fixed_by_packages,
+ "affecting_packages": affecting_packages,
+ }
+
+ for field_name, values in related_fields.items():
+ if values:
+ getattr(advisory_obj, field_name).add(*values)
+
+ except Advisory.MultipleObjectsReturned:
+ logger.error(
+ f"Multiple Advisories returned: unique_content_id: {content_id}, url: {advisory.url}, advisory: {advisory!r}"
+ )
+ raise
+ except Exception as e:
+ if logger:
+ logger(
+ f"Error while processing {advisory!r} with aliases {advisory.aliases!r}: {e!r} \n {traceback_format_exc()}",
+ level=logging.ERROR,
+ )
+
+ return advisory_obj
+
+
@transaction.atomic
def import_advisory(
advisory: Advisory,
diff --git a/vulnerabilities/risk.py b/vulnerabilities/risk.py
index a4508a03f..56f19171e 100644
--- a/vulnerabilities/risk.py
+++ b/vulnerabilities/risk.py
@@ -36,6 +36,8 @@ def get_weighted_severity(severities):
score_list = []
for severity in severities:
+ if not severity.url:
+ continue
parsed_url = urlparse(severity.url)
severity_source = parsed_url.netloc.replace("www.", "", 1)
weight = WEIGHT_CONFIG.get(severity_source, DEFAULT_WEIGHT)
@@ -112,3 +114,19 @@ def compute_package_risk(package):
return
return round(max(result), 1)
+
+
+def compute_package_risk_v2(package):
+ """
+ Calculate the risk for a package by iterating over all vulnerabilities that affects this package
+ and determining the associated risk.
+ """
+ result = []
+ for advisory in package.affected_by_advisories.all():
+ if risk := advisory.risk_score:
+ result.append(float(risk))
+
+ if not result:
+ return
+
+ return round(max(result), 1)
diff --git a/vulnerabilities/templates/advisory_detail.html b/vulnerabilities/templates/advisory_detail.html
new file mode 100644
index 000000000..8a386d4ec
--- /dev/null
+++ b/vulnerabilities/templates/advisory_detail.html
@@ -0,0 +1,614 @@
+{% extends "base.html" %}
+{% load humanize %}
+{% load widget_tweaks %}
+{% load static %}
+{% load show_cvss %}
+{% load url_filters %}
+
+{% block title %}
+VulnerableCode Advisory Details - {{ advisory.advisory_id }}
+{% endblock %}
+
+{% block content %}
+
+{% if advisory %}
+
+
+
+
+
+
+
+
+
+
+
+ | Advisory ID |
+ {{ advisory.datasource_id }}/{{ advisory.advisory_id }} |
+
+
+ | Aliases |
+
+ {% for alias in aliases %}
+ {% if alias.url %}
+ {{ alias }}
+ {% else %}
+ {{ alias }}
+ {% endif %}
+
+ {% endfor %}
+ |
+
+
+ | Summary |
+ {{ advisory.summary }}
+ |
+
+ {% if severity_score_range %}
+
+ | Severity score range |
+ {{ severity_score_range }}
+ |
+ {% endif %}
+
+
+ | Status |
+ {{ status }} |
+
+
+
+ |
+ Exploitability |
+
+ {{ advisory.exploitability }}
+ |
+
+
+
+ | Weighted Severity |
+
+ {{ advisory.weighted_severity }}
+ |
+
+
+
+ | Risk |
+
+ {{ advisory.risk_score }}
+ |
+
+
+ | Affected and Fixed Packages |
+
+
+ Package Details
+
+ |
+
+
+
+
+ Weaknesses ({{ weaknesses|length }})
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ | Reference id |
+ Reference type |
+ URL |
+
+
+ {% for ref in references %}
+
+ {% if ref.reference_id %}
+ | {{ ref.reference_id }} |
+ {% else %}
+ |
+ {% endif %}
+
+ {% if ref.reference_type %}
+ {{ ref.get_reference_type_display }} |
+ {% else %}
+ |
+ {% endif %}
+
+ {{ ref.url }} |
+
+ {% empty %}
+
+ |
+ There are no known references.
+ |
+
+ {% endfor %}
+
+
+
+
+ {% for exploit in advisory.exploits.all %}
+
+ {% empty %}
+
+ |
+ No exploits are available.
+ |
+
+ {% endfor %}
+
+
+
+ {% for severity_vector in severity_vectors %}
+ {% if severity_vector.vector.version == '2.0' %}
+ Vector: {{ severity_vector.vector.vectorString }} Found at
{{ severity_vector.origin }}
+
+ {% elif severity_vector.vector.version == '3.1' or severity_vector.vector.version == '3.0'%}
+ Vector: {{ severity_vector.vector.vectorString }} Found at
{{ severity_vector.origin }}
+
+ {% elif severity_vector.vector.version == '4' %}
+ Vector: {{ severity_vector.vector.vectorString }} Found at
{{ severity_vector.origin }}
+
+ {% elif severity_vector.vector.version == 'ssvc' %}
+
+ Vector: {{ severity_vector.vector.vectorString }} Found at
{{ severity_vector.origin }}
+
+ {% endif %}
+ {% empty %}
+
+ |
+ There are no known vectors.
+ |
+
+ {% endfor %}
+
+
+
+
+ {% if epss_data %}
+
+ Exploit Prediction Scoring System (EPSS)
+
+
+
+
+ |
+
+ Percentile
+
+ |
+ {{ epss_data.percentile }} |
+
+
+ |
+
+ EPSS Score
+
+ |
+ {{ epss_data.score }} |
+
+ {% if epss_data.published_at %}
+
+ |
+
+ Published At
+
+ |
+ {{ epss_data.published_at }} |
+
+ {% endif %}
+
+
+ {% else %}
+
No EPSS data available for this advisory.
+ {% endif %}
+
+
+
+
+
+
+{% endif %}
+
+
+
+
+
+{% endblock %}
\ No newline at end of file
diff --git a/vulnerabilities/templates/advisory_package_details.html b/vulnerabilities/templates/advisory_package_details.html
new file mode 100644
index 000000000..0f4c71044
--- /dev/null
+++ b/vulnerabilities/templates/advisory_package_details.html
@@ -0,0 +1,88 @@
+{% extends "base.html" %}
+{% load humanize %}
+{% load widget_tweaks %}
+{% load static %}
+{% load show_cvss %}
+{% load url_filters %}
+
+{% block title %}
+VulnerableCode Advisory Package Details - {{ advisory.advisory_id }}
+{% endblock %}
+
+{% block content %}
+
+{% if advisory %}
+
+
+
+
+
+
+
+ | Affected |
+ Fixed by |
+
+
+
+ {% for package in affected_packages %}
+
+ |
+ {{ package.purl }}
+ |
+
+
+ {% for match in all_affected_fixed_by_matches %}
+ {% if match.affected_package == package %}
+ {% if match.matched_fixed_by_packages|length > 0 %}
+ {% for pkg in match.matched_fixed_by_packages %}
+ {{ pkg }}
+
+ {% endfor %}
+ {% else %}
+ There are no reported fixed by versions.
+ {% endif %}
+ {% endif %}
+ {% endfor %}
+
+ |
+
+ {% empty %}
+
+ |
+ This vulnerability is not known to affect any packages.
+ |
+
+ {% endfor %}
+
+
+
+
+
+{% endif %}
+
+
+
+
+
+{% endblock %}
\ No newline at end of file
diff --git a/vulnerabilities/templates/index_v2.html b/vulnerabilities/templates/index_v2.html
new file mode 100644
index 000000000..962b5f79f
--- /dev/null
+++ b/vulnerabilities/templates/index_v2.html
@@ -0,0 +1,33 @@
+{% extends "base.html" %}
+{% load widget_tweaks %}
+
+{% block title %}
+VulnerableCode Home
+{% endblock %}
+
+{% block content %}
+
+
+
+
+ {% include "package_search_box_v2.html" %}
+
+
+
+
+ VulnerableCode aggregates software
+ vulnerabilities from multiple public advisory sources
+ and presents their details along with their affected
+ packages and fixed-by packages identified by
+ Package URLs (PURLs).
+
+
+ What's new in this Release:
+
+ Check out latest updates here!
+
+
+
+
+
+{% endblock %}
\ No newline at end of file
diff --git a/vulnerabilities/templates/package_details_v2.html b/vulnerabilities/templates/package_details_v2.html
new file mode 100644
index 000000000..54cb8ffed
--- /dev/null
+++ b/vulnerabilities/templates/package_details_v2.html
@@ -0,0 +1,365 @@
+{% extends "base.html" %}
+{% load humanize %}
+{% load widget_tweaks %}
+{% load static %}
+{% load url_filters %}
+
+{% block title %}
+VulnerableCode Package Details - {{ package.purl }}
+{% endblock %}
+
+{% block content %}
+
+ {% include "package_search_box_v2.html"%}
+
+
+{% if package %}
+
+
+
+
+
+
+
+
+
+ {% if affected_by_advisories|length != 0 %}
+
+ {% else %}
+
+ {% endif %}
+
+
+
+ |
+
+ purl
+
+ |
+
+ {{ fixed_package_details.purl.to_string }}
+ |
+
+ {% if package.is_ghost %}
+
+ |
+ Tags
+ |
+
+
+ Ghost
+
+ |
+
+ {% endif %}
+
+
+
+ {% if affected_by_advisories|length != 0 %}
+
+
+
+ {% endif %}
+
+
+
+ Vulnerabilities affecting this package ({{ affected_by_advisories|length }})
+
+
+
+
+
+ | Advisory |
+ Source |
+ Date Published |
+ Summary |
+ Fixed in package version |
+
+
+
+
+ {% for advisory in affected_by_advisories %}
+
+
+
+ {{advisory.avid }}
+
+
+ {% if advisory.alias|length != 0 %}
+ Aliases:
+ {% endif %}
+
+ {% for alias in advisory.alias %}
+ {% if alias.url %}
+ {{ alias }}
+
+ {% else %}
+ {{ alias }}
+
+ {% endif %}
+ {% endfor %}
+ |
+
+ {{advisory.url}}
+ |
+
+ {{advisory.date_published}}
+ |
+
+ {{ advisory.summary }}
+ |
+
+ {% if package.purl == fixed_package_details.purl.to_string %}
+ {% for key, value in fixed_package_details.items %}
+ {% if key == "advisories" %}
+ {% for vuln in value %}
+ {% if vuln.advisory.advisory_id == advisory.advisory_id %}
+ {% if vuln.fixed_by_package_details is None %}
+ There are no reported fixed by versions.
+ {% else %}
+ {% for fixed_pkg in vuln.fixed_by_package_details %}
+
+ {% if fixed_pkg.fixed_by_purl_advisories|length == 0 %}
+ {{ fixed_pkg.fixed_by_purl.version }}
+
+ Subject of 0 other advisories.
+ {% else %}
+ {{ fixed_pkg.fixed_by_purl.version }}
+ {% if fixed_pkg.fixed_by_purl_advisories|length != 1 %}
+
+ Subject of {{ fixed_pkg.fixed_by_purl_advisories|length }} other
+ advisory.
+ {% else %}
+
+ Subject of {{ fixed_pkg.fixed_by_purl_advisories|length }} other
+ advisory.
+ {% endif %}
+
+
+ {% endif %}
+
+ {% endfor %}
+ {% endif %}
+ {% endif %}
+ {% endfor %}
+ {% endif %}
+ {% endfor %}
+ {% endif %}
+ |
+
+ {% empty %}
+
+ |
+ This package is not known to be subject of any advisories.
+ |
+
+ {% endfor %}
+
+
+
+
+
+
+ Vulnerabilities fixed by this package ({{ fixing_advisories|length }})
+
+
+
+
+
+ | Advisory |
+ Source |
+ Date Published |
+ Summary |
+ Aliases |
+
+
+
+ {% for advisory in fixing_advisories %}
+
+ |
+
+ {{advisory.avid }}
+
+ |
+
+ {{advisory.url}}
+ |
+
+ {{advisory.date_published}}
+ |
+
+ {{ advisory.summary }}
+ |
+
+ {% for alias in advisory.alias %}
+ {% if alias.url %}
+ {{ alias }}
+
+ {% else %}
+ {{ alias }}
+
+ {% endif %}
+ {% endfor %}
+ |
+
+ {% empty %}
+
+ |
+ This package is not known to fix any advisories.
+ |
+
+ {% endfor %}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+{% endif %}
+{% endblock %}
diff --git a/vulnerabilities/templates/package_search_box_v2.html b/vulnerabilities/templates/package_search_box_v2.html
new file mode 100644
index 000000000..e78d400e6
--- /dev/null
+++ b/vulnerabilities/templates/package_search_box_v2.html
@@ -0,0 +1,48 @@
+{% load widget_tweaks %}
+
+
+ Search for packages
+
+
+
+
diff --git a/vulnerabilities/templates/packages_v2.html b/vulnerabilities/templates/packages_v2.html
new file mode 100644
index 000000000..fe2b05abe
--- /dev/null
+++ b/vulnerabilities/templates/packages_v2.html
@@ -0,0 +1,84 @@
+{% extends "base.html" %}
+{% load humanize %}
+{% load widget_tweaks %}
+
+{% block title %}
+VulnerableCode Package Search
+{% endblock %}
+
+{% block content %}
+
+ {% include "package_search_box_v2.html" %}
+
+
+{% if search %}
+
+
+
+
+ {{ page_obj.paginator.count|intcomma }} results
+
+ {% if is_paginated %}
+ {% include 'includes/pagination.html' with page_obj=page_obj %}
+ {% endif %}
+
+
+
+
+
+
+
+
+
+ |
+
+ Package URL
+
+ |
+
+
+ Affected by vulnerabilities
+
+ |
+
+
+ Fixing vulnerabilities
+
+ |
+
+
+
+ {% for package in page_obj %}
+
+ |
+ {{ package.purl }}
+ |
+ {{ package.vulnerability_count }} |
+ {{ package.patched_vulnerability_count }} |
+
+ {% empty %}
+
+ |
+ No Package found.
+ |
+
+ {% endfor %}
+
+
+
+
+ {% if is_paginated %}
+ {% include 'includes/pagination.html' with page_obj=page_obj %}
+ {% endif %}
+
+
+{% endif %}
+{% endblock %}
diff --git a/vulnerabilities/tests/pipelines/test_apache_httpd_importer_pipeline_v2.py b/vulnerabilities/tests/pipelines/test_apache_httpd_importer_pipeline_v2.py
new file mode 100644
index 000000000..94454c473
--- /dev/null
+++ b/vulnerabilities/tests/pipelines/test_apache_httpd_importer_pipeline_v2.py
@@ -0,0 +1,161 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# VulnerableCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/aboutcode-org/vulnerablecode for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+
+import pytest
+import requests
+
+from vulnerabilities.importer import AdvisoryData
+from vulnerabilities.pipelines.v2_importers.apache_httpd_importer import ApacheHTTPDImporterPipeline
+from vulnerabilities.pipelines.v2_importers.apache_httpd_importer import fetch_links
+from vulnerabilities.pipelines.v2_importers.apache_httpd_importer import get_weaknesses
+
+
+# Dummy responses
+class DummyResponseContent:
+ def __init__(self, content_bytes):
+ self.content = content_bytes
+
+
+class DummyResponseJSON:
+ def __init__(self, json_data):
+ self._json = json_data
+
+ def json(self):
+ return self._json
+
+
+# Tests for fetch_links
+@pytest.fixture(autouse=True)
+def no_requests(monkeypatch):
+ # Ensure other tests don't hit real HTTP
+ monkeypatch.setattr(
+ requests,
+ "get",
+ lambda url: (_ for _ in ()).throw(AssertionError(f"Unexpected HTTP GET call to {url}")),
+ )
+
+
+def test_fetch_links_filters_and_resolves(monkeypatch):
+ html = """
+
+ A1
+ A2
+ TXT
+
+ """
+ base_url = "https://example.com/base/"
+ # Monkeypatch HTTP GET for HTML
+ def fake_get(url):
+ assert url == base_url
+ return DummyResponseContent(html.encode("utf-8"))
+
+ monkeypatch.setattr(requests, "get", fake_get)
+ links = fetch_links(base_url)
+ assert len(links) == 2
+ assert links == [
+ "https://example.com/base/advisory1.json",
+ "https://example.com/json/advisory2.json",
+ ]
+
+
+# Tests for get_weaknesses
+def test_get_weaknesses_with_cna_structure():
+ mock_data = {
+ "containers": {"cna": {"problemTypes": [{"descriptions": [{"cweId": "CWE-125"}]}]}}
+ }
+ result = get_weaknesses(mock_data)
+ assert result == [125]
+
+
+def test_get_weaknesses_with_data_meta_structure():
+ mock_data = {
+ "CVE_data_meta": {"ID": "CVE-2020-0001"},
+ "problemtype": {
+ "problemtype_data": [
+ {"description": [{"value": "CWE-190 Integer Overflow"}]},
+ {"description": [{"value": "CWE-200 Some Issue"}]},
+ ]
+ },
+ }
+ result = get_weaknesses(mock_data)
+ assert set(result) == {190, 200}
+
+
+# Tests for ApacheHTTPDImporterPipeline
+class DummyPipeline(ApacheHTTPDImporterPipeline):
+ # Expose protected methods for testing
+ pass
+
+
+@pytest.fixture
+def pipeline(monkeypatch):
+ pipe = DummyPipeline()
+ # Prevent real HTTP in fetch_links
+ monkeypatch.setattr(
+ "vulnerabilities.pipelines.v2_importers.apache_httpd_importer.fetch_links",
+ lambda url: ["u1", "u2"],
+ )
+ return pipe
+
+
+def test_advisories_count(monkeypatch, pipeline):
+ # Should use mocked links
+ count = pipeline.advisories_count()
+ assert count == 2
+
+
+def test_collect_advisories_and_to_advisory(monkeypatch, pipeline):
+ # Prepare two dummy JSONs
+ sample1 = {
+ "CVE_data_meta": {"ID": "CVE-1"},
+ "description": {"description_data": [{"lang": "eng", "value": "Test desc"}]},
+ "impact": [{"other": "5.0"}],
+ "affects": {"vendor": {"vendor_data": []}},
+ "timeline": [],
+ }
+ sample2 = {
+ "cveMetadata": {"cveId": "CVE-2"},
+ "description": {"description_data": [{"lang": "eng", "value": "Other desc"}]},
+ "impact": [{"other": "7.5"}],
+ "affects": {"vendor": {"vendor_data": []}},
+ "timeline": [],
+ }
+ # Monkeypatch requests.get to return JSON
+ def fake_get(u):
+ if u == "u1":
+ return DummyResponseJSON(sample1)
+ elif u == "u2":
+ return DummyResponseJSON(sample2)
+ else:
+ raise AssertionError(f"Unexpected URL {u}")
+
+ monkeypatch.setattr(requests, "get", fake_get)
+ advisories = list(pipeline.collect_advisories())
+ assert len(advisories) == 2
+ # Validate first advisory
+ adv1 = advisories[0]
+ assert isinstance(adv1, AdvisoryData)
+ assert adv1.advisory_id == "CVE-1"
+ assert adv1.summary == "Test desc"
+ assert adv1.severities and adv1.severities[0].value == "5.0"
+ assert adv1.url.endswith("CVE-1.json")
+ # Validate second advisory
+ adv2 = advisories[1]
+ assert adv2.advisory_id == "CVE-2"
+ assert adv2.summary == "Other desc"
+ assert adv2.severities[0].value == "7.5"
+
+
+# Test version range conversion error
+def test_to_version_ranges_unknown_comparator(pipeline):
+ # version_data with bad comparator
+ versions_data = [{"version_value": "1.0.0", "version_affected": "<>"}]
+ fixed_versions = []
+ with pytest.raises(ValueError):
+ pipeline.to_version_ranges(versions_data, fixed_versions)
diff --git a/vulnerabilities/tests/pipelines/test_collect_commits_v2.py b/vulnerabilities/tests/pipelines/test_collect_commits_v2.py
new file mode 100644
index 000000000..dddec9084
--- /dev/null
+++ b/vulnerabilities/tests/pipelines/test_collect_commits_v2.py
@@ -0,0 +1,131 @@
+from datetime import datetime
+from unittest.mock import patch
+
+import pytest
+
+from vulnerabilities.models import AdvisoryReference
+from vulnerabilities.models import AdvisoryV2
+from vulnerabilities.models import CodeFixV2
+from vulnerabilities.models import PackageV2
+from vulnerabilities.pipelines.v2_improvers.collect_commits import CollectFixCommitsPipeline
+from vulnerabilities.pipelines.v2_improvers.collect_commits import is_vcs_url
+from vulnerabilities.pipelines.v2_improvers.collect_commits import is_vcs_url_already_processed
+from vulnerabilities.pipelines.v2_improvers.collect_commits import normalize_vcs_url
+
+
+@pytest.mark.parametrize(
+ "url,expected",
+ [
+ ("git://github.com/angular/di.js.git", True),
+ ("github:user/repo", True),
+ ("user/repo", True),
+ ("https://github.com/user/repo.git", True),
+ ("git@github.com:user/repo.git", True),
+ ("ftp://example.com/not-a-repo", False),
+ ("random-string", False),
+ ("https://example.com/not-a-repo", False),
+ ],
+)
+def test_is_vcs_url(url, expected):
+ assert is_vcs_url(url) is expected
+
+
+@pytest.mark.parametrize(
+ "url,normalized",
+ [
+ ("git@github.com:user/repo.git", "https://github.com/user/repo.git"),
+ ("github:user/repo", "https://github.com/user/repo"),
+ ("bitbucket:example/repo", "https://bitbucket.org/example/repo"),
+ ("user/repo", "https://github.com/user/repo"),
+ ("https://gitlab.com/foo/bar.git", "https://gitlab.com/foo/bar.git"),
+ ],
+)
+def test_normalize_vcs_url(url, normalized):
+ assert normalize_vcs_url(url) == normalized
+
+
+@pytest.mark.django_db
+def test_is_vcs_url_already_processed_true():
+ advisory = AdvisoryV2.objects.create(
+ advisory_id="CVE-2025-9999",
+ datasource_id="test-ds",
+ avid="test-ds/CVE-2025-9999",
+ url="https://example.com/advisory/CVE-2025-9999",
+ unique_content_id="11111",
+ date_collected=datetime.now(),
+ )
+ package = PackageV2.objects.create(
+ type="bar",
+ name="foo",
+ version="1.0",
+ )
+ advisory.affecting_packages.add(package)
+ advisory.save()
+ CodeFixV2.objects.create(
+ commits=["https://github.com/user/repo/commit/abc123"],
+ advisory=advisory,
+ affected_package=package,
+ )
+ assert is_vcs_url_already_processed("https://github.com/user/repo/commit/abc123") is True
+
+
+@pytest.mark.django_db
+def test_collect_fix_commits_pipeline_creates_entry():
+ advisory = AdvisoryV2.objects.create(
+ advisory_id="CVE-2025-1000",
+ datasource_id="test-ds",
+ avid="test-ds/CVE-2025-1000",
+ url="https://example.com/advisory/CVE-2025-1000",
+ unique_content_id="11111",
+ date_collected=datetime.now(),
+ )
+ package = PackageV2.objects.create(
+ type="foo",
+ name="testpkg",
+ version="1.0",
+ )
+ reference = AdvisoryReference.objects.create(
+ url="https://github.com/test/testpkg/commit/abc123"
+ )
+ advisory.affecting_packages.add(package)
+ advisory.references.add(reference)
+ advisory.save()
+
+ pipeline = CollectFixCommitsPipeline()
+ pipeline.collect_and_store_fix_commits()
+
+ codefixes = CodeFixV2.objects.all()
+ assert codefixes.count() == 1
+ fix = codefixes.first()
+ assert "abc123" in fix.commits[0]
+ assert fix.advisory == advisory
+ assert fix.affected_package == package
+
+
+@pytest.mark.django_db
+def test_collect_fix_commits_pipeline_skips_non_commit_urls():
+ advisory = AdvisoryV2.objects.create(
+ advisory_id="CVE-2025-2000",
+ datasource_id="test-ds",
+ avid="test-ds/CVE-2025-2000",
+ url="https://example.com/advisory/CVE-2025-2000",
+ unique_content_id="11111",
+ date_collected=datetime.now(),
+ )
+ package = PackageV2.objects.create(
+ type="pypi",
+ name="otherpkg",
+ version="2.0",
+ )
+
+ advisory.affecting_packages.add(package)
+
+ reference = AdvisoryReference.objects.create(url="https://github.com/test/testpkg/issues/12")
+
+ advisory.references.add(reference)
+ advisory.save()
+
+ pipeline = CollectFixCommitsPipeline()
+ pipeline.collect_and_store_fix_commits()
+
+ assert CodeFixV2.objects.count() == 0
diff --git a/vulnerabilities/tests/pipelines/test_compute_package_risk_v2.py b/vulnerabilities/tests/pipelines/test_compute_package_risk_v2.py
new file mode 100644
index 000000000..4dbfb222a
--- /dev/null
+++ b/vulnerabilities/tests/pipelines/test_compute_package_risk_v2.py
@@ -0,0 +1,69 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# VulnerableCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/aboutcode-org/vulnerablecode for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+from datetime import datetime
+from decimal import Decimal
+
+import pytest
+
+from vulnerabilities.models import AdvisorySeverity
+from vulnerabilities.models import AdvisoryV2
+from vulnerabilities.models import AdvisoryWeakness
+from vulnerabilities.models import PackageV2
+from vulnerabilities.pipelines.v2_improvers.compute_package_risk import ComputePackageRiskPipeline
+from vulnerabilities.severity_systems import CVSSV3
+from vulnerabilities.severity_systems import GENERIC
+
+
+@pytest.mark.django_db
+def test_simple_risk_pipeline():
+ pkg = PackageV2.objects.create(type="pypi", name="foo", version="2.3.0")
+ assert PackageV2.objects.count() == 1
+
+ adv = AdvisoryV2(
+ advisory_id="VCID-Existing",
+ summary="vulnerability description here",
+ datasource_id="ds",
+ avid="ds/VCID-Existing",
+ unique_content_id="ajkef",
+ url="https://test.com",
+ date_collected=datetime.now(),
+ )
+ adv.save()
+
+ severity1 = AdvisorySeverity.objects.create(
+ url="https://nvd.nist.gov/vuln/detail/CVE-xxxx-xxx1",
+ scoring_system=CVSSV3.identifier,
+ scoring_elements="CVSS:3.0/AV:P/AC:H/PR:H/UI:R/S:C/C:H/I:H/A:N/E:H/RL:O/RC:R/CR:H/MAC:H/MC:L",
+ value="6.5",
+ )
+
+ severity2 = AdvisorySeverity.objects.create(
+ url="https://nvd.nist.gov/vuln/detail/CVE-xxxx-xxx1",
+ scoring_system=GENERIC.identifier,
+ value="MODERATE", # 6.9
+ )
+ adv.severities.add(severity1)
+ adv.severities.add(severity2)
+
+ weaknesses = AdvisoryWeakness.objects.create(cwe_id=119)
+ adv.weaknesses.add(weaknesses)
+
+ adv.affecting_packages.add(pkg)
+ adv.save()
+
+ improver = ComputePackageRiskPipeline()
+ improver.execute()
+
+ assert pkg.risk_score is None
+
+ improver = ComputePackageRiskPipeline()
+ improver.execute()
+
+ pkg = PackageV2.objects.get(type="pypi", name="foo", version="2.3.0")
+ assert pkg.risk_score == Decimal("3.1")
diff --git a/vulnerabilities/tests/pipelines/test_compute_version_rank_v2.py b/vulnerabilities/tests/pipelines/test_compute_version_rank_v2.py
new file mode 100644
index 000000000..eb8d3aebd
--- /dev/null
+++ b/vulnerabilities/tests/pipelines/test_compute_version_rank_v2.py
@@ -0,0 +1,70 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# VulnerableCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/aboutcode-org/vulnerablecode for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+
+from unittest.mock import patch
+
+import pytest
+from univers.versions import Version
+
+from vulnerabilities.models import PackageV2
+from vulnerabilities.pipelines.v2_improvers.computer_package_version_rank import (
+ ComputeVersionRankPipeline,
+)
+
+
+@pytest.mark.django_db
+class TestComputeVersionRankPipeline:
+ @pytest.fixture
+ def pipeline(self):
+ return ComputeVersionRankPipeline()
+
+ @pytest.fixture
+ def packages(self, db):
+ package_type = "pypi"
+ namespace = "test_namespace"
+ name = "test_package"
+ PackageV2.objects.create(type=package_type, namespace=namespace, name=name, version="1.0.0")
+ PackageV2.objects.create(type=package_type, namespace=namespace, name=name, version="1.1.0")
+ PackageV2.objects.create(type=package_type, namespace=namespace, name=name, version="0.9.0")
+ return PackageV2.objects.filter(type=package_type, namespace=namespace, name=name)
+
+ def test_compute_and_store_version_rank(self, pipeline, packages):
+ with patch.object(pipeline, "log") as mock_log:
+ pipeline.compute_and_store_version_rank()
+ assert mock_log.call_count > 0
+ for package in packages:
+ assert package.version_rank is not None
+
+ def test_update_version_rank_for_group(self, pipeline, packages):
+ with patch.object(PackageV2.objects, "bulk_update") as mock_bulk_update:
+ pipeline.update_version_rank_for_group(packages)
+ mock_bulk_update.assert_called_once()
+ updated_packages = mock_bulk_update.call_args[0][0]
+ assert len(updated_packages) == len(packages)
+ for idx, package in enumerate(sorted(packages, key=lambda p: Version(p.version))):
+ assert updated_packages[idx].version_rank == idx
+
+ def test_sort_packages_by_version(self, pipeline, packages):
+ sorted_packages = pipeline.sort_packages_by_version(packages)
+ versions = [p.version for p in sorted_packages]
+ assert versions == sorted(versions, key=Version)
+
+ def test_sort_packages_by_version_empty(self, pipeline):
+ assert pipeline.sort_packages_by_version([]) == []
+
+ def test_sort_packages_by_version_invalid_scheme(self, pipeline, packages):
+ for package in packages:
+ package.type = "invalid"
+ assert pipeline.sort_packages_by_version(packages) == []
+
+ def test_compute_and_store_version_rank_invalid_scheme(self, pipeline):
+ PackageV2.objects.create(type="invalid", namespace="test", name="package", version="1.0.0")
+ with patch.object(pipeline, "log") as mock_log:
+ pipeline.compute_and_store_version_rank()
+ mock_log.assert_any_call("Successfully populated `version_rank` for all packages.")
diff --git a/vulnerabilities/tests/pipelines/test_elixir_security_v2_importer.py b/vulnerabilities/tests/pipelines/test_elixir_security_v2_importer.py
new file mode 100644
index 000000000..96359ca3c
--- /dev/null
+++ b/vulnerabilities/tests/pipelines/test_elixir_security_v2_importer.py
@@ -0,0 +1,108 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# VulnerableCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/aboutcode-org/vulnerablecode for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+
+import shutil
+from pathlib import Path
+from unittest.mock import MagicMock
+from unittest.mock import patch
+
+import pytest
+
+from vulnerabilities.importer import AdvisoryData
+from vulnerabilities.pipelines.v2_importers.elixir_security_importer import (
+ ElixirSecurityImporterPipeline,
+)
+
+
+@pytest.fixture
+def mock_vcs_response(tmp_path):
+ repo_dir = tmp_path / "repo"
+ repo_dir.mkdir()
+ packages_dir = repo_dir / "packages" / "some_package"
+ packages_dir.mkdir(parents=True)
+
+ advisory_file = packages_dir / "CVE-2022-9999.yml"
+ advisory_file.write_text(
+ """
+ cve: "2022-9999"
+ package: "plug"
+ description: "Cross-site scripting vulnerability in plug < 1.11.1"
+ patched_versions:
+ - ">= 1.11.1"
+ unaffected_versions:
+ - "< 1.0.0"
+ disclosure_date: "2022-12-01"
+ link: "https://github.com/plug/plug/security/advisories/GHSA-xxxx-yyyy"
+ """
+ )
+
+ mock = MagicMock()
+ mock.dest_dir = str(repo_dir)
+ mock.delete = MagicMock()
+ return mock
+
+
+@pytest.fixture
+def mock_fetch_via_vcs(mock_vcs_response):
+ with patch(
+ "vulnerabilities.pipelines.v2_importers.elixir_security_importer.fetch_via_vcs"
+ ) as mock:
+ mock.return_value = mock_vcs_response
+ yield mock
+
+
+def test_advisories_count(mock_fetch_via_vcs, mock_vcs_response):
+ importer = ElixirSecurityImporterPipeline()
+ importer.clone()
+ count = importer.advisories_count()
+ assert count == 1
+
+
+def test_collect_advisories(mock_fetch_via_vcs, mock_vcs_response):
+ importer = ElixirSecurityImporterPipeline()
+ importer.clone()
+ advisories = list(importer.collect_advisories())
+
+ assert len(advisories) == 1
+
+ advisory: AdvisoryData = advisories[0]
+ assert advisory.advisory_id == "CVE-2022-9999"
+ assert advisory.summary.startswith("Cross-site scripting vulnerability")
+ assert advisory.affected_packages[0].package.name == "plug"
+ assert advisory.affected_packages[0].package.type == "hex"
+ assert (
+ advisory.references_v2[0].url
+ == "https://github.com/plug/plug/security/advisories/GHSA-xxxx-yyyy"
+ )
+ assert advisory.date_published.isoformat().startswith("2022-12-01")
+
+
+def test_collect_advisories_skips_invalid_cve(mock_fetch_via_vcs, tmp_path):
+ repo_dir = tmp_path / "repo"
+ packages_dir = repo_dir / "packages"
+
+ if packages_dir.exists():
+ shutil.rmtree(packages_dir)
+ packages_dir.mkdir(parents=True, exist_ok=True)
+
+ advisory_file = packages_dir / "bad_advisory.yml"
+ advisory_file.write_text("cve: BAD-ID\npackage: x\n")
+
+ mock_response = MagicMock()
+ mock_response.dest_dir = str(repo_dir)
+ mock_response.delete = MagicMock()
+
+ with patch(
+ "vulnerabilities.pipelines.v2_importers.elixir_security_importer.fetch_via_vcs"
+ ) as mock:
+ mock.return_value = mock_response
+ importer = ElixirSecurityImporterPipeline()
+ importer.clone()
+ advisories = list(importer.collect_advisories())
+ assert len(advisories) == 0
diff --git a/vulnerabilities/tests/pipelines/test_enhance_with_exploitdb_v2.py b/vulnerabilities/tests/pipelines/test_enhance_with_exploitdb_v2.py
new file mode 100644
index 000000000..865356158
--- /dev/null
+++ b/vulnerabilities/tests/pipelines/test_enhance_with_exploitdb_v2.py
@@ -0,0 +1,56 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# VulnerableCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/aboutcode-org/vulnerablecode for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+
+import os
+from datetime import datetime
+from unittest import mock
+from unittest.mock import Mock
+
+import pytest
+
+from vulnerabilities.models import AdvisoryAlias
+from vulnerabilities.models import AdvisoryExploit
+from vulnerabilities.models import AdvisoryV2
+from vulnerabilities.pipelines.v2_improvers.enhance_with_exploitdb import ExploitDBImproverPipeline
+
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+TEST_DATA = os.path.join(BASE_DIR, "../test_data", "exploitdb_improver/files_exploits.csv")
+
+
+@pytest.mark.django_db
+@mock.patch("requests.get")
+def test_exploit_db_improver(mock_get):
+ mock_response = Mock(status_code=200)
+ with open(TEST_DATA, "r") as f:
+ mock_response.text = f.read()
+ mock_get.return_value = mock_response
+
+ improver = ExploitDBImproverPipeline()
+
+ # Run the improver when there is no matching aliases
+ improver.execute()
+
+ assert AdvisoryExploit.objects.count() == 0
+
+ adv1 = AdvisoryV2.objects.create(
+ advisory_id="VCIO-123-2002",
+ datasource_id="ds",
+ avid="ds/VCIO-123-2002",
+ unique_content_id="i3giu",
+ url="https://test.com",
+ date_collected=datetime.now(),
+ )
+
+ alias = AdvisoryAlias.objects.create(alias="CVE-2009-3699")
+
+ adv1.aliases.add(alias)
+
+ # Run Exploit-DB Improver again when there are matching aliases.
+ improver.execute()
+ assert AdvisoryExploit.objects.count() == 1
diff --git a/vulnerabilities/tests/pipelines/test_enhance_with_kev_v2.py b/vulnerabilities/tests/pipelines/test_enhance_with_kev_v2.py
new file mode 100644
index 000000000..bd58fa5fd
--- /dev/null
+++ b/vulnerabilities/tests/pipelines/test_enhance_with_kev_v2.py
@@ -0,0 +1,57 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# VulnerableCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/aboutcode-org/vulnerablecode for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+
+import os
+from datetime import datetime
+from unittest import mock
+from unittest.mock import Mock
+
+import pytest
+
+from vulnerabilities.models import AdvisoryAlias
+from vulnerabilities.models import AdvisoryExploit
+from vulnerabilities.models import AdvisoryV2
+from vulnerabilities.pipelines.v2_improvers.enhance_with_kev import VulnerabilityKevPipeline
+from vulnerabilities.utils import load_json
+
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+TEST_DATA = os.path.join(BASE_DIR, "../test_data", "kev_data.json")
+
+
+@pytest.mark.django_db
+@mock.patch("requests.get")
+def test_kev_improver(mock_get):
+ mock_response = Mock(status_code=200)
+ mock_response.json.return_value = load_json(TEST_DATA)
+ mock_get.return_value = mock_response
+
+ improver = VulnerabilityKevPipeline()
+
+ # Run the improver when there is no matching aliases
+ improver.execute()
+
+ assert AdvisoryExploit.objects.count() == 0
+
+ adv1 = AdvisoryV2.objects.create(
+ advisory_id="VCIO-123-2002",
+ datasource_id="ds",
+ avid="ds/VCIO-123-2002",
+ unique_content_id="i3giu",
+ url="https://test.com",
+ date_collected=datetime.now(),
+ )
+ adv1.save()
+
+ alias = AdvisoryAlias.objects.create(alias="CVE-2021-38647")
+
+ adv1.aliases.add(alias)
+
+ # Run Kev Improver again when there are matching aliases.
+ improver.execute()
+ assert AdvisoryExploit.objects.count() == 1
diff --git a/vulnerabilities/tests/pipelines/test_enhance_with_metasploit_v2.py b/vulnerabilities/tests/pipelines/test_enhance_with_metasploit_v2.py
new file mode 100644
index 000000000..c20437145
--- /dev/null
+++ b/vulnerabilities/tests/pipelines/test_enhance_with_metasploit_v2.py
@@ -0,0 +1,56 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# VulnerableCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/aboutcode-org/vulnerablecode for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+
+import os
+from datetime import datetime
+from unittest import mock
+from unittest.mock import Mock
+
+import pytest
+
+from vulnerabilities.models import AdvisoryAlias
+from vulnerabilities.models import AdvisoryExploit
+from vulnerabilities.models import AdvisoryV2
+from vulnerabilities.pipelines.v2_improvers.enhance_with_metasploit import (
+ MetasploitImproverPipeline,
+)
+from vulnerabilities.utils import load_json
+
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+TEST_DATA = os.path.join(BASE_DIR, "../test_data", "metasploit_improver/modules_metadata_base.json")
+
+
+@pytest.mark.django_db
+@mock.patch("requests.get")
+def test_metasploit_improver(mock_get):
+ mock_response = Mock(status_code=200)
+ mock_response.json.return_value = load_json(TEST_DATA)
+ mock_get.return_value = mock_response
+
+ improver = MetasploitImproverPipeline()
+
+ # Run the improver when there is no matching aliases
+ improver.execute()
+ assert AdvisoryExploit.objects.count() == 0
+
+ adv1 = AdvisoryV2.objects.create(
+ advisory_id="VCIO-123-2002",
+ datasource_id="ds",
+ avid="ds/VCIO-123-2002",
+ unique_content_id="i3giu",
+ url="https://test.com",
+ date_collected=datetime.now(),
+ )
+ alias = AdvisoryAlias.objects.create(alias="CVE-2007-4387")
+
+ adv1.aliases.add(alias)
+
+ # Run metasploit Improver again when there are matching aliases.
+ improver.execute()
+ assert AdvisoryExploit.objects.count() == 1
diff --git a/vulnerabilities/tests/pipelines/test_flag_ghost_packages_v2.py b/vulnerabilities/tests/pipelines/test_flag_ghost_packages_v2.py
new file mode 100644
index 000000000..d082fdc3a
--- /dev/null
+++ b/vulnerabilities/tests/pipelines/test_flag_ghost_packages_v2.py
@@ -0,0 +1,111 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# VulnerableCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/aboutcode-org/vulnerablecode for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+
+from unittest.mock import patch
+
+import pytest
+from packageurl import PackageURL
+
+from vulnerabilities.models import PackageV2
+from vulnerabilities.pipelines.v2_improvers.flag_ghost_packages import (
+ detect_and_flag_ghost_packages,
+)
+from vulnerabilities.pipelines.v2_improvers.flag_ghost_packages import flag_ghost_packages
+
+
+@pytest.mark.django_db
+def test_flag_ghost_package_marked_correctly():
+ pkg = PackageV2.objects.create(
+ type="pypi",
+ namespace=None,
+ name="requests",
+ version="999.999.999",
+ )
+
+ with patch(
+ "vulnerabilities.pipelines.v2_improvers.flag_ghost_packages.get_versions"
+ ) as mock_get_versions:
+ mock_get_versions.return_value = {"2.25.1", "2.26.0"}
+
+ base_purl = PackageURL(type="pypi", namespace=None, name="requests")
+ ghost_count = flag_ghost_packages(base_purl, [pkg])
+
+ pkg.refresh_from_db()
+ assert ghost_count == 1
+ assert pkg.is_ghost is True
+
+
+@pytest.mark.django_db
+def test_flag_non_ghost_package_not_marked():
+ pkg = PackageV2.objects.create(
+ type="pypi",
+ namespace=None,
+ name="requests",
+ version="2.26.0",
+ )
+
+ with patch(
+ "vulnerabilities.pipelines.v2_improvers.flag_ghost_packages.get_versions"
+ ) as mock_get_versions:
+ mock_get_versions.return_value = {"2.25.1", "2.26.0"}
+
+ base_purl = PackageURL(type="pypi", namespace=None, name="requests")
+ ghost_count = flag_ghost_packages(base_purl, [pkg])
+
+ pkg.refresh_from_db()
+ assert ghost_count == 0
+ assert pkg.is_ghost is False
+
+
+@pytest.mark.django_db
+def test_flag_ghost_packages_gracefully_handles_version_fetch_failure():
+ pkg = PackageV2.objects.create(
+ type="pypi",
+ namespace=None,
+ name="some-lib",
+ version="1.0.0",
+ )
+
+ with patch(
+ "vulnerabilities.pipelines.v2_improvers.flag_ghost_packages.get_versions"
+ ) as mock_get_versions:
+ mock_get_versions.return_value = None
+
+ base_purl = PackageURL(type="pypi", namespace=None, name="some-lib")
+ ghost_count = flag_ghost_packages(base_purl, [pkg])
+
+ pkg.refresh_from_db()
+ assert ghost_count == 0
+ assert pkg.is_ghost is False
+
+
+@pytest.mark.django_db
+def test_detect_and_flag_ghost_packages(monkeypatch):
+ ghost_pkg = PackageV2.objects.create(type="pypi", name="fakepkg", version="9.9.9")
+ real_pkg = PackageV2.objects.create(type="pypi", name="realpkg", version="1.0.0")
+
+ def fake_versions(purl, logger=None):
+ if purl.name == "realpkg":
+ return {"1.0.0"}
+ if purl.name == "fakepkg":
+ return {"0.1.0", "0.2.0"}
+ return set()
+
+ monkeypatch.setattr(
+ "vulnerabilities.pipelines.v2_improvers.flag_ghost_packages.get_versions",
+ fake_versions,
+ )
+
+ detect_and_flag_ghost_packages()
+
+ ghost_pkg.refresh_from_db()
+ real_pkg.refresh_from_db()
+
+ assert ghost_pkg.is_ghost is True
+ assert real_pkg.is_ghost is False
diff --git a/vulnerabilities/tests/pipelines/test_github_importer_v2.py b/vulnerabilities/tests/pipelines/test_github_importer_v2.py
new file mode 100644
index 000000000..ec3ab5a04
--- /dev/null
+++ b/vulnerabilities/tests/pipelines/test_github_importer_v2.py
@@ -0,0 +1,174 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# VulnerableCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/aboutcode-org/vulnerablecode for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+
+from unittest.mock import patch
+
+import pytest
+from packageurl import PackageURL
+
+from vulnerabilities.pipelines.v2_importers.github_importer import GitHubAPIImporterPipeline
+from vulnerabilities.pipelines.v2_importers.github_importer import get_cwes_from_github_advisory
+from vulnerabilities.pipelines.v2_importers.github_importer import get_purl
+from vulnerabilities.utils import get_item
+
+
+@pytest.fixture
+def mock_fetch():
+ with patch(
+ "vulnerabilities.pipelines.v2_importers.github_importer.utils.fetch_github_graphql_query"
+ ) as mock:
+ yield mock
+
+
+def test_advisories_count(mock_fetch):
+ # Mock the GraphQL query response for advisory count
+ mock_fetch.return_value = {"data": {"securityVulnerabilities": {"totalCount": 10}}}
+
+ pipeline = GitHubAPIImporterPipeline()
+
+ count = pipeline.advisories_count()
+
+ # Assert that the count is correct
+ assert count == 10
+
+
+def test_collect_advisories(mock_fetch):
+ # Mock advisory data for GitHub
+ advisory_data = {
+ "data": {
+ "securityVulnerabilities": {
+ "edges": [
+ {
+ "node": {
+ "advisory": {
+ "identifiers": [{"type": "GHSA", "value": "GHSA-1234-ABCD"}],
+ "summary": "Sample advisory description",
+ "references": [
+ {"url": "https://github.com/advisories/GHSA-1234-ABCD"}
+ ],
+ "severity": "HIGH",
+ "cwes": {"nodes": [{"cweId": "CWE-123"}]},
+ "publishedAt": "2023-01-01T00:00:00Z",
+ },
+ "firstPatchedVersion": {"identifier": "1.2.3"},
+ "package": {"name": "example-package"},
+ "vulnerableVersionRange": ">=1.0.0,<=1.2.0",
+ }
+ }
+ ],
+ "pageInfo": {"hasNextPage": False, "endCursor": None},
+ }
+ }
+ }
+
+ # Mock the response from GitHub GraphQL query
+ mock_fetch.return_value = advisory_data
+
+ # Instantiate the pipeline
+ pipeline = GitHubAPIImporterPipeline()
+
+ # Collect advisories
+ advisories = list(pipeline.collect_advisories())
+
+ # Check if advisories were correctly parsed
+ assert len(advisories) == 1
+ advisory = advisories[0]
+
+ # Validate advisory fields
+ assert advisory.advisory_id == "GHSA-1234-ABCD"
+ assert advisory.summary == "Sample advisory description"
+ assert advisory.url == "https://github.com/advisories/GHSA-1234-ABCD"
+ assert len(advisory.references_v2) == 1
+ assert advisory.references_v2[0].reference_id == "GHSA-1234-ABCD"
+ assert advisory.severities[0].value == "HIGH"
+
+ # Validate affected package and version range
+ affected_package = advisory.affected_packages[0]
+ assert isinstance(affected_package.package, PackageURL)
+ assert affected_package.package.name == "example-package"
+
+ # Check CWE extraction
+ assert advisory.weaknesses == [123]
+
+
+def test_get_purl(mock_fetch):
+ # Test for package URL generation
+ result = get_purl("cargo", "example/package-name")
+
+ # Validate that the correct PackageURL is generated
+ assert isinstance(result, PackageURL)
+ assert result.type == "cargo"
+ assert result.namespace == None
+ assert result.name == "example/package-name"
+
+
+def test_process_response(mock_fetch):
+ # Mock advisory data as input for the process_response function
+ advisory_data = {
+ "data": {
+ "securityVulnerabilities": {
+ "edges": [
+ {
+ "node": {
+ "advisory": {
+ "identifiers": [{"type": "GHSA", "value": "GHSA-5678-EFGH"}],
+ "summary": "Another advisory",
+ "references": [
+ {"url": "https://github.com/advisories/GHSA-5678-EFGH"}
+ ],
+ "severity": "MEDIUM",
+ "cwes": {"nodes": [{"cweId": "CWE-200"}]},
+ "publishedAt": "2023-02-01T00:00:00Z",
+ },
+ "firstPatchedVersion": {"identifier": "2.0.0"},
+ "package": {"name": "another-package"},
+ "vulnerableVersionRange": ">=2.0.0,<=3.0.0",
+ }
+ }
+ ],
+ "pageInfo": {"hasNextPage": False, "endCursor": None},
+ }
+ }
+ }
+
+ # Mock the response from GitHub GraphQL query
+ mock_fetch.return_value = advisory_data
+
+ # Process the mock response
+ result = list(GitHubAPIImporterPipeline().collect_advisories())
+
+ # Check the results
+ assert len(result) == 1
+ advisory = result[0]
+
+ # Validate the advisory data
+ assert advisory.advisory_id == "GHSA-5678-EFGH"
+ assert advisory.summary == "Another advisory"
+ assert advisory.url == "https://github.com/advisories/GHSA-5678-EFGH"
+
+ # Check CWE extraction
+ assert advisory.weaknesses == [200]
+
+
+def test_get_cwes_from_github_advisory(mock_fetch):
+ # Mock CWEs extraction from GitHub advisory
+ advisory_data = {"cwes": {"nodes": [{"cweId": "CWE-522"}]}}
+
+ cwes = get_cwes_from_github_advisory(advisory_data)
+
+ # Validate the CWE ID extraction
+ assert cwes == [522]
+
+
+def test_invalid_package_type_in_get_purl(mock_fetch):
+ # Test for invalid package type
+ result = get_purl("invalidpkg", "example/package-name")
+
+ # Assert that None is returned for an invalid package type
+ assert result is None
diff --git a/vulnerabilities/tests/pipelines/test_gitlab_v2_importer.py b/vulnerabilities/tests/pipelines/test_gitlab_v2_importer.py
new file mode 100644
index 000000000..6e5c8eb15
--- /dev/null
+++ b/vulnerabilities/tests/pipelines/test_gitlab_v2_importer.py
@@ -0,0 +1,153 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# VulnerableCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+#
+
+from pathlib import Path
+from unittest.mock import MagicMock
+from unittest.mock import patch
+
+import pytest
+
+from vulnerabilities.importer import AdvisoryData
+
+
+@pytest.fixture
+def mock_vcs_response(tmp_path):
+ mock_response = MagicMock()
+ mock_response.dest_dir = str(tmp_path)
+ mock_response.delete = MagicMock()
+ return mock_response
+
+
+@pytest.fixture
+def mock_fetch_via_vcs(mock_vcs_response):
+ with patch("vulnerabilities.pipelines.v2_importers.gitlab_importer.fetch_via_vcs") as mock:
+ mock.return_value = mock_vcs_response
+ yield mock
+
+
+@pytest.fixture
+def mock_gitlab_yaml(tmp_path):
+ advisory_dir = tmp_path / "pypi" / "package_name"
+ advisory_dir.mkdir(parents=True)
+
+ advisory_file = advisory_dir / "CVE-2022-0001.yml"
+ advisory_file.write_text(
+ """
+ identifier: "CVE-2022-0001"
+ package_slug: "pypi/package_name"
+ title: "Example vulnerability"
+ description: "Example description"
+ pubdate: "2022-06-15"
+ affected_range: "<2.0.0"
+ fixed_versions:
+ - "2.0.0"
+ urls:
+ - "https://example.com/advisory"
+ cwe_ids:
+ - "CWE-79"
+ identifiers:
+ - "CVE-2022-0001"
+ """
+ )
+ return tmp_path
+
+
+def test_clone(mock_fetch_via_vcs, mock_vcs_response):
+ from vulnerabilities.pipelines.v2_importers.gitlab_importer import GitLabImporterPipeline
+
+ pipeline = GitLabImporterPipeline()
+ pipeline.clone()
+
+ mock_fetch_via_vcs.assert_called_once_with(pipeline.repo_url)
+ assert pipeline.vcs_response == mock_vcs_response
+
+
+def test_advisories_count(mock_gitlab_yaml, mock_vcs_response, mock_fetch_via_vcs):
+ from vulnerabilities.pipelines.v2_importers.gitlab_importer import GitLabImporterPipeline
+
+ mock_vcs_response.dest_dir = str(mock_gitlab_yaml)
+
+ pipeline = GitLabImporterPipeline()
+ pipeline.clone()
+ mock_fetch_via_vcs.assert_called_once()
+
+ count = pipeline.advisories_count()
+ assert count == 1
+
+
+def test_collect_advisories(mock_gitlab_yaml, mock_vcs_response, mock_fetch_via_vcs):
+ from vulnerabilities.pipelines.v2_importers.gitlab_importer import GitLabImporterPipeline
+
+ mock_vcs_response.dest_dir = str(mock_gitlab_yaml)
+
+ pipeline = GitLabImporterPipeline()
+ pipeline.clone()
+
+ advisories = list(pipeline.collect_advisories())
+ assert len(advisories) == 1
+ advisory = advisories[0]
+
+ assert isinstance(advisory, AdvisoryData)
+ assert advisory.advisory_id == "CVE-2022-0001"
+ assert advisory.summary == "Example vulnerability\nExample description"
+ assert advisory.references_v2[0].url == "https://example.com/advisory"
+ assert advisory.affected_packages[0].package.name == "package-name"
+ assert advisory.affected_packages[0].fixed_version
+ assert advisory.weaknesses[0] == 79
+
+
+def test_clean_downloads(mock_vcs_response):
+ from vulnerabilities.pipelines.v2_importers.gitlab_importer import GitLabImporterPipeline
+
+ pipeline = GitLabImporterPipeline()
+ pipeline.vcs_response = mock_vcs_response
+
+ pipeline.clean_downloads()
+ mock_vcs_response.delete.assert_called_once()
+
+
+def test_on_failure(mock_vcs_response):
+ from vulnerabilities.pipelines.v2_importers.gitlab_importer import GitLabImporterPipeline
+
+ pipeline = GitLabImporterPipeline()
+ pipeline.vcs_response = mock_vcs_response
+
+ with patch.object(pipeline, "clean_downloads") as mock_clean:
+ pipeline.on_failure()
+ mock_clean.assert_called_once()
+
+
+def test_collect_advisories_with_invalid_yaml(
+ mock_gitlab_yaml, mock_vcs_response, mock_fetch_via_vcs
+):
+ from vulnerabilities.pipelines.v2_importers.gitlab_importer import GitLabImporterPipeline
+
+ # Add an invalid YAML file
+ invalid_file = Path(mock_gitlab_yaml) / "pypi" / "package_name" / "invalid.yml"
+ invalid_file.write_text(":::invalid_yaml")
+
+ mock_vcs_response.dest_dir = str(mock_gitlab_yaml)
+
+ pipeline = GitLabImporterPipeline()
+ pipeline.clone()
+
+ # Should not raise but skip invalid YAML
+ advisories = list(pipeline.collect_advisories())
+ assert len(advisories) == 1 # Only one valid advisory is parsed
+
+
+def test_advisories_count_empty(mock_vcs_response, mock_fetch_via_vcs, tmp_path):
+ from vulnerabilities.pipelines.v2_importers.gitlab_importer import GitLabImporterPipeline
+
+ mock_vcs_response.dest_dir = str(tmp_path)
+
+ pipeline = GitLabImporterPipeline()
+ pipeline.clone()
+ mock_fetch_via_vcs.assert_called_once()
+
+ count = pipeline.advisories_count()
+ assert count == 0
diff --git a/vulnerabilities/tests/pipelines/test_npm_importer_pipeline_v2.py b/vulnerabilities/tests/pipelines/test_npm_importer_pipeline_v2.py
new file mode 100644
index 000000000..7941c9b69
--- /dev/null
+++ b/vulnerabilities/tests/pipelines/test_npm_importer_pipeline_v2.py
@@ -0,0 +1,128 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# VulnerableCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/aboutcode-org/vulnerablecode for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+
+import json
+from types import SimpleNamespace
+
+import pytz
+from packageurl import PackageURL
+from univers.version_range import NpmVersionRange
+from univers.versions import SemverVersion
+
+from vulnerabilities.importer import AdvisoryData
+from vulnerabilities.pipelines.v2_importers.npm_importer import NpmImporterPipeline
+from vulnerabilities.severity_systems import CVSSV2
+from vulnerabilities.severity_systems import CVSSV3
+
+
+def test_clone(monkeypatch):
+ import vulnerabilities.pipelines.v2_importers.npm_importer as npm_mod
+
+ dummy = SimpleNamespace(dest_dir="dummy", delete=lambda: None)
+ # Patch the name in the npm_importer module, not fetchcode.vcs
+ monkeypatch.setattr(npm_mod, "fetch_via_vcs", lambda url: dummy)
+
+ p = NpmImporterPipeline()
+ p.clone()
+
+ assert p.vcs_response is dummy
+
+
+def test_clean_downloads_and_on_failure():
+ called = {}
+
+ def delete():
+ called["deleted"] = True
+
+ dummy = SimpleNamespace(dest_dir="dummy", delete=delete)
+ p = NpmImporterPipeline()
+ p.vcs_response = dummy
+ p.clean_downloads()
+ assert called.get("deleted", False)
+ called.clear()
+ p.on_failure()
+ assert called.get("deleted", False)
+
+
+def test_advisories_count_and_collect(tmp_path):
+ base = tmp_path
+ vuln_dir = base / "vuln" / "npm"
+ vuln_dir.mkdir(parents=True)
+ (vuln_dir / "index.json").write_text("{}")
+ (vuln_dir / "001.json").write_text(json.dumps({"id": "001"}))
+ p = NpmImporterPipeline()
+ p.vcs_response = SimpleNamespace(dest_dir=str(base), delete=lambda: None)
+ assert p.advisories_count() == 2
+ advisories = list(p.collect_advisories())
+ # Should yield None for index.json and one AdvisoryData
+ real = [a for a in advisories if isinstance(a, AdvisoryData)]
+ assert len(real) == 1
+ assert real[0].advisory_id == "npm-001"
+
+
+def test_to_advisory_data_skips_index(tmp_path):
+ p = NpmImporterPipeline()
+ file = tmp_path / "index.json"
+ file.write_text("{}")
+ assert p.to_advisory_data(file) is None
+
+
+def test_to_advisory_data_full(tmp_path):
+ data = {
+ "id": "123",
+ "overview": "desc",
+ "title": "ti",
+ "created_at": "2021-01-01T00:00:00Z",
+ "cvss_vector": "CVSS:3.0/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H",
+ "cvss_score": "9.8",
+ "references": ["http://ref1"],
+ "module_name": "mypkg",
+ "vulnerable_versions": "<=1.2.3",
+ "patched_versions": ">=1.2.4",
+ "cves": ["CVE-123", "CVE-124"],
+ }
+ file = tmp_path / "123.json"
+ file.write_text(json.dumps(data))
+ p = NpmImporterPipeline()
+ adv = p.to_advisory_data(file)
+ assert isinstance(adv, AdvisoryData)
+ assert adv.advisory_id == "npm-123"
+ assert "ti" in adv.summary and "desc" in adv.summary
+ assert adv.date_published.tzinfo == pytz.UTC
+ assert len(adv.severities) == 1 and adv.severities[0].system == CVSSV3
+ urls = [r.url for r in adv.references_v2]
+ assert "http://ref1" in urls
+ assert f"https://github.com/nodejs/security-wg/blob/main/vuln/npm/123.json" in urls
+ pkg = adv.affected_packages[0]
+ assert pkg.package == PackageURL(type="npm", name="mypkg")
+ assert isinstance(pkg.affected_version_range, NpmVersionRange)
+ assert pkg.fixed_version == SemverVersion("1.2.4")
+ assert set(adv.aliases) == {"CVE-123", "CVE-124"}
+
+
+def test_to_advisory_data_cvss_v2(tmp_path):
+ data = {"id": "124", "cvss_vector": "CVSS:2.0/AV:N/AC:L/Au:N/C:P/I:P/A:P", "cvss_score": "5.5"}
+ file = tmp_path / "124.json"
+ file.write_text(json.dumps(data))
+ p = NpmImporterPipeline()
+ adv = p.to_advisory_data(file)
+ assert len(adv.severities) == 1 and adv.severities[0].system == CVSSV2
+
+
+def test_get_affected_package_special_and_standard():
+ p = NpmImporterPipeline()
+ pkg = p.get_affected_package(
+ {"vulnerable_versions": "<=99.999.99999", "patched_versions": "<0.0.0"}, "pkg"
+ )
+ assert isinstance(pkg.affected_version_range, NpmVersionRange)
+ assert pkg.fixed_version is None
+ data2 = {"vulnerable_versions": "<=2.0.0", "patched_versions": ">=2.0.1"}
+ pkg2 = p.get_affected_package(data2, "pkg2")
+ assert isinstance(pkg2.affected_version_range, NpmVersionRange)
+ assert pkg2.fixed_version == SemverVersion("2.0.1")
diff --git a/vulnerabilities/tests/pipelines/test_postgresql_v2_importer.py b/vulnerabilities/tests/pipelines/test_postgresql_v2_importer.py
new file mode 100644
index 000000000..da077f3ed
--- /dev/null
+++ b/vulnerabilities/tests/pipelines/test_postgresql_v2_importer.py
@@ -0,0 +1,154 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# VulnerableCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/aboutcode-org/vulnerablecode for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+
+from unittest.mock import MagicMock
+from unittest.mock import patch
+
+import pytest
+from univers.versions import SemverVersion
+
+from vulnerabilities.importer import AdvisoryData
+from vulnerabilities.pipelines.v2_importers.postgresql_importer import PostgreSQLImporterPipeline
+
+HTML_PAGE_WITH_LINKS = """
+
+
+ Security Advisory
+ Advisory 1
+ Another Advisory
+ Advisory 2
+
+
+"""
+
+HTML_ADVISORY = """
+
+
+
+
+
+"""
+
+
+@pytest.fixture
+def importer():
+ return PostgreSQLImporterPipeline()
+
+
+@patch("vulnerabilities.pipelines.v2_importers.postgresql_importer.requests.get")
+def test_collect_links(mock_get, importer):
+ mock_get.return_value.content = HTML_PAGE_WITH_LINKS.encode("utf-8")
+
+ importer.collect_links()
+
+ assert len(importer.links) == 3 # base + 2 new
+ assert any("advisory1.html" in link for link in importer.links)
+ assert any("advisory2.html" in link for link in importer.links)
+
+
+@patch("vulnerabilities.pipelines.v2_importers.postgresql_importer.requests.get")
+def test_advisories_count(mock_get, importer):
+ mock_get.return_value.content = HTML_PAGE_WITH_LINKS.encode("utf-8")
+
+ count = importer.advisories_count()
+ assert count >= 3
+
+
+@patch("vulnerabilities.pipelines.v2_importers.postgresql_importer.requests.get")
+def test_collect_advisories(mock_get, importer):
+ importer.links = {
+ "https://www.postgresql.org/support/security/advisory1.html",
+ "https://www.postgresql.org/support/security/advisory2.html",
+ }
+
+ mock_get.return_value.content = HTML_ADVISORY.encode("utf-8")
+
+ advisories = list(importer.collect_advisories())
+
+ assert len(advisories) == 2
+ advisory = advisories[0]
+ assert isinstance(advisory, AdvisoryData)
+ assert advisory.advisory_id == "CVE-2022-1234"
+ assert "Description of the issue" in advisory.summary
+ assert len(advisory.references_v2) > 0
+ assert advisory.affected_packages[0].package.name == "postgresql"
+ assert str(advisory.affected_packages[0].fixed_version) == "10.2"
+ assert advisory.affected_packages[0].affected_version_range.contains(SemverVersion("10.0.0"))
+ assert advisory.affected_packages[0].affected_version_range.contains(SemverVersion("10.1.0"))
+
+
+@patch("vulnerabilities.pipelines.v2_importers.postgresql_importer.requests.get")
+def test_collect_advisories_with_no_fixed_version(mock_get, importer):
+ no_fix_html = """
+
+
+
+
+
+ """
+
+ def side_effect(url, *args, **kwargs):
+ if "advisory" not in url:
+ return MagicMock(content=HTML_PAGE_WITH_LINKS.encode("utf-8"))
+ return MagicMock(content=no_fix_html.encode("utf-8"))
+
+ mock_get.side_effect = side_effect
+
+ advisories = list(importer.collect_advisories())
+
+ assert len(advisories) == 2
+ advisory = advisories[0]
+ assert advisory.advisory_id == "CVE-2023-5678"
+ assert advisory.affected_packages[0].fixed_version is None
+ assert advisory.affected_packages[0].affected_version_range.contains(SemverVersion("9.5"))
+
+
+@patch("vulnerabilities.pipelines.v2_importers.postgresql_importer.requests.get")
+def test_cvss_parsing(mock_get, importer):
+ mock_get.side_effect = lambda url, *args, **kwargs: MagicMock(
+ content=HTML_ADVISORY.encode("utf-8")
+ )
+
+ importer.links = {"https://www.postgresql.org/support/security/advisory1.html"}
+
+ advisories = list(importer.collect_advisories())
+
+ assert len(advisories) == 1
+ reference = advisories[0].references_v2[0]
+
+ severity = reference.severities[0]
+ assert severity.system.identifier == "cvssv3"
+ assert severity.value == "9.8"
+ assert "AV:N/AC:L/PR:N/UI:N" in severity.scoring_elements
diff --git a/vulnerabilities/tests/pipelines/test_pypa_v2_importer_pipeline.py b/vulnerabilities/tests/pipelines/test_pypa_v2_importer_pipeline.py
new file mode 100644
index 000000000..20aa63387
--- /dev/null
+++ b/vulnerabilities/tests/pipelines/test_pypa_v2_importer_pipeline.py
@@ -0,0 +1,173 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# VulnerableCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/aboutcode-org/vulnerablecode for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+
+from unittest.mock import MagicMock
+from unittest.mock import patch
+
+import pytest
+import saneyaml
+
+from vulnerabilities.importer import AdvisoryData
+
+
+@pytest.fixture
+def mock_vcs_response():
+ # Mock the vcs_response from fetch_via_vcs
+ mock_response = MagicMock()
+ mock_response.dest_dir = "/mock/repo"
+ mock_response.delete = MagicMock()
+ return mock_response
+
+
+@pytest.fixture
+def mock_fetch_via_vcs(mock_vcs_response):
+ with patch("vulnerabilities.pipelines.v2_importers.pypa_importer.fetch_via_vcs") as mock:
+ mock.return_value = mock_vcs_response
+ yield mock
+
+
+@pytest.fixture
+def mock_pathlib(tmp_path):
+ # Mock the Path structure to simulate the `vulns` directory and advisory files
+ vulns_dir = tmp_path / "vulns"
+ vulns_dir.mkdir()
+
+ advisory_file = vulns_dir / "CVE-2021-1234.yaml"
+ advisory_file.write_text(
+ """
+ id: CVE-2021-1234
+ summary: Sample PyPI vulnerability
+ references:
+ - https://pypi.org/advisory/CVE-2021-1234
+ """
+ )
+ return vulns_dir
+
+
+def test_clone(mock_fetch_via_vcs, mock_vcs_response):
+ # Import inside the test function to avoid circular import
+ from vulnerabilities.pipelines.v2_importers.pypa_importer import PyPaImporterPipeline
+
+ # Test the `clone` method to ensure it calls `fetch_via_vcs`
+ pipeline = PyPaImporterPipeline()
+ pipeline.clone()
+
+ mock_fetch_via_vcs.assert_called_once_with(pipeline.repo_url)
+ assert pipeline.vcs_response == mock_vcs_response
+
+
+def test_advisories_count(mock_pathlib, mock_vcs_response, mock_fetch_via_vcs):
+ # Import inside the test function to avoid circular import
+ from vulnerabilities.pipelines.v2_importers.pypa_importer import PyPaImporterPipeline
+
+ # Mock `vcs_response.dest_dir` to point to the temporary directory
+ mock_vcs_response.dest_dir = str(mock_pathlib.parent)
+
+ pipeline = PyPaImporterPipeline()
+
+ # Call clone() to set the vcs_response attribute
+ pipeline.clone()
+ mock_fetch_via_vcs.assert_called_once_with(pipeline.repo_url)
+
+ count = pipeline.advisories_count()
+
+ # Check that the count matches the number of YAML files in the `vulns` directory
+ assert count == 1
+
+
+def test_collect_advisories(mock_pathlib, mock_vcs_response, mock_fetch_via_vcs):
+ # Import inside the test function to avoid circular import
+ from vulnerabilities.pipelines.v2_importers.pypa_importer import PyPaImporterPipeline
+
+ # Mock `vcs_response.dest_dir` to point to the temporary directory
+ mock_vcs_response.dest_dir = str(mock_pathlib.parent)
+
+ # Mock `parse_advisory_data` to return an AdvisoryData object
+ with patch("vulnerabilities.importers.osv.parse_advisory_data_v2") as mock_parse:
+ mock_parse.return_value = AdvisoryData(
+ advisory_id="CVE-2021-1234",
+ summary="Sample PyPI vulnerability",
+ references_v2=[{"url": "https://pypi.org/advisory/CVE-2021-1234"}],
+ affected_packages=[],
+ weaknesses=[],
+ url="https://pypi.org/advisory/CVE-2021-1234",
+ )
+
+ pipeline = PyPaImporterPipeline()
+ pipeline.clone()
+ mock_fetch_via_vcs.assert_called_once_with(pipeline.repo_url)
+ advisories = list(pipeline.collect_advisories())
+
+ # Ensure that advisories are parsed correctly
+ assert len(advisories) == 1
+ advisory = advisories[0]
+ assert advisory.advisory_id == "CVE-2021-1234"
+ assert advisory.summary == "Sample PyPI vulnerability"
+ assert advisory.url == "https://pypi.org/advisory/CVE-2021-1234"
+
+
+def test_clean_downloads(mock_vcs_response):
+ # Import inside the test function to avoid circular import
+ from vulnerabilities.pipelines.v2_importers.pypa_importer import PyPaImporterPipeline
+
+ # Test the `clean_downloads` method to ensure the repository is deleted
+ pipeline = PyPaImporterPipeline()
+ pipeline.vcs_response = mock_vcs_response
+
+ pipeline.clean_downloads()
+
+ mock_vcs_response.delete.assert_called_once()
+
+
+def test_on_failure(mock_vcs_response):
+ # Import inside the test function to avoid circular import
+ from vulnerabilities.pipelines.v2_importers.pypa_importer import PyPaImporterPipeline
+
+ # Test the `on_failure` method to ensure `clean_downloads` is called on failure
+ pipeline = PyPaImporterPipeline()
+ pipeline.vcs_response = mock_vcs_response
+
+ with patch.object(pipeline, "clean_downloads") as mock_clean:
+ pipeline.on_failure()
+
+ mock_clean.assert_called_once()
+
+
+def test_collect_advisories_with_invalid_yaml(mock_pathlib, mock_vcs_response, mock_fetch_via_vcs):
+ # Import inside the test function to avoid circular import
+ from vulnerabilities.pipelines.v2_importers.pypa_importer import PyPaImporterPipeline
+
+ # Create an invalid YAML file
+ invalid_file = mock_pathlib / "invalid_file.yaml"
+ invalid_file.write_text("invalid_yaml")
+
+ mock_vcs_response.dest_dir = str(mock_pathlib.parent)
+
+ with patch("vulnerabilities.importers.osv.parse_advisory_data_v2") as mock_parse:
+ # Mock parse_advisory_data to raise an error on invalid YAML
+ mock_parse.side_effect = saneyaml.YAMLError("Invalid YAML")
+
+ pipeline = PyPaImporterPipeline()
+ pipeline.clone()
+ mock_fetch_via_vcs.assert_called_once_with(pipeline.repo_url)
+ with pytest.raises(saneyaml.YAMLError):
+ list(pipeline.collect_advisories())
+
+
+def test_advisories_count_empty(mock_vcs_response, mock_fetch_via_vcs):
+ # Import inside the test function to avoid circular import
+ from vulnerabilities.pipelines.v2_importers.pypa_importer import PyPaImporterPipeline
+
+ # Mock an empty 'vulns' directory
+ mock_vcs_response.dest_dir = "/mock/empty_repo"
+ pipeline = PyPaImporterPipeline()
+ pipeline.clone()
+ # Test that advisories_count returns 0 for an empty directory
+ count = pipeline.advisories_count()
+ assert count == 0
diff --git a/vulnerabilities/tests/pipelines/test_pysec_v2_importer.py b/vulnerabilities/tests/pipelines/test_pysec_v2_importer.py
new file mode 100644
index 000000000..33c716889
--- /dev/null
+++ b/vulnerabilities/tests/pipelines/test_pysec_v2_importer.py
@@ -0,0 +1,137 @@
+import json
+from io import BytesIO
+from unittest.mock import patch
+from zipfile import ZipFile
+
+import pytest
+
+from vulnerabilities.importer import AdvisoryData
+from vulnerabilities.pipelines.v2_importers.pysec_importer import (
+ PyPIImporterPipeline, # Path to the PyPI Importer
+)
+
+
+@pytest.fixture
+def mock_zip_data():
+ # Create mock zip data for testing
+ zip_buffer = BytesIO()
+ with ZipFile(zip_buffer, mode="w") as zip_file:
+ # Create a sample advisory file inside the zip
+ advisory_data = {
+ "advisory_id": "PYSEC-1234",
+ "summary": "Sample PyPI advisory",
+ "references": [{"url": "https://pypi.org/advisory/PYSEC-1234"}],
+ "package": {"name": "example-package"},
+ "affected_versions": ">=1.0.0,<=2.0.0",
+ }
+ # Save the sample advisory as a JSON file
+ with zip_file.open("PYSEC-1234.json", "w") as f:
+ f.write(json.dumps(advisory_data).encode("utf-8"))
+ zip_buffer.seek(0)
+ return zip_buffer
+
+
+@pytest.fixture
+def mock_requests_get():
+ with patch("requests.get") as mock:
+ yield mock
+
+
+def test_fetch_zip(mock_requests_get, mock_zip_data):
+ # Mock the `requests.get` to return the mock zip data
+ mock_requests_get.return_value.content = mock_zip_data.read()
+
+ pipeline = PyPIImporterPipeline()
+
+ # Call the `fetch_zip` method
+ pipeline.fetch_zip()
+
+ # Reset the position of mock_zip_data to 0 before comparing
+ mock_zip_data.seek(0)
+
+ # Verify that the zip file content is correctly assigned
+ assert pipeline.advisory_zip == mock_zip_data.read()
+
+
+def test_advisories_count(mock_requests_get, mock_zip_data):
+ # Mock the `requests.get` to return the mock zip data
+ mock_requests_get.return_value.content = mock_zip_data.read()
+
+ pipeline = PyPIImporterPipeline()
+
+ # Fetch the zip data
+ pipeline.fetch_zip()
+
+ # Test advisories count
+ count = pipeline.advisories_count()
+
+ # Verify that it correctly counts the number of advisory files starting with 'PYSEC-'
+ assert count == 1
+
+
+def test_collect_advisories(mock_requests_get, mock_zip_data):
+ # Mock the `requests.get` to return the mock zip data
+ mock_requests_get.return_value.content = mock_zip_data.read()
+
+ pipeline = PyPIImporterPipeline()
+
+ # Fetch the zip data
+ pipeline.fetch_zip()
+
+ # Mock the `parse_advisory_data_v2` function to return a dummy AdvisoryData
+ with patch("vulnerabilities.importers.osv.parse_advisory_data_v2") as mock_parse:
+ mock_parse.return_value = AdvisoryData(
+ advisory_id="PYSEC-1234",
+ summary="Sample PyPI advisory",
+ references_v2=[{"url": "https://pypi.org/advisory/PYSEC-1234"}],
+ affected_packages=[],
+ weaknesses=[],
+ url="https://pypi.org/advisory/PYSEC-1234",
+ )
+
+ # Call the `collect_advisories` method
+ advisories = list(pipeline.collect_advisories())
+
+ # Ensure we have 1 advisory
+ assert len(advisories) == 1
+
+ # Verify advisory data
+ advisory = advisories[0]
+ assert advisory.advisory_id == "PYSEC-1234"
+ assert advisory.summary == "Sample PyPI advisory"
+ assert advisory.url == "https://pypi.org/advisory/PYSEC-1234"
+
+
+def test_collect_advisories_invalid_file(mock_requests_get, mock_zip_data):
+ # Create a mock zip with an invalid file name
+ zip_buffer = BytesIO()
+ with ZipFile(zip_buffer, mode="w") as zip_file:
+ zip_file.writestr("INVALID_FILE.txt", "Invalid content")
+
+ zip_buffer.seek(0)
+ mock_requests_get.return_value.content = zip_buffer.read()
+
+ pipeline = PyPIImporterPipeline()
+
+ # Fetch the zip data
+ pipeline.fetch_zip()
+
+ # Mock the `parse_advisory_data_v2` function
+ with patch("vulnerabilities.importers.osv.parse_advisory_data_v2") as mock_parse:
+ mock_parse.return_value = AdvisoryData(
+ advisory_id="PYSEC-1234",
+ summary="Sample PyPI advisory",
+ references_v2=[{"url": "https://pypi.org/advisory/PYSEC-1234"}],
+ affected_packages=[],
+ weaknesses=[],
+ url="https://pypi.org/advisory/PYSEC-1234",
+ )
+
+ # Call the `collect_advisories` method and check the logging for invalid file
+ with patch(
+ "vulnerabilities.pipelines.VulnerableCodeBaseImporterPipelineV2.log"
+ ) as mock_log:
+ advisories = list(pipeline.collect_advisories())
+
+ # Ensure no advisories were yielded due to the invalid file
+ assert len(advisories) == 0
diff --git a/vulnerabilities/tests/pipelines/test_vulnerablecode_importer_v2_pipeline.py b/vulnerabilities/tests/pipelines/test_vulnerablecode_importer_v2_pipeline.py
new file mode 100644
index 000000000..f995f0c1f
--- /dev/null
+++ b/vulnerabilities/tests/pipelines/test_vulnerablecode_importer_v2_pipeline.py
@@ -0,0 +1,180 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# VulnerableCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/aboutcode-org/vulnerablecode for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+
+import logging
+from datetime import datetime
+from datetime import timedelta
+from unittest import mock
+
+import pytest
+from packageurl import PackageURL
+
+from vulnerabilities.importer import AdvisoryData
+from vulnerabilities.importer import UnMergeablePackageError
+from vulnerabilities.models import AdvisoryV2
+from vulnerabilities.models import PackageV2
+from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
+
+
+class DummyImporter(VulnerableCodeBaseImporterPipelineV2):
+ pipeline_id = "dummy"
+ log_messages = []
+
+ def log(self, message, level=logging.INFO):
+ self.log_messages.append((level, message))
+
+ def collect_advisories(self):
+ yield from self._advisories
+
+ def advisories_count(self):
+ return len(self._advisories)
+
+
+@pytest.fixture
+def dummy_advisory():
+ return AdvisoryData(
+ summary="Test advisory",
+ aliases=["CVE-2025-0001"],
+ references_v2=[],
+ severities=[],
+ weaknesses=[],
+ affected_packages=[],
+ advisory_id="ADV-123",
+ date_published=datetime.now() - timedelta(days=10),
+ url="https://example.com/advisory/1",
+ )
+
+
+@pytest.fixture
+def dummy_importer(dummy_advisory):
+ importer = DummyImporter()
+ importer._advisories = [dummy_advisory]
+ return importer
+
+
+@pytest.mark.django_db
+def test_collect_and_store_advisories(dummy_importer):
+ dummy_importer.collect_and_store_advisories()
+ assert len(dummy_importer.log_messages) >= 2
+ assert "Successfully collected" in dummy_importer.log_messages[-1][1]
+ assert AdvisoryV2.objects.count() == 1
+
+
+def test_get_advisory_packages_basic(dummy_importer):
+ purl = PackageURL("pypi", None, "dummy", "1.0.0")
+ affected_package = mock.Mock()
+ affected_package.package = purl
+ dummy_importer.unfurl_version_ranges = False
+
+ with mock.patch(
+ "vulnerabilities.improvers.default.get_exact_purls", return_value=([purl], [purl])
+ ):
+ with mock.patch.object(
+ PackageV2.objects, "get_or_create_from_purl", return_value=(mock.Mock(), True)
+ ) as mock_get:
+ dummy_importer.get_advisory_packages(
+ advisory_data=mock.Mock(affected_packages=[affected_package])
+ )
+ assert mock_get.call_count == 2 # one affected, one fixed
+
+
+def test_get_published_package_versions_filters(dummy_importer):
+ purl = PackageURL("pypi", None, "example", None)
+
+ dummy_versions = [
+ mock.Mock(value="1.0.0", release_date=datetime.now() - timedelta(days=5)),
+ mock.Mock(value="2.0.0", release_date=datetime.now() + timedelta(days=5)), # future
+ ]
+
+ with mock.patch(
+ "vulnerabilities.pipelines.package_versions.versions", return_value=dummy_versions
+ ):
+ versions = dummy_importer.get_published_package_versions(purl, until=datetime.now())
+ assert "1.0.0" in versions
+ assert "2.0.0" not in versions
+
+
+def test_get_published_package_versions_failure_logs(dummy_importer):
+ purl = PackageURL("pypi", None, "example", None)
+ with mock.patch(
+ "vulnerabilities.pipelines.package_versions.versions", side_effect=Exception("fail")
+ ):
+ versions = dummy_importer.get_published_package_versions(purl)
+ assert versions == []
+ assert any("Failed to fetch versions" in msg for lvl, msg in dummy_importer.log_messages)
+
+
+def test_expand_version_range_to_purls(dummy_importer):
+ purls = list(
+ dummy_importer.expand_verion_range_to_purls("npm", "lodash", "lodash", ["1.0.0", "1.1.0"])
+ )
+ assert all(isinstance(p, PackageURL) for p in purls)
+ assert purls[0].name == "lodash"
+
+
+def test_resolve_package_versions(dummy_importer):
+ dummy_importer.ignorable_versions = []
+ dummy_importer.expand_verion_range_to_purls = lambda *args, **kwargs: [
+ PackageURL("npm", None, "a", "1.0.0")
+ ]
+
+ with mock.patch(
+ "vulnerabilities.pipelines.resolve_version_range", return_value=(["1.0.0"], ["1.1.0"])
+ ), mock.patch(
+ "vulnerabilities.pipelines.get_affected_packages_by_patched_package",
+ return_value={None: [PackageURL("npm", None, "a", "1.0.0")]},
+ ), mock.patch(
+ "vulnerabilities.pipelines.nearest_patched_package", return_value=[]
+ ):
+ aff, fix = dummy_importer.resolve_package_versions(
+ affected_version_range=">=1.0.0",
+ pkg_type="npm",
+ pkg_namespace=None,
+ pkg_name="a",
+ valid_versions=["1.0.0", "1.1.0"],
+ )
+ assert any(isinstance(p, PackageURL) for p in aff)
+
+
+def test_get_impacted_packages_mergeable(dummy_importer):
+ ap = mock.Mock()
+ ap.package = PackageURL("npm", None, "abc", None)
+ dummy_importer.get_published_package_versions = lambda package_url, until: ["1.0.0", "1.1.0"]
+ dummy_importer.resolve_package_versions = lambda **kwargs: (
+ [PackageURL("npm", None, "abc", "1.0.0")],
+ [PackageURL("npm", None, "abc", "1.1.0")],
+ )
+
+ with mock.patch(
+ "vulnerabilities.importer.AffectedPackage.merge",
+ return_value=(ap.package, [">=1.0.0"], ["1.1.0"]),
+ ):
+ aff, fix = dummy_importer.get_impacted_packages([ap], datetime.now())
+ assert len(aff) == 1 and aff[0].version == "1.0.0"
+ assert len(fix) == 1 and fix[0].version == "1.1.0"
+
+
+def test_get_impacted_packages_unmergeable(dummy_importer):
+ ap = mock.Mock()
+ ap.package = PackageURL("npm", None, "abc", None)
+ ap.affected_version_range = ">=1.0.0"
+ ap.fixed_version = None
+
+ dummy_importer.get_published_package_versions = lambda package_url, until: ["1.0.0", "1.1.0"]
+ dummy_importer.resolve_package_versions = lambda **kwargs: (
+ [PackageURL("npm", None, "abc", "1.0.0")],
+ [PackageURL("npm", None, "abc", "1.1.0")],
+ )
+
+ with mock.patch(
+ "vulnerabilities.importer.AffectedPackage.merge", side_effect=UnMergeablePackageError
+ ):
+ aff, fix = dummy_importer.get_impacted_packages([ap], datetime.utcnow())
+ assert len(aff) == 1
+ assert aff[0].version == "1.0.0"
diff --git a/vulnerabilities/tests/pipelines/test_vulnrichment_v2_importer.py b/vulnerabilities/tests/pipelines/test_vulnrichment_v2_importer.py
new file mode 100644
index 000000000..f926058c2
--- /dev/null
+++ b/vulnerabilities/tests/pipelines/test_vulnrichment_v2_importer.py
@@ -0,0 +1,205 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# VulnerableCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/aboutcode-org/vulnerablecode for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+
+import json
+from pathlib import Path
+from unittest.mock import MagicMock
+from unittest.mock import patch
+
+import pytest
+
+from vulnerabilities.importer import AdvisoryData
+from vulnerabilities.importer import VulnerabilitySeverity
+from vulnerabilities.pipelines.v2_importers.vulnrichment_importer import VulnrichImporterPipeline
+
+
+@pytest.fixture
+def mock_vcs_response():
+ # Mock the vcs_response from fetch_via_vcs
+ mock_response = MagicMock()
+ mock_response.dest_dir = "/mock/repo"
+ mock_response.delete = MagicMock()
+ return mock_response
+
+
+@pytest.fixture
+def mock_fetch_via_vcs(mock_vcs_response):
+ with patch(
+ "vulnerabilities.pipelines.v2_importers.vulnrichment_importer.fetch_via_vcs"
+ ) as mock:
+ mock.return_value = mock_vcs_response
+ yield mock
+
+
+@pytest.fixture
+def mock_pathlib(tmp_path):
+ # Create a mock filesystem with a 'vulns' directory and JSON files
+ vulns_dir = tmp_path / "vulns"
+ vulns_dir.mkdir()
+
+ advisory_file = vulns_dir / "CVE-2021-1234.json"
+ advisory_file.write_text(
+ json.dumps(
+ {
+ "cveMetadata": {
+ "cveId": "CVE-2021-1234",
+ "state": "PUBLIC",
+ "datePublished": "2021-01-01",
+ },
+ "containers": {
+ "cna": {
+ "descriptions": [{"lang": "en", "value": "Sample PyPI vulnerability"}],
+ "metrics": [
+ {
+ "cvssV4_0": {
+ "baseScore": 7.5,
+ "vectorString": "AV:N/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:H",
+ }
+ }
+ ],
+ "affected": [{"cpes": ["cpe:/a:example:package"]}],
+ "references": [{"url": "https://example.com", "tags": ["exploit"]}],
+ }
+ },
+ }
+ )
+ )
+ return vulns_dir
+
+
+def test_clone(mock_fetch_via_vcs, mock_vcs_response):
+ # Test the `clone` method to ensure the repository is cloned correctly
+ pipeline = VulnrichImporterPipeline()
+ pipeline.clone()
+
+ mock_fetch_via_vcs.assert_called_once_with(pipeline.repo_url)
+ assert pipeline.vcs_response == mock_vcs_response
+
+
+def test_advisories_count(mock_pathlib, mock_vcs_response, mock_fetch_via_vcs):
+ mock_vcs_response.dest_dir = str(mock_pathlib.parent)
+
+ pipeline = VulnrichImporterPipeline()
+ pipeline.clone()
+ count = pipeline.advisories_count()
+
+ assert count == 0
+
+
+def test_collect_advisories(mock_pathlib, mock_vcs_response, mock_fetch_via_vcs):
+ # Mock `vcs_response.dest_dir` to point to the temporary directory
+ mock_vcs_response.dest_dir = str(mock_pathlib.parent)
+
+ # Mock `parse_cve_advisory` to return an AdvisoryData object
+ with patch(
+ "vulnerabilities.pipelines.v2_importers.vulnrichment_importer.VulnrichImporterPipeline.parse_cve_advisory"
+ ) as mock_parse:
+ mock_parse.return_value = AdvisoryData(
+ advisory_id="CVE-2021-1234",
+ summary="Sample PyPI vulnerability",
+ references_v2=[{"url": "https://example.com"}],
+ affected_packages=[],
+ weaknesses=[],
+ url="https://example.com",
+ severities=[
+ VulnerabilitySeverity(
+ system="cvssv4",
+ value=7.5,
+ scoring_elements="AV:N/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:H",
+ )
+ ],
+ )
+
+ pipeline = VulnrichImporterPipeline()
+ pipeline.clone()
+ advisories = list(pipeline.collect_advisories())
+
+ # Ensure that advisories are parsed correctly
+ assert len(advisories) == 1
+ advisory = advisories[0]
+ assert advisory.advisory_id == "CVE-2021-1234"
+ assert advisory.summary == "Sample PyPI vulnerability"
+ assert advisory.url == "https://example.com"
+
+
+def test_clean_downloads(mock_vcs_response, mock_fetch_via_vcs):
+ # Test the `clean_downloads` method to ensure the repository is deleted
+ pipeline = VulnrichImporterPipeline()
+ pipeline.clone()
+ pipeline.vcs_response = mock_vcs_response
+
+ pipeline.clean_downloads()
+
+ mock_vcs_response.delete.assert_called_once()
+
+
+def test_on_failure(mock_vcs_response, mock_fetch_via_vcs):
+ pipeline = VulnrichImporterPipeline()
+ pipeline.clone()
+ pipeline.vcs_response = mock_vcs_response
+
+ with patch.object(pipeline, "clean_downloads") as mock_clean:
+ pipeline.on_failure()
+
+ mock_clean.assert_called_once()
+
+
+def test_parse_cve_advisory(mock_pathlib, mock_vcs_response, mock_fetch_via_vcs):
+ from vulnerabilities.pipelines.v2_importers.vulnrichment_importer import (
+ VulnrichImporterPipeline,
+ )
+
+ mock_vcs_response.dest_dir = str(mock_pathlib.parent)
+
+ raw_data = {
+ "cveMetadata": {"cveId": "CVE-2021-1234", "state": "PUBLIC", "datePublished": "2021-01-01"},
+ "containers": {
+ "cna": {
+ "descriptions": [{"lang": "en", "value": "Sample PyPI vulnerability"}],
+ "metrics": [
+ {
+ "cvssV4_0": {
+ "baseScore": 7.5,
+ "vectorString": "AV:N/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:H",
+ }
+ }
+ ],
+ "affected": [{"cpes": ["cpe:/a:example:package"]}],
+ "references": [{"url": "https://example.com", "tags": ["exploit"]}],
+ }
+ },
+ }
+ advisory_url = "https://github.com/cisagov/vulnrichment/blob/develop/CVE-2021-1234.json"
+
+ pipeline = VulnrichImporterPipeline()
+ pipeline.clone()
+ advisory = pipeline.parse_cve_advisory(raw_data, advisory_url)
+
+ assert advisory.advisory_id == "CVE-2021-1234"
+ assert advisory.summary == "Sample PyPI vulnerability"
+ assert advisory.url == advisory_url
+ assert len(advisory.severities) == 1
+ assert advisory.severities[0].value == 7.5
+
+
+def test_collect_advisories_with_invalid_json(mock_pathlib, mock_vcs_response, mock_fetch_via_vcs):
+ invalid_file = mock_pathlib / "invalid_file.json"
+ invalid_file.write_text("invalid_json")
+
+ mock_vcs_response.dest_dir = str(mock_pathlib.parent)
+
+ with patch(
+ "vulnerabilities.pipelines.v2_importers.vulnrichment_importer.VulnrichImporterPipeline.parse_cve_advisory"
+ ) as mock_parse:
+ mock_parse.side_effect = json.JSONDecodeError("Invalid JSON", "", 0)
+
+ pipeline = VulnrichImporterPipeline()
+ pipeline.clone()
+ with pytest.raises(json.JSONDecodeError):
+ list(pipeline.collect_advisories())
diff --git a/vulnerabilities/tests/pipes/test_advisory.py b/vulnerabilities/tests/pipes/test_advisory.py
index ee29a4b8d..72c477455 100644
--- a/vulnerabilities/tests/pipes/test_advisory.py
+++ b/vulnerabilities/tests/pipes/test_advisory.py
@@ -9,6 +9,7 @@
from datetime import datetime
+import pytest
from django.core.exceptions import ValidationError
from django.test import TestCase
from django.utils import timezone
@@ -19,6 +20,14 @@
from vulnerabilities.importer import AdvisoryData
from vulnerabilities.importer import AffectedPackage
from vulnerabilities.importer import Reference
+from vulnerabilities.models import AdvisoryAlias
+from vulnerabilities.models import AdvisoryReference
+from vulnerabilities.models import AdvisorySeverity
+from vulnerabilities.models import AdvisoryWeakness
+from vulnerabilities.pipes.advisory import get_or_create_advisory_aliases
+from vulnerabilities.pipes.advisory import get_or_create_advisory_references
+from vulnerabilities.pipes.advisory import get_or_create_advisory_severities
+from vulnerabilities.pipes.advisory import get_or_create_advisory_weaknesses
from vulnerabilities.pipes.advisory import get_or_create_aliases
from vulnerabilities.pipes.advisory import import_advisory
from vulnerabilities.utils import compute_content_id
@@ -134,3 +143,85 @@ def test_advisory_insert_no_duplicate_content_id(self):
date_collected=date,
created_by="test_pipeline",
)
+
+
+@pytest.fixture
+def advisory_aliases():
+ return ["CVE-2021-12345", "GHSA-xyz"]
+
+
+@pytest.fixture
+def advisory_references():
+ return [
+ Reference(reference_id="REF-1", url="https://example.com/advisory/1"),
+ Reference(reference_id="REF-2", url="https://example.com/advisory/2"),
+ Reference(reference_id="", url="https://example.com/advisory/3"),
+ Reference(url="https://example.com/advisory/4"),
+ ]
+
+
+@pytest.fixture
+def advisory_severities():
+ class Severity:
+ def __init__(self, system, value, scoring_elements, published_at=None, url=None):
+ self.system = system
+ self.value = value
+ self.scoring_elements = scoring_elements
+ self.published_at = published_at
+ self.url = url
+
+ class System:
+ def __init__(self, identifier):
+ self.identifier = identifier
+
+ return [
+ Severity(
+ System("CVSSv3"),
+ "7.5",
+ "AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H",
+ timezone.now(),
+ "https://cvss.example.com",
+ ),
+ ]
+
+
+@pytest.fixture
+def advisory_weaknesses():
+ return [79, 89]
+
+
+@pytest.mark.django_db
+def test_get_or_create_advisory_aliases(advisory_aliases):
+ aliases = get_or_create_advisory_aliases(advisory_aliases)
+ assert len(aliases) == len(advisory_aliases)
+ for alias_obj in aliases:
+ assert isinstance(alias_obj, AdvisoryAlias)
+ assert alias_obj.alias in advisory_aliases
+
+
+@pytest.mark.django_db
+def test_get_or_create_advisory_references(advisory_references):
+ refs = get_or_create_advisory_references(advisory_references)
+ assert len(refs) == len(advisory_references)
+ for ref in refs:
+ assert isinstance(ref, AdvisoryReference)
+ assert ref.url in [r.url for r in advisory_references]
+
+
+@pytest.mark.django_db
+def test_get_or_create_advisory_severities(advisory_severities):
+ sevs = get_or_create_advisory_severities(advisory_severities)
+ assert len(sevs) == len(advisory_severities)
+ for sev in sevs:
+ assert isinstance(sev, AdvisorySeverity)
+ assert sev.scoring_system == advisory_severities[0].system.identifier
+ assert sev.value == advisory_severities[0].value
+
+
+@pytest.mark.django_db
+def test_get_or_create_advisory_weaknesses(advisory_weaknesses):
+ weaknesses = get_or_create_advisory_weaknesses(advisory_weaknesses)
+ assert len(weaknesses) == len(advisory_weaknesses)
+ for w in weaknesses:
+ assert isinstance(w, AdvisoryWeakness)
+ assert w.cwe_id in advisory_weaknesses
diff --git a/vulnerabilities/utils.py b/vulnerabilities/utils.py
index 52104b556..3aec1f56c 100644
--- a/vulnerabilities/utils.py
+++ b/vulnerabilities/utils.py
@@ -39,7 +39,7 @@
from univers.version_range import NginxVersionRange
from univers.version_range import VersionRange
-from aboutcode.hashid import build_vcid # NOQA
+from aboutcode.hashid import build_vcid
logger = logging.getLogger(__name__)
@@ -249,6 +249,11 @@ def fetch_github_graphql_query(graphql_query: dict):
response = _get_gh_response(gh_token=gh_token, graphql_query=graphql_query)
+ if not response:
+ msg = "No response received from GitHub API."
+ logger.error(msg)
+ raise GraphQLError(msg)
+
message = response.get("message")
if message and message == "Bad credentials":
raise GitHubTokenError(f"Invalid GitHub token: {message}")
@@ -266,7 +271,10 @@ def _get_gh_response(gh_token, graphql_query):
"""
endpoint = "https://api.github.com/graphql"
headers = {"Authorization": f"bearer {gh_token}"}
- return requests.post(endpoint, headers=headers, json=graphql_query).json()
+ try:
+ return requests.post(endpoint, headers=headers, json=graphql_query).json()
+ except Exception as e:
+ logger.error(f"Failed to fetch data from GitHub GraphQL API: {e}")
def dedupe(original: List) -> List:
@@ -287,9 +295,10 @@ def get_affected_packages_by_patched_package(
"""
affected_packages_by_patched_package = defaultdict(list)
for package in affected_packages:
- affected_packages_by_patched_package[package.patched_package].append(
- package.vulnerable_package
- )
+ if package.vulnerable_package:
+ affected_packages_by_patched_package[package.patched_package].append(
+ package.vulnerable_package
+ )
return affected_packages_by_patched_package
@@ -595,6 +604,7 @@ def compute_content_id(advisory_data):
# Normalize fields
from vulnerabilities.importer import AdvisoryData
+ from vulnerabilities.importer import AdvisoryDataV2
from vulnerabilities.models import Advisory
if isinstance(advisory_data, Advisory):
@@ -610,20 +620,62 @@ def compute_content_id(advisory_data):
normalized_data["url"] = advisory_data.url
elif isinstance(advisory_data, AdvisoryData):
- normalized_data = {
- "aliases": normalize_list(advisory_data.aliases),
- "summary": normalize_text(advisory_data.summary),
- "affected_packages": [
- pkg.to_dict() for pkg in normalize_list(advisory_data.affected_packages) if pkg
- ],
- "references": [
- ref.to_dict() for ref in normalize_list(advisory_data.references) if ref
- ],
- "weaknesses": normalize_list(advisory_data.weaknesses),
- }
+ if advisory_data.references_v2:
+ normalized_data = {
+ "aliases": normalize_list(advisory_data.aliases),
+ "summary": normalize_text(advisory_data.summary),
+ "affected_packages": [
+ pkg.to_dict() for pkg in normalize_list(advisory_data.affected_packages) if pkg
+ ],
+ "references": [
+ ref.to_dict() for ref in normalize_list(advisory_data.references_v2) if ref
+ ],
+ "severities": [
+ sev.to_dict() for sev in normalize_list(advisory_data.severities) if sev
+ ],
+ "weaknesses": normalize_list(advisory_data.weaknesses),
+ }
+ elif advisory_data.references or advisory_data.references == []:
+ normalized_data = {
+ "aliases": normalize_list(advisory_data.aliases),
+ "summary": normalize_text(advisory_data.summary),
+ "affected_packages": [
+ pkg.to_dict() for pkg in normalize_list(advisory_data.affected_packages) if pkg
+ ],
+ "references": [
+ ref.to_dict() for ref in normalize_list(advisory_data.references) if ref
+ ],
+ "weaknesses": normalize_list(advisory_data.weaknesses),
+ }
+
normalized_data["url"] = advisory_data.url
normalized_json = json.dumps(normalized_data, separators=(",", ":"), sort_keys=True)
content_id = hashlib.sha256(normalized_json.encode("utf-8")).hexdigest()
return content_id
+
+
+def create_registry(pipelines):
+ """
+ Return a mapping of {pipeline ID: pipeline class} for a list of pipelines.
+ """
+ from vulnerabilities.pipelines import VulnerableCodePipeline
+
+ registry = {}
+ for pipeline in pipelines:
+ if issubclass(pipeline, VulnerableCodePipeline):
+ key = pipeline.pipeline_id
+ else:
+ # For everything legacy use qualified_name
+ key = pipeline.qualified_name
+
+ if not key:
+ raise Exception(f"Pipeline ID can not be empty: {pipeline!r}")
+
+ if key in registry:
+ raise Exception(f"Duplicate pipeline found: {key}")
+
+ registry[key] = pipeline
+
+ return registry
diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py
index e6fb95a94..71534f9fb 100644
--- a/vulnerabilities/views.py
+++ b/vulnerabilities/views.py
@@ -29,6 +29,7 @@
from vulnerabilities import models
from vulnerabilities.forms import AdminLoginForm
+from vulnerabilities.forms import AdvisorySearchForm
from vulnerabilities.forms import ApiUserCreationForm
from vulnerabilities.forms import PackageSearchForm
from vulnerabilities.forms import PipelineSchedulePackageForm
@@ -71,6 +72,34 @@ def get_queryset(self, query=None):
)
+class PackageSearchV2(ListView):
+ model = models.PackageV2
+ template_name = "packages_v2.html"
+ ordering = ["type", "namespace", "name", "version"]
+ paginate_by = PAGE_SIZE
+
+ def get_context_data(self, **kwargs):
+ context = super().get_context_data(**kwargs)
+ request_query = self.request.GET
+ context["package_search_form"] = PackageSearchForm(request_query)
+ context["search"] = request_query.get("search")
+ return context
+
+ def get_queryset(self, query=None):
+ """
+ Return a Package queryset for the ``query``.
+ Make a best effort approach to find matching packages either based
+ on exact purl, partial purl or just name and namespace.
+ """
+ query = query or self.request.GET.get("search") or ""
+ return (
+ self.model.objects.search(query)
+ .with_vulnerability_counts()
+ .prefetch_related()
+ .order_by("package_url")
+ )
+
+
class VulnerabilitySearch(ListView):
model = models.Vulnerability
template_name = "vulnerabilities.html"
@@ -89,6 +118,24 @@ def get_queryset(self, query=None):
return self.model.objects.search(query=query).with_package_counts()
+class AdvisorySearch(ListView):
+ model = models.AdvisoryV2
+ template_name = "vulnerabilities.html"
+ ordering = ["advisory_id"]
+ paginate_by = PAGE_SIZE
+
+ def get_context_data(self, **kwargs):
+ context = super().get_context_data(**kwargs)
+ request_query = self.request.GET
+ context["advisory_search_form"] = VulnerabilitySearchForm(request_query)
+ context["search"] = request_query.get("search")
+ return context
+
+ def get_queryset(self, query=None):
+ query = query or self.request.GET.get("search") or ""
+ return self.model.objects.search(query=query).with_package_counts()
+
+
class PackageDetails(DetailView):
model = models.Package
template_name = "package_details.html"
@@ -130,6 +177,47 @@ def get_object(self, queryset=None):
return package
+class PackageV2Details(DetailView):
+ model = models.PackageV2
+ template_name = "package_details_v2.html"
+ slug_url_kwarg = "purl"
+ slug_field = "purl"
+
+ def get_context_data(self, **kwargs):
+ context = super().get_context_data(**kwargs)
+ package = self.object
+ context["package"] = package
+ context["affected_by_advisories"] = package.affected_by_advisories.order_by("advisory_id")
+ # Ghost package should not fix any vulnerability.
+ context["fixing_advisories"] = (
+ None if package.is_ghost else package.fixing_advisories.order_by("advisory_id")
+ )
+ context["package_search_form"] = PackageSearchForm(self.request.GET)
+ context["fixed_package_details"] = package.fixed_package_details
+
+ # context["history"] = list(package.history)
+ return context
+
+ def get_object(self, queryset=None):
+ if queryset is None:
+ queryset = self.get_queryset()
+
+ purl = self.kwargs.get(self.slug_url_kwarg)
+ if purl:
+ queryset = queryset.for_purl(purl)
+ else:
+ cls = self.__class__.__name__
+ raise AttributeError(
+ f"Package details view {cls} must be called with a purl, " f"but got: {purl!r}"
+ )
+
+ try:
+ package = queryset.get()
+ except queryset.model.DoesNotExist:
+ raise Http404(f"No Package found for purl: {purl}")
+ return package
+
+
class VulnerabilityDetails(DetailView):
model = models.Vulnerability
template_name = "vulnerability_details.html"
@@ -193,9 +281,11 @@ def get_context_data(self, **kwargs):
for severity in valid_severities:
try:
- vector_values = SCORING_SYSTEMS[severity.scoring_system].get(
- severity.scoring_elements
- )
+ vector_values_system = SCORING_SYSTEMS[severity.scoring_system]
+ if not vector_values_system:
+ logging.error(f"Unknown scoring system: {severity.scoring_system}")
+ continue
+ vector_values = vector_values_system.get(severity.scoring_elements)
if vector_values:
severity_vectors.append({"vector": vector_values, "origin": severity.url})
except (
@@ -232,6 +322,112 @@ def get_context_data(self, **kwargs):
return context
+class AdvisoryDetails(DetailView):
+ model = models.AdvisoryV2
+ template_name = "advisory_detail.html"
+ slug_url_kwarg = "id"
+ slug_field = "id"
+
+ def get_queryset(self):
+ return (
+ super()
+ .get_queryset()
+ .select_related()
+ .prefetch_related(
+ Prefetch(
+ "references",
+ queryset=models.AdvisoryReference.objects.only(
+ "reference_id", "reference_type", "url"
+ ),
+ ),
+ Prefetch(
+ "aliases",
+ queryset=models.AdvisoryAlias.objects.only("alias"),
+ ),
+ Prefetch(
+ "weaknesses",
+ queryset=models.AdvisoryWeakness.objects.only("cwe_id"),
+ ),
+ Prefetch(
+ "severities",
+ queryset=models.AdvisorySeverity.objects.only(
+ "scoring_system", "value", "url", "scoring_elements", "published_at"
+ ),
+ ),
+ Prefetch(
+ "exploits",
+ queryset=models.AdvisoryExploit.objects.only(
+ "data_source", "description", "required_action", "due_date", "notes"
+ ),
+ ),
+ )
+ )
+
+ def get_context_data(self, **kwargs):
+ """
+ Build context with preloaded QuerySets and minimize redundant queries.
+ """
+ context = super().get_context_data(**kwargs)
+ advisory = self.object
+
+ # Pre-fetch and process data in Python instead of the template
+ weaknesses_present_in_db = [
+ weakness_object
+ for weakness_object in advisory.weaknesses.all()
+ if weakness_object.weakness
+ ]
+
+ valid_severities = self.object.severities.exclude(scoring_system=EPSS.identifier).filter(
+ scoring_elements__isnull=False, scoring_system__in=SCORING_SYSTEMS.keys()
+ )
+
+ severity_vectors = []
+
+ for severity in valid_severities:
+ try:
+ vector_values_system = SCORING_SYSTEMS.get(severity.scoring_system)
+ if not vector_values_system:
+ logging.error(f"Unknown scoring system: {severity.scoring_system}")
+ continue
+ if vector_values_system.identifier in ["cvssv3.1_qr"]:
+ continue
+ vector_values = vector_values_system.get(severity.scoring_elements)
+ if vector_values:
+ severity_vectors.append({"vector": vector_values, "origin": severity.url})
+ logging.error(f"Error processing scoring elements: {severity.scoring_elements}")
+ except (
+ CVSS2MalformedError,
+ CVSS3MalformedError,
+ CVSS4MalformedError,
+ NotImplementedError,
+ ):
+ logging.error(f"CVSSMalformedError for {severity.scoring_elements}")
+
+ epss_severity = advisory.severities.filter(scoring_system="epss").first()
+ epss_data = None
+ if epss_severity:
+ epss_data = {
+ "percentile": epss_severity.scoring_elements,
+ "score": epss_severity.value,
+ "published_at": epss_severity.published_at,
+ }
+ print(severity_vectors)
+ context.update(
+ {
+ "advisory": advisory,
+ "severities": list(advisory.severities.all()),
+ "severity_vectors": severity_vectors,
+ "references": list(advisory.references.all()),
+ "aliases": list(advisory.aliases.all()),
+ "weaknesses": weaknesses_present_in_db,
+ "status": advisory.get_status_label,
+ # "history": advisory.history,
+ "epss_data": epss_data,
+ }
+ )
+ return context
+
+
class HomePage(View):
template_name = "index.html"
@@ -245,6 +441,19 @@ def get(self, request):
return render(request=request, template_name=self.template_name, context=context)
+class HomePageV2(View):
+ template_name = "index_v2.html"
+
+ def get(self, request):
+ request_query = request.GET
+ context = {
+ "vulnerability_search_form": AdvisorySearchForm(request_query),
+ "package_search_form": PackageSearchForm(request_query),
+ "release_url": f"https://github.com/aboutcode-org/vulnerablecode/releases/tag/v{VULNERABLECODE_VERSION}",
+ }
+ return render(request=request, template_name=self.template_name, context=context)
+
+
email_template = """
Dear VulnerableCode.io user:
@@ -353,6 +562,58 @@ def get_context_data(self, **kwargs):
return context
+class AdvisoryPackagesDetails(DetailView):
+ """
+ View to display all packages affected by or fixing a specific vulnerability.
+ URL: /advisories/{id}/packages
+ """
+
+ model = models.AdvisoryV2
+ template_name = "advisory_package_details.html"
+ slug_url_kwarg = "id"
+ slug_field = "id"
+
+ def get_queryset(self):
+ """
+ Prefetch and optimize related data to minimize database hits.
+ """
+ return (
+ super()
+ .get_queryset()
+ .prefetch_related(
+ Prefetch(
+ "affecting_packages",
+ queryset=models.PackageV2.objects.only("type", "namespace", "name", "version"),
+ ),
+ Prefetch(
+ "fixed_by_packages",
+ queryset=models.PackageV2.objects.only("type", "namespace", "name", "version"),
+ ),
+ )
+ )
+
+ def get_context_data(self, **kwargs):
+ """
+ Build context with preloaded QuerySets and minimize redundant queries.
+ """
+ context = super().get_context_data(**kwargs)
+ advisory = self.object
+ (
+ sorted_fixed_by_packages,
+ sorted_affected_packages,
+ all_affected_fixed_by_matches,
+ ) = advisory.aggregate_fixed_and_affected_packages()
+ context.update(
+ {
+ "affected_packages": sorted_affected_packages,
+ "fixed_by_packages": sorted_fixed_by_packages,
+ "all_affected_fixed_by_matches": all_affected_fixed_by_matches,
+ "advisory": advisory,
+ }
+ )
+ return context
+
+
class PipelineScheduleListView(ListView, FormMixin):
model = PipelineSchedule
context_object_name = "schedule_list"
diff --git a/vulnerablecode/urls.py b/vulnerablecode/urls.py
index 45a03a28d..245b8e917 100644
--- a/vulnerablecode/urls.py
+++ b/vulnerablecode/urls.py
@@ -20,15 +20,21 @@
from vulnerabilities.api import CPEViewSet
from vulnerabilities.api import PackageViewSet
from vulnerabilities.api import VulnerabilityViewSet
+from vulnerabilities.api_v2 import AdvisoriesPackageV2ViewSet
from vulnerabilities.api_v2 import CodeFixViewSet
from vulnerabilities.api_v2 import PackageV2ViewSet
from vulnerabilities.api_v2 import PipelineScheduleV2ViewSet
from vulnerabilities.api_v2 import VulnerabilityV2ViewSet
from vulnerabilities.views import AdminLoginView
+from vulnerabilities.views import AdvisoryDetails
+from vulnerabilities.views import AdvisoryPackagesDetails
from vulnerabilities.views import ApiUserCreateView
from vulnerabilities.views import HomePage
+from vulnerabilities.views import HomePageV2
from vulnerabilities.views import PackageDetails
from vulnerabilities.views import PackageSearch
+from vulnerabilities.views import PackageSearchV2
+from vulnerabilities.views import PackageV2Details
from vulnerabilities.views import PipelineRunDetailView
from vulnerabilities.views import PipelineRunListView
from vulnerabilities.views import PipelineScheduleListView
@@ -55,6 +61,9 @@ def __init__(self, *args, **kwargs):
api_v2_router = OptionalSlashRouter()
api_v2_router.register("packages", PackageV2ViewSet, basename="package-v2")
+api_v2_router.register(
+ "advisories-packages", AdvisoriesPackageV2ViewSet, basename="advisories-package-v2"
+)
api_v2_router.register("vulnerabilities", VulnerabilityV2ViewSet, basename="vulnerability-v2")
api_v2_router.register("codefixes", CodeFixViewSet, basename="codefix")
api_v2_router.register("schedule", PipelineScheduleV2ViewSet, basename="schedule")
@@ -87,16 +96,36 @@ def __init__(self, *args, **kwargs):
PipelineRunDetailView.as_view(),
name="run-details",
),
+ path(
+ "v2",
+ HomePageV2.as_view(),
+ name="home",
+ ),
+ path(
+ "advisories/",
+ AdvisoryDetails.as_view(),
+ name="advisory_details",
+ ),
path(
"packages/search/",
PackageSearch.as_view(),
name="package_search",
),
+ path(
+ "packages/v2/search/",
+ PackageSearchV2.as_view(),
+ name="package_search_v2",
+ ),
re_path(
r"^packages/(?Ppkg:.+)$",
PackageDetails.as_view(),
name="package_details",
),
+ re_path(
+ r"^packages/v2/(?Ppkg:.+)$",
+ PackageV2Details.as_view(),
+ name="package_details_v2",
+ ),
path(
"vulnerabilities/search/",
VulnerabilitySearch.as_view(),
@@ -112,6 +141,11 @@ def __init__(self, *args, **kwargs):
VulnerabilityPackagesDetails.as_view(),
name="vulnerability_package_details",
),
+ path(
+ "advisories//packages",
+ AdvisoryPackagesDetails.as_view(),
+ name="advisory_package_details",
+ ),
path(
"api/",
include(api_router.urls),