diff --git a/vulnerabilities/api_v2.py b/vulnerabilities/api_v2.py index 4c2562216..4915dda63 100644 --- a/vulnerabilities/api_v2.py +++ b/vulnerabilities/api_v2.py @@ -24,8 +24,14 @@ from rest_framework.response import Response from rest_framework.reverse import reverse +from vulnerabilities.models import AdvisoryReference +from vulnerabilities.models import AdvisorySeverity +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.models import AdvisoryWeakness from vulnerabilities.models import CodeFix +from vulnerabilities.models import CodeFixV2 from vulnerabilities.models import Package +from vulnerabilities.models import PackageV2 from vulnerabilities.models import PipelineRun from vulnerabilities.models import PipelineSchedule from vulnerabilities.models import Vulnerability @@ -44,6 +50,16 @@ class Meta: fields = ["cwe_id", "name", "description"] +class AdvisoryWeaknessSerializer(serializers.ModelSerializer): + cwe_id = serializers.CharField() + name = serializers.CharField() + description = serializers.CharField() + + class Meta: + model = AdvisoryWeakness + fields = ["cwe_id", "name", "description"] + + class VulnerabilityReferenceV2Serializer(serializers.ModelSerializer): url = serializers.CharField() reference_type = serializers.CharField() @@ -54,6 +70,29 @@ class Meta: fields = ["url", "reference_type", "reference_id"] +class AdvisoryReferenceSerializer(serializers.ModelSerializer): + url = serializers.CharField() + reference_type = serializers.CharField() + reference_id = serializers.CharField() + + class Meta: + model = AdvisoryReference + fields = ["url", "reference_type", "reference_id"] + + +class AdvisorySeveritySerializer(serializers.ModelSerializer): + class Meta: + model = AdvisorySeverity + fields = ["url", "value", "scoring_system", "scoring_elements", "published_at"] + + def to_representation(self, instance): + data = super().to_representation(instance) + published_at = data.get("published_at", None) + if not published_at: + data.pop("published_at") + return data + + class VulnerabilitySeverityV2Serializer(serializers.ModelSerializer): class Meta: model = VulnerabilitySeverity @@ -94,6 +133,32 @@ def get_aliases(self, obj): return [alias.alias for alias in obj.aliases.all()] +class AdvisoryV2Serializer(serializers.ModelSerializer): + aliases = serializers.SerializerMethodField() + weaknesses = AdvisoryWeaknessSerializer(many=True) + references = AdvisoryReferenceSerializer(many=True) + severities = AdvisorySeveritySerializer(many=True) + advisory_id = serializers.CharField(source="avid", read_only=True) + + class Meta: + model = AdvisoryV2 + fields = [ + "advisory_id", + "url", + "aliases", + "summary", + "severities", + "weaknesses", + "references", + "exploitability", + "weighted_severity", + "risk_score", + ] + + def get_aliases(self, obj): + return [alias.alias for alias in obj.aliases.all()] + + class VulnerabilityListSerializer(serializers.ModelSerializer): url = serializers.SerializerMethodField() @@ -233,6 +298,57 @@ def get_fixing_vulnerabilities(self, obj): return [vuln.vulnerability_id for vuln in obj.fixing_vulnerabilities.all()] +class AdvisoryPackageV2Serializer(serializers.ModelSerializer): + purl = serializers.CharField(source="package_url") + risk_score = serializers.FloatField(read_only=True) + affected_by_vulnerabilities = serializers.SerializerMethodField() + fixing_vulnerabilities = serializers.SerializerMethodField() + next_non_vulnerable_version = serializers.CharField(read_only=True) + latest_non_vulnerable_version = serializers.CharField(read_only=True) + + class Meta: + model = Package + fields = [ + "purl", + "affected_by_vulnerabilities", + "fixing_vulnerabilities", + "next_non_vulnerable_version", + "latest_non_vulnerable_version", + "risk_score", + ] + + def get_affected_by_vulnerabilities(self, obj): + """ + Return a dictionary with vulnerabilities as keys and their details, including fixed_by_packages. + """ + result = {} + request = self.context.get("request") + for adv in getattr(obj, "prefetched_affected_advisories", []): + fixed_by_package = adv.fixed_by_packages.first() + purl = None + if fixed_by_package: + purl = fixed_by_package.package_url + # Get code fixed for a vulnerability + code_fixes = CodeFixV2.objects.filter(advisory=adv).distinct() + code_fix_urls = [ + reverse("codefix-detail", args=[code_fix.id], request=request) + for code_fix in code_fixes + ] + + result[adv.avid] = { + "advisory_id": adv.avid, + "fixed_by_packages": purl, + "code_fixes": code_fix_urls, + } + return result + + def get_fixing_vulnerabilities(self, obj): + # Ghost package should not fix any vulnerability. + if obj.is_ghost: + return [] + return [adv.advisory_id for adv in obj.fixing_advisories.all()] + + class PackageurlListSerializer(serializers.Serializer): purls = serializers.ListField( child=serializers.CharField(), @@ -261,6 +377,12 @@ class PackageV2FilterSet(filters.FilterSet): purl = filters.CharFilter(field_name="package_url") +class AdvisoryPackageV2FilterSet(filters.FilterSet): + affected_by_vulnerability = filters.CharFilter(field_name="affected_by_advisory__advisory_id") + fixing_vulnerability = filters.CharFilter(field_name="fixing_advisories__advisory_id") + purl = filters.CharFilter(field_name="package_url") + + class PackageV2ViewSet(viewsets.ReadOnlyModelViewSet): queryset = Package.objects.all().prefetch_related( Prefetch( @@ -754,3 +876,263 @@ def get_permissions(self): if self.action not in ["list", "retrieve"]: return [IsAdminWithSessionAuth()] return super().get_permissions() + + +class AdvisoriesPackageV2ViewSet(viewsets.ReadOnlyModelViewSet): + queryset = PackageV2.objects.all().prefetch_related( + Prefetch( + "affected_by_advisories", + queryset=AdvisoryV2.objects.prefetch_related("fixed_by_packages"), + to_attr="prefetched_affected_advisories", + ) + ) + serializer_class = AdvisoryPackageV2Serializer + filter_backends = (filters.DjangoFilterBackend,) + filterset_class = AdvisoryPackageV2FilterSet + + def get_queryset(self): + queryset = super().get_queryset() + package_purls = self.request.query_params.getlist("purl") + affected_by_advisory = self.request.query_params.get("affected_by_advisory") + fixing_advisory = self.request.query_params.get("fixing_advisory") + if package_purls: + queryset = queryset.filter(package_url__in=package_purls) + if affected_by_advisory: + queryset = queryset.filter(affected_by_advisories__advisory_id=affected_by_advisory) + if fixing_advisory: + queryset = queryset.filter(fixing_advisories__advisory=fixing_advisory) + return queryset.with_is_vulnerable() + + def list(self, request, *args, **kwargs): + queryset = self.get_queryset() + # Apply pagination + page = self.paginate_queryset(queryset) + if page is not None: + # Collect only vulnerabilities for packages in the current page + advisories = set() + for package in page: + advisories.update(package.affected_by_advisories.all()) + advisories.update(package.fixing_advisories.all()) + + # Serialize the vulnerabilities with advisory_id and advisory label as keys + advisory_data = {f"{adv.avid}": AdvisoryV2Serializer(adv).data for adv in advisories} + + # Serialize the current page of packages + serializer = self.get_serializer(page, many=True) + data = serializer.data + print(data) + # Use 'self.get_paginated_response' to include pagination data + return self.get_paginated_response({"advisories": advisory_data, "packages": data}) + + # If pagination is not applied, collect vulnerabilities for all packages + advisories = set() + for package in queryset: + advisories.update(package.affected_by_vulnerabilities.all()) + advisories.update(package.fixing_vulnerabilities.all()) + + advisory_data = {f"{adv.avid}": AdvisoryV2Serializer(adv).data for adv in advisories} + + serializer = self.get_serializer(queryset, many=True) + data = serializer.data + return Response({"advisories": advisory_data, "packages": data}) + + @extend_schema( + request=PackageurlListSerializer, + responses={200: PackageV2Serializer(many=True)}, + ) + @action( + detail=False, + methods=["post"], + serializer_class=PackageurlListSerializer, + filter_backends=[], + pagination_class=None, + ) + def bulk_lookup(self, request): + """ + Return the response for exact PackageURLs requested for. + """ + serializer = self.serializer_class(data=request.data) + if not serializer.is_valid(): + return Response( + status=status.HTTP_400_BAD_REQUEST, + data={ + "error": serializer.errors, + "message": "A non-empty 'purls' list of PURLs is required.", + }, + ) + validated_data = serializer.validated_data + purls = validated_data.get("purls") + + # Fetch packages matching the provided purls + packages = PackageV2.objects.for_purls(purls).with_is_vulnerable() + + # Collect vulnerabilities associated with these packages + advisories = set() + for package in packages: + advisories.update(package.affected_by_advisories.all()) + advisories.update(package.fixing_advisories.all()) + + # Serialize vulnerabilities with vulnerability_id as keys + advisory_data = {adv.avid: AdvisoryV2Serializer(adv).data for adv in advisories} + + # Serialize packages + package_data = AdvisoryPackageV2Serializer( + packages, + many=True, + context={"request": request}, + ).data + + return Response( + { + "advisories": advisory_data, + "packages": package_data, + } + ) + + @extend_schema( + request=PackageBulkSearchRequestSerializer, + responses={200: PackageV2Serializer(many=True)}, + ) + @action( + detail=False, + methods=["post"], + serializer_class=PackageBulkSearchRequestSerializer, + filter_backends=[], + pagination_class=None, + ) + def bulk_search(self, request): + """ + Lookup for vulnerable packages using many Package URLs at once. + """ + serializer = self.serializer_class(data=request.data) + if not serializer.is_valid(): + return Response( + status=status.HTTP_400_BAD_REQUEST, + data={ + "error": serializer.errors, + "message": "A non-empty 'purls' list of PURLs is required.", + }, + ) + validated_data = serializer.validated_data + purls = validated_data.get("purls") + purl_only = validated_data.get("purl_only", False) + plain_purl = validated_data.get("plain_purl", False) + + if plain_purl: + purl_objects = [PackageURL.from_string(purl) for purl in purls] + plain_purl_objects = [ + PackageURL( + type=purl.type, + namespace=purl.namespace, + name=purl.name, + version=purl.version, + ) + for purl in purl_objects + ] + plain_purls = [str(purl) for purl in plain_purl_objects] + + query = ( + PackageV2.objects.filter(plain_package_url__in=plain_purls) + .order_by("plain_package_url") + .distinct("plain_package_url") + .with_is_vulnerable() + ) + + packages = query + + # Collect vulnerabilities associated with these packages + advisories = set() + for package in packages: + advisories.update(package.affected_by_vulnerabilities.all()) + advisories.update(package.fixing_vulnerabilities.all()) + + advisory_data = {adv.avid: VulnerabilityV2Serializer(adv).data for adv in advisories} + + if not purl_only: + package_data = AdvisoryPackageV2Serializer( + packages, many=True, context={"request": request} + ).data + return Response( + { + "advisories": advisory_data, + "packages": package_data, + } + ) + + # Using order by and distinct because there will be + # many fully qualified purl for a single plain purl + vulnerable_purls = query.vulnerable().only("plain_package_url") + vulnerable_purls = [str(package.plain_package_url) for package in vulnerable_purls] + return Response(data=vulnerable_purls) + + query = PackageV2.objects.filter(package_url__in=purls).distinct().with_is_vulnerable() + packages = query + + # Collect vulnerabilities associated with these packages + advisories = set() + for package in packages: + advisories.update(package.affected_by_vulnerabilities.all()) + advisories.update(package.fixing_vulnerabilities.all()) + + advisory_data = {adv.advisory_id: AdvisoryV2Serializer(adv).data for adv in advisories} + + if not purl_only: + package_data = AdvisoryPackageV2Serializer( + packages, many=True, context={"request": request} + ).data + return Response( + { + "advisories": advisory_data, + "packages": package_data, + } + ) + + vulnerable_purls = query.vulnerable().only("package_url") + vulnerable_purls = [str(package.package_url) for package in vulnerable_purls] + return Response(data=vulnerable_purls) + + @action(detail=False, methods=["get"]) + def all(self, request): + """ + Return a list of Package URLs of vulnerable packages. + """ + vulnerable_purls = ( + PackageV2.objects.vulnerable() + .only("package_url") + .order_by("package_url") + .distinct() + .values_list("package_url", flat=True) + ) + return Response(vulnerable_purls) + + @extend_schema( + request=LookupRequestSerializer, + responses={200: PackageV2Serializer(many=True)}, + ) + @action( + detail=False, + methods=["post"], + serializer_class=LookupRequestSerializer, + filter_backends=[], + pagination_class=None, + ) + def lookup(self, request): + """ + Return the response for exact PackageURL requested for. + """ + serializer = self.serializer_class(data=request.data) + if not serializer.is_valid(): + return Response( + status=status.HTTP_400_BAD_REQUEST, + data={ + "error": serializer.errors, + "message": "A 'purl' is required.", + }, + ) + validated_data = serializer.validated_data + purl = validated_data.get("purl") + + qs = self.get_queryset().for_purls([purl]).with_is_vulnerable() + return Response( + AdvisoryPackageV2Serializer(qs, many=True, context={"request": request}).data + ) diff --git a/vulnerabilities/forms.py b/vulnerabilities/forms.py index 74a10340c..7d955ac37 100644 --- a/vulnerabilities/forms.py +++ b/vulnerabilities/forms.py @@ -36,6 +36,14 @@ class VulnerabilitySearchForm(forms.Form): ) +class AdvisorySearchForm(forms.Form): + + search = forms.CharField( + required=True, + widget=forms.TextInput(attrs={"placeholder": "Advisory id or alias such as CVE or GHSA"}), + ) + + class ApiUserCreationForm(forms.ModelForm): """ Support a simplified creation for API-only users directly from the UI. diff --git a/vulnerabilities/importer.py b/vulnerabilities/importer.py index 759ec9330..9cef5e0fa 100644 --- a/vulnerabilities/importer.py +++ b/vulnerabilities/importer.py @@ -55,6 +55,7 @@ class VulnerabilitySeverity: value: str scoring_elements: str = "" published_at: Optional[datetime.datetime] = None + url: Optional[str] = None def to_dict(self): data = { @@ -145,6 +146,54 @@ def from_url(cls, url): return cls(url=url) +@dataclasses.dataclass(eq=True) +@functools.total_ordering +class ReferenceV2: + reference_id: str = "" + reference_type: str = "" + url: str = "" + + def __post_init__(self): + if not self.url: + raise TypeError("Reference must have a url") + if self.reference_id and not isinstance(self.reference_id, str): + self.reference_id = str(self.reference_id) + + def __lt__(self, other): + if not isinstance(other, Reference): + return NotImplemented + return self._cmp_key() < other._cmp_key() + + # TODO: Add cache + def _cmp_key(self): + return (self.reference_id, self.reference_type, self.url) + + def to_dict(self): + """Return a normalized dictionary representation""" + return { + "reference_id": self.reference_id, + "reference_type": self.reference_type, + "url": self.url, + } + + @classmethod + def from_dict(cls, ref: dict): + return cls( + reference_id=str(ref["reference_id"]), + reference_type=ref.get("reference_type") or "", + url=ref["url"], + ) + + @classmethod + def from_url(cls, url): + reference_id = get_reference_id(url) + if "GHSA-" in reference_id.upper(): + return cls(reference_id=reference_id, url=url) + if is_cve(reference_id): + return cls(url=url, reference_id=reference_id.upper()) + return cls(url=url) + + class UnMergeablePackageError(Exception): """ Raised when a package cannot be merged with another one. @@ -302,10 +351,81 @@ class AdvisoryData: date_published must be aware datetime """ + advisory_id: str = "" aliases: List[str] = dataclasses.field(default_factory=list) summary: Optional[str] = "" affected_packages: List[AffectedPackage] = dataclasses.field(default_factory=list) references: List[Reference] = dataclasses.field(default_factory=list) + references_v2: List[ReferenceV2] = dataclasses.field(default_factory=list) + date_published: Optional[datetime.datetime] = None + weaknesses: List[int] = dataclasses.field(default_factory=list) + severities: List[VulnerabilitySeverity] = dataclasses.field(default_factory=list) + url: Optional[str] = None + + def __post_init__(self): + if self.date_published and not self.date_published.tzinfo: + logger.warning(f"AdvisoryData with no tzinfo: {self!r}") + if self.summary: + self.summary = self.clean_summary(self.summary) + + def clean_summary(self, summary): + # https://nvd.nist.gov/vuln/detail/CVE-2013-4314 + # https://github.com/cms-dev/cms/issues/888#issuecomment-516977572 + summary = summary.strip() + if summary: + summary = summary.replace("\x00", "\uFFFD") + return summary + + def to_dict(self): + return { + "aliases": self.aliases, + "summary": self.summary, + "affected_packages": [pkg.to_dict() for pkg in self.affected_packages], + "references": [ref.to_dict() for ref in self.references], + "date_published": self.date_published.isoformat() if self.date_published else None, + "weaknesses": self.weaknesses, + "url": self.url if self.url else "", + } + + @classmethod + def from_dict(cls, advisory_data): + date_published = advisory_data["date_published"] + transformed = { + "aliases": advisory_data["aliases"], + "summary": advisory_data["summary"], + "affected_packages": [ + AffectedPackage.from_dict(pkg) + for pkg in advisory_data["affected_packages"] + if pkg is not None + ], + "references": [Reference.from_dict(ref) for ref in advisory_data["references"]], + "date_published": datetime.datetime.fromisoformat(date_published) + if date_published + else None, + "weaknesses": advisory_data["weaknesses"], + "url": advisory_data.get("url") or None, + } + return cls(**transformed) + + +@dataclasses.dataclass(order=True) +class AdvisoryDataV2: + """ + This data class expresses the contract between data sources and the import runner. + + If a vulnerability_id is present then: + summary or affected_packages or references must be present + otherwise + either affected_package or references should be present + + date_published must be aware datetime + """ + + advisory_id: str = "" + aliases: List[str] = dataclasses.field(default_factory=list) + summary: Optional[str] = "" + affected_packages: List[AffectedPackage] = dataclasses.field(default_factory=list) + references: List[ReferenceV2] = dataclasses.field(default_factory=list) date_published: Optional[datetime.datetime] = None weaknesses: List[int] = dataclasses.field(default_factory=list) url: Optional[str] = None diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index f0d9532ab..3dd914a92 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -33,7 +33,6 @@ from vulnerabilities.importers import ubuntu_usn from vulnerabilities.importers import vulnrichment from vulnerabilities.importers import xen -from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline from vulnerabilities.pipelines import alpine_linux_importer from vulnerabilities.pipelines import github_importer from vulnerabilities.pipelines import gitlab_importer @@ -42,45 +41,59 @@ from vulnerabilities.pipelines import nvd_importer from vulnerabilities.pipelines import pypa_importer from vulnerabilities.pipelines import pysec_importer +from vulnerabilities.pipelines.v2_importers import apache_httpd_importer as apache_httpd_v2 +from vulnerabilities.pipelines.v2_importers import github_importer as github_importer_v2 +from vulnerabilities.pipelines.v2_importers import gitlab_importer as gitlab_importer_v2 +from vulnerabilities.pipelines.v2_importers import npm_importer as npm_importer_v2 +from vulnerabilities.pipelines.v2_importers import nvd_importer as nvd_importer_v2 +from vulnerabilities.pipelines.v2_importers import pypa_importer as pypa_importer_v2 +from vulnerabilities.pipelines.v2_importers import pysec_importer as pysec_importer_v2 +from vulnerabilities.pipelines.v2_importers import vulnrichment_importer as vulnrichment_importer_v2 +from vulnerabilities.utils import create_registry -IMPORTERS_REGISTRY = [ - nvd_importer.NVDImporterPipeline, - github_importer.GitHubAPIImporterPipeline, - gitlab_importer.GitLabImporterPipeline, - github_osv.GithubOSVImporter, - pypa_importer.PyPaImporterPipeline, - npm_importer.NpmImporterPipeline, - nginx_importer.NginxImporterPipeline, - pysec_importer.PyPIImporterPipeline, - apache_tomcat.ApacheTomcatImporter, - postgresql.PostgreSQLImporter, - debian.DebianImporter, - curl.CurlImporter, - epss.EPSSImporter, - vulnrichment.VulnrichImporter, - alpine_linux_importer.AlpineLinuxImporterPipeline, - ruby.RubyImporter, - apache_kafka.ApacheKafkaImporter, - openssl.OpensslImporter, - redhat.RedhatImporter, - archlinux.ArchlinuxImporter, - ubuntu.UbuntuImporter, - debian_oval.DebianOvalImporter, - retiredotnet.RetireDotnetImporter, - apache_httpd.ApacheHTTPDImporter, - mozilla.MozillaImporter, - gentoo.GentooImporter, - istio.IstioImporter, - project_kb_msr2019.ProjectKBMSRImporter, - suse_scores.SUSESeverityScoreImporter, - elixir_security.ElixirSecurityImporter, - xen.XenImporter, - ubuntu_usn.UbuntuUSNImporter, - fireeye.FireyeImporter, - oss_fuzz.OSSFuzzImporter, -] - -IMPORTERS_REGISTRY = { - x.pipeline_id if issubclass(x, VulnerableCodeBaseImporterPipeline) else x.qualified_name: x - for x in IMPORTERS_REGISTRY -} +IMPORTERS_REGISTRY = create_registry( + [ + nvd_importer_v2.NVDImporterPipeline, + github_importer_v2.GitHubAPIImporterPipeline, + npm_importer_v2.NpmImporterPipeline, + vulnrichment_importer_v2.VulnrichImporterPipeline, + apache_httpd_v2.ApacheHTTPDImporterPipeline, + pypa_importer_v2.PyPaImporterPipeline, + gitlab_importer_v2.GitLabImporterPipeline, + pysec_importer_v2.PyPIImporterPipeline, + nvd_importer.NVDImporterPipeline, + github_importer.GitHubAPIImporterPipeline, + gitlab_importer.GitLabImporterPipeline, + github_osv.GithubOSVImporter, + pypa_importer.PyPaImporterPipeline, + npm_importer.NpmImporterPipeline, + nginx_importer.NginxImporterPipeline, + pysec_importer.PyPIImporterPipeline, + apache_tomcat.ApacheTomcatImporter, + postgresql.PostgreSQLImporter, + debian.DebianImporter, + curl.CurlImporter, + epss.EPSSImporter, + vulnrichment.VulnrichImporter, + alpine_linux_importer.AlpineLinuxImporterPipeline, + ruby.RubyImporter, + apache_kafka.ApacheKafkaImporter, + openssl.OpensslImporter, + redhat.RedhatImporter, + archlinux.ArchlinuxImporter, + ubuntu.UbuntuImporter, + debian_oval.DebianOvalImporter, + retiredotnet.RetireDotnetImporter, + apache_httpd.ApacheHTTPDImporter, + mozilla.MozillaImporter, + gentoo.GentooImporter, + istio.IstioImporter, + project_kb_msr2019.ProjectKBMSRImporter, + suse_scores.SUSESeverityScoreImporter, + elixir_security.ElixirSecurityImporter, + xen.XenImporter, + ubuntu_usn.UbuntuUSNImporter, + fireeye.FireyeImporter, + oss_fuzz.OSSFuzzImporter, + ] +) diff --git a/vulnerabilities/importers/curl.py b/vulnerabilities/importers/curl.py index a7f5e86fa..7cbc3208e 100644 --- a/vulnerabilities/importers/curl.py +++ b/vulnerabilities/importers/curl.py @@ -97,7 +97,7 @@ def parse_advisory_data(raw_data) -> AdvisoryData: ... ] ... } >>> parse_advisory_data(raw_data) - AdvisoryData(aliases=['CVE-2024-2379'], summary='QUIC certificate check bypass with wolfSSL', affected_packages=[AffectedPackage(package=PackageURL(type='generic', namespace='curl.se', name='curl', version=None, qualifiers={}, subpath=None), affected_version_range=GenericVersionRange(constraints=(VersionConstraint(comparator='=', version=SemverVersion(string='8.6.0')),)), fixed_version=SemverVersion(string='8.7.0'))], references=[Reference(reference_id='', reference_type='', url='https://curl.se/docs/CVE-2024-2379.html', severities=[VulnerabilitySeverity(system=Cvssv3ScoringSystem(identifier='cvssv3.1', name='CVSSv3.1 Base Score', url='https://www.first.org/cvss/v3-1/', notes='CVSSv3.1 base score and vector'), value='Low', scoring_elements='', published_at=None)]), Reference(reference_id='', reference_type='', url='https://hackerone.com/reports/2410774', severities=[])], date_published=datetime.datetime(2024, 3, 27, 8, 0, tzinfo=datetime.timezone.utc), weaknesses=[297], url='https://curl.se/docs/CVE-2024-2379.json') + AdvisoryData(advisory_id='', aliases=['CVE-2024-2379'], summary='QUIC certificate check bypass with wolfSSL', affected_packages=[AffectedPackage(package=PackageURL(type='generic', namespace='curl.se', name='curl', version=None, qualifiers={}, subpath=None), affected_version_range=GenericVersionRange(constraints=(VersionConstraint(comparator='=', version=SemverVersion(string='8.6.0')),)), fixed_version=SemverVersion(string='8.7.0'))], references=[Reference(reference_id='', reference_type='', url='https://curl.se/docs/CVE-2024-2379.html', severities=[VulnerabilitySeverity(system=Cvssv3ScoringSystem(identifier='cvssv3.1', name='CVSSv3.1 Base Score', url='https://www.first.org/cvss/v3-1/', notes='CVSSv3.1 base score and vector'), value='Low', scoring_elements='', published_at=None, url=None)]), Reference(reference_id='', reference_type='', url='https://hackerone.com/reports/2410774', severities=[])], references_v2=[], date_published=datetime.datetime(2024, 3, 27, 8, 0, tzinfo=datetime.timezone.utc), weaknesses=[297], severities=[], url='https://curl.se/docs/CVE-2024-2379.json') """ affected = get_item(raw_data, "affected")[0] if len(get_item(raw_data, "affected")) > 0 else [] diff --git a/vulnerabilities/importers/osv.py b/vulnerabilities/importers/osv.py index 19867cda5..01f2d8023 100644 --- a/vulnerabilities/importers/osv.py +++ b/vulnerabilities/importers/osv.py @@ -107,6 +107,74 @@ def parse_advisory_data( ) +def parse_advisory_data_v2( + raw_data: dict, supported_ecosystems, advisory_url: str +) -> Optional[AdvisoryData]: + """ + Return an AdvisoryData build from a ``raw_data`` mapping of OSV advisory and + a ``supported_ecosystem`` string. + """ + advisory_id = raw_data.get("id") or "" + if not advisory_id: + logger.error(f"Missing advisory id in OSV data: {raw_data}") + return None + summary = raw_data.get("summary") or "" + details = raw_data.get("details") or "" + summary = build_description(summary=summary, description=details) + aliases = raw_data.get("aliases") or [] + + date_published = get_published_date(raw_data=raw_data) + severities = list(get_severities(raw_data=raw_data)) + references = get_references_v2(raw_data=raw_data) + + affected_packages = [] + + for affected_pkg in raw_data.get("affected") or []: + purl = get_affected_purl(affected_pkg=affected_pkg, raw_id=advisory_id) + + if not purl or purl.type not in supported_ecosystems: + logger.error(f"Unsupported package type: {affected_pkg!r} in OSV: {advisory_id!r}") + continue + + affected_version_range = get_affected_version_range( + affected_pkg=affected_pkg, + raw_id=advisory_id, + supported_ecosystem=purl.type, + ) + + for fixed_range in affected_pkg.get("ranges") or []: + fixed_version = get_fixed_versions( + fixed_range=fixed_range, raw_id=advisory_id, supported_ecosystem=purl.type + ) + + for version in fixed_version: + affected_packages.append( + AffectedPackage( + package=purl, + affected_version_range=affected_version_range, + fixed_version=version, + ) + ) + database_specific = raw_data.get("database_specific") or {} + cwe_ids = database_specific.get("cwe_ids") or [] + weaknesses = list(map(get_cwe_id, cwe_ids)) + + if advisory_id in aliases: + aliases.remove(advisory_id) + + return AdvisoryData( + advisory_id=advisory_id, + aliases=aliases, + summary=summary, + references_v2=references, + severities=severities, + affected_packages=affected_packages, + date_published=date_published, + weaknesses=weaknesses, + url=advisory_url, + ) + + def extract_fixed_versions(fixed_range) -> Iterable[str]: """ Return a list of fixed version strings given a ``fixed_range`` mapping of @@ -187,6 +255,23 @@ def get_references(raw_data, severities) -> List[Reference]: return references +def get_references_v2(raw_data) -> List[Reference]: + """ + Return a list Reference extracted from a mapping of OSV ``raw_data`` given a + ``severities`` list of VulnerabilitySeverity. + """ + references = [] + for ref in raw_data.get("references") or []: + if not ref: + continue + url = ref["url"] + if not url: + logger.error(f"Reference without URL : {ref!r} for OSV id: {raw_data['id']!r}") + continue + references.append(Reference(url=ref["url"])) + return references + + def get_affected_purl(affected_pkg, raw_id): """ Return an affected PackageURL or None given a mapping of ``affected_pkg`` diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index 08cce6ff9..be6f73cb9 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -9,7 +9,6 @@ from vulnerabilities.improvers import valid_versions from vulnerabilities.improvers import vulnerability_status -from vulnerabilities.pipelines import VulnerableCodePipeline from vulnerabilities.pipelines import add_cvss31_to_CVEs from vulnerabilities.pipelines import collect_commits from vulnerabilities.pipelines import compute_advisory_todo @@ -21,39 +20,55 @@ from vulnerabilities.pipelines import flag_ghost_packages from vulnerabilities.pipelines import populate_vulnerability_summary_pipeline from vulnerabilities.pipelines import remove_duplicate_advisories +from vulnerabilities.pipelines.v2_improvers import collect_commits as collect_commits_v2 +from vulnerabilities.pipelines.v2_improvers import compute_package_risk as compute_package_risk_v2 +from vulnerabilities.pipelines.v2_improvers import ( + computer_package_version_rank as compute_version_rank_v2, +) +from vulnerabilities.pipelines.v2_improvers import enhance_with_exploitdb as exploitdb_v2 +from vulnerabilities.pipelines.v2_improvers import enhance_with_kev as enhance_with_kev_v2 +from vulnerabilities.pipelines.v2_improvers import ( + enhance_with_metasploit as enhance_with_metasploit_v2, +) +from vulnerabilities.pipelines.v2_improvers import flag_ghost_packages as flag_ghost_packages_v2 +from vulnerabilities.utils import create_registry -IMPROVERS_REGISTRY = [ - valid_versions.GitHubBasicImprover, - valid_versions.GitLabBasicImprover, - valid_versions.NginxBasicImprover, - valid_versions.ApacheHTTPDImprover, - valid_versions.DebianBasicImprover, - valid_versions.NpmImprover, - valid_versions.ElixirImprover, - valid_versions.ApacheTomcatImprover, - valid_versions.ApacheKafkaImprover, - valid_versions.IstioImprover, - valid_versions.DebianOvalImprover, - valid_versions.UbuntuOvalImprover, - valid_versions.OSSFuzzImprover, - valid_versions.RubyImprover, - valid_versions.GithubOSVImprover, - vulnerability_status.VulnerabilityStatusImprover, - valid_versions.CurlImprover, - flag_ghost_packages.FlagGhostPackagePipeline, - enhance_with_kev.VulnerabilityKevPipeline, - enhance_with_metasploit.MetasploitImproverPipeline, - enhance_with_exploitdb.ExploitDBImproverPipeline, - compute_package_risk.ComputePackageRiskPipeline, - compute_package_version_rank.ComputeVersionRankPipeline, - collect_commits.CollectFixCommitsPipeline, - add_cvss31_to_CVEs.CVEAdvisoryMappingPipeline, - remove_duplicate_advisories.RemoveDuplicateAdvisoriesPipeline, - populate_vulnerability_summary_pipeline.PopulateVulnerabilitySummariesPipeline, - compute_advisory_todo.ComputeToDo, -] - -IMPROVERS_REGISTRY = { - x.pipeline_id if issubclass(x, VulnerableCodePipeline) else x.qualified_name: x - for x in IMPROVERS_REGISTRY -} +IMPROVERS_REGISTRY = create_registry( + [ + valid_versions.GitHubBasicImprover, + valid_versions.GitLabBasicImprover, + valid_versions.NginxBasicImprover, + valid_versions.ApacheHTTPDImprover, + valid_versions.DebianBasicImprover, + valid_versions.NpmImprover, + valid_versions.ElixirImprover, + valid_versions.ApacheTomcatImprover, + valid_versions.ApacheKafkaImprover, + valid_versions.IstioImprover, + valid_versions.DebianOvalImprover, + valid_versions.UbuntuOvalImprover, + valid_versions.OSSFuzzImprover, + valid_versions.RubyImprover, + valid_versions.GithubOSVImprover, + vulnerability_status.VulnerabilityStatusImprover, + valid_versions.CurlImprover, + flag_ghost_packages.FlagGhostPackagePipeline, + enhance_with_kev.VulnerabilityKevPipeline, + enhance_with_metasploit.MetasploitImproverPipeline, + enhance_with_exploitdb.ExploitDBImproverPipeline, + compute_package_risk.ComputePackageRiskPipeline, + compute_package_version_rank.ComputeVersionRankPipeline, + collect_commits.CollectFixCommitsPipeline, + add_cvss31_to_CVEs.CVEAdvisoryMappingPipeline, + remove_duplicate_advisories.RemoveDuplicateAdvisoriesPipeline, + populate_vulnerability_summary_pipeline.PopulateVulnerabilitySummariesPipeline, + exploitdb_v2.ExploitDBImproverPipeline, + enhance_with_kev_v2.VulnerabilityKevPipeline, + flag_ghost_packages_v2.FlagGhostPackagePipeline, + enhance_with_metasploit_v2.MetasploitImproverPipeline, + compute_package_risk_v2.ComputePackageRiskPipeline, + compute_version_rank_v2.ComputeVersionRankPipeline, + collect_commits_v2.CollectFixCommitsPipeline, + compute_advisory_todo.ComputeToDo, + ] +) diff --git a/vulnerabilities/management/commands/import.py b/vulnerabilities/management/commands/import.py index f4876b11a..78ec8bb0a 100644 --- a/vulnerabilities/management/commands/import.py +++ b/vulnerabilities/management/commands/import.py @@ -14,6 +14,7 @@ from vulnerabilities.import_runner import ImportRunner from vulnerabilities.importers import IMPORTERS_REGISTRY from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 class Command(BaseCommand): @@ -57,7 +58,9 @@ def import_data(self, importers): failed_importers = [] for importer in importers: - if issubclass(importer, VulnerableCodeBaseImporterPipeline): + if issubclass(importer, VulnerableCodeBaseImporterPipeline) or issubclass( + importer, VulnerableCodeBaseImporterPipelineV2 + ): self.stdout.write(f"Importing data using {importer.pipeline_id}") status, error = importer().execute() if status != 0: diff --git a/vulnerabilities/migrations/0094_advisoryalias_advisoryreference_advisoryseverity_and_more.py b/vulnerabilities/migrations/0094_advisoryalias_advisoryreference_advisoryseverity_and_more.py new file mode 100644 index 000000000..fd3f74d98 --- /dev/null +++ b/vulnerabilities/migrations/0094_advisoryalias_advisoryreference_advisoryseverity_and_more.py @@ -0,0 +1,635 @@ +# Generated by Django 4.2.20 on 2025-07-01 10:38 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0093_advisorytodo_todorelatedadvisory_and_more"), + ] + + operations = [ + migrations.CreateModel( + name="AdvisoryAlias", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "alias", + models.CharField( + help_text="An alias is a unique vulnerability identifier in some database, such as CVE-2020-2233", + max_length=50, + unique=True, + ), + ), + ], + options={ + "ordering": ["alias"], + }, + ), + migrations.CreateModel( + name="AdvisoryReference", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "url", + models.URLField( + help_text="URL to the vulnerability reference", max_length=1024, unique=True + ), + ), + ( + "reference_type", + models.CharField( + blank=True, + choices=[ + ("advisory", "Advisory"), + ("exploit", "Exploit"), + ("mailing_list", "Mailing List"), + ("bug", "Bug"), + ("other", "Other"), + ], + max_length=20, + ), + ), + ( + "reference_id", + models.CharField( + blank=True, + db_index=True, + help_text="An optional reference ID, such as DSA-4465-1 when available", + max_length=500, + ), + ), + ], + options={ + "ordering": ["reference_id", "url", "reference_type"], + }, + ), + migrations.CreateModel( + name="AdvisorySeverity", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "url", + models.URLField( + db_index=True, + help_text="URL to the vulnerability severity", + max_length=1024, + null=True, + ), + ), + ( + "scoring_system", + models.CharField( + choices=[ + ("cvssv2", "CVSSv2 Base Score"), + ("cvssv3", "CVSSv3 Base Score"), + ("cvssv3.1", "CVSSv3.1 Base Score"), + ("cvssv4", "CVSSv4 Base Score"), + ("rhbs", "RedHat Bugzilla severity"), + ("rhas", "RedHat Aggregate severity"), + ("archlinux", "Archlinux Vulnerability Group Severity"), + ("cvssv3.1_qr", "CVSSv3.1 Qualitative Severity Rating"), + ("generic_textual", "Generic textual severity rating"), + ("apache_httpd", "Apache Httpd Severity"), + ("apache_tomcat", "Apache Tomcat Severity"), + ("epss", "Exploit Prediction Scoring System"), + ("ssvc", "Stakeholder-Specific Vulnerability Categorization"), + ], + help_text="Identifier for the scoring system used. Available choices are: cvssv2: CVSSv2 Base Score,\ncvssv3: CVSSv3 Base Score,\ncvssv3.1: CVSSv3.1 Base Score,\ncvssv4: CVSSv4 Base Score,\nrhbs: RedHat Bugzilla severity,\nrhas: RedHat Aggregate severity,\narchlinux: Archlinux Vulnerability Group Severity,\ncvssv3.1_qr: CVSSv3.1 Qualitative Severity Rating,\ngeneric_textual: Generic textual severity rating,\napache_httpd: Apache Httpd Severity,\napache_tomcat: Apache Tomcat Severity,\nepss: Exploit Prediction Scoring System,\nssvc: Stakeholder-Specific Vulnerability Categorization ", + max_length=50, + ), + ), + ( + "value", + models.CharField(help_text="Example: 9.0, Important, High", max_length=50), + ), + ( + "scoring_elements", + models.CharField( + help_text="Supporting scoring elements used to compute the score values. For example a CVSS vector string as used to compute a CVSS score.", + max_length=150, + null=True, + ), + ), + ( + "published_at", + models.DateTimeField( + blank=True, + help_text="UTC Date of publication of the vulnerability severity", + null=True, + ), + ), + ], + options={ + "ordering": ["url", "scoring_system", "value"], + }, + ), + migrations.CreateModel( + name="AdvisoryV2", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "datasource_id", + models.CharField( + help_text="Unique ID for the datasource used for this advisory .e.g.: nginx_importer_v2", + max_length=100, + ), + ), + ( + "advisory_id", + models.CharField( + help_text="An advisory is a unique vulnerability identifier in some database, such as PYSEC-2020-2233", + max_length=50, + ), + ), + ( + "avid", + models.CharField( + help_text="Unique ID for the datasource used for this advisory .e.g.: pysec_importer_v2/PYSEC-2020-2233", + max_length=500, + ), + ), + ( + "unique_content_id", + models.CharField( + help_text="A 64 character unique identifier for the content of the advisory since we use sha256 as hex", + max_length=64, + unique=True, + ), + ), + ("url", models.URLField(help_text="Link to the advisory on the upstream website")), + ("summary", models.TextField(blank=True)), + ( + "date_published", + models.DateTimeField( + blank=True, help_text="UTC Date of publication of the advisory", null=True + ), + ), + ( + "date_collected", + models.DateTimeField(help_text="UTC Date on which the advisory was collected"), + ), + ( + "date_imported", + models.DateTimeField( + blank=True, + help_text="UTC Date on which the advisory was imported", + null=True, + ), + ), + ( + "status", + models.IntegerField( + choices=[(1, "Published"), (2, "Disputed"), (3, "Invalid")], default=1 + ), + ), + ( + "exploitability", + models.DecimalField( + blank=True, + decimal_places=1, + help_text="Exploitability indicates the likelihood that a vulnerability in a software package could be used by malicious actors to compromise systems, applications, or networks. This metric is determined automatically based on the discovery of known exploits.", + max_digits=2, + null=True, + ), + ), + ( + "weighted_severity", + models.DecimalField( + blank=True, + decimal_places=1, + help_text="Weighted severity is the highest value calculated by multiplying each severity by its corresponding weight, divided by 10.", + max_digits=3, + null=True, + ), + ), + ], + options={ + "ordering": ["datasource_id", "advisory_id", "date_published", "unique_content_id"], + }, + ), + migrations.CreateModel( + name="AdvisoryWeakness", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ("cwe_id", models.IntegerField(help_text="CWE id")), + ], + ), + migrations.CreateModel( + name="PackageV2", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "type", + models.CharField( + blank=True, + help_text="A short code to identify the type of this package. For example: gem for a Rubygem, docker for a container, pypi for a Python Wheel or Egg, maven for a Maven Jar, deb for a Debian package, etc.", + max_length=16, + ), + ), + ( + "namespace", + models.CharField( + blank=True, + help_text="Package name prefix, such as Maven groupid, Docker image owner, GitHub user or organization, etc.", + max_length=255, + ), + ), + ( + "name", + models.CharField(blank=True, help_text="Name of the package.", max_length=100), + ), + ( + "version", + models.CharField( + blank=True, help_text="Version of the package.", max_length=100 + ), + ), + ( + "qualifiers", + models.CharField( + blank=True, + help_text="Extra qualifying data for a package such as the name of an OS, architecture, distro, etc.", + max_length=1024, + ), + ), + ( + "subpath", + models.CharField( + blank=True, + help_text="Extra subpath within a package, relative to the package root.", + max_length=200, + ), + ), + ( + "package_url", + models.CharField( + db_index=True, + help_text="The Package URL for this package.", + max_length=1000, + ), + ), + ( + "plain_package_url", + models.CharField( + db_index=True, + help_text="The Package URL for this package without qualifiers and subpath.", + max_length=1000, + ), + ), + ( + "is_ghost", + models.BooleanField( + db_index=True, + default=False, + help_text="True if the package does not exist in the upstream package manager or its repository.", + ), + ), + ( + "risk_score", + models.DecimalField( + decimal_places=1, + help_text="Risk score between 0.00 and 10.00, where higher values indicate greater vulnerability risk for the package.", + max_digits=3, + null=True, + ), + ), + ( + "version_rank", + models.IntegerField( + db_index=True, + default=0, + help_text="Rank of the version to support ordering by version. Rank zero means the rank has not been defined yet", + ), + ), + ], + options={ + "abstract": False, + }, + ), + migrations.CreateModel( + name="CodeFixV2", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "commits", + models.JSONField( + blank=True, + default=list, + help_text="List of commit identifiers using VCS URLs associated with the code change.", + ), + ), + ( + "pulls", + models.JSONField( + blank=True, + default=list, + help_text="List of pull request URLs associated with the code change.", + ), + ), + ( + "downloads", + models.JSONField( + blank=True, + default=list, + help_text="List of download URLs for the patched code.", + ), + ), + ( + "patch", + models.TextField( + blank=True, + help_text="The code change as a patch in unified diff format.", + null=True, + ), + ), + ( + "notes", + models.TextField( + blank=True, + help_text="Notes or instructions about this code change.", + null=True, + ), + ), + ( + "references", + models.JSONField( + blank=True, + default=list, + help_text="URL references related to this code change.", + ), + ), + ( + "is_reviewed", + models.BooleanField( + default=False, help_text="Indicates if this code change has been reviewed." + ), + ), + ( + "created_at", + models.DateTimeField( + auto_now_add=True, + help_text="Timestamp indicating when this code change was created.", + ), + ), + ( + "updated_at", + models.DateTimeField( + auto_now=True, + help_text="Timestamp indicating when this code change was last updated.", + ), + ), + ( + "advisory", + models.ForeignKey( + help_text="The affected package version to which this code fix applies.", + on_delete=django.db.models.deletion.CASCADE, + related_name="code_fix_v2", + to="vulnerabilities.advisoryv2", + ), + ), + ( + "affected_package", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="code_fix_v2_affected", + to="vulnerabilities.packagev2", + ), + ), + ( + "base_package_version", + models.ForeignKey( + blank=True, + help_text="The base package version to which this code change applies.", + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="codechanges_v2", + to="vulnerabilities.packagev2", + ), + ), + ( + "fixed_package", + models.ForeignKey( + blank=True, + help_text="The fixing package version with this code fix", + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="code_fix_v2_fixed", + to="vulnerabilities.packagev2", + ), + ), + ], + options={ + "abstract": False, + }, + ), + migrations.AddField( + model_name="advisoryv2", + name="affecting_packages", + field=models.ManyToManyField( + help_text="A list of packages that are affected by this advisory.", + related_name="affected_by_advisories", + to="vulnerabilities.packagev2", + ), + ), + migrations.AddField( + model_name="advisoryv2", + name="aliases", + field=models.ManyToManyField( + help_text="A list of serializable Alias objects", + related_name="advisories", + to="vulnerabilities.advisoryalias", + ), + ), + migrations.AddField( + model_name="advisoryv2", + name="fixed_by_packages", + field=models.ManyToManyField( + help_text="A list of packages that are reported by this advisory.", + related_name="fixing_advisories", + to="vulnerabilities.packagev2", + ), + ), + migrations.AddField( + model_name="advisoryv2", + name="references", + field=models.ManyToManyField( + help_text="A list of serializable Reference objects", + related_name="advisories", + to="vulnerabilities.advisoryreference", + ), + ), + migrations.AddField( + model_name="advisoryv2", + name="severities", + field=models.ManyToManyField( + help_text="A list of vulnerability severities associated with this advisory.", + related_name="advisories", + to="vulnerabilities.advisoryseverity", + ), + ), + migrations.AddField( + model_name="advisoryv2", + name="weaknesses", + field=models.ManyToManyField( + help_text="A list of software weaknesses associated with this advisory.", + related_name="advisories", + to="vulnerabilities.advisoryweakness", + ), + ), + migrations.CreateModel( + name="AdvisoryExploit", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "date_added", + models.DateField( + blank=True, + help_text="The date the vulnerability was added to an exploit catalog.", + null=True, + ), + ), + ( + "description", + models.TextField( + blank=True, + help_text="Description of the vulnerability in an exploit catalog, often a refinement of the original CVE description", + null=True, + ), + ), + ( + "required_action", + models.TextField( + blank=True, + help_text="The required action to address the vulnerability, typically to apply vendor updates or apply vendor mitigations or to discontinue use.", + null=True, + ), + ), + ( + "due_date", + models.DateField( + blank=True, + help_text="The date the required action is due, which applies to all USA federal civilian executive branch (FCEB) agencies, but all organizations are strongly encouraged to execute the required action", + null=True, + ), + ), + ( + "notes", + models.TextField( + blank=True, + help_text="Additional notes and resources about the vulnerability, often a URL to vendor instructions.", + null=True, + ), + ), + ( + "known_ransomware_campaign_use", + models.BooleanField( + default=False, + help_text="Known' if this vulnerability is known to have been leveraged as part of a ransomware campaign; \n or 'Unknown' if there is no confirmation that the vulnerability has been utilized for ransomware.", + ), + ), + ( + "source_date_published", + models.DateField( + blank=True, + help_text="The date that the exploit was published or disclosed.", + null=True, + ), + ), + ( + "exploit_type", + models.TextField( + blank=True, + help_text="The type of the exploit as provided by the original upstream data source.", + null=True, + ), + ), + ( + "platform", + models.TextField( + blank=True, + help_text="The platform associated with the exploit as provided by the original upstream data source.", + null=True, + ), + ), + ( + "source_date_updated", + models.DateField( + blank=True, + help_text="The date the exploit was updated in the original upstream data source.", + null=True, + ), + ), + ( + "data_source", + models.TextField( + blank=True, + help_text="The source of the exploit information, such as CISA KEV, exploitdb, metaspoit, or others.", + null=True, + ), + ), + ( + "source_url", + models.URLField( + blank=True, + help_text="The URL to the exploit as provided in the original upstream data source.", + null=True, + ), + ), + ( + "advisory", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="exploits", + to="vulnerabilities.advisoryv2", + ), + ), + ], + ), + migrations.AlterUniqueTogether( + name="advisoryv2", + unique_together={("datasource_id", "advisory_id", "unique_content_id")}, + ), + ] diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index c4ccbd1fa..ab01010d7 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -172,6 +172,7 @@ def with_package_counts(self): ) +# FIXME: Remove when migration from Vulnerability to Advisory is completed class VulnerabilitySeverity(models.Model): url = models.URLField( max_length=1024, @@ -211,6 +212,7 @@ class Meta: ordering = ["url", "scoring_system", "value"] +# FIXME: Remove when migration from Vulnerability to Advisory is completed class VulnerabilityStatusType(models.IntegerChoices): """List of vulnerability statuses.""" @@ -219,6 +221,7 @@ class VulnerabilityStatusType(models.IntegerChoices): INVALID = 3, "Invalid" +# FIXME: Remove when migration from Vulnerability to Advisory is completed class Vulnerability(models.Model): """ A software vulnerability with a unique identifier and alternate ``aliases``. @@ -511,6 +514,7 @@ def get_cwes(self): Database.get_cwes = get_cwes +# FIXME: Remove when migration from Vulnerability to Advisory is completed class Weakness(models.Model): """ A Common Weakness Enumeration model @@ -557,6 +561,7 @@ def to_dict(self): return {"cwe_id": self.cwe_id, "name": self.name, "description": self.description} +# FIXME: Remove when migration from Vulnerability to Advisory is completed class VulnerabilityReferenceQuerySet(BaseQuerySet): def for_cpe(self): """ @@ -565,6 +570,7 @@ def for_cpe(self): return self.filter(reference_id__startswith="cpe") +# FIXME: Remove when migration from Vulnerability to Advisory is completed class VulnerabilityReference(models.Model): """ A reference to a vulnerability such as a security advisory from a Linux distribution or language @@ -622,6 +628,7 @@ def is_cpe(self): return self.reference_id.startswith("cpe") +# FIXME: Remove when migration from Vulnerability to Advisory is completed class VulnerabilityRelatedReference(models.Model): """ A reference related to a vulnerability. @@ -642,6 +649,7 @@ class Meta: ordering = ["vulnerability", "reference"] +# FIXME: Remove when migration from Vulnerability to Advisory is completed class PackageQuerySet(BaseQuerySet, PackageURLQuerySet): def get_fixed_by_package_versions(self, purl: PackageURL, fix=True): """ @@ -808,6 +816,7 @@ def get_purl_query_lookups(purl): return purl_to_dict(plain_purl, with_empty=False) +# FIXME: Remove when migration from Vulnerability to Advisory is completed class Package(PackageURLMixin): """ A software package with related vulnerabilities. @@ -1118,7 +1127,6 @@ def fixing_vulnerabilities(self): """ Return a queryset of Vulnerabilities that are fixed by this package. """ - print("A") return self.fixed_by_vulnerabilities.all() @property @@ -1136,6 +1144,7 @@ def affecting_vulns(self): ) +# FIXME: Remove when migration from Vulnerability to Advisory is completed class PackageRelatedVulnerabilityBase(models.Model): """ Abstract base class for package-vulnerability relations. @@ -1232,11 +1241,13 @@ def add_package_vulnerability_changelog(self, advisory): ) +# FIXME: Remove when migration from Vulnerability to Advisory is completed class FixingPackageRelatedVulnerability(PackageRelatedVulnerabilityBase): class Meta(PackageRelatedVulnerabilityBase.Meta): verbose_name_plural = "Fixing Package Related Vulnerabilities" +# FIXME: Remove when migration from Vulnerability to Advisory is completed class AffectedByPackageRelatedVulnerability(PackageRelatedVulnerabilityBase): severities = models.ManyToManyField( @@ -1258,6 +1269,7 @@ def for_cve(self): return self.filter(alias__startswith="CVE") +# FIXME: Remove when migration from Vulnerability to Advisory is completed class Alias(models.Model): """ An alias is a unique vulnerability identifier in some database, such as @@ -1311,10 +1323,35 @@ def url(self): return f"https://github.com/nodejs/security-wg/blob/main/vuln/npm/{id}.json" +class AdvisoryV2QuerySet(BaseQuerySet): + def search(query): + """ + This function will take a string as an input, the string could be an alias or an advisory ID or + something in the advisory description. + """ + return AdvisoryV2.objects.filter( + Q(advisory_id__icontains=query) + | Q(aliases__alias__icontains=query) + | Q(summary__icontains=query) + | Q(references__url__icontains=query) + ).distinct() + + class AdvisoryQuerySet(BaseQuerySet): - pass + def search(query): + """ + This function will take a string as an input, the string could be an alias or an advisory ID or + something in the advisory description. + """ + return Advisory.objects.filter( + Q(advisory_id__icontains=query) + | Q(aliases__alias__icontains=query) + | Q(summary__icontains=query) + | Q(references__url__icontains=query) + ).distinct() +# FIXME: Remove when migration from Vulnerability to Advisory is completed class Advisory(models.Model): """ An advisory represents data directly obtained from upstream transformed @@ -1797,6 +1834,60 @@ class Meta: abstract = True +class CodeChangeV2(models.Model): + """ + Abstract base model representing a change in code, either introducing or fixing a vulnerability. + This includes details about commits, patches, and related metadata. + + We are tracking commits, pulls and downloads as references to the code change. The goal is to + keep track and store the actual code patch in the ``patch`` field. When not available the patch + will be inferred from these references using improvers. + """ + + commits = models.JSONField( + blank=True, + default=list, + help_text="List of commit identifiers using VCS URLs associated with the code change.", + ) + pulls = models.JSONField( + blank=True, + default=list, + help_text="List of pull request URLs associated with the code change.", + ) + downloads = models.JSONField( + blank=True, default=list, help_text="List of download URLs for the patched code." + ) + patch = models.TextField( + blank=True, null=True, help_text="The code change as a patch in unified diff format." + ) + base_package_version = models.ForeignKey( + "PackageV2", + null=True, + blank=True, + on_delete=models.SET_NULL, + related_name="codechanges_v2", + help_text="The base package version to which this code change applies.", + ) + notes = models.TextField( + blank=True, null=True, help_text="Notes or instructions about this code change." + ) + references = models.JSONField( + blank=True, default=list, help_text="URL references related to this code change." + ) + is_reviewed = models.BooleanField( + default=False, help_text="Indicates if this code change has been reviewed." + ) + created_at = models.DateTimeField( + auto_now_add=True, help_text="Timestamp indicating when this code change was created." + ) + updated_at = models.DateTimeField( + auto_now=True, help_text="Timestamp indicating when this code change was last updated." + ) + + class Meta: + abstract = True + + class CodeFix(CodeChange): """ A code fix is a code change that addresses a vulnerability and is associated: @@ -1821,6 +1912,35 @@ class CodeFix(CodeChange): ) +class CodeFixV2(CodeChangeV2): + """ + A code fix is a code change that addresses a vulnerability and is associated: + - with a specific advisory + - package that has been affected + - optionally with a specific fixing package version when it is known + """ + + advisory = models.ForeignKey( + "AdvisoryV2", + on_delete=models.CASCADE, + related_name="code_fix_v2", + help_text="The affected package version to which this code fix applies.", + ) + + affected_package = models.ForeignKey( + "PackageV2", on_delete=models.CASCADE, related_name="code_fix_v2_affected" + ) + + fixed_package = models.ForeignKey( + "PackageV2", + null=True, + blank=True, + on_delete=models.SET_NULL, + related_name="code_fix_v2_fixed", + help_text="The fixing package version with this code fix", + ) + + class PipelineRun(models.Model): """The Database representation of a pipeline execution.""" @@ -2333,21 +2453,812 @@ class AdvisoryToDo(models.Model): class Meta: unique_together = ("related_advisories_id", "issue_type") - def save(self, *args, **kwargs): - self.full_clean() - return super().save(*args, **kwargs) +class AdvisorySeverity(models.Model): + url = models.URLField( + max_length=1024, + null=True, + help_text="URL to the vulnerability severity", + db_index=True, + ) -class ToDoRelatedAdvisory(models.Model): - todo = models.ForeignKey( - AdvisoryToDo, - on_delete=models.CASCADE, + scoring_system_choices = tuple( + (system.identifier, system.name) for system in SCORING_SYSTEMS.values() ) - advisory = models.ForeignKey( - Advisory, - on_delete=models.CASCADE, + scoring_system = models.CharField( + max_length=50, + choices=scoring_system_choices, + help_text="Identifier for the scoring system used. Available choices are: {} ".format( + ",\n".join(f"{sid}: {sname}" for sid, sname in scoring_system_choices) + ), + ) + + value = models.CharField(max_length=50, help_text="Example: 9.0, Important, High") + + scoring_elements = models.CharField( + max_length=150, + null=True, + help_text="Supporting scoring elements used to compute the score values. " + "For example a CVSS vector string as used to compute a CVSS score.", + ) + + published_at = models.DateTimeField( + blank=True, null=True, help_text="UTC Date of publication of the vulnerability severity" ) + objects = BaseQuerySet.as_manager() + class Meta: - unique_together = ("todo", "advisory") + ordering = ["url", "scoring_system", "value"] + + +class AdvisoryWeakness(models.Model): + """ + A weakness is a software weakness that is associated with a vulnerability. + """ + + cwe_id = models.IntegerField(help_text="CWE id") + + cwe_by_id = {} + + def get_cwe(self, cwe_id): + if not self.cwe_by_id: + db = Database() + for weakness in db.get_cwes(): + self.cwe_by_id[str(weakness.cwe_id)] = weakness + return self.cwe_by_id[cwe_id] + + @property + def cwe(self): + return f"CWE-{self.cwe_id}" + + @property + def weakness(self): + """ + Return a queryset of Weakness for this vulnerability. + """ + try: + weakness = self.get_cwe(str(self.cwe_id)) + return weakness + except Exception as e: + logger.warning(f"Could not find CWE {self.cwe_id}: {e}") + + @property + def name(self): + """Return the weakness's name.""" + return self.weakness.name if self.weakness else "" + + @property + def description(self): + """Return the weakness's description.""" + return self.weakness.description if self.weakness else "" + + def to_dict(self): + return {"cwe_id": self.cwe_id, "name": self.name, "description": self.description} + + +class AdvisoryReference(models.Model): + url = models.URLField( + max_length=1024, + help_text="URL to the vulnerability reference", + unique=True, + ) + + ADVISORY = "advisory" + EXPLOIT = "exploit" + MAILING_LIST = "mailing_list" + BUG = "bug" + OTHER = "other" + + REFERENCE_TYPES = [ + (ADVISORY, "Advisory"), + (EXPLOIT, "Exploit"), + (MAILING_LIST, "Mailing List"), + (BUG, "Bug"), + (OTHER, "Other"), + ] + + reference_type = models.CharField(max_length=20, choices=REFERENCE_TYPES, blank=True) + + reference_id = models.CharField( + max_length=500, + help_text="An optional reference ID, such as DSA-4465-1 when available", + blank=True, + db_index=True, + ) + + class Meta: + ordering = ["reference_id", "url", "reference_type"] + + def __str__(self): + reference_id = f" {self.reference_id}" if self.reference_id else "" + return f"{self.url}{reference_id}" + + @property + def is_cpe(self): + """ + Return True if this is a CPE reference. + """ + return self.reference_id.startswith("cpe") + + +class AdvisoryAlias(models.Model): + alias = models.CharField( + max_length=50, + unique=True, + blank=False, + null=False, + help_text="An alias is a unique vulnerability identifier in some database, " + "such as CVE-2020-2233", + ) + + class Meta: + ordering = ["alias"] + + def __str__(self): + return self.alias + + @cached_property + def url(self): + """ + Create a URL for the alias. + """ + alias: str = self.alias + if alias.startswith("CVE"): + return f"https://nvd.nist.gov/vuln/detail/{alias}" + + if alias.startswith("GHSA"): + return f"https://github.com/advisories/{alias}" + + if alias.startswith("NPM-"): + id = alias.lstrip("NPM-") + return f"https://github.com/nodejs/security-wg/blob/main/vuln/npm/{id}.json" + + +class AdvisoryV2(models.Model): + """ + An advisory represents data directly obtained from upstream transformed + into structured data + """ + + # This is similar to a type or a namespace + datasource_id = models.CharField( + max_length=100, + blank=False, + null=False, + help_text="Unique ID for the datasource used for this advisory ." "e.g.: nginx_importer_v2", + ) + + # This is similar to a name + advisory_id = models.CharField( + max_length=50, + blank=False, + null=False, + unique=False, + help_text="An advisory is a unique vulnerability identifier in some database, " + "such as PYSEC-2020-2233", + ) + + avid = models.CharField( + max_length=500, + blank=False, + null=False, + help_text="Unique ID for the datasource used for this advisory ." + "e.g.: pysec_importer_v2/PYSEC-2020-2233", + ) + + # This is similar to a version + unique_content_id = models.CharField( + max_length=64, + blank=False, + null=False, + unique=True, + help_text="A 64 character unique identifier for the content of the advisory since we use sha256 as hex", + ) + url = models.URLField( + blank=False, + null=False, + help_text="Link to the advisory on the upstream website", + ) + + # TODO: Have a mapping that gives datasource class by datasource ID + # Get label from datasource class + # Remove this from model + # In the UI - Use label + # In the API - Use datasource_id + # Have an API endpoint for all info for datasources - show license, label + + summary = models.TextField( + blank=True, + ) + aliases = models.ManyToManyField( + AdvisoryAlias, + related_name="advisories", + help_text="A list of serializable Alias objects", + ) + references = models.ManyToManyField( + AdvisoryReference, + related_name="advisories", + help_text="A list of serializable Reference objects", + ) + severities = models.ManyToManyField( + AdvisorySeverity, + related_name="advisories", + help_text="A list of vulnerability severities associated with this advisory.", + ) + weaknesses = models.ManyToManyField( + AdvisoryWeakness, + related_name="advisories", + help_text="A list of software weaknesses associated with this advisory.", + ) + date_published = models.DateTimeField( + blank=True, null=True, help_text="UTC Date of publication of the advisory" + ) + date_collected = models.DateTimeField(help_text="UTC Date on which the advisory was collected") + date_imported = models.DateTimeField( + blank=True, null=True, help_text="UTC Date on which the advisory was imported" + ) + + affecting_packages = models.ManyToManyField( + "PackageV2", + related_name="affected_by_advisories", + help_text="A list of packages that are affected by this advisory.", + ) + + fixed_by_packages = models.ManyToManyField( + "PackageV2", + related_name="fixing_advisories", + help_text="A list of packages that are reported by this advisory.", + ) + + status = models.IntegerField( + choices=VulnerabilityStatusType.choices, default=VulnerabilityStatusType.PUBLISHED + ) + + exploitability = models.DecimalField( + null=True, + blank=True, + max_digits=2, + decimal_places=1, + help_text="Exploitability indicates the likelihood that a vulnerability in a software package could be used by malicious actors to compromise systems, " + "applications, or networks. This metric is determined automatically based on the discovery of known exploits.", + ) + + weighted_severity = models.DecimalField( + null=True, + blank=True, + max_digits=3, + decimal_places=1, + help_text="Weighted severity is the highest value calculated by multiplying each severity by its corresponding weight, divided by 10.", + ) + + @property + def risk_score(self): + """ + Risk expressed as a number ranging from 0 to 10. + Risk is calculated from weighted severity and exploitability values. + It is the maximum value of (the weighted severity multiplied by its exploitability) or 10 + Risk = min(weighted severity * exploitability, 10) + """ + if self.exploitability and self.weighted_severity: + risk_score = min(float(self.exploitability * self.weighted_severity), 10.0) + return round(risk_score, 1) + + objects = AdvisoryQuerySet.as_manager() + + class Meta: + unique_together = ["datasource_id", "advisory_id", "unique_content_id"] + ordering = ["datasource_id", "advisory_id", "date_published", "unique_content_id"] + + def save(self, *args, **kwargs): + self.full_clean() + return super().save(*args, **kwargs) + + @property + def get_status_label(self): + label_by_status = {choice[0]: choice[1] for choice in VulnerabilityStatusType.choices} + return label_by_status.get(self.status) or VulnerabilityStatusType.PUBLISHED.label + + def get_absolute_url(self): + """ + Return this Vulnerability details absolute URL. + """ + return reverse("advisory_details", args=[self.id]) + + def to_advisory_data(self) -> "AdvisoryDataV2": + from vulnerabilities.importer import AdvisoryDataV2 + from vulnerabilities.importer import AffectedPackage + from vulnerabilities.importer import ReferenceV2 + + return AdvisoryDataV2( + aliases=[item.alias for item in self.aliases.all()], + summary=self.summary, + affected_packages=[ + AffectedPackage.from_dict(pkg) for pkg in self.affected_packages if pkg + ], + references=[ReferenceV2.from_dict(ref) for ref in self.references], + date_published=self.date_published, + weaknesses=self.weaknesses, + severities=self.severities, + url=self.url, + ) + + @property + def get_aliases(self): + """ + Return a queryset of all Aliases for this vulnerability. + """ + return self.aliases.all() + + def aggregate_fixed_and_affected_packages(self): + from vulnerabilities.utils import get_purl_version_class + + sorted_fixed_by_packages = self.fixed_by_packages.filter(is_ghost=False).order_by( + "type", "namespace", "name", "qualifiers", "subpath" + ) + + if sorted_fixed_by_packages: + sorted_fixed_by_packages.first().calculate_version_rank + + sorted_affected_packages = self.affecting_packages.all() + + if sorted_affected_packages: + sorted_affected_packages.first().calculate_version_rank + + grouped_fixed_by_packages = { + key: list(group) + for key, group in groupby( + sorted_fixed_by_packages, + key=attrgetter("type", "namespace", "name", "qualifiers", "subpath"), + ) + } + + all_affected_fixed_by_matches = [] + + for sorted_affected_package in sorted_affected_packages: + affected_fixed_by_matches = { + "affected_package": sorted_affected_package, + "matched_fixed_by_packages": [], + } + + # Build the key to find matching group + key = ( + sorted_affected_package.type, + sorted_affected_package.namespace, + sorted_affected_package.name, + sorted_affected_package.qualifiers, + sorted_affected_package.subpath, + ) + + # Get matching group from pre-grouped fixed_by_packages + matching_fixed_packages = grouped_fixed_by_packages.get(key, []) + + # Get version classes for comparison + affected_version_class = get_purl_version_class(sorted_affected_package) + affected_version = affected_version_class(sorted_affected_package.version) + + # Compare versions and filter valid matches + matched_fixed_by_packages = [ + fixed_by_package.purl + for fixed_by_package in matching_fixed_packages + if get_purl_version_class(fixed_by_package)(fixed_by_package.version) + > affected_version + ] + + affected_fixed_by_matches["matched_fixed_by_packages"] = matched_fixed_by_packages + all_affected_fixed_by_matches.append(affected_fixed_by_matches) + return sorted_fixed_by_packages, sorted_affected_packages, all_affected_fixed_by_matches + + alias = get_aliases + + +class ToDoRelatedAdvisory(models.Model): + todo = models.ForeignKey( + AdvisoryToDo, + on_delete=models.CASCADE, + ) + + advisory = models.ForeignKey( + Advisory, + on_delete=models.CASCADE, + ) + + class Meta: + unique_together = ("todo", "advisory") + + +class PackageQuerySetV2(BaseQuerySet, PackageURLQuerySet): + def search(self, query: str = None): + """ + Return a Package queryset searching for the ``query``. + Make a best effort approach to find matching packages either based + on exact purl, partial purl or just name and namespace. + """ + query = query and query.strip() + if not query: + return self.none() + qs = self + + try: + # if it's a valid purl, try to parse it and use it as is + purl = str(utils.plain_purl(query)) + qs = qs.filter(package_url__istartswith=purl) + except ValueError: + # otherwise use query as a plain string + qs = qs.filter(package_url__icontains=query) + return qs.order_by("package_url") + + def with_vulnerability_counts(self): + return self.annotate( + vulnerability_count=Count( + "affected_by_advisories", + ), + patched_vulnerability_count=Count( + "fixing_advisories", + ), + ) + + def get_fixed_by_package_versions(self, purl: PackageURL, fix=True): + """ + Return a queryset of all the package versions of this `package` that fix any vulnerability. + If `fix` is False, return all package versions whether or not they fix a vulnerability. + """ + filter_dict = { + "name": purl.name, + "namespace": purl.namespace, + "type": purl.type, + "qualifiers": purl.qualifiers, + "subpath": purl.subpath, + } + + if fix: + filter_dict["fixing_advisories__isnull"] = False + + # TODO: why do we need distinct + return PackageV2.objects.filter(**filter_dict).distinct() + + def get_or_create_from_purl(self, purl: Union[PackageURL, str]): + """ + Return a new or existing Package given a ``purl`` PackageURL object or PURL string. + """ + package, is_created = PackageV2.objects.get_or_create(**purl_to_dict(purl=purl)) + + return package, is_created + + def only_vulnerable(self): + return self._vulnerable(True) + + def only_non_vulnerable(self): + return self._vulnerable(False).filter(is_ghost=False) + + def for_purl(self, purl): + """ + Return a queryset matching the ``purl`` Package URL. + """ + return self.filter(package_url=purl) + + def for_purls(self, purls=()): + """ + Return a queryset of Packages matching a list of PURLs. + """ + return self.filter(package_url__in=purls).distinct() + + def _vulnerable(self, vulnerable=True): + """ + Filter to select only vulnerable or non-vulnearble packages. + """ + return self.with_is_vulnerable().filter(is_vulnerable=vulnerable) + + def with_is_vulnerable(self): + """ + Annotate Package with ``is_vulnerable`` boolean attribute. + """ + return self.annotate( + is_vulnerable=Exists(AdvisoryV2.objects.filter(affecting_packages__pk=OuterRef("pk"))) + ) + + +class PackageV2(PackageURLMixin): + """ + A software package with related vulnerabilities. + """ + + package_url = models.CharField( + max_length=1000, + null=False, + help_text="The Package URL for this package.", + db_index=True, + ) + + plain_package_url = models.CharField( + max_length=1000, + null=False, + help_text="The Package URL for this package without qualifiers and subpath.", + db_index=True, + ) + + is_ghost = models.BooleanField( + default=False, + help_text="True if the package does not exist in the upstream package manager or its repository.", + db_index=True, + ) + + risk_score = models.DecimalField( + null=True, + max_digits=3, + decimal_places=1, + help_text="Risk score between 0.00 and 10.00, where higher values " + "indicate greater vulnerability risk for the package.", + ) + + version_rank = models.IntegerField( + help_text="Rank of the version to support ordering by version. Rank " + "zero means the rank has not been defined yet", + default=0, + db_index=True, + ) + + def __str__(self): + return self.package_url + + @property + def purl(self): + return self.package_url + + def save(self, *args, **kwargs): + """ + Save, normalizing PURL fields. + """ + purl = PackageURL( + type=self.type, + namespace=self.namespace, + name=self.name, + version=self.version, + qualifiers=self.qualifiers, + subpath=self.subpath, + ) + + # We re-parse the purl to ensure name and namespace + # are set correctly + normalized = normalize_purl(purl=purl) + + for name, value in purl_to_dict(normalized).items(): + setattr(self, name, value) + + self.package_url = str(normalized) + plain_purl = utils.plain_purl(normalized) + self.plain_package_url = str(plain_purl) + super().save(*args, **kwargs) + + objects = PackageQuerySetV2.as_manager() + + @property + def calculate_version_rank(self): + """ + Calculate and return the `version_rank` for a package that does not have one. + If this package already has a `version_rank`, return it. + + The calculated rank will be interpolated between two packages that have + `version_rank` values and are closest to this package in terms of version order. + """ + + group_packages = PackageV2.objects.filter( + type=self.type, + namespace=self.namespace, + name=self.name, + ) + + if any(p.version_rank == 0 for p in group_packages): + sorted_packages = sorted(group_packages, key=lambda p: self.version_class(p.version)) + for rank, package in enumerate(sorted_packages, start=1): + package.version_rank = rank + PackageV2.objects.bulk_update(sorted_packages, fields=["version_rank"]) + return self.version_rank + + @property + def fixed_package_details(self): + """ + Return a mapping of vulnerabilities that affect this package and the next and + latest non-vulnerable versions. + """ + package_details = {} + package_details["purl"] = PackageURL.from_string(self.purl) + + next_non_vulnerable, latest_non_vulnerable = self.get_non_vulnerable_versions() + package_details["next_non_vulnerable"] = next_non_vulnerable + package_details["latest_non_vulnerable"] = latest_non_vulnerable + + package_details["advisories"] = self.get_affecting_vulnerabilities() + + return package_details + + def get_non_vulnerable_versions(self): + """ + Return a tuple of the next and latest non-vulnerable versions as Package instance. + Return a tuple of (None, None) if there is no non-vulnerable version. + """ + if self.version_rank == 0: + self.calculate_version_rank + non_vulnerable_versions = PackageV2.objects.get_fixed_by_package_versions( + self, fix=False + ).only_non_vulnerable() + + later_non_vulnerable_versions = non_vulnerable_versions.filter( + version_rank__gt=self.version_rank + ) + + later_non_vulnerable_versions = list(later_non_vulnerable_versions) + + if later_non_vulnerable_versions: + sorted_versions = later_non_vulnerable_versions + next_non_vulnerable = sorted_versions[0] + latest_non_vulnerable = sorted_versions[-1] + return next_non_vulnerable, latest_non_vulnerable + + return None, None + + @cached_property + def version_class(self): + range_class = RANGE_CLASS_BY_SCHEMES.get(self.type) + return range_class.version_class if range_class else Version + + def get_absolute_url(self): + """ + Return this Vulnerability details absolute URL. + """ + return reverse("package_details_v2", args=[self.purl]) + + @cached_property + def current_version(self): + return self.version_class(self.version) + + def get_affecting_vulnerabilities(self): + """ + Return a list of vulnerabilities that affect this package together with information regarding + the versions that fix the vulnerabilities. + """ + if self.version_rank == 0: + self.calculate_version_rank + package_details_advs = [] + + fixed_by_packages = PackageV2.objects.get_fixed_by_package_versions(self, fix=True) + + package_advisories = self.affected_by_advisories.prefetch_related( + Prefetch( + "fixed_by_packages", + queryset=fixed_by_packages, + to_attr="fixed_packages", + ) + ) + + for adv in package_advisories: + package_details_advs.append({"advisory": adv}) + later_fixed_packages = [] + + for fixed_pkg in adv.fixed_by_packages.all(): + if fixed_pkg not in fixed_by_packages: + continue + fixed_version = self.version_class(fixed_pkg.version) + if fixed_version > self.current_version: + later_fixed_packages.append(fixed_pkg) + + next_fixed_package_vulns = [] + + sort_fixed_by_packages_by_version = [] + if later_fixed_packages: + sort_fixed_by_packages_by_version = sorted( + later_fixed_packages, key=lambda p: p.version_rank + ) + + fixed_by_pkgs = [] + + for vuln_details in package_details_advs: + if vuln_details["advisory"] != adv: + continue + vuln_details["fixed_by_purl"] = [] + vuln_details["fixed_by_purl_advisories"] = [] + + for fixed_by_pkg in sort_fixed_by_packages_by_version: + fixed_by_package_details = {} + fixed_by_purl = PackageURL.from_string(fixed_by_pkg.purl) + next_fixed_package_vulns = list(fixed_by_pkg.affected_by_advisories.all()) + + fixed_by_package_details["fixed_by_purl"] = fixed_by_purl + fixed_by_package_details["fixed_by_purl_advisories"] = next_fixed_package_vulns + fixed_by_pkgs.append(fixed_by_package_details) + + vuln_details["fixed_by_package_details"] = fixed_by_pkgs + + return package_details_advs + + +class AdvisoryExploit(models.Model): + """ + A vulnerability exploit is code used to + take advantage of a security flaw for unauthorized access or malicious activity. + """ + + advisory = models.ForeignKey( + AdvisoryV2, + related_name="exploits", + on_delete=models.CASCADE, + ) + + date_added = models.DateField( + null=True, + blank=True, + help_text="The date the vulnerability was added to an exploit catalog.", + ) + + description = models.TextField( + null=True, + blank=True, + help_text="Description of the vulnerability in an exploit catalog, often a refinement of the original CVE description", + ) + + required_action = models.TextField( + null=True, + blank=True, + help_text="The required action to address the vulnerability, typically to " + "apply vendor updates or apply vendor mitigations or to discontinue use.", + ) + + due_date = models.DateField( + null=True, + blank=True, + help_text="The date the required action is due, which applies" + " to all USA federal civilian executive branch (FCEB) agencies, " + "but all organizations are strongly encouraged to execute the required action", + ) + + notes = models.TextField( + null=True, + blank=True, + help_text="Additional notes and resources about the vulnerability," + " often a URL to vendor instructions.", + ) + + known_ransomware_campaign_use = models.BooleanField( + default=False, + help_text="""Known' if this vulnerability is known to have been leveraged as part of a ransomware campaign; + or 'Unknown' if there is no confirmation that the vulnerability has been utilized for ransomware.""", + ) + + source_date_published = models.DateField( + null=True, blank=True, help_text="The date that the exploit was published or disclosed." + ) + + exploit_type = models.TextField( + null=True, + blank=True, + help_text="The type of the exploit as provided by the original upstream data source.", + ) + + platform = models.TextField( + null=True, + blank=True, + help_text="The platform associated with the exploit as provided by the original upstream data source.", + ) + + source_date_updated = models.DateField( + null=True, + blank=True, + help_text="The date the exploit was updated in the original upstream data source.", + ) + + data_source = models.TextField( + null=True, + blank=True, + help_text="The source of the exploit information, such as CISA KEV, exploitdb, metaspoit, or others.", + ) + + source_url = models.URLField( + null=True, + blank=True, + help_text="The URL to the exploit as provided in the original upstream data source.", + ) + + @property + def get_known_ransomware_campaign_use_type(self): + return "Known" if self.known_ransomware_campaign_use else "Unknown" diff --git a/vulnerabilities/pipelines/__init__.py b/vulnerabilities/pipelines/__init__.py index d41b05321..3d1316cce 100644 --- a/vulnerabilities/pipelines/__init__.py +++ b/vulnerabilities/pipelines/__init__.py @@ -15,18 +15,29 @@ from traceback import format_exc as traceback_format_exc from typing import Iterable from typing import List +from typing import Optional from aboutcode.pipeline import LoopProgress from aboutcode.pipeline import PipelineDefinition from aboutcode.pipeline import humanize_time +from fetchcode import package_versions +from packageurl import PackageURL from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import UnMergeablePackageError from vulnerabilities.improver import MAX_CONFIDENCE from vulnerabilities.models import Advisory +from vulnerabilities.models import PackageV2 from vulnerabilities.models import PipelineRun from vulnerabilities.pipes.advisory import import_advisory from vulnerabilities.pipes.advisory import insert_advisory +from vulnerabilities.pipes.advisory import insert_advisory_v2 +from vulnerabilities.utils import AffectedPackage as LegacyAffectedPackage from vulnerabilities.utils import classproperty +from vulnerabilities.utils import get_affected_packages_by_patched_package +from vulnerabilities.utils import nearest_patched_package +from vulnerabilities.utils import resolve_version_range module_logger = logging.getLogger(__name__) @@ -148,14 +159,6 @@ def on_failure(self): """ pass - @classproperty - def pipeline_id(cls): - """Return unique pipeline_id set in cls.pipeline_id""" - - if cls.pipeline_id is None or cls.pipeline_id == "": - raise NotImplementedError("pipeline_id is not defined or is empty") - return cls.pipeline_id - class VulnerableCodeBaseImporterPipeline(VulnerableCodePipeline): """ @@ -207,12 +210,13 @@ def collect_and_store_advisories(self): progress = LoopProgress(total_iterations=estimated_advisory_count, logger=self.log) for advisory in progress.iter(self.collect_advisories()): - if _obj := insert_advisory( - advisory=advisory, - pipeline_id=self.pipeline_id, - logger=self.log, - ): - collected_advisory_count += 1 + if isinstance(advisory, AdvisoryData): + if _obj := insert_advisory( + advisory=advisory, + pipeline_id=self.pipeline_id, + logger=self.log, + ): + collected_advisory_count += 1 self.log(f"Successfully collected {collected_advisory_count:,d} advisories") @@ -248,3 +252,256 @@ def import_advisory(self, advisory: Advisory) -> int: f"Failed to import advisory: {advisory!r} with error {e!r}:\n{traceback_format_exc()}", level=logging.ERROR, ) + + +class VulnerableCodeBaseImporterPipelineV2(VulnerableCodePipeline): + """ + Base importer pipeline for importing advisories. + + Uses: + Subclass this Pipeline and implement ``advisories_count`` and ``collect_advisories`` + method. Also override the ``steps`` and ``advisory_confidence`` as needed. + """ + + pipeline_id = None # Unique Pipeline ID, this should be the name of pipeline module. + license_url = None + spdx_license_expression = None + repo_url = None + advisory_confidence = MAX_CONFIDENCE + ignorable_versions = [] + unfurl_version_ranges = False + + @classmethod + def steps(cls): + return (cls.collect_and_store_advisories,) + + def collect_advisories(self) -> Iterable[AdvisoryData]: + """ + Yield AdvisoryData for importer pipeline. + + Populate the `self.collected_advisories_count` field and yield AdvisoryData + """ + raise NotImplementedError + + def advisories_count(self) -> int: + """ + Return the estimated AdvisoryData to be yielded by ``collect_advisories``. + + Used by ``collect_and_store_advisories`` to log the progress of advisory collection. + """ + raise NotImplementedError + + def collect_and_store_advisories(self): + collected_advisory_count = 0 + estimated_advisory_count = self.advisories_count() + + if estimated_advisory_count > 0: + self.log(f"Collecting {estimated_advisory_count:,d} advisories") + + progress = LoopProgress(total_iterations=estimated_advisory_count, logger=self.log) + for advisory in progress.iter(self.collect_advisories()): + if advisory is None: + self.log("Advisory is None, skipping") + continue + if _obj := insert_advisory_v2( + advisory=advisory, + pipeline_id=self.pipeline_id, + get_advisory_packages=self.get_advisory_packages, + logger=self.log, + ): + collected_advisory_count += 1 + + self.log(f"Successfully collected {collected_advisory_count:,d} advisories") + + def get_advisory_packages(self, advisory_data: AdvisoryData) -> list: + """ + Return the list of packages for the given advisory. + + Used by ``import_advisory`` to get the list of packages for the advisory. + """ + from vulnerabilities.improvers import default + + affected_purls = [] + fixed_purls = [] + for affected_package in advisory_data.affected_packages: + package_affected_purls, package_fixed_purls = default.get_exact_purls( + affected_package=affected_package + ) + affected_purls.extend(package_affected_purls) + fixed_purls.extend(package_fixed_purls) + + if self.unfurl_version_ranges: + vulnerable_pvs, fixed_pvs = self.get_impacted_packages( + affected_packages=advisory_data.affected_packages, + advisory_date_published=advisory_data.date_published, + ) + affected_purls.extend(vulnerable_pvs) + fixed_purls.extend(fixed_pvs) + + vulnerable_packages = [] + fixed_packages = [] + + for affected_purl in affected_purls: + vulnerable_package, _ = PackageV2.objects.get_or_create_from_purl(purl=affected_purl) + vulnerable_packages.append(vulnerable_package) + + for fixed_purl in fixed_purls: + fixed_package, _ = PackageV2.objects.get_or_create_from_purl(purl=fixed_purl) + fixed_packages.append(fixed_package) + + return vulnerable_packages, fixed_packages + + def get_published_package_versions( + self, package_url: PackageURL, until: Optional[datetime] = None + ) -> List[str]: + """ + Return a list of versions published before `until` for the `package_url` + """ + versions_before_until = [] + try: + versions = package_versions.versions(str(package_url)) + for version in versions or []: + if until and version.release_date and version.release_date > until: + continue + versions_before_until.append(version.value) + + return versions_before_until + except Exception as e: + self.log( + f"Failed to fetch versions for package {str(package_url)} {e!r}", + level=logging.ERROR, + ) + return [] + + def get_impacted_packages(self, affected_packages, advisory_date_published): + """ + Return a tuple of lists of affected and fixed PackageURLs + """ + if not affected_packages: + return [], [] + + mergable = True + + # TODO: We should never had the exception in first place + try: + purl, affected_version_ranges, fixed_versions = AffectedPackage.merge(affected_packages) + except UnMergeablePackageError: + self.log(f"Cannot merge with different purls {affected_packages!r}", logging.ERROR) + mergable = False + + if not mergable: + vulnerable_packages = [] + fixed_packages = [] + for affected_package in affected_packages: + purl = affected_package.package + affected_version_range = affected_package.affected_version_range + fixed_version = affected_package.fixed_version + pkg_type = purl.type + pkg_namespace = purl.namespace + pkg_name = purl.name + if not affected_version_range and fixed_version: + fixed_packages.append( + PackageURL( + type=pkg_type, + namespace=pkg_namespace, + name=pkg_name, + version=str(fixed_version), + ) + ) + else: + valid_versions = self.get_published_package_versions( + package_url=purl, until=advisory_date_published + ) + affected_pvs, fixed_pvs = self.resolve_package_versions( + affected_version_range=affected_version_range, + pkg_type=pkg_type, + pkg_namespace=pkg_namespace, + pkg_name=pkg_name, + valid_versions=valid_versions, + ) + vulnerable_packages.extend(affected_pvs) + fixed_packages.extend(fixed_pvs) + return vulnerable_packages, fixed_packages + else: + pkg_type = purl.type + pkg_namespace = purl.namespace + pkg_name = purl.name + pkg_qualifiers = purl.qualifiers + fixed_purls = [ + PackageURL( + type=pkg_type, + namespace=pkg_namespace, + name=pkg_name, + version=str(version), + qualifiers=pkg_qualifiers, + ) + for version in fixed_versions + ] + if not affected_version_ranges: + return [], fixed_purls + else: + valid_versions = self.get_published_package_versions( + package_url=purl, until=advisory_date_published + ) + vulnerable_packages = [] + fixed_packages = [] + for affected_version_range in affected_version_ranges: + vulnerable_pvs, fixed_pvs = self.resolve_package_versions( + affected_version_range=affected_version_range, + pkg_type=pkg_type, + pkg_namespace=pkg_namespace, + pkg_name=pkg_name, + valid_versions=valid_versions, + ) + vulnerable_packages.extend(vulnerable_pvs) + fixed_packages.extend(fixed_pvs) + return vulnerable_packages, fixed_packages + + def resolve_package_versions( + self, + affected_version_range, + pkg_type, + pkg_namespace, + pkg_name, + valid_versions, + ): + """ + Return a tuple of lists of ``affected_packages`` and ``fixed_packages`` PackageURL for the given `affected_version_range` and `valid_versions`. + + ``valid_versions`` are the valid version listed on the package registry for that package + + """ + aff_vers, unaff_vers = resolve_version_range( + affected_version_range=affected_version_range, + ignorable_versions=self.ignorable_versions, + package_versions=valid_versions, + ) + + affected_purls = list( + self.expand_verion_range_to_purls(pkg_type, pkg_namespace, pkg_name, aff_vers) + ) + + unaffected_purls = list( + self.expand_verion_range_to_purls(pkg_type, pkg_namespace, pkg_name, unaff_vers) + ) + + fixed_packages = [] + affected_packages = [] + + patched_packages = nearest_patched_package( + vulnerable_packages=affected_purls, resolved_packages=unaffected_purls + ) + + for ( + fixed_package, + affected_purls, + ) in get_affected_packages_by_patched_package(patched_packages).items(): + if fixed_package: + fixed_packages.append(fixed_package) + affected_packages.extend(affected_purls) + + return affected_packages, fixed_packages + + def expand_verion_range_to_purls(self, pkg_type, pkg_namespace, pkg_name, versions): + for version in versions: + yield PackageURL(type=pkg_type, namespace=pkg_namespace, name=pkg_name, version=version) diff --git a/vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py b/vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py new file mode 100644 index 000000000..90ea32b75 --- /dev/null +++ b/vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py @@ -0,0 +1,341 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging +import re +import urllib.parse +from typing import Iterable + +import requests +from bs4 import BeautifulSoup +from packageurl import PackageURL +from univers.version_constraint import VersionConstraint +from univers.version_range import ApacheVersionRange +from univers.versions import SemverVersion + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import Reference +from vulnerabilities.importer import VulnerabilitySeverity +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 +from vulnerabilities.severity_systems import APACHE_HTTPD +from vulnerabilities.utils import create_weaknesses_list +from vulnerabilities.utils import cwe_regex +from vulnerabilities.utils import get_item + +logger = logging.getLogger(__name__) + + +def fetch_links(url): + links = [] + data = requests.get(url).content + soup = BeautifulSoup(data, features="lxml") + for tag in soup.find_all("a"): + link = tag.get("href") + if not link.endswith("json"): + continue + links.append(urllib.parse.urljoin(url, link)) + return links + + +def get_weaknesses(cve_data): + """ + Extract CWE IDs from CVE data. + + Args: + cve_data (dict): The CVE data in a dictionary format. + + Returns: + List[int]: A list of unique CWE IDs. + + Examples: + >>> mock_cve_data1 = { + ... "containers": { + ... "cna": { + ... "providerMetadata": { + ... "orgId": "f0158376-9dc2-43b6-827c-5f631a4d8d09" + ... }, + ... "title": "mod_macro buffer over-read", + ... "problemTypes": [ + ... { + ... "descriptions": [ + ... { + ... "description": "CWE-125 Out-of-bounds Read", + ... "lang": "en", + ... "cweId": "CWE-125", + ... "type": "CWE" + ... } + ... ] + ... } + ... ] + ... } + ... } + ... } + >>> mock_cve_data2 = { + ... "data_type": "CVE", + ... "data_format": "MITRE", + ... "data_version": "4.0", + ... "generator": { + ... "engine": "Vulnogram 0.0.9" + ... }, + ... "CVE_data_meta": { + ... "ID": "CVE-2022-28614", + ... "ASSIGNER": "security@apache.org", + ... "TITLE": "read beyond bounds via ap_rwrite() ", + ... "STATE": "PUBLIC" + ... }, + ... "problemtype": { + ... "problemtype_data": [ + ... { + ... "description": [ + ... { + ... "lang": "eng", + ... "value": "CWE-190 Integer Overflow or Wraparound" + ... } + ... ] + ... }, + ... { + ... "description": [ + ... { + ... "lang": "eng", + ... "value": "CWE-200 Exposure of Sensitive Information to an Unauthorized Actor" + ... } + ... ] + ... } + ... ] + ... } + ... } + + >>> get_weaknesses(mock_cve_data1) + [125] + + >>> get_weaknesses(mock_cve_data2) + [190, 200] + """ + alias = get_item(cve_data, "CVE_data_meta", "ID") + cwe_strings = [] + if alias: + problemtype_data = get_item(cve_data, "problemtype", "problemtype_data") or [] + for problem in problemtype_data: + for desc in problem.get("description", []): + value = desc.get("value", "") + cwe_id_string_list = re.findall(cwe_regex, value) + cwe_strings.extend(cwe_id_string_list) + else: + problemTypes = cve_data.get("containers", {}).get("cna", {}).get("problemTypes", []) + descriptions = problemTypes[0].get("descriptions", []) if len(problemTypes) > 0 else [] + for description in descriptions: + cwe_id_string = description.get("cweId", "") + cwe_strings.append(cwe_id_string) + + weaknesses = create_weaknesses_list(cwe_strings) + return weaknesses + + +class ApacheHTTPDImporterPipeline(VulnerableCodeBaseImporterPipelineV2): + """ + Apache HTTPD Importer Pipeline + + This pipeline imports security advisories from the Apache HTTPD project. + """ + + pipeline_id = "apache_httpd_importer_v2" + spdx_license_expression = "Apache-2.0" + license_url = "https://www.apache.org/licenses/LICENSE-2.0" + base_url = "https://httpd.apache.org/security/json/" + unfurl_version_ranges = True + + links = [] + + ignorable_versions = frozenset( + [ + "AGB_BEFORE_AAA_CHANGES", + "APACHE_1_2b1", + "APACHE_1_2b10", + "APACHE_1_2b11", + "APACHE_1_2b2", + "APACHE_1_2b3", + "APACHE_1_2b4", + "APACHE_1_2b5", + "APACHE_1_2b6", + "APACHE_1_2b7", + "APACHE_1_2b8", + "APACHE_1_2b9", + "APACHE_1_3_PRE_NT", + "APACHE_1_3a1", + "APACHE_1_3b1", + "APACHE_1_3b2", + "APACHE_1_3b3", + "APACHE_1_3b5", + "APACHE_1_3b6", + "APACHE_1_3b7", + "APACHE_2_0_2001_02_09", + "APACHE_2_0_52_WROWE_RC1", + "APACHE_2_0_ALPHA", + "APACHE_2_0_ALPHA_2", + "APACHE_2_0_ALPHA_3", + "APACHE_2_0_ALPHA_4", + "APACHE_2_0_ALPHA_5", + "APACHE_2_0_ALPHA_6", + "APACHE_2_0_ALPHA_7", + "APACHE_2_0_ALPHA_8", + "APACHE_2_0_ALPHA_9", + "APACHE_2_0_BETA_CANDIDATE_1", + "APACHE_BIG_SYMBOL_RENAME_POST", + "APACHE_BIG_SYMBOL_RENAME_PRE", + "CHANGES", + "HTTPD_LDAP_1_0_0", + "INITIAL", + "MOD_SSL_2_8_3", + "PCRE_3_9", + "POST_APR_SPLIT", + "PRE_APR_CHANGES", + "STRIKER_2_0_51_RC1", + "STRIKER_2_0_51_RC2", + "STRIKER_2_1_0_RC1", + "WROWE_2_0_43_PRE1", + "apache-1_3-merge-1-post", + "apache-1_3-merge-1-pre", + "apache-1_3-merge-2-post", + "apache-1_3-merge-2-pre", + "apache-apr-merge-3", + "apache-doc-split-01", + "dg_last_1_2_doc_merge", + "djg-apache-nspr-07", + "djg_nspr_split", + "moving_to_httpd_module", + "mpm-3", + "mpm-merge-1", + "mpm-merge-2", + "post_ajp_proxy", + "pre_ajp_proxy", + ] + ) + unfurl_version_ranges = True + + @classmethod + def steps(cls): + return (cls.collect_and_store_advisories,) + + def collect_advisories(self) -> Iterable[AdvisoryData]: + if not self.links: + self.links = fetch_links(self.base_url) + for link in self.links: + data = requests.get(link).json() + yield self.to_advisory(data) + + def advisories_count(self) -> int: + """Count the number of advisories available in the JSON files.""" + if not self.links: + self.links = fetch_links(self.base_url) + return len(self.links) + + def to_advisory(self, data): + alias = get_item(data, "CVE_data_meta", "ID") + if not alias: + alias = get_item(data, "cveMetadata", "cveId") + descriptions = get_item(data, "description", "description_data") or [] + description = None + for desc in descriptions: + if desc.get("lang") == "eng": + description = desc.get("value") + break + + severities = [] + impacts = data.get("impact") or [] + for impact in impacts: + value = impact.get("other") + if value: + severities.append( + VulnerabilitySeverity( + system=APACHE_HTTPD, + value=value, + scoring_elements="", + ) + ) + break + reference = Reference( + reference_id=alias, + url=urllib.parse.urljoin(self.base_url, f"{alias}.json"), + ) + + versions_data = [] + for vendor in get_item(data, "affects", "vendor", "vendor_data") or []: + for products in get_item(vendor, "product", "product_data") or []: + for version_data in get_item(products, "version", "version_data") or []: + versions_data.append(version_data) + + fixed_versions = [] + for timeline_object in data.get("timeline") or []: + timeline_value = timeline_object.get("value") + if "release" in timeline_value: + split_timeline_value = timeline_value.split(" ") + if "never" in timeline_value: + continue + if "release" in split_timeline_value[-1]: + fixed_versions.append(split_timeline_value[0]) + if "release" in split_timeline_value[0]: + fixed_versions.append(split_timeline_value[-1]) + + affected_packages = [] + affected_version_range = self.to_version_ranges(versions_data, fixed_versions) + if affected_version_range: + affected_packages.append( + AffectedPackage( + package=PackageURL( + type="apache", + name="httpd", + ), + affected_version_range=affected_version_range, + ) + ) + + weaknesses = get_weaknesses(data) + + return AdvisoryData( + advisory_id=alias, + aliases=[], + summary=description or "", + affected_packages=affected_packages, + references_v2=[reference], + weaknesses=weaknesses, + url=reference.url, + severities=severities, + ) + + def to_version_ranges(self, versions_data, fixed_versions): + constraints = [] + for version_data in versions_data: + version_value = version_data["version_value"] + range_expression = version_data["version_affected"] + if range_expression not in {"<=", ">=", "?=", "!<", "="}: + raise ValueError(f"unknown comparator found! {range_expression}") + comparator_by_range_expression = { + ">=": ">=", + "!<": ">=", + "<=": "<=", + "=": "=", + } + comparator = comparator_by_range_expression.get(range_expression) + if comparator: + constraints.append( + VersionConstraint(comparator=comparator, version=SemverVersion(version_value)) + ) + + for fixed_version in fixed_versions: + # The VersionConstraint method `invert()` inverts the fixed_version's comparator, + # enabling inclusion of multiple fixed versions with the `affected_version_range` values. + constraints.append( + VersionConstraint( + comparator="=", + version=SemverVersion(fixed_version), + ).invert() + ) + + return ApacheVersionRange(constraints=constraints) diff --git a/vulnerabilities/pipelines/v2_importers/elixir_security_importer.py b/vulnerabilities/pipelines/v2_importers/elixir_security_importer.py new file mode 100644 index 000000000..902dd5248 --- /dev/null +++ b/vulnerabilities/pipelines/v2_importers/elixir_security_importer.py @@ -0,0 +1,124 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from pathlib import Path +from typing import Iterable + +from dateutil import parser as dateparser +from fetchcode.vcs import fetch_via_vcs +from packageurl import PackageURL +from univers.version_constraint import VersionConstraint +from univers.version_range import HexVersionRange + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import Reference +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 +from vulnerabilities.utils import is_cve +from vulnerabilities.utils import load_yaml + + +class ElixirSecurityImporterPipeline(VulnerableCodeBaseImporterPipelineV2): + """ + Elixir Security Advisiories Importer Pipeline + + This pipeline imports security advisories for elixir. + """ + + pipeline_id = "elixir_security_importer_v2" + spdx_license_expression = "CC0-1.0" + license_url = "https://github.com/dependabot/elixir-security-advisories/blob/master/LICENSE.txt" + repo_url = "git+https://github.com/dependabot/elixir-security-advisories" + unfurl_version_ranges = True + + @classmethod + def steps(cls): + return (cls.collect_and_store_advisories,) + + def clone(self): + self.log(f"Cloning `{self.repo_url}`") + self.vcs_response = fetch_via_vcs(self.repo_url) + + def advisories_count(self) -> int: + base_path = Path(self.vcs_response.dest_dir) + count = len(list((base_path / "packages").glob("**/*.yml"))) + return count + + def collect_advisories(self) -> Iterable[AdvisoryData]: + try: + base_path = Path(self.vcs_response.dest_dir) + vuln = base_path / "packages" + for file in vuln.glob("**/*.yml"): + yield from self.process_file(file, base_path) + finally: + if self.vcs_response: + self.vcs_response.delete() + + def process_file(self, file, base_path) -> Iterable[AdvisoryData]: + relative_path = str(file.relative_to(base_path)).strip("/") + advisory_url = ( + f"https://github.com/dependabot/elixir-security-advisories/blob/master/{relative_path}" + ) + yaml_file = load_yaml(str(file)) + + summary = yaml_file.get("description") or "" + pkg_name = yaml_file.get("package") or "" + + cve_id = "" + cve = yaml_file.get("cve") or "" + if cve and not cve.startswith("CVE-"): + cve_id = f"CVE-{cve}" + elif cve: + cve_id = cve + + if not cve_id or not is_cve(cve_id): + return + + references = [] + link = yaml_file.get("link") or "" + if link: + references.append(Reference(url=link)) + + constraints = [] + vrc = HexVersionRange.version_class + unaffected_versions = yaml_file.get("unaffected_versions") or [] + patched_versions = yaml_file.get("patched_versions") or [] + + for version in unaffected_versions: + constraints.append(VersionConstraint.from_string(version_class=vrc, string=version)) + + for version in patched_versions: + if version.startswith("~>"): + version = version[2:] + constraints.append( + VersionConstraint.from_string(version_class=vrc, string=version).invert() + ) + + affected_packages = [] + if pkg_name: + affected_packages.append( + AffectedPackage( + package=PackageURL(type="hex", name=pkg_name), + affected_version_range=HexVersionRange(constraints=constraints), + ) + ) + + date_published = None + if yaml_file.get("disclosure_date"): + date_published = dateparser.parse(yaml_file.get("disclosure_date")) + + yield AdvisoryData( + advisory_id=cve_id, + aliases=[], + summary=summary, + references_v2=references, + affected_packages=affected_packages, + url=advisory_url, + date_published=date_published, + ) diff --git a/vulnerabilities/pipelines/v2_importers/github_importer.py b/vulnerabilities/pipelines/v2_importers/github_importer.py new file mode 100644 index 000000000..9ac360016 --- /dev/null +++ b/vulnerabilities/pipelines/v2_importers/github_importer.py @@ -0,0 +1,393 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging +from traceback import format_exc as traceback_format_exc +from typing import Callable +from typing import Iterable +from typing import List +from typing import Optional + +from cwe2.database import Database +from dateutil import parser as dateparser +from packageurl import PackageURL +from univers.version_range import RANGE_CLASS_BY_SCHEMES +from univers.version_range import build_range_from_github_advisory_constraint + +from vulnerabilities import severity_systems +from vulnerabilities import utils +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import Reference +from vulnerabilities.importer import VulnerabilitySeverity +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 +from vulnerabilities.utils import dedupe +from vulnerabilities.utils import get_cwe_id +from vulnerabilities.utils import get_item + + +class GitHubAPIImporterPipeline(VulnerableCodeBaseImporterPipelineV2): + """ + GitHub Importer Pipeline + + This pipeline imports security advisories from GitHub Security Advisories. + """ + + pipeline_id = "github_importer_v2" + spdx_license_expression = "CC-BY-4.0" + license_url = "https://github.com/github/advisory-database/blob/main/LICENSE.md" + unfurl_version_ranges = True + + ignorable_versions = frozenset( + [ + "0.1-bulbasaur", + "0.1-charmander", + "0.3m1", + "0.3m2", + "0.3m3", + "0.3m4", + "0.3m5", + "0.4m1", + "0.4m2", + "0.4m3", + "0.4m4", + "0.4m5", + "0.5m1", + "0.5m2", + "0.5m3", + "0.5m4", + "0.5m5", + "0.6m1", + "0.6m2", + "0.6m3", + "0.6m4", + "0.6m5", + "0.6m6", + "0.7.10p1", + "0.7.11p1", + "0.7.11p2", + "0.7.11p3", + "0.8.1p1", + "0.8.3p1", + "0.8.4p1", + "0.8.4p2", + "0.8.6p1", + "0.8.7p1", + "0.9-doduo", + "0.9-eevee", + "0.9-fearow", + "0.9-gyarados", + "0.9-horsea", + "0.9-ivysaur", + "2013-01-21T20:33:09+0100", + "2013-01-23T17:11:52+0100", + "2013-02-01T20:50:46+0100", + "2013-02-02T19:59:03+0100", + "2013-02-02T20:23:17+0100", + "2013-02-08T17:40:57+0000", + "2013-03-27T16:32:26+0100", + "2013-05-09T12:47:53+0200", + "2013-05-10T17:55:56+0200", + "2013-05-14T20:16:05+0200", + "2013-06-01T10:32:51+0200", + "2013-07-19T09:11:08+0000", + "2013-08-12T21:48:56+0200", + "2013-09-11T19-27-10", + "2013-12-23T17-51-15", + "2014-01-12T15-52-10", + "2.0.1rc2-git", + "3.0.0b3-", + "3.0b6dev-r41684", + "-class.-jw.util.version.Version-", + "vulnerabilities", + ] + ) + + @classmethod + def steps(cls): + return (cls.collect_and_store_advisories,) + + package_type_by_github_ecosystem = { + # "MAVEN": "maven", + # "NUGET": "nuget", + # "COMPOSER": "composer", + # "PIP": "pypi", + # "RUBYGEMS": "gem", + "NPM": "npm", + # "RUST": "cargo", + # "GO": "golang", + } + + def advisories_count(self): + advisory_query = """ + query{ + securityVulnerabilities(first: 0, ecosystem: %s) { + totalCount + } + } + """ + advisory_counts = 0 + for ecosystem in self.package_type_by_github_ecosystem.keys(): + graphql_query = {"query": advisory_query % (ecosystem)} + response = utils.fetch_github_graphql_query(graphql_query) + advisory_counts += get_item(response, "data", "securityVulnerabilities", "totalCount") + return advisory_counts + + def collect_advisories(self) -> Iterable[AdvisoryData]: + + # TODO: We will try to gather more info from GH API + # Check https://github.com/nexB/vulnerablecode/issues/1039#issuecomment-1366458885 + # Check https://github.com/nexB/vulnerablecode/issues/645 + # set of all possible values of first '%s' = {'MAVEN','COMPOSER', 'NUGET', 'RUBYGEMS', 'PYPI', 'NPM', 'RUST'} + # second '%s' is interesting, it will have the value '' for the first request, + advisory_query = """ + query{ + securityVulnerabilities(first: 100, ecosystem: %s, %s) { + edges { + node { + advisory { + identifiers { + type + value + } + summary + references { + url + } + severity + cwes(first: 10){ + nodes { + cweId + } + } + publishedAt + } + firstPatchedVersion{ + identifier + } + package { + name + } + vulnerableVersionRange + } + } + pageInfo { + hasNextPage + endCursor + } + } + } + """ + for ecosystem, package_type in self.package_type_by_github_ecosystem.items(): + end_cursor_exp = "" + while True: + graphql_query = {"query": advisory_query % (ecosystem, end_cursor_exp)} + response = utils.fetch_github_graphql_query(graphql_query) + + page_info = get_item(response, "data", "securityVulnerabilities", "pageInfo") + end_cursor = get_item(page_info, "endCursor") + if end_cursor: + end_cursor = f'"{end_cursor}"' + end_cursor_exp = f"after: {end_cursor}" + + yield from process_response(response, package_type=package_type) + + if not get_item(page_info, "hasNextPage"): + break + + +def get_purl(pkg_type: str, github_name: str, logger: Callable = None) -> Optional[PackageURL]: + """ + Return a PackageURL by splitting the `github_name` using the `pkg_type` + convention. Return None and log an error if we can not split or it is an + unknown package type. + + For example:: + >>> expected = PackageURL(type='maven', namespace='org.apache.commons', name='commons-lang3') + >>> assert get_purl("maven", "org.apache.commons:commons-lang3") == expected + + >>> expected = PackageURL(type="composer", namespace="foo", name="bar") + >>> assert get_purl("composer", "foo/bar") == expected + """ + if pkg_type == "maven": + if ":" not in github_name: + if logger: + logger(f"get_purl: Invalid maven package name {github_name}", level=logging.ERROR) + return + ns, _, name = github_name.partition(":") + return PackageURL(type=pkg_type, namespace=ns, name=name) + + if pkg_type in ("composer", "npm"): + if "/" not in github_name: + return PackageURL(type=pkg_type, name=github_name) + vendor, _, name = github_name.partition("/") + return PackageURL(type=pkg_type, namespace=vendor, name=name) + + if pkg_type in ("nuget", "pypi", "gem", "golang", "npm", "cargo"): + return PackageURL(type=pkg_type, name=github_name) + + if logger: + logger(f"get_purl: Unknown package type {pkg_type}", level=logging.ERROR) + + +def process_response( + resp: dict, package_type: str, logger: Callable = None +) -> Iterable[AdvisoryData]: + """ + Yield `AdvisoryData` by taking `resp` and `ecosystem` as input + """ + vulnerabilities = get_item(resp, "data", "securityVulnerabilities", "edges") or [] + if not vulnerabilities: + if logger: + logger( + f"No vulnerabilities found for package_type: {package_type!r} in response: {resp!r}", + level=logging.ERROR, + ) + return + + for vulnerability in vulnerabilities: + aliases = [] + affected_packages = [] + github_advisory = get_item(vulnerability, "node") + if not github_advisory: + if logger: + logger(f"No node found in {vulnerability!r}", level=logging.ERROR) + continue + + advisory = get_item(github_advisory, "advisory") + if not advisory: + if logger: + logger(f"No advisory found in {github_advisory!r}", level=logging.ERROR) + continue + + summary = get_item(advisory, "summary") or "" + + references = get_item(advisory, "references") or [] + if references: + urls = (ref["url"] for ref in references) + references = [Reference.from_url(u) for u in urls] + + date_published = get_item(advisory, "publishedAt") + if date_published: + date_published = dateparser.parse(date_published) + + name = get_item(github_advisory, "package", "name") + if name: + purl = get_purl(pkg_type=package_type, github_name=name, logger=logger) + if purl: + affected_range = get_item(github_advisory, "vulnerableVersionRange") + fixed_version = get_item(github_advisory, "firstPatchedVersion", "identifier") + if affected_range: + try: + affected_range = build_range_from_github_advisory_constraint( + package_type, affected_range + ) + except Exception as e: + if logger: + logger( + f"Could not parse affected range {affected_range!r} {e!r} \n {traceback_format_exc()}", + level=logging.ERROR, + ) + affected_range = None + if fixed_version: + try: + fixed_version = RANGE_CLASS_BY_SCHEMES[package_type].version_class( + fixed_version + ) + except Exception as e: + if logger: + logger( + f"Invalid fixed version {fixed_version!r} {e!r} \n {traceback_format_exc()}", + level=logging.ERROR, + ) + fixed_version = None + if affected_range or fixed_version: + affected_packages.append( + AffectedPackage( + package=purl, + affected_version_range=affected_range, + fixed_version=fixed_version, + ) + ) + identifiers = get_item(advisory, "identifiers") or [] + ghsa_id = "" + severities = [] + for identifier in identifiers: + value = identifier["value"] + identifier_type = identifier["type"] + aliases.append(value) + # attach the GHSA with severity score + if identifier_type == "GHSA": + # Each Node has only one GHSA, hence exit after attaching + # score to this GHSA + ghsa_id = value + for ref in references: + if ref.reference_id == value: + severity = get_item(advisory, "severity") + if severity: + severities = [ + VulnerabilitySeverity( + system=severity_systems.CVSS31_QUALITY, + value=severity, + url=ref.url, + ) + ] + + elif identifier_type == "CVE": + pass + else: + if logger: + logger( + f"Unknown identifier type {identifier_type!r} and value {value!r}", + level=logging.ERROR, + ) + + weaknesses = get_cwes_from_github_advisory(advisory, logger) + + advisory_id = None + + aliases = sorted(dedupe(aliases)) + + advisory_id = ghsa_id or aliases[0] + + aliases.remove(advisory_id) + + yield AdvisoryData( + advisory_id=ghsa_id, + aliases=aliases, + summary=summary, + references_v2=references, + severities=severities, + affected_packages=affected_packages, + date_published=date_published, + weaknesses=weaknesses, + url=f"https://github.com/advisories/{ghsa_id}", + ) + + +def get_cwes_from_github_advisory(advisory, logger=None) -> List[int]: + """ + Return the cwe-id list from advisory ex: [ 522 ] + by extracting the cwe_list from advisory ex: [{'cweId': 'CWE-522'}] + then remove the CWE- from string and convert it to integer 522 and Check if the CWE in CWE-Database + """ + weaknesses = [] + db = Database() + cwe_list = get_item(advisory, "cwes", "nodes") or [] + for cwe_item in cwe_list: + cwe_string = get_item(cwe_item, "cweId") + if cwe_string: + cwe_id = get_cwe_id(cwe_string) + try: + db.get(cwe_id) + weaknesses.append(cwe_id) + except Exception as e: + if logger: + logger(f"Invalid CWE id {e!r} \n {traceback_format_exc()}", level=logging.ERROR) + return weaknesses diff --git a/vulnerabilities/pipelines/v2_importers/gitlab_importer.py b/vulnerabilities/pipelines/v2_importers/gitlab_importer.py new file mode 100644 index 000000000..1f175f07f --- /dev/null +++ b/vulnerabilities/pipelines/v2_importers/gitlab_importer.py @@ -0,0 +1,329 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging +import traceback +from pathlib import Path +from typing import Iterable +from typing import List +from typing import Tuple + +import pytz +import saneyaml +from dateutil import parser as dateparser +from fetchcode.vcs import fetch_via_vcs +from packageurl import PackageURL +from univers.version_range import RANGE_CLASS_BY_SCHEMES +from univers.version_range import VersionRange +from univers.version_range import from_gitlab_native +from univers.versions import Version + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import Reference +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 +from vulnerabilities.utils import build_description +from vulnerabilities.utils import get_advisory_url +from vulnerabilities.utils import get_cwe_id + + +class GitLabImporterPipeline(VulnerableCodeBaseImporterPipelineV2): + """ + GitLab Importer Pipeline + + Collect advisory from GitLab Advisory Database (Open Source Edition). + """ + + pipeline_id = "gitlab_importer_v2" + spdx_license_expression = "MIT" + license_url = "https://gitlab.com/gitlab-org/advisories-community/-/blob/main/LICENSE" + repo_url = "git+https://gitlab.com/gitlab-org/advisories-community/" + unfurl_version_ranges = True + + @classmethod + def steps(cls): + return ( + cls.clone, + cls.collect_and_store_advisories, + cls.clean_downloads, + ) + + purl_type_by_gitlab_scheme = { + "conan": "conan", + "gem": "gem", + # Entering issue to parse go package names https://github.com/nexB/vulnerablecode/issues/742 + # "go": "golang", + "maven": "maven", + "npm": "npm", + "nuget": "nuget", + "packagist": "composer", + "pypi": "pypi", + } + + gitlab_scheme_by_purl_type = {v: k for k, v in purl_type_by_gitlab_scheme.items()} + + def clone(self): + self.log(f"Cloning `{self.repo_url}`") + self.vcs_response = fetch_via_vcs(self.repo_url) + + def advisories_count(self): + root = Path(self.vcs_response.dest_dir) + return sum(1 for _ in root.rglob("*.yml")) + + def collect_advisories(self) -> Iterable[AdvisoryData]: + base_path = Path(self.vcs_response.dest_dir) + + for file_path in base_path.rglob("*.yml"): + if file_path.parent == base_path: + continue + + gitlab_type, _, _ = parse_advisory_path( + base_path=base_path, + file_path=file_path, + ) + + if gitlab_type not in self.purl_type_by_gitlab_scheme: + self.log( + f"Unknown package type {gitlab_type!r} in {file_path!r}", + level=logging.ERROR, + ) + continue + + advisory = parse_gitlab_advisory( + file=file_path, + base_path=base_path, + gitlab_scheme_by_purl_type=self.gitlab_scheme_by_purl_type, + purl_type_by_gitlab_scheme=self.purl_type_by_gitlab_scheme, + logger=self.log, + ) + + if not advisory: + self.log( + f"Failed to parse advisory from {file_path!r}", + level=logging.ERROR, + ) + continue + + yield advisory + + def clean_downloads(self): + if self.vcs_response: + self.log(f"Removing cloned repository") + self.vcs_response.delete() + + def on_failure(self): + self.clean_downloads() + + +def parse_advisory_path(base_path: Path, file_path: Path) -> Tuple[str, str, str]: + """ + Parse a gitlab advisory file and return a 3-tuple of: + (gitlab_type, package_slug, vulnerability_id) + + For example:: + + >>> base_path = Path("/tmp/tmpi1klhpmd/checkout") + >>> file_path=Path("/tmp/tmpi1klhpmd/checkout/pypi/gradio/CVE-2021-43831.yml") + >>> parse_advisory_path(base_path=base_path, file_path=file_path) + ('pypi', 'gradio', 'CVE-2021-43831') + + >>> file_path=Path("/tmp/tmpi1klhpmd/checkout/nuget/github.com/beego/beego/v2/nuget/CVE-2021-43831.yml") + >>> parse_advisory_path(base_path=base_path, file_path=file_path) + ('nuget', 'github.com/beego/beego/v2/nuget', 'CVE-2021-43831') + + >>> file_path = Path("/tmp/tmpi1klhpmd/checkout/npm/@express/beego/beego/v2/CVE-2021-43831.yml") + >>> parse_advisory_path(base_path=base_path, file_path=file_path) + ('npm', '@express/beego/beego/v2', 'CVE-2021-43831') + """ + relative_path_segments = file_path.relative_to(base_path).parts + gitlab_type = relative_path_segments[0] + vuln_id = file_path.stem + package_slug = "/".join(relative_path_segments[1:-1]) + + return gitlab_type, package_slug, vuln_id + + +def get_purl(package_slug, purl_type_by_gitlab_scheme, logger): + """ + Return a PackageURL object from a package slug + """ + parts = [p for p in package_slug.strip("/").split("/") if p] + gitlab_scheme = parts[0] + purl_type = purl_type_by_gitlab_scheme[gitlab_scheme] + if gitlab_scheme == "go": + name = "/".join(parts[1:]) + return PackageURL(type=purl_type, namespace=None, name=name) + # if package slug is of the form: + # "nuget/NuGet.Core" + if len(parts) == 2: + name = parts[1] + return PackageURL(type=purl_type, name=name) + # if package slug is of the form: + # "nuget/github.com/beego/beego/v2/nuget" + if len(parts) >= 3: + name = parts[-1] + namespace = "/".join(parts[1:-1]) + return PackageURL(type=purl_type, namespace=namespace, name=name) + logger(f"get_purl: package_slug can not be parsed: {package_slug!r}", level=logging.ERROR) + return + + +def extract_affected_packages( + affected_version_range: VersionRange, + fixed_versions: List[Version], + purl: PackageURL, +) -> Iterable[AffectedPackage]: + """ + Yield AffectedPackage objects, one for each fixed_version + + In case of gitlab advisory data we get a list of fixed_versions and a affected_version_range. + Since we can not determine which package fixes which range. + We store the all the fixed_versions with the same affected_version_range in the advisory. + Later the advisory data is used to be inferred in the GitLabBasicImprover. + """ + for fixed_version in fixed_versions: + yield AffectedPackage( + package=purl, + fixed_version=fixed_version, + affected_version_range=affected_version_range, + ) + + +def parse_gitlab_advisory( + file, base_path, gitlab_scheme_by_purl_type, purl_type_by_gitlab_scheme, logger +): + """ + Parse a Gitlab advisory file and return an AdvisoryData or None. + These files are YAML. There is a JSON schema documented at + https://gitlab.com/gitlab-org/advisories-community/-/blob/main/ci/schema/schema.json + + Sample YAML file: + --- + identifier: "GMS-2018-26" + package_slug: "packagist/amphp/http" + title: "Incorrect header injection check" + description: "amphp/http isn't properly protected against HTTP header injection." + pubdate: "2018-03-15" + affected_range: "<1.0.1" + fixed_versions: + - "v1.0.1" + urls: + - "https://github.com/amphp/http/pull/4" + cwe_ids: + - "CWE-1035" + - "CWE-937" + identifiers: + - "GMS-2018-26" + """ + with open(file) as f: + gitlab_advisory = saneyaml.load(f) + if not isinstance(gitlab_advisory, dict): + logger( + f"parse_gitlab_advisory: unknown gitlab advisory format in {file!r} with data: {gitlab_advisory!r}", + level=logging.ERROR, + ) + return + + # refer to schema here https://gitlab.com/gitlab-org/advisories-community/-/blob/main/ci/schema/schema.json + aliases = gitlab_advisory.get("identifiers") + advisory_id = gitlab_advisory.get("identifier") + if advisory_id in aliases: + aliases.remove(advisory_id) + summary = build_description(gitlab_advisory.get("title"), gitlab_advisory.get("description")) + urls = gitlab_advisory.get("urls") + references = [Reference.from_url(u) for u in urls] + + print(references) + + cwe_ids = gitlab_advisory.get("cwe_ids") or [] + cwe_list = list(map(get_cwe_id, cwe_ids)) + + date_published = dateparser.parse(gitlab_advisory.get("pubdate")) + date_published = date_published.replace(tzinfo=pytz.UTC) + package_slug = gitlab_advisory.get("package_slug") + advisory_url = get_advisory_url( + file=file, + base_path=base_path, + url="https://gitlab.com/gitlab-org/advisories-community/-/blob/main/", + ) + purl: PackageURL = get_purl( + package_slug=package_slug, + purl_type_by_gitlab_scheme=purl_type_by_gitlab_scheme, + logger=logger, + ) + if not purl: + logger( + f"parse_yaml_file: purl is not valid: {file!r} {package_slug!r}", level=logging.ERROR + ) + return AdvisoryData( + aliases=aliases, + summary=summary, + references=references, + date_published=date_published, + url=advisory_url, + ) + affected_version_range = None + fixed_versions = gitlab_advisory.get("fixed_versions") or [] + affected_range = gitlab_advisory.get("affected_range") + gitlab_native_schemes = set(["pypi", "gem", "npm", "go", "packagist", "conan"]) + vrc: VersionRange = RANGE_CLASS_BY_SCHEMES[purl.type] + gitlab_scheme = gitlab_scheme_by_purl_type[purl.type] + try: + if affected_range: + if gitlab_scheme in gitlab_native_schemes: + affected_version_range = from_gitlab_native( + gitlab_scheme=gitlab_scheme, string=affected_range + ) + else: + affected_version_range = vrc.from_native(affected_range) + except Exception as e: + logger( + f"parse_yaml_file: affected_range is not parsable: {affected_range!r} for: {purl!s} error: {e!r}\n {traceback.format_exc()}", + level=logging.ERROR, + ) + + parsed_fixed_versions = [] + for fixed_version in fixed_versions: + try: + fixed_version = vrc.version_class(fixed_version) + parsed_fixed_versions.append(fixed_version) + except Exception as e: + logger( + f"parse_yaml_file: fixed_version is not parsable`: {fixed_version!r} error: {e!r}\n {traceback.format_exc()}", + level=logging.ERROR, + ) + + if parsed_fixed_versions: + affected_packages = list( + extract_affected_packages( + affected_version_range=affected_version_range, + fixed_versions=parsed_fixed_versions, + purl=purl, + ) + ) + else: + if not affected_version_range: + affected_packages = [] + else: + affected_packages = [ + AffectedPackage( + package=purl, + affected_version_range=affected_version_range, + ) + ] + return AdvisoryData( + advisory_id=advisory_id, + aliases=aliases, + summary=summary, + references_v2=references, + date_published=date_published, + affected_packages=affected_packages, + weaknesses=cwe_list, + url=advisory_url, + ) diff --git a/vulnerabilities/pipelines/v2_importers/npm_importer.py b/vulnerabilities/pipelines/v2_importers/npm_importer.py new file mode 100644 index 000000000..19d21c987 --- /dev/null +++ b/vulnerabilities/pipelines/v2_importers/npm_importer.py @@ -0,0 +1,178 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +# Author: Navonil Das (@NavonilDas) + +from pathlib import Path +from typing import Iterable + +import pytz +from dateutil.parser import parse +from fetchcode.vcs import fetch_via_vcs +from packageurl import PackageURL +from univers.version_range import NpmVersionRange + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import Reference +from vulnerabilities.importer import VulnerabilitySeverity +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 +from vulnerabilities.severity_systems import CVSSV2 +from vulnerabilities.severity_systems import CVSSV3 +from vulnerabilities.utils import build_description +from vulnerabilities.utils import load_json + + +class NpmImporterPipeline(VulnerableCodeBaseImporterPipelineV2): + """ + Node.js Security Working Group importer pipeline + + Import advisories from nodejs security working group including node proper advisories and npm advisories. + """ + + pipeline_id = "nodejs_security_wg" + spdx_license_expression = "MIT" + license_url = "https://github.com/nodejs/security-wg/blob/main/LICENSE.md" + repo_url = "git+https://github.com/nodejs/security-wg" + unfurl_version_ranges = True + + @classmethod + def steps(cls): + return ( + cls.clone, + cls.collect_and_store_advisories, + cls.clean_downloads, + ) + + def clone(self): + self.log(f"Cloning `{self.repo_url}`") + self.vcs_response = fetch_via_vcs(self.repo_url) + + def advisories_count(self): + vuln_directory = Path(self.vcs_response.dest_dir) / "vuln" / "npm" + return sum(1 for _ in vuln_directory.glob("*.json")) + + def collect_advisories(self) -> Iterable[AdvisoryData]: + vuln_directory = Path(self.vcs_response.dest_dir) / "vuln" / "npm" + + for advisory in vuln_directory.glob("*.json"): + yield self.to_advisory_data(advisory) + + def to_advisory_data(self, file: Path) -> Iterable[AdvisoryData]: + if file.name == "index.json": + self.log(f"Skipping {file.name} file") + return + data = load_json(file) + id = data.get("id") + description = data.get("overview") or "" + summary = data.get("title") or "" + # TODO: Take care of description + date_published = None + if isinstance(data.get("created_at"), str): + date_published = parse(data.get("created_at")).replace(tzinfo=pytz.UTC) + references = [] + cvss_vector = data.get("cvss_vector") + cvss_score = data.get("cvss_score") + severities = [] + if cvss_vector and cvss_vector.startswith("CVSS:3.0/"): + severities.append( + VulnerabilitySeverity( + system=CVSSV3, + value=cvss_score, + url=f"https://github.com/nodejs/security-wg/blob/main/vuln/npm/{id}.json", + ) + ) + if cvss_vector and cvss_vector.startswith("CVSS:2.0/"): + severities.append( + VulnerabilitySeverity( + system=CVSSV2, + value=cvss_score, + url=f"https://github.com/nodejs/security-wg/blob/main/vuln/npm/{id}.json", + ) + ) + if not id: + self.log(f"Advisory ID not found in {file}") + return + + advisory_reference = Reference( + url=f"https://github.com/nodejs/security-wg/blob/main/vuln/npm/{id}.json", + reference_id=id, + ) + + for ref in data.get("references") or []: + references.append( + Reference( + url=ref, + ) + ) + + if advisory_reference not in references: + references.append(advisory_reference) + + package_name = data.get("module_name") + affected_packages = [] + if package_name: + affected_packages.append(self.get_affected_package(data, package_name)) + advsisory_aliases = data.get("cves") or [] + + return AdvisoryData( + advisory_id=f"npm-{id}", + aliases=advsisory_aliases, + summary=build_description(summary=summary, description=description), + date_published=date_published, + affected_packages=affected_packages, + references_v2=references, + severities=severities, + url=f"https://github.com/nodejs/security-wg/blob/main/vuln/npm/{id}.json", + ) + + def get_affected_package(self, data, package_name): + affected_version_range = None + unaffected_version_range = None + fixed_version = None + + vulnerable_range = data.get("vulnerable_versions") or "" + patched_range = data.get("patched_versions") or "" + + # https://github.com/nodejs/security-wg/blob/cfaa51cc5c83f01eea61b69658f7bc76a77c5979/vuln/npm/213.json#L14 + if vulnerable_range == "<=99.999.99999": + vulnerable_range = "*" + if vulnerable_range: + affected_version_range = NpmVersionRange.from_native(vulnerable_range) + + # https://github.com/nodejs/security-wg/blob/cfaa51cc5c83f01eea61b69658f7bc76a77c5979/vuln/npm/213.json#L15 + if patched_range == "<0.0.0": + patched_range = None + if patched_range: + unaffected_version_range = NpmVersionRange.from_native(patched_range) + + # We only store single fixed versions and not a range of fixed versions + # If there is a single constraint in the unaffected_version_range + # having comparator as ">=" then we store that as the fixed version + if unaffected_version_range and len(unaffected_version_range.constraints) == 1: + constraint = unaffected_version_range.constraints[0] + if constraint.comparator == ">=": + fixed_version = constraint.version + + return AffectedPackage( + package=PackageURL( + type="npm", + name=package_name, + ), + affected_version_range=affected_version_range, + fixed_version=fixed_version, + ) + + def clean_downloads(self): + if self.vcs_response: + self.log(f"Removing cloned repository") + self.vcs_response.delete() + + def on_failure(self): + self.clean_downloads() diff --git a/vulnerabilities/pipelines/v2_importers/nvd_importer.py b/vulnerabilities/pipelines/v2_importers/nvd_importer.py new file mode 100644 index 000000000..1166ac8ef --- /dev/null +++ b/vulnerabilities/pipelines/v2_importers/nvd_importer.py @@ -0,0 +1,338 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import gzip +import json +import logging +from datetime import date +from traceback import format_exc as traceback_format_exc +from typing import Iterable + +import attr +import requests +from dateutil import parser as dateparser + +from vulnerabilities import severity_systems +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import Reference +from vulnerabilities.importer import VulnerabilitySeverity +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 +from vulnerabilities.utils import get_cwe_id +from vulnerabilities.utils import get_item + + +class NVDImporterPipeline(VulnerableCodeBaseImporterPipelineV2): + """ + NVD Importer Pipeline + + Collect advisories from NVD. + """ + + pipeline_id = "nvd_importer_v2" + # See https://github.com/nexB/vulnerablecode/issues/665 for follow up + spdx_license_expression = ( + "LicenseRef-scancode-us-govt-public-domain AND LicenseRef-scancode-cve-tou" + ) + license_url = "https://nvd.nist.gov/general/FAQ-Sections/General-FAQs#faqLink7" + notice = """ + See https://nvd.nist.gov/general/FAQ-Sections/General-FAQs#faqLink7 + All NVD data is freely available from our data feeds + (https://nvd.nist.gov/vuln/data-feeds). There are no fees, licensing + restrictions, or even a requirement to register. All NIST publications are + available in the public domain according to Title 17 of the United States + Code. Acknowledgment of the NVD when using our information is appreciated. + In addition, please email nvd@nist.gov to let us know how the information is + being used + + See also https://cve.mitre.org/about/termsofuse.html + Terms of Use + LICENSE + [...] + CVE Usage: MITRE hereby grants you a perpetual, worldwide, non-exclusive, no- + charge, royalty-free, irrevocable copyright license to reproduce, prepare + derivative works of, publicly display, publicly perform, sublicense, and + distribute Common Vulnerabilities and Exposures (CVE®). Any copy you make for + such purposes is authorized provided that you reproduce MITRE's copyright + designation and this license in any such copy. DISCLAIMERS + + ALL DOCUMENTS AND THE INFORMATION CONTAINED THEREIN PROVIDED BY MITRE ARE + PROVIDED ON AN "AS IS" BASIS AND THE CONTRIBUTOR, THE ORGANIZATION HE/SHE + REPRESENTS OR IS SPONSORED BY (IF ANY), THE MITRE CORPORATION, ITS BOARD OF + TRUSTEES, OFFICERS, AGENTS, AND EMPLOYEES, DISCLAIM ALL WARRANTIES, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE + INFORMATION THEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF + MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. + """ + + @classmethod + def steps(cls): + return (cls.collect_and_store_advisories,) + + def advisories_count(self): + url = "https://services.nvd.nist.gov/rest/json/cves/2.0?resultsPerPage=1" + + advisory_count = 0 + try: + response = requests.get(url) + response.raise_for_status() + data = response.json() + except requests.HTTPError as http_err: + self.log( + f"HTTP error occurred: {http_err} \n {traceback_format_exc()}", + level=logging.ERROR, + ) + return advisory_count + + advisory_count = data.get("totalResults", 0) + return advisory_count + + def collect_advisories(self) -> Iterable[AdvisoryData]: + for _year, cve_data in fetch_cve_data_1_1(logger=self.log): + yield from to_advisories(cve_data=cve_data) + + +# Isolating network calls for simplicity of testing +def fetch(url, logger=None): + if logger: + logger(f"Fetching `{url}`") + gz_file = requests.get(url) + data = gzip.decompress(gz_file.content) + try: + data = data.decode("utf-8") + except UnicodeDecodeError: + logger(f"Failed to decode data from {url}") + return {} + return json.loads(data) + + +def fetch_cve_data_1_1(starting_year=2002, logger=None): + """ + Yield tuples of (year, lists of CVE mappings) from the NVD, one for each + year since ``starting_year`` defaulting to 2002. + """ + current_year = date.today().year + # NVD json feeds start from 2002. + for year in range(starting_year, current_year + 1): + download_url = f"https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-{year}.json.gz" + yield year, fetch(url=download_url, logger=logger) + + +def to_advisories(cve_data): + """ + Yield AdvisoryData objects from a CVE json feed. + """ + for cve_item in CveItem.from_cve_data(cve_data=cve_data): + if cve_item.is_related_to_hardware or not cve_item.cve_id: + continue + yield cve_item.to_advisory() + + +@attr.attributes +class CveItem: + cve_item = attr.attrib(default=attr.Factory(dict), type=dict) + + @classmethod + def to_advisories(cls, cve_data, skip_hardware=True): + """ + Yield AdvisoryData objects from ``cve_data`` data for CVE JSON 1.1feed. + Skip hardware + """ + for cve_item in CveItem.from_cve_data(cve_data=cve_data, skip_hardware=skip_hardware): + yield cve_item.to_advisory() + + @classmethod + def from_cve_data(cls, cve_data, skip_hardware=True): + """ + Yield CVE items mapping from a cve_data list of CVE mappings from the NVD. + """ + for cve_item in cve_data.get("CVE_Items") or []: + if not cve_item: + continue + if not isinstance(cve_item, dict): + raise ValueError(f"cve_item: {cve_item!r} is not a mapping") + cve_item = cls(cve_item=cve_item) + if skip_hardware and cve_item.is_related_to_hardware: + continue + yield cve_item + + @property + def cve_id(self): + return self.cve_item["cve"]["CVE_data_meta"]["ID"] + + @property + def summary(self): + """ + Return a descriptive summary. + """ + # In 99% of cases len(cve_item['cve']['description']['description_data']) == 1 , so + # this usually returns cve_item['cve']['description']['description_data'][0]['value'] + # In the remaining 1% cases this returns the longest summary. + # FIXME: we should retun the full description WITH the summry as the first line instead + summaries = [] + for desc in get_item(self.cve_item, "cve", "description", "description_data") or []: + if desc.get("value"): + summaries.append(desc["value"]) + return max(summaries, key=len) if summaries else None + + @property + def cpes(self): + """ + Return a list of unique CPE strings for this CVE. + """ + # FIXME: we completely ignore the configurations here + cpes = [] + for node in get_item(self.cve_item, "configurations", "nodes") or []: + for cpe_data in node.get("cpe_match") or []: + cpe23_uri = cpe_data.get("cpe23Uri") + if cpe23_uri and cpe23_uri not in cpes: + cpes.append(cpe23_uri) + return cpes + + @property + def severities(self): + """ + Return a list of VulnerabilitySeverity for this CVE. + """ + severities = [] + impact = self.cve_item.get("impact") or {} + base_metric_v4 = impact.get("baseMetricV4") or {} + if base_metric_v4: + cvss_v4 = base_metric_v4.get("cvssV4") or {} + vs = VulnerabilitySeverity( + system=severity_systems.CVSSV4, + value=str(cvss_v4.get("baseScore") or ""), + scoring_elements=str(cvss_v4.get("vectorString") or ""), + url=f"https://nvd.nist.gov/vuln/detail/{self.cve_id}", + ) + severities.append(vs) + + base_metric_v3 = impact.get("baseMetricV3") or {} + if base_metric_v3: + cvss_v3 = get_item(base_metric_v3, "cvssV3") + version = cvss_v3.get("version") + system = None + if version == "3.1": + system = severity_systems.CVSSV31 + else: + system = severity_systems.CVSSV3 + vs = VulnerabilitySeverity( + system=system, + value=str(cvss_v3.get("baseScore") or ""), + scoring_elements=str(cvss_v3.get("vectorString") or ""), + url=f"https://nvd.nist.gov/vuln/detail/{self.cve_id}", + ) + severities.append(vs) + + base_metric_v2 = impact.get("baseMetricV2") or {} + if base_metric_v2: + cvss_v2 = base_metric_v2.get("cvssV2") or {} + vs = VulnerabilitySeverity( + system=severity_systems.CVSSV2, + value=str(cvss_v2.get("baseScore") or ""), + scoring_elements=str(cvss_v2.get("vectorString") or ""), + url=f"https://nvd.nist.gov/vuln/detail/{self.cve_id}", + ) + severities.append(vs) + + return severities + + @property + def reference_urls(self): + """ + Return a list unique of reference URLs. + """ + # FIXME: we should also collect additional data from the references such as tags and ids + + urls = [] + for reference in get_item(self.cve_item, "cve", "references", "reference_data") or []: + ref_url = reference.get("url") + if ref_url and ref_url.startswith(("http", "ftp")) and ref_url not in urls: + urls.append(ref_url) + return urls + + @property + def references(self): + """ + Return a list of AdvisoryReference. + """ + # FIXME: we should also collect additional data from the references such as tags and ids + references = [] + + # we track each CPE as a reference for now + for cpe in self.cpes: + cpe_url = f"https://nvd.nist.gov/vuln/search/results?adv_search=true&isCpeNameSearch=true&query={cpe}" + references.append(Reference(reference_id=cpe, url=cpe_url)) + + # FIXME: we also add the CVE proper as a reference, but is this correct? + references.append( + Reference( + url=f"https://nvd.nist.gov/vuln/detail/{self.cve_id}", + reference_id=self.cve_id, + ) + ) + + # clean to remove dupes for the CVE id proper + ref_urls = [ + ru + for ru in self.reference_urls + if ru != f"https://nvd.nist.gov/vuln/detail/{self.cve_id}" + ] + references.extend([Reference(url=url) for url in ref_urls]) + + return references + + @property + def is_related_to_hardware(self): + """ + Return True if this CVE item is for hardware (as opposed to software). + """ + return any(is_related_to_hardware(cpe) for cpe in self.cpes) + + @property + def weaknesses(self): + """ + Return a list of CWE IDs like: [119, 189] + """ + weaknesses = [] + for weaknesses_item in ( + get_item(self.cve_item, "cve", "problemtype", "problemtype_data") or [] + ): + weaknesses_description = weaknesses_item.get("description") or [] + for weaknesses_value in weaknesses_description: + cwe_id = ( + weaknesses_value.get("value") if weaknesses_value.get("lang") == "en" else None + ) + if cwe_id in ["NVD-CWE-Other", "NVD-CWE-noinfo"] or not cwe_id: + continue # Skip Invalid CWE + weaknesses.append(get_cwe_id(cwe_id)) + return weaknesses + + def to_advisory(self): + """ + Return an AdvisoryData object from this CVE item + """ + return AdvisoryData( + advisory_id=self.cve_id, + aliases=[], + summary=self.summary, + references_v2=self.references, + date_published=dateparser.parse(self.cve_item.get("publishedDate")), + weaknesses=self.weaknesses, + severities=self.severities, + url=f"https://nvd.nist.gov/vuln/detail/{self.cve_id}", + ) + + +def is_related_to_hardware(cpe): + """ + Return True if the ``cpe`` is related to hardware. + """ + cpe_comps = cpe.split(":") + # CPE follow the format cpe:cpe_version:product_type:vendor:product + return len(cpe_comps) > 2 and cpe_comps[2] == "h" diff --git a/vulnerabilities/pipelines/v2_importers/postgresql_importer.py b/vulnerabilities/pipelines/v2_importers/postgresql_importer.py new file mode 100644 index 000000000..2f5a49439 --- /dev/null +++ b/vulnerabilities/pipelines/v2_importers/postgresql_importer.py @@ -0,0 +1,163 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import urllib.parse as urlparse +from typing import Iterable + +import requests +from bs4 import BeautifulSoup +from packageurl import PackageURL +from univers.version_range import GenericVersionRange +from univers.versions import GenericVersion + +from vulnerabilities import severity_systems +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import Reference +from vulnerabilities.importer import VulnerabilitySeverity +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 + + +class PostgreSQLImporterPipeline(VulnerableCodeBaseImporterPipelineV2): + """ + PostgreSQL Importer Pipeline + + This pipeline imports security advisories from the PostgreSQL project. + """ + + pipeline_id = "postgresql_importer_v2" + license_url = "https://www.postgresql.org/about/licence/" + spdx_license_expression = "PostgreSQL" + base_url = "https://www.postgresql.org/support/security/" + + links = set() + + @classmethod + def steps(cls): + return (cls.collect_and_store_advisories,) + + def advisories_count(self) -> int: + if not self.links: + self.collect_links() + return len(self.links) + + def collect_advisories(self) -> Iterable[AdvisoryData]: + if not self.links: + self.collect_links() + + for url in self.links: + data = requests.get(url).content + yield from self.to_advisories(data) + + def collect_links(self): + known_urls = {self.base_url} + visited_urls = set() + + while True: + unvisited_urls = known_urls - visited_urls + for url in unvisited_urls: + data = requests.get(url).content + visited_urls.add(url) + known_urls.update(self.find_advisory_urls(data)) + if known_urls == visited_urls: + break + self.links = known_urls + + def to_advisories(self, data): + advisories = [] + soup = BeautifulSoup(data, features="lxml") + tables = soup.select("table") + + if not tables: + return advisories + + table = tables[0] + + for row in table.select("tbody tr"): + ref_col, affected_col, fixed_col, severity_score_col, desc_col = row.select("td") + summary = desc_col.text + pkg_qualifiers = {"os": "windows"} if "windows" in summary.lower() else {} + + affected_packages = [] + affected_version_list = [v.strip() for v in affected_col.text.split(",") if v.strip()] + fixed_version_list = [v.strip() for v in fixed_col.text.split(",") if v.strip()] + + if fixed_version_list: + for fixed_version in fixed_version_list: + affected_packages.append( + AffectedPackage( + package=PackageURL( + name="postgresql", type="generic", qualifiers=pkg_qualifiers + ), + affected_version_range=GenericVersionRange.from_versions( + affected_version_list + ) + if affected_version_list + else None, + fixed_version=GenericVersion(fixed_version), + ) + ) + elif affected_version_list: + affected_packages.append( + AffectedPackage( + package=PackageURL( + name="postgresql", type="generic", qualifiers=pkg_qualifiers + ), + affected_version_range=GenericVersionRange.from_versions( + affected_version_list + ), + ) + ) + + cve_id = "" + try: + cve_id = ref_col.select(".nobr")[0].text + except IndexError: + pass + + references = [] + vector_link_tag = severity_score_col.find("a") + for a_tag in ref_col.select("a"): + link = a_tag.attrs["href"] + if link.startswith("/"): + link = urlparse.urljoin("https://www.postgresql.org/", link) + severities = [] + if "support/security/CVE" in link and vector_link_tag: + parsed_link = urlparse.urlparse(vector_link_tag["href"]) + cvss3_vector = urlparse.parse_qs(parsed_link.query).get("vector", [""])[0] + cvss3_base_score = vector_link_tag.text + severities.append( + VulnerabilitySeverity( + system=severity_systems.CVSSV3, + value=cvss3_base_score, + scoring_elements=cvss3_vector, + ) + ) + references.append(Reference(url=link, severities=severities)) + + if cve_id: + advisories.append( + AdvisoryData( + advisory_id=cve_id, + aliases=[], + summary=summary, + references_v2=references, + affected_packages=affected_packages, + url=f"https://www.postgresql.org/support/security/{cve_id}", + ) + ) + + return advisories + + def find_advisory_urls(self, page_data): + soup = BeautifulSoup(page_data, features="lxml") + return { + urlparse.urljoin("https://www.postgresql.org/", a_tag.attrs["href"]) + for a_tag in soup.select("h3+ p a") + } diff --git a/vulnerabilities/pipelines/v2_importers/pypa_importer.py b/vulnerabilities/pipelines/v2_importers/pypa_importer.py new file mode 100644 index 000000000..7463cc4bd --- /dev/null +++ b/vulnerabilities/pipelines/v2_importers/pypa_importer.py @@ -0,0 +1,74 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from pathlib import Path +from typing import Iterable + +import saneyaml +from fetchcode.vcs import fetch_via_vcs + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 +from vulnerabilities.utils import get_advisory_url + + +class PyPaImporterPipeline(VulnerableCodeBaseImporterPipelineV2): + """ + Pypa Importer Pipeline + + Collect advisories from PyPA GitHub repository.""" + + pipeline_id = "pypa_importer_v2" + spdx_license_expression = "CC-BY-4.0" + license_url = "https://github.com/pypa/advisory-database/blob/main/LICENSE" + repo_url = "git+https://github.com/pypa/advisory-database" + unfurl_version_ranges = True + + @classmethod + def steps(cls): + return ( + cls.clone, + cls.collect_and_store_advisories, + cls.clean_downloads, + ) + + def clone(self): + self.log(f"Cloning `{self.repo_url}`") + self.vcs_response = fetch_via_vcs(self.repo_url) + + def advisories_count(self): + vulns_directory = Path(self.vcs_response.dest_dir) / "vulns" + return sum(1 for _ in vulns_directory.rglob("*.yaml")) + + def collect_advisories(self) -> Iterable[AdvisoryData]: + from vulnerabilities.importers.osv import parse_advisory_data_v2 + + base_directory = Path(self.vcs_response.dest_dir) + vulns_directory = base_directory / "vulns" + + for advisory in vulns_directory.rglob("*.yaml"): + advisory_url = get_advisory_url( + file=advisory, + base_path=base_directory, + url="https://github.com/pypa/advisory-database/blob/main/", + ) + advisory_dict = saneyaml.load(advisory.read_text()) + yield parse_advisory_data_v2( + raw_data=advisory_dict, + supported_ecosystems=["pypi"], + advisory_url=advisory_url, + ) + + def clean_downloads(self): + if self.vcs_response: + self.log(f"Removing cloned repository") + self.vcs_response.delete() + + def on_failure(self): + self.clean_downloads() diff --git a/vulnerabilities/pipelines/v2_importers/pysec_importer.py b/vulnerabilities/pipelines/v2_importers/pysec_importer.py new file mode 100644 index 000000000..e67f41a28 --- /dev/null +++ b/vulnerabilities/pipelines/v2_importers/pysec_importer.py @@ -0,0 +1,67 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# +import json +import logging +from io import BytesIO +from typing import Iterable +from zipfile import ZipFile + +import requests + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 + + +class PyPIImporterPipeline(VulnerableCodeBaseImporterPipelineV2): + """ + PyPI Importer Pipeline + + Collect advisories from PyPI.""" + + pipeline_id = "pysec_importer_v2" + license_url = "https://github.com/pypa/advisory-database/blob/main/LICENSE" + url = "https://osv-vulnerabilities.storage.googleapis.com/PyPI/all.zip" + spdx_license_expression = "CC-BY-4.0" + unfurl_version_ranges = True + + @classmethod + def steps(cls): + return ( + cls.fetch_zip, + cls.collect_and_store_advisories, + ) + + def fetch_zip(self): + self.log(f"Fetching `{self.url}`") + self.advisory_zip = requests.get(self.url).content + + def advisories_count(self) -> int: + with ZipFile(BytesIO(self.advisory_zip)) as zip: + advisory_count = sum(1 for file in zip.namelist() if file.startswith("PYSEC-")) + return advisory_count + + def collect_advisories(self) -> Iterable[AdvisoryData]: + """Yield AdvisoryData using a zipped data dump of OSV data""" + from vulnerabilities.importers.osv import parse_advisory_data_v2 + + with ZipFile(BytesIO(self.advisory_zip)) as zip_file: + for file_name in zip_file.namelist(): + if not file_name.startswith("PYSEC-"): + self.log( + f"Unsupported PyPI advisory data file: {file_name}", + level=logging.ERROR, + ) + continue + with zip_file.open(file_name) as f: + vul_info = json.load(f) + yield parse_advisory_data_v2( + raw_data=vul_info, + supported_ecosystems=["pypi"], + advisory_url=self.url, + ) diff --git a/vulnerabilities/pipelines/v2_importers/vulnrichment_importer.py b/vulnerabilities/pipelines/v2_importers/vulnrichment_importer.py new file mode 100644 index 000000000..b2ddfd3cd --- /dev/null +++ b/vulnerabilities/pipelines/v2_importers/vulnrichment_importer.py @@ -0,0 +1,318 @@ +import json +import logging +import re +from pathlib import Path +from typing import Iterable + +import dateparser +from fetchcode.vcs import fetch_via_vcs + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import Reference +from vulnerabilities.importer import VulnerabilitySeverity +from vulnerabilities.models import VulnerabilityReference +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 +from vulnerabilities.severity_systems import SCORING_SYSTEMS +from vulnerabilities.utils import get_advisory_url +from vulnerabilities.utils import get_cwe_id +from vulnerabilities.utils import get_reference_id + +logger = logging.getLogger(__name__) + + +class VulnrichImporterPipeline(VulnerableCodeBaseImporterPipelineV2): + """ + Vulnrichment Importer Pipeline + + This pipeline imports security advisories from Vulnrichment project. + """ + + pipeline_id = "vulnrichment_importer_v2" + spdx_license_expression = "CC0-1.0" + license_url = "https://github.com/cisagov/vulnrichment/blob/develop/LICENSE" + repo_url = "git+https://github.com/cisagov/vulnrichment.git" + + @classmethod + def steps(cls): + return ( + cls.clone, + cls.collect_and_store_advisories, + cls.clean_downloads, + ) + + def clone(self): + self.log(f"Cloning `{self.repo_url}`") + self.vcs_response = fetch_via_vcs(self.repo_url) + + def advisories_count(self): + vuln_directory = Path(self.vcs_response.dest_dir) + return sum(1 for _ in vuln_directory.glob("*.json")) + + def collect_advisories(self) -> Iterable[AdvisoryData]: + base_path = Path(self.vcs_response.dest_dir) + for file_path in base_path.glob("**/**/*.json"): + if not file_path.name.startswith("CVE-"): + continue + with open(file_path) as f: + raw_data = json.load(f) + advisory_url = get_advisory_url( + file=file_path, + base_path=base_path, + url="https://github.com/cisagov/vulnrichment/blob/develop/", + ) + yield self.parse_cve_advisory(raw_data, advisory_url) + + def parse_cve_advisory(self, raw_data, advisory_url): + cve_metadata = raw_data.get("cveMetadata", {}) + cve_id = cve_metadata.get("cveId") + state = cve_metadata.get("state") + + date_published = cve_metadata.get("datePublished") + if date_published: + date_published = dateparser.parse(date_published) + + # Extract containers + containers = raw_data.get("containers", {}) + cna_data = containers.get("cna", {}) + adp_data = containers.get("adp", {}) + + # Extract descriptions + summary = "" + description_list = cna_data.get("descriptions", []) + for description_dict in description_list: + if not description_dict.get("lang") in ["en", "en-US"]: + continue + summary = description_dict.get("value") + + # Extract metrics + severities = [] + metrics = cna_data.get("metrics", []) + [ + adp_metrics for data in adp_data for adp_metrics in data.get("metrics", []) + ] + + vulnrichment_scoring_system = { + "cvssV4_0": SCORING_SYSTEMS["cvssv4"], + "cvssV3_1": SCORING_SYSTEMS["cvssv3.1"], + "cvssV3_0": SCORING_SYSTEMS["cvssv3"], + "cvssV2_0": SCORING_SYSTEMS["cvssv2"], + "other": { + "ssvc": SCORING_SYSTEMS["ssvc"], + }, # ignore kev + } + + for metric in metrics: + for metric_type, metric_value in metric.items(): + if metric_type not in vulnrichment_scoring_system: + continue + + if metric_type == "other": + other_types = metric_value.get("type") + if other_types == "ssvc": + content = metric_value.get("content", {}) + vector_string, decision = ssvc_calculator(content) + scoring_system = vulnrichment_scoring_system[metric_type][other_types] + severity = VulnerabilitySeverity( + system=scoring_system, value=decision, scoring_elements=vector_string + ) + severities.append(severity) + # ignore kev + else: + vector_string = metric_value.get("vectorString") + base_score = metric_value.get("baseScore") + scoring_system = vulnrichment_scoring_system[metric_type] + severity = VulnerabilitySeverity( + system=scoring_system, value=base_score, scoring_elements=vector_string + ) + severities.append(severity) + + # Extract references cpes and ignore affected products + cpes = set() + for affected_product in cna_data.get("affected", []): + if type(affected_product) != dict: + continue + cpes.update(affected_product.get("cpes") or []) + + references = [] + for ref in cna_data.get("references", []): + # https://github.com/CVEProject/cve-schema/blob/main/schema/tags/reference-tags.json + # We removed all unwanted reference types and set the default reference type to 'OTHER'. + ref_type = VulnerabilityReference.OTHER + vul_ref_types = { + "exploit": VulnerabilityReference.EXPLOIT, + "issue-tracking": VulnerabilityReference.BUG, + "mailing-list": VulnerabilityReference.MAILING_LIST, + "third-party-advisory": VulnerabilityReference.ADVISORY, + "vendor-advisory": VulnerabilityReference.ADVISORY, + "vdb-entry": VulnerabilityReference.ADVISORY, + } + + for tag_type in ref.get("tags", []): + if tag_type in vul_ref_types: + ref_type = vul_ref_types.get(tag_type) + + url = ref.get("url") + reference = Reference( + reference_id=get_reference_id(url), + url=url, + reference_type=ref_type, + ) + + references.append(reference) + + cpes_ref = [ + Reference( + reference_id=cpe, + reference_type=VulnerabilityReference.OTHER, + url=f"https://nvd.nist.gov/vuln/search/results?adv_search=true&isCpeNameSearch=true&query={cpe}", + ) + for cpe in sorted(list(cpes)) + ] + references.extend(cpes_ref) + + weaknesses = set() + for problem_type in cna_data.get("problemTypes", []): + descriptions = problem_type.get("descriptions", []) + for description in descriptions: + cwe_id = description.get("cweId") + if cwe_id: + weaknesses.add(get_cwe_id(cwe_id)) + + description_text = description.get("description") + if description_text: + pattern = r"CWE-(\d+)" + match = re.search(pattern, description_text) + if match: + weaknesses.add(int(match.group(1))) + + return AdvisoryData( + advisory_id=cve_id, + aliases=[], + summary=summary, + references_v2=references, + date_published=date_published, + weaknesses=sorted(weaknesses), + url=advisory_url, + severities=severities, + ) + + def clean_downloads(self): + if self.vcs_response: + self.log("Removing cloned repository") + self.vcs_response.delete() + + def on_failure(self): + self.clean_downloads() + + +def ssvc_calculator(ssvc_data): + """ + Return the ssvc vector and the decision value + """ + options = ssvc_data.get("options", []) + timestamp = ssvc_data.get("timestamp") + + # Extract the options into a dictionary + options_dict = {k: v.lower() for option in options for k, v in option.items()} + + # We copied the table value from this link. + # https://www.cisa.gov/sites/default/files/publications/cisa-ssvc-guide%20508c.pdf + + # Determining Mission and Well-Being Impact Value + mission_well_being_table = { + # (Mission Prevalence, Public Well-being Impact) : "Mission & Well-being" + ("minimal", "minimal"): "low", + ("minimal", "material"): "medium", + ("minimal", "irreversible"): "high", + ("support", "minimal"): "medium", + ("support", "material"): "medium", + ("support", "irreversible"): "high", + ("essential", "minimal"): "high", + ("essential", "material"): "high", + ("essential", "irreversible"): "high", + } + + if "Mission Prevalence" not in options_dict: + options_dict["Mission Prevalence"] = "minimal" + + if "Public Well-being Impact" not in options_dict: + options_dict["Public Well-being Impact"] = "material" + + options_dict["Mission & Well-being"] = mission_well_being_table[ + (options_dict["Mission Prevalence"], options_dict["Public Well-being Impact"]) + ] + + decision_key = ( + options_dict.get("Exploitation"), + options_dict.get("Automatable"), + options_dict.get("Technical Impact"), + options_dict.get("Mission & Well-being"), + ) + + decision_points = { + "Exploitation": {"E": {"none": "N", "poc": "P", "active": "A"}}, + "Automatable": {"A": {"no": "N", "yes": "Y"}}, + "Technical Impact": {"T": {"partial": "P", "total": "T"}}, + "Public Well-being Impact": {"B": {"minimal": "M", "material": "A", "irreversible": "I"}}, + "Mission Prevalence": {"P": {"minimal": "M", "support": "S", "essential": "E"}}, + "Mission & Well-being": {"M": {"low": "L", "medium": "M", "high": "H"}}, + } + + # Create the SSVC vector + ssvc_vector = "SSVCv2/" + for key, value_map in options_dict.items(): + options_key = decision_points.get(key) + for lhs, rhs_map in options_key.items(): + ssvc_vector += f"{lhs}:{rhs_map.get(value_map)}/" + + # "Decision": {"D": {"Track": "T", "Track*": "R", "Attend": "A", "Act": "C"}}, + decision_values = {"Track": "T", "Track*": "R", "Attend": "A", "Act": "C"} + + decision_lookup = { + ("none", "no", "partial", "low"): "Track", + ("none", "no", "partial", "medium"): "Track", + ("none", "no", "partial", "high"): "Track", + ("none", "no", "total", "low"): "Track", + ("none", "no", "total", "medium"): "Track", + ("none", "no", "total", "high"): "Track*", + ("none", "yes", "partial", "low"): "Track", + ("none", "yes", "partial", "medium"): "Track", + ("none", "yes", "partial", "high"): "Attend", + ("none", "yes", "total", "low"): "Track", + ("none", "yes", "total", "medium"): "Track", + ("none", "yes", "total", "high"): "Attend", + ("poc", "no", "partial", "low"): "Track", + ("poc", "no", "partial", "medium"): "Track", + ("poc", "no", "partial", "high"): "Track*", + ("poc", "no", "total", "low"): "Track", + ("poc", "no", "total", "medium"): "Track*", + ("poc", "no", "total", "high"): "Attend", + ("poc", "yes", "partial", "low"): "Track", + ("poc", "yes", "partial", "medium"): "Track", + ("poc", "yes", "partial", "high"): "Attend", + ("poc", "yes", "total", "low"): "Track", + ("poc", "yes", "total", "medium"): "Track*", + ("poc", "yes", "total", "high"): "Attend", + ("active", "no", "partial", "low"): "Track", + ("active", "no", "partial", "medium"): "Track", + ("active", "no", "partial", "high"): "Attend", + ("active", "no", "total", "low"): "Track", + ("active", "no", "total", "medium"): "Attend", + ("active", "no", "total", "high"): "Act", + ("active", "yes", "partial", "low"): "Attend", + ("active", "yes", "partial", "medium"): "Attend", + ("active", "yes", "partial", "high"): "Act", + ("active", "yes", "total", "low"): "Attend", + ("active", "yes", "total", "medium"): "Act", + ("active", "yes", "total", "high"): "Act", + } + + decision = decision_lookup.get(decision_key, "") + + if decision: + ssvc_vector += f"D:{decision_values.get(decision)}/" + + if timestamp: + timestamp_formatted = dateparser.parse(timestamp).strftime("%Y-%m-%dT%H:%M:%SZ") + + ssvc_vector += f"{timestamp_formatted}/" + return ssvc_vector, decision diff --git a/vulnerabilities/pipelines/v2_improvers/collect_commits.py b/vulnerabilities/pipelines/v2_improvers/collect_commits.py new file mode 100644 index 000000000..32fb1ce79 --- /dev/null +++ b/vulnerabilities/pipelines/v2_improvers/collect_commits.py @@ -0,0 +1,252 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import re + +from aboutcode.pipeline import LoopProgress + +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.models import CodeFixV2 +from vulnerabilities.pipelines import VulnerableCodePipeline + + +def is_vcs_url_already_processed(commit_id): + """ + Check if a VCS URL exists in a CodeFix entry. + """ + return CodeFixV2.objects.filter(commits__contains=[commit_id]).exists() + + +class CollectFixCommitsPipeline(VulnerableCodePipeline): + """ + Improver pipeline to scout References and create CodeFix entries. + """ + + pipeline_id = "collect_fix_commits_v2" + license_expression = None + + @classmethod + def steps(cls): + return (cls.collect_and_store_fix_commits,) + + def collect_and_store_fix_commits(self): + affected_advisories = ( + AdvisoryV2.objects.filter(affecting_packages__isnull=False) + .prefetch_related("affecting_packages") + .distinct() + ) + + self.log(f"Processing {affected_advisories.count():,d} references to collect fix commits.") + + created_fix_count = 0 + progress = LoopProgress(total_iterations=affected_advisories.count(), logger=self.log) + + for adv in progress.iter(affected_advisories.paginated(per_page=500)): + for reference in adv.references.all(): + if not "/commit/" in reference.url: + continue + if not is_vcs_url(reference.url): + continue + + vcs_url = normalize_vcs_url(repo_url=reference.url) + + if not vcs_url: + continue + + # Skip if already processed + if is_vcs_url_already_processed(commit_id=vcs_url): + self.log( + f"Skipping already processed reference: {reference.url} with VCS URL {vcs_url}" + ) + continue + # check if vcs_url has commit + for package in adv.affecting_packages.all(): + code_fix, created = CodeFixV2.objects.get_or_create( + commits=[vcs_url], + advisory=adv, + affected_package=package, + ) + + if created: + created_fix_count += 1 + self.log( + f"Created CodeFix entry for reference: {reference.url} with VCS URL {vcs_url}" + ) + + self.log(f"Successfully created {created_fix_count:,d} CodeFix entries.") + + +PLAIN_URLS = ( + "https://", + "http://", +) + +VCS_URLS = ( + "git://", + "git+git://", + "git+https://", + "git+http://", + "hg://", + "hg+http://", + "hg+https://", + "svn://", + "svn+https://", + "svn+http://", +) + + +# TODO: This function was borrowed from scancode-toolkit. We need to create a shared library for that. +def normalize_vcs_url(repo_url, vcs_tool=None): + """ + Return a normalized vcs_url version control URL given some `repo_url` and an + optional `vcs_tool` hint (such as 'git', 'hg', etc.) + + Return None if repo_url is not recognized as a VCS URL. + + Handles shortcuts for GitHub, GitHub gist, Bitbucket, or GitLab repositories + and more using the same approach as npm install: + + See https://docs.npmjs.com/files/package.json#repository + or https://getcomposer.org/doc/05-repositories.md + + This is done here in npm: + https://github.com/npm/npm/blob/d3c858ce4cfb3aee515bb299eb034fe1b5e44344/node_modules/hosted-git-info/git-host-info.js + + These should be resolved: + npm/npm + gist:11081aaa281 + bitbucket:example/repo + gitlab:another/repo + expressjs/serve-static + git://github.com/angular/di.js.git + git://github.com/hapijs/boom + git@github.com:balderdashy/waterline-criteria.git + http://github.com/ariya/esprima.git + http://github.com/isaacs/nopt + https://github.com/chaijs/chai + https://github.com/christkv/kerberos.git + https://gitlab.com/foo/private.git + git@gitlab.com:foo/private.git + """ + if not repo_url or not isinstance(repo_url, str): + return + + repo_url = repo_url.strip() + if not repo_url: + return + + # TODO: If we match http and https, we may should add more check in + # case if the url is not a repo one. For example, check the domain + # name in the url... + if repo_url.startswith(VCS_URLS + PLAIN_URLS): + return repo_url + + if repo_url.startswith("git@"): + tool, _, right = repo_url.partition("@") + if ":" in repo_url: + host, _, repo = right.partition(":") + else: + # git@github.com/Filirom1/npm2aur.git + host, _, repo = right.partition("/") + + if any(r in host for r in ("bitbucket", "gitlab", "github")): + scheme = "https" + else: + scheme = "git" + + return f"{scheme}://{host}/{repo}" + + # FIXME: where these URL schemes come from?? + if repo_url.startswith(("bitbucket:", "gitlab:", "github:", "gist:")): + repo = repo_url.split(":")[1] + hoster_urls = { + "bitbucket": f"https://bitbucket.org/{repo}", + "github": f"https://github.com/{repo}", + "gitlab": f"https://gitlab.com/{repo}", + "gist": f"https://gist.github.com/{repo}", + } + hoster, _, repo = repo_url.partition(":") + return hoster_urls[hoster] % locals() + + if len(repo_url.split("/")) == 2: + # implicit github, but that's only on NPM? + return f"https://github.com/{repo_url}" + return repo_url + + +def is_vcs_url(repo_url): + """ + Check if a given URL or string matches a valid VCS (Version Control System) URL. + + Supports: + - Standard VCS URL protocols (git, http, https, ssh) + - Shortcut syntax (e.g., github:user/repo, gitlab:group/repo) + - GitHub shortcut (e.g., user/repo) + + Args: + repo_url (str): The repository URL or shortcut to validate. + + Returns: + bool: True if the string is a valid VCS URL, False otherwise. + + Examples: + >>> is_vcs_url("git://github.com/angular/di.js.git") + True + >>> is_vcs_url("github:user/repo") + True + >>> is_vcs_url("user/repo") + True + >>> is_vcs_url("https://github.com/user/repo.git") + True + >>> is_vcs_url("git@github.com:user/repo.git") + True + >>> is_vcs_url("http://github.com/isaacs/nopt") + True + >>> is_vcs_url("https://gitlab.com/foo/private.git") + True + >>> is_vcs_url("git@gitlab.com:foo/private.git") + True + >>> is_vcs_url("bitbucket:example/repo") + True + >>> is_vcs_url("gist:11081aaa281") + True + >>> is_vcs_url("ftp://example.com/not-a-repo") + False + >>> is_vcs_url("random-string") + False + >>> is_vcs_url("https://example.com/not-a-repo") + False + """ + if not repo_url or not isinstance(repo_url, str): + return False + + repo_url = repo_url.strip() + if not repo_url: + return False + + # Define valid VCS domains + vcs_domains = r"(github\.com|gitlab\.com|bitbucket\.org|gist\.github\.com)" + + # 1. Match URLs with standard protocols pointing to VCS domains + if re.match(rf"^(git|ssh|http|https)://{vcs_domains}/[\w\-.]+/[\w\-.]+", repo_url): + return True + + # 2. Match SSH URLs (e.g., git@github.com:user/repo.git) + if re.match(rf"^git@{vcs_domains}:[\w\-.]+/[\w\-.]+(\.git)?$", repo_url): + return True + + # 3. Match shortcut syntax (e.g., github:user/repo) + if re.match(r"^(github|gitlab|bitbucket|gist):[\w\-./]+$", repo_url): + return True + + # 4. Match implicit GitHub shortcut (e.g., user/repo) + if re.match(r"^[\w\-]+/[\w\-]+$", repo_url): + return True + + return False diff --git a/vulnerabilities/pipelines/v2_improvers/compute_package_risk.py b/vulnerabilities/pipelines/v2_improvers/compute_package_risk.py new file mode 100644 index 000000000..55608f0d1 --- /dev/null +++ b/vulnerabilities/pipelines/v2_improvers/compute_package_risk.py @@ -0,0 +1,143 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# +from aboutcode.pipeline import LoopProgress + +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.models import PackageV2 +from vulnerabilities.pipelines import VulnerableCodePipeline +from vulnerabilities.risk import compute_package_risk_v2 +from vulnerabilities.risk import compute_vulnerability_risk_factors + + +class ComputePackageRiskPipeline(VulnerableCodePipeline): + """ + Compute risk score for packages. + + See https://github.com/aboutcode-org/vulnerablecode/issues/1543 + """ + + pipeline_id = "compute_package_risk_v2" + license_expression = None + + @classmethod + def steps(cls): + return ( + cls.compute_and_store_vulnerability_risk_score, + cls.compute_and_store_package_risk_score, + ) + + def compute_and_store_vulnerability_risk_score(self): + affected_advisories = ( + AdvisoryV2.objects.filter(affecting_packages__isnull=False) + .prefetch_related( + "references", + "severities", + "exploits", + ) + .distinct() + ) + + self.log( + f"Calculating risk for {affected_advisories.count():,d} vulnerability with a affected packages records" + ) + + progress = LoopProgress(total_iterations=affected_advisories.count(), logger=self.log) + + updatables = [] + updated_vulnerability_count = 0 + batch_size = 5000 + + for advisory in progress.iter(affected_advisories.paginated(per_page=batch_size)): + severities = advisory.severities.all() + references = advisory.references.all() + exploits = advisory.exploits.all() + + weighted_severity, exploitability = compute_vulnerability_risk_factors( + references=references, + severities=severities, + exploits=exploits, + ) + advisory.weighted_severity = weighted_severity + advisory.exploitability = exploitability + print( + f"Computed risk for {advisory.advisory_id} with weighted_severity={weighted_severity} and exploitability={exploitability}" + ) + updatables.append(advisory) + + if len(updatables) >= batch_size: + updated_vulnerability_count += bulk_update( + model=AdvisoryV2, + items=updatables, + fields=["weighted_severity", "exploitability"], + logger=self.log, + ) + + updated_vulnerability_count += bulk_update( + model=AdvisoryV2, + items=updatables, + fields=["weighted_severity", "exploitability"], + logger=self.log, + ) + + self.log( + f"Successfully added risk score for {updated_vulnerability_count:,d} vulnerability" + ) + + def compute_and_store_package_risk_score(self): + affected_packages = ( + PackageV2.objects.filter(affected_by_advisories__isnull=False) + ).distinct() + + self.log(f"Calculating risk for {affected_packages.count():,d} affected package records") + + progress = LoopProgress( + total_iterations=affected_packages.count(), + logger=self.log, + progress_step=5, + ) + + updatables = [] + updated_package_count = 0 + batch_size = 10000 + + for package in progress.iter(affected_packages.paginated(per_page=batch_size)): + risk_score = compute_package_risk_v2(package) + + if not risk_score: + continue + + package.risk_score = risk_score + updatables.append(package) + + if len(updatables) >= batch_size: + updated_package_count += bulk_update( + model=PackageV2, + items=updatables, + fields=["risk_score"], + logger=self.log, + ) + updated_package_count += bulk_update( + model=PackageV2, + items=updatables, + fields=["risk_score"], + logger=self.log, + ) + self.log(f"Successfully added risk score for {updated_package_count:,d} package") + + +def bulk_update(model, items, fields, logger): + item_count = 0 + if items: + try: + model.objects.bulk_update(objs=items, fields=fields) + item_count += len(items) + except Exception as e: + logger(f"Error updating {model.__name__}: {e}") + items.clear() + return item_count diff --git a/vulnerabilities/pipelines/v2_improvers/computer_package_version_rank.py b/vulnerabilities/pipelines/v2_improvers/computer_package_version_rank.py new file mode 100644 index 000000000..dd10a1695 --- /dev/null +++ b/vulnerabilities/pipelines/v2_improvers/computer_package_version_rank.py @@ -0,0 +1,93 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from itertools import groupby + +from aboutcode.pipeline import LoopProgress +from django.db import transaction +from univers.version_range import RANGE_CLASS_BY_SCHEMES +from univers.versions import Version + +from vulnerabilities.models import PackageV2 +from vulnerabilities.pipelines import VulnerableCodePipeline + + +class ComputeVersionRankPipeline(VulnerableCodePipeline): + """ + A pipeline to compute and assign version ranks for all packages. + """ + + pipeline_id = "compute_version_rank_v2" + license_expression = None + + @classmethod + def steps(cls): + return (cls.compute_and_store_version_rank,) + + def compute_and_store_version_rank(self): + """ + Compute and assign version ranks to all packages. + """ + groups = PackageV2.objects.only("type", "namespace", "name").order_by( + "type", "namespace", "name" + ) + + def key(package): + return package.type, package.namespace, package.name + + groups = groupby(groups, key=key) + + groups = [(list(x), list(y)) for x, y in groups] + + total_groups = len(groups) + self.log(f"Calculating `version_rank` for {total_groups:,d} groups of packages.") + + progress = LoopProgress( + total_iterations=total_groups, + logger=self.log, + progress_step=5, + ) + + for group, packages in progress.iter(groups): + type, namespace, name = group + if type not in RANGE_CLASS_BY_SCHEMES: + continue + self.update_version_rank_for_group(packages) + + self.log("Successfully populated `version_rank` for all packages.") + + @transaction.atomic + def update_version_rank_for_group(self, packages): + """ + Update the `version_rank` for all packages in a specific group. + """ + + # Sort the packages by version + sorted_packages = self.sort_packages_by_version(packages) + + # Assign version ranks + updates = [] + for rank, package in enumerate(sorted_packages, start=1): + package.version_rank = rank + updates.append(package) + + # Bulk update to save the ranks + PackageV2.objects.bulk_update(updates, fields=["version_rank"]) + + def sort_packages_by_version(self, packages): + """ + Sort packages by version using `version_class`. + """ + + if not packages: + return [] + version_class = RANGE_CLASS_BY_SCHEMES.get(packages[0].type).version_class + if not version_class: + version_class = Version + return sorted(packages, key=lambda p: version_class(p.version)) diff --git a/vulnerabilities/pipelines/v2_improvers/enhance_with_exploitdb.py b/vulnerabilities/pipelines/v2_improvers/enhance_with_exploitdb.py new file mode 100644 index 000000000..c306502d8 --- /dev/null +++ b/vulnerabilities/pipelines/v2_improvers/enhance_with_exploitdb.py @@ -0,0 +1,169 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import csv +import io +import logging +from traceback import format_exc as traceback_format_exc + +import requests +from aboutcode.pipeline import LoopProgress +from dateutil import parser as dateparser +from django.db import DataError + +from vulnerabilities.models import AdvisoryAlias +from vulnerabilities.models import AdvisoryExploit +from vulnerabilities.models import AdvisoryReference +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.pipelines import VulnerableCodePipeline + + +class ExploitDBImproverPipeline(VulnerableCodePipeline): + """ + ExploitDB Improver Pipeline: Fetch ExploitDB data, iterate over it to find the vulnerability with + the specified alias, and create or update the ref and ref-type accordingly. + """ + + pipeline_id = "enhance_with_exploitdb_v2" + spdx_license_expression = "GPL-2.0" + + @classmethod + def steps(cls): + return ( + cls.fetch_exploits, + cls.add_exploit, + ) + + def fetch_exploits(self): + exploit_db_url = ( + "https://gitlab.com/exploit-database/exploitdb/-/raw/main/files_exploits.csv" + ) + self.log(f"Fetching {exploit_db_url}") + + try: + response = requests.get(exploit_db_url) + response.raise_for_status() + except requests.exceptions.HTTPError as http_err: + self.log( + f"Failed to fetch the Exploit-DB Exploits: {exploit_db_url} with error {http_err!r}:\n{traceback_format_exc()}", + level=logging.ERROR, + ) + raise + + self.exploit_data = io.StringIO(response.text) + + def add_exploit(self): + + csvreader = csv.DictReader(self.exploit_data) + + raw_data = list(csvreader) + fetched_exploit_count = len(raw_data) + + vulnerability_exploit_count = 0 + self.log(f"Enhancing the vulnerability with {fetched_exploit_count:,d} exploit records") + progress = LoopProgress(total_iterations=fetched_exploit_count, logger=self.log) + + for row in progress.iter(raw_data): + vulnerability_exploit_count += add_vulnerability_exploit(row, self.log) + + self.log(f"Successfully added {vulnerability_exploit_count:,d} exploit-db advisory exploit") + + +def add_vulnerability_exploit(row, logger): + advisories = set() + + aliases = row["codes"].split(";") if row["codes"] else [] + + if not aliases: + return 0 + + for raw_alias in aliases: + try: + if alias := AdvisoryAlias.objects.get(alias=raw_alias): + for adv in alias.advisories.all(): + advisories.add(adv) + else: + advs = AdvisoryV2.objects.filter(advisory_id=raw_alias) + for adv in advs: + advisories.add(adv) + except AdvisoryAlias.DoesNotExist: + continue + + if not advisories: + logger(f"No advisory found for aliases {aliases}") + return 0 + + date_added = parse_date(row["date_added"]) + source_date_published = parse_date(row["date_published"]) + source_date_updated = parse_date(row["date_updated"]) + + for advisory in advisories: + add_exploit_references(row["codes"], row["source_url"], row["file"], advisory, logger) + try: + AdvisoryExploit.objects.update_or_create( + advisory=advisory, + data_source="Exploit-DB", + defaults={ + "date_added": date_added, + "description": row["description"], + "known_ransomware_campaign_use": row["verified"], + "source_date_published": source_date_published, + "exploit_type": row["type"], + "platform": row["platform"], + "source_date_updated": source_date_updated, + "source_url": row["source_url"], + }, + ) + except DataError as e: + logger( + f"Failed to Create the Vulnerability Exploit-DB with error {e!r}:\n{traceback_format_exc()}", + level=logging.ERROR, + ) + return 1 + + +def add_exploit_references(ref_id, direct_url, path, adv, logger): + url_map = { + "file_url": f"https://gitlab.com/exploit-database/exploitdb/-/blob/main/{path}", + "direct_url": direct_url, + } + + for key, url in url_map.items(): + if url: + try: + ref, created = AdvisoryReference.objects.update_or_create( + url=url, + defaults={ + "reference_id": ref_id, + "reference_type": AdvisoryReference.EXPLOIT, + }, + ) + + if created: + ref.advisories.add(adv) + ref.save() + logger(f"Created {ref} for {adv} with {key}={url}") + + except DataError as e: + logger( + f"Failed to Create the Vulnerability Reference For Exploit-DB with error {e!r}:\n{traceback_format_exc()}", + level=logging.ERROR, + ) + + +def parse_date(date_string): + if date_string: + try: + date_obj = dateparser.parse(date_string).date() + return date_obj.strftime("%Y-%m-%d") + except (ValueError, TypeError, Exception) as e: + logging.error( + f"Error while parsing ExploitDB date '{date_string}' with error {e!r}:\n{traceback_format_exc()}" + ) + return diff --git a/vulnerabilities/pipelines/v2_improvers/enhance_with_kev.py b/vulnerabilities/pipelines/v2_improvers/enhance_with_kev.py new file mode 100644 index 000000000..486d79232 --- /dev/null +++ b/vulnerabilities/pipelines/v2_improvers/enhance_with_kev.py @@ -0,0 +1,103 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging +from traceback import format_exc as traceback_format_exc + +import requests +from aboutcode.pipeline import LoopProgress + +from vulnerabilities.models import AdvisoryAlias +from vulnerabilities.models import AdvisoryExploit +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.pipelines import VulnerableCodePipeline + + +class VulnerabilityKevPipeline(VulnerableCodePipeline): + """ + Known Exploited Vulnerabilities Pipeline: Retrieve KEV data, iterate through it to identify vulnerabilities + by their associated aliases, and create or update the corresponding Exploit instances. + """ + + pipeline_id = "enhance_with_kev_v2" + license_expression = None + + @classmethod + def steps(cls): + return ( + cls.fetch_exploits, + cls.add_exploits, + ) + + def fetch_exploits(self): + kev_url = "https://raw.githubusercontent.com/aboutcode-org/aboutcode-mirror-kev/refs/heads/main/known_exploited_vulnerabilities.json" + self.log(f"Fetching {kev_url}") + + try: + response = requests.get(kev_url) + response.raise_for_status() + except requests.exceptions.HTTPError as http_err: + self.log( + f"Failed to fetch the KEV Exploits: {kev_url} with error {http_err!r}:\n{traceback_format_exc()}", + level=logging.ERROR, + ) + raise + self.kev_data = response.json() + + def add_exploits(self): + fetched_exploit_count = self.kev_data.get("count") + self.log(f"Enhancing the vulnerability with {fetched_exploit_count:,d} exploit records") + + vulnerability_exploit_count = 0 + progress = LoopProgress(total_iterations=fetched_exploit_count, logger=self.log) + + for record in progress.iter(self.kev_data.get("vulnerabilities", [])): + vulnerability_exploit_count += add_vulnerability_exploit( + kev_vul=record, + logger=self.log, + ) + + self.log(f"Successfully added {vulnerability_exploit_count:,d} kev exploit") + + +def add_vulnerability_exploit(kev_vul, logger): + cve_id = kev_vul.get("cveID") + + if not cve_id: + return 0 + + advisories = set() + try: + if alias := AdvisoryAlias.objects.get(alias=cve_id): + for adv in alias.advisories.all(): + advisories.add(adv) + else: + advs = AdvisoryV2.objects.filter(advisory_id=cve_id) + for adv in advs: + advisories.add(adv) + except AdvisoryAlias.DoesNotExist: + logger(f"No advisory found for aliases {cve_id}") + return 0 + + for advisory in advisories: + AdvisoryExploit.objects.update_or_create( + advisory=advisory, + data_source="KEV", + defaults={ + "description": kev_vul["shortDescription"], + "date_added": kev_vul["dateAdded"], + "required_action": kev_vul["requiredAction"], + "due_date": kev_vul["dueDate"], + "notes": kev_vul["notes"], + "known_ransomware_campaign_use": True + if kev_vul["knownRansomwareCampaignUse"] == "Known" + else False, + }, + ) + return 1 diff --git a/vulnerabilities/pipelines/v2_improvers/enhance_with_metasploit.py b/vulnerabilities/pipelines/v2_improvers/enhance_with_metasploit.py new file mode 100644 index 000000000..fbfea5150 --- /dev/null +++ b/vulnerabilities/pipelines/v2_improvers/enhance_with_metasploit.py @@ -0,0 +1,126 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging +from traceback import format_exc as traceback_format_exc + +import requests +import saneyaml +from aboutcode.pipeline import LoopProgress +from dateutil import parser as dateparser + +from vulnerabilities.models import AdvisoryAlias +from vulnerabilities.models import AdvisoryExploit +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.pipelines import VulnerableCodePipeline + + +class MetasploitImproverPipeline(VulnerableCodePipeline): + """ + Metasploit Exploits Pipeline: Retrieve Metasploit data, iterate through it to identify vulnerabilities + by their associated aliases, and create or update the corresponding Exploit instances. + """ + + pipeline_id = "enhance_with_metasploit_v2" + spdx_license_expression = "BSD-3-clause" + + @classmethod + def steps(cls): + return ( + cls.fetch_exploits, + cls.add_advisory_exploits, + ) + + def fetch_exploits(self): + url = "https://raw.githubusercontent.com/rapid7/metasploit-framework/master/db/modules_metadata_base.json" + self.log(f"Fetching {url}") + try: + response = requests.get(url) + response.raise_for_status() + except requests.exceptions.HTTPError as http_err: + self.log( + f"Failed to fetch the Metasploit Exploits: {url} with error {http_err!r}:\n{traceback_format_exc()}", + level=logging.ERROR, + ) + raise + + self.metasploit_data = response.json() + + def add_advisory_exploits(self): + fetched_exploit_count = len(self.metasploit_data) + self.log(f"Enhancing the vulnerability with {fetched_exploit_count:,d} exploit records") + + vulnerability_exploit_count = 0 + progress = LoopProgress(total_iterations=fetched_exploit_count, logger=self.log) + for _, record in progress.iter(self.metasploit_data.items()): + vulnerability_exploit_count += add_advisory_exploit( + record=record, + logger=self.log, + ) + self.log(f"Successfully added {vulnerability_exploit_count:,d} vulnerability exploit") + + +def add_advisory_exploit(record, logger): + advisories = set() + references = record.get("references", []) + + interesting_references = [ + ref for ref in references if not ref.startswith("OSVDB") and not ref.startswith("URL-") + ] + + if not interesting_references: + return 0 + + for ref in interesting_references: + try: + if alias := AdvisoryAlias.objects.get(alias=ref): + for adv in alias.advisories.all(): + advisories.add(adv) + else: + advs = AdvisoryV2.objects.filter(advisory_id=ref) + for adv in advs: + advisories.add(adv) + except AdvisoryAlias.DoesNotExist: + continue + + if not advisories: + logger(f"No advisories found for aliases {interesting_references}") + return 0 + + description = record.get("description", "") + notes = record.get("notes", {}) + platform = record.get("platform") + + source_url = "" + if path := record.get("path"): + source_url = f"https://github.com/rapid7/metasploit-framework/tree/master{path}" + source_date_published = None + + if disclosure_date := record.get("disclosure_date"): + try: + source_date_published = dateparser.parse(disclosure_date).date() + except ValueError as e: + logger( + f"Error while parsing date {disclosure_date} with error {e!r}:\n{traceback_format_exc()}", + level=logging.ERROR, + ) + + for advisory in advisories: + AdvisoryExploit.objects.update_or_create( + advisory=advisory, + data_source="Metasploit", + defaults={ + "description": description, + "notes": saneyaml.dump(notes), + "source_date_published": source_date_published, + "platform": platform, + "source_url": source_url, + }, + ) + return 1 diff --git a/vulnerabilities/pipelines/v2_improvers/flag_ghost_packages.py b/vulnerabilities/pipelines/v2_improvers/flag_ghost_packages.py new file mode 100644 index 000000000..8a4825df4 --- /dev/null +++ b/vulnerabilities/pipelines/v2_improvers/flag_ghost_packages.py @@ -0,0 +1,104 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging +from itertools import groupby +from traceback import format_exc as traceback_format_exc + +from aboutcode.pipeline import LoopProgress +from fetchcode.package_versions import SUPPORTED_ECOSYSTEMS as FETCHCODE_SUPPORTED_ECOSYSTEMS +from fetchcode.package_versions import versions +from packageurl import PackageURL + +from vulnerabilities.models import PackageV2 +from vulnerabilities.pipelines import VulnerableCodePipeline + + +class FlagGhostPackagePipeline(VulnerableCodePipeline): + """Detect and flag packages that do not exist upstream.""" + + pipeline_id = "flag_ghost_packages_v2" + + @classmethod + def steps(cls): + return (cls.flag_ghost_packages,) + + def flag_ghost_packages(self): + detect_and_flag_ghost_packages(logger=self.log) + + +def detect_and_flag_ghost_packages(logger=None): + """Check if packages are available upstream. If not, mark them as ghost package.""" + interesting_packages_qs = ( + PackageV2.objects.order_by("type", "namespace", "name") + .filter(type__in=FETCHCODE_SUPPORTED_ECOSYSTEMS) + .filter(qualifiers="") + .filter(subpath="") + ) + + distinct_packages_count = ( + interesting_packages_qs.values("type", "namespace", "name") + .distinct("type", "namespace", "name") + .count() + ) + + grouped_packages = groupby( + interesting_packages_qs.paginated(), + key=lambda pkg: (pkg.type, pkg.namespace, pkg.name), + ) + + ghost_package_count = 0 + progress = LoopProgress(total_iterations=distinct_packages_count, logger=logger) + for type_namespace_name, packages in progress.iter(grouped_packages): + ghost_package_count += flag_ghost_packages( + base_purl=PackageURL(*type_namespace_name), + packages=packages, + logger=logger, + ) + + if logger: + logger(f"Successfully flagged {ghost_package_count:,d} ghost Packages") + + +def flag_ghost_packages(base_purl, packages, logger=None): + """ + Check if `packages` are available upstream. + If not, update `is_ghost` to `True`. + Return the number of packages flagged as ghost. + """ + known_versions = get_versions(purl=base_purl, logger=logger) + # Skip if encounter error while fetching known versions + if known_versions is None: + return 0 + + ghost_packages = 0 + for pkg in packages: + pkg.is_ghost = False + if pkg.version.lstrip("vV") not in known_versions: + pkg.is_ghost = True + ghost_packages += 1 + + if logger: + logger(f"Flagging ghost package {pkg.purl!s}", level=logging.DEBUG) + pkg.save() + + return ghost_packages + + +def get_versions(purl, logger=None): + """Return set of known versions for the given purl.""" + try: + return {v.value.lstrip("vV") for v in versions(str(purl))} + except Exception as e: + if logger: + logger( + f"Error while fetching known versions for {purl!s}: {e!r} \n {traceback_format_exc()}", + level=logging.ERROR, + ) + return diff --git a/vulnerabilities/pipes/advisory.py b/vulnerabilities/pipes/advisory.py index 46f8b1ed3..d5d88fbfd 100644 --- a/vulnerabilities/pipes/advisory.py +++ b/vulnerabilities/pipes/advisory.py @@ -22,6 +22,11 @@ from vulnerabilities.importer import AdvisoryData from vulnerabilities.improver import MAX_CONFIDENCE from vulnerabilities.models import Advisory +from vulnerabilities.models import AdvisoryAlias +from vulnerabilities.models import AdvisoryReference +from vulnerabilities.models import AdvisorySeverity +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.models import AdvisoryWeakness from vulnerabilities.models import AffectedByPackageRelatedVulnerability from vulnerabilities.models import Alias from vulnerabilities.models import FixingPackageRelatedVulnerability @@ -38,6 +43,61 @@ def get_or_create_aliases(aliases: List) -> QuerySet: return Alias.objects.filter(alias__in=aliases) +from django.db.models import Q + + +def get_or_create_advisory_aliases(aliases: List[str]) -> List[AdvisoryAlias]: + existing = AdvisoryAlias.objects.filter(alias__in=aliases) + existing_aliases = {a.alias for a in existing} + + to_create = [AdvisoryAlias(alias=alias) for alias in aliases if alias not in existing_aliases] + AdvisoryAlias.objects.bulk_create(to_create, ignore_conflicts=True) + + return list(AdvisoryAlias.objects.filter(alias__in=aliases)) + + +def get_or_create_advisory_references(references: List) -> List[AdvisoryReference]: + reference_urls = [ref.url for ref in references] + existing = AdvisoryReference.objects.filter(url__in=reference_urls) + existing_urls = {r.url for r in existing} + + to_create = [ + AdvisoryReference(reference_id=ref.reference_id, url=ref.url) + for ref in references + if ref.url not in existing_urls + ] + AdvisoryReference.objects.bulk_create(to_create, ignore_conflicts=True) + + return list(AdvisoryReference.objects.filter(url__in=reference_urls)) + + +def get_or_create_advisory_severities(severities: List) -> QuerySet: + severity_objs = [] + for severity in severities: + published_at = str(severity.published_at) if severity.published_at else None + sev, _ = AdvisorySeverity.objects.get_or_create( + scoring_system=severity.system.identifier, + value=severity.value, + scoring_elements=severity.scoring_elements, + defaults={ + "published_at": published_at, + }, + url=severity.url, + ) + severity_objs.append(sev) + return AdvisorySeverity.objects.filter(id__in=[severity.id for severity in severity_objs]) + + +def get_or_create_advisory_weaknesses(weaknesses: List[str]) -> List[AdvisoryWeakness]: + existing = AdvisoryWeakness.objects.filter(cwe_id__in=weaknesses) + existing_ids = {w.cwe_id for w in existing} + + to_create = [AdvisoryWeakness(cwe_id=w) for w in weaknesses if w not in existing_ids] + AdvisoryWeakness.objects.bulk_create(to_create, ignore_conflicts=True) + + return list(AdvisoryWeakness.objects.filter(cwe_id__in=weaknesses)) + + def insert_advisory(advisory: AdvisoryData, pipeline_id: str, logger: Callable = None): from vulnerabilities.utils import compute_content_id @@ -76,6 +136,64 @@ def insert_advisory(advisory: AdvisoryData, pipeline_id: str, logger: Callable = return advisory_obj +def insert_advisory_v2( + advisory: AdvisoryData, + pipeline_id: str, + get_advisory_packages: Callable, + logger: Callable = None, +): + from vulnerabilities.utils import compute_content_id + + advisory_obj = None + aliases = get_or_create_advisory_aliases(aliases=advisory.aliases) + references = get_or_create_advisory_references(references=advisory.references_v2) + severities = get_or_create_advisory_severities(severities=advisory.severities) + weaknesses = get_or_create_advisory_weaknesses(weaknesses=advisory.weaknesses) + content_id = compute_content_id(advisory_data=advisory) + affecting_packages, fixed_by_packages = get_advisory_packages(advisory_data=advisory) + try: + default_data = { + "datasource_id": pipeline_id, + "advisory_id": advisory.advisory_id, + "avid": f"{pipeline_id}/{advisory.advisory_id}", + "summary": advisory.summary, + "date_published": advisory.date_published, + "date_collected": datetime.now(timezone.utc), + } + + advisory_obj, _ = AdvisoryV2.objects.get_or_create( + unique_content_id=content_id, + url=advisory.url, + defaults=default_data, + ) + related_fields = { + "aliases": aliases, + "references": references, + "severities": severities, + "weaknesses": weaknesses, + "fixed_by_packages": fixed_by_packages, + "affecting_packages": affecting_packages, + } + + for field_name, values in related_fields.items(): + if values: + getattr(advisory_obj, field_name).add(*values) + + except Advisory.MultipleObjectsReturned: + logger.error( + f"Multiple Advisories returned: unique_content_id: {content_id}, url: {advisory.url}, advisory: {advisory!r}" + ) + raise + except Exception as e: + if logger: + logger( + f"Error while processing {advisory!r} with aliases {advisory.aliases!r}: {e!r} \n {traceback_format_exc()}", + level=logging.ERROR, + ) + + return advisory_obj + + @transaction.atomic def import_advisory( advisory: Advisory, diff --git a/vulnerabilities/risk.py b/vulnerabilities/risk.py index a4508a03f..56f19171e 100644 --- a/vulnerabilities/risk.py +++ b/vulnerabilities/risk.py @@ -36,6 +36,8 @@ def get_weighted_severity(severities): score_list = [] for severity in severities: + if not severity.url: + continue parsed_url = urlparse(severity.url) severity_source = parsed_url.netloc.replace("www.", "", 1) weight = WEIGHT_CONFIG.get(severity_source, DEFAULT_WEIGHT) @@ -112,3 +114,19 @@ def compute_package_risk(package): return return round(max(result), 1) + + +def compute_package_risk_v2(package): + """ + Calculate the risk for a package by iterating over all vulnerabilities that affects this package + and determining the associated risk. + """ + result = [] + for advisory in package.affected_by_advisories.all(): + if risk := advisory.risk_score: + result.append(float(risk)) + + if not result: + return + + return round(max(result), 1) diff --git a/vulnerabilities/templates/advisory_detail.html b/vulnerabilities/templates/advisory_detail.html new file mode 100644 index 000000000..8a386d4ec --- /dev/null +++ b/vulnerabilities/templates/advisory_detail.html @@ -0,0 +1,614 @@ +{% extends "base.html" %} +{% load humanize %} +{% load widget_tweaks %} +{% load static %} +{% load show_cvss %} +{% load url_filters %} + +{% block title %} +VulnerableCode Advisory Details - {{ advisory.advisory_id }} +{% endblock %} + +{% block content %} + +{% if advisory %} +
+
+
+
+ Advisory details: + + {{advisory.datasource_id}} / {{ advisory.advisory_id }} + +
+
+ + +
+
+
+ + + + + + + + + + + + + + + {% if severity_score_range %} + + + + {% endif %} + + + + + + + + + + + + + + + + + + + + + + + + + +
Advisory ID {{ advisory.datasource_id }}/{{ advisory.advisory_id }}
Aliases + {% for alias in aliases %} + {% if alias.url %} + {{ alias }} + {% else %} + {{ alias }} + {% endif %} +
+ {% endfor %} +
Summary{{ advisory.summary }} +
Severity score range{{ severity_score_range }} +
Status{{ status }}
+ Exploitability + {{ advisory.exploitability }} +
Weighted Severity + {{ advisory.weighted_severity }} +
Risk + {{ advisory.risk_score }} +
Affected and Fixed Packages + + Package Details + +
+
+ Weaknesses ({{ weaknesses|length }}) +
+
+ + {% for weakness in weaknesses %} + + + + + + {% empty %} + + + + {% endfor %} +
CWE-{{ weakness.cwe_id }} + + {{ weakness.name }} + +
+ There are no known CWE. +
+
+
+
+ + +
+ + + + + + + {% for severity in severities %} + + + + + + {% empty %} + + + + {% endfor %} +
System Score Found at
{{ severity.scoring_system }}{{ severity.value }} + {{ severity.url }} +
+ There are no known severity scores. +
+
+ +
+ + + + + + + + + {% for ref in references %} + + {% if ref.reference_id %} + + {% else %} + + {% endif %} + + {% if ref.reference_type %} + + {% else %} + + {% endif %} + + + + {% empty %} + + + + {% endfor %} +
Reference id Reference type URL
{{ ref.reference_id }}{{ ref.get_reference_type_display }}{{ ref.url }}
+ There are no known references. +
+
+ +
+ {% for exploit in advisory.exploits.all %} + + + + + + + + {% if exploit.date_added %} + + + + + {% endif %} + {% if exploit.description %} + + + + + {% endif %} + {% if exploit.required_action %} + + + + + {% endif %} + {% if exploit.due_date %} + + + + + {% endif %} + {% if exploit.notes %} + + + + + {% endif %} + {% if exploit.known_ransomware_campaign_use is not None %} + + + + + {% endif %} + {% if exploit.source_date_published %} + + + + + {% endif %} + {% if exploit.exploit_type %} + + + + + {% endif %} + {% if exploit.platform %} + + + + + {% endif %} + {% if exploit.source_date_updated %} + + + + + {% endif %} + + {% if exploit.source_url %} + + + + + {% endif %} + +
Data source {{ exploit.data_source }}
+ + Date added + + {{ exploit.date_added }}
+ + Description + + {{ exploit.description }}
+ + Required action + + {{ exploit.required_action }}
+ + Due date + + {{ exploit.due_date }}
+ + Note + +
{{ exploit.notes }}
+ + Ransomware campaign use + + {{ exploit.known_ransomware_campaign_use|yesno:"Known,Unknown" }}
+ + Source publication date + + {{ exploit.source_date_published }}
+ + Exploit type + + {{ exploit.exploit_type }}
+ + Platform + + {{ exploit.platform }}
+ + Source update date + + {{ exploit.source_date_updated }}
+ + Source URL + + {{ exploit.source_url }}
+ {% empty %} + + + No exploits are available. + + + {% endfor %} +
+ +
+ {% for severity_vector in severity_vectors %} + {% if severity_vector.vector.version == '2.0' %} + Vector: {{ severity_vector.vector.vectorString }} Found at {{ severity_vector.origin }} + + + + + + + + + + + + + + + + + + + +
Exploitability (E)Access Vector (AV)Access Complexity (AC)Authentication (Au)Confidentiality Impact (C)Integrity Impact (I)Availability Impact (A)
{{ severity_vector.vector.exploitability|cvss_printer:"high,functional,unproven,proof_of_concept,not_defined" }}{{ severity_vector.vector.accessVector|cvss_printer:"local,adjacent_network,network" }}{{ severity_vector.vector.accessComplexity|cvss_printer:"high,medium,low" }}{{ severity_vector.vector.authentication|cvss_printer:"multiple,single,none" }}{{ severity_vector.vector.confidentialityImpact|cvss_printer:"none,partial,complete" }}{{ severity_vector.vector.integrityImpact|cvss_printer:"none,partial,complete" }}{{ severity_vector.vector.availabilityImpact|cvss_printer:"none,partial,complete" }}
+ {% elif severity_vector.vector.version == '3.1' or severity_vector.vector.version == '3.0'%} + Vector: {{ severity_vector.vector.vectorString }} Found at {{ severity_vector.origin }} + + + + + + + + + + + + + + + + + + + + + +
Attack Vector (AV)Attack Complexity (AC)Privileges Required (PR)User Interaction (UI)Scope (S)Confidentiality Impact (C)Integrity Impact (I)Availability Impact (A)
{{ severity_vector.vector.attackVector|cvss_printer:"network,adjacent_network,local,physical"}}{{ severity_vector.vector.attackComplexity|cvss_printer:"low,high" }}{{ severity_vector.vector.privilegesRequired|cvss_printer:"none,low,high" }}{{ severity_vector.vector.userInteraction|cvss_printer:"none,required"}}{{ severity_vector.vector.scope|cvss_printer:"unchanged,changed" }}{{ severity_vector.vector.confidentialityImpact|cvss_printer:"high,low,none" }}{{ severity_vector.vector.integrityImpact|cvss_printer:"high,low,none" }}{{ severity_vector.vector.availabilityImpact|cvss_printer:"high,low,none" }}
+ {% elif severity_vector.vector.version == '4' %} + Vector: {{ severity_vector.vector.vectorString }} Found at {{ severity_vector.origin }} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Attack Vector (AV)Attack Complexity (AC)Attack Requirements (AT)Privileges Required (PR)User Interaction (UI)Vulnerable System Impact Confidentiality (VC)Vulnerable System Impact Integrity (VI)Vulnerable System Impact Availability (VA)Subsequent System Impact Confidentiality (SC)Subsequent System Impact Integrity (SI)Subsequent System Impact Availability (SA)
{{ severity_vector.vector.attackVector|cvss_printer:"network,adjacent,local,physical"}}{{ severity_vector.vector.attackComplexity|cvss_printer:"low,high" }}{{ severity_vector.vector.attackRequirement|cvss_printer:"none,present" }}{{ severity_vector.vector.privilegesRequired|cvss_printer:"none,low,high" }}{{ severity_vector.vector.userInteraction|cvss_printer:"none,passive,active"}}{{ severity_vector.vector.vulnerableSystemImpactConfidentiality|cvss_printer:"high,low,none" }}{{ severity_vector.vector.vulnerableSystemImpactIntegrity|cvss_printer:"high,low,none" }}{{ severity_vector.vector.vulnerableSystemImpactAvailability|cvss_printer:"high,low,none" }}{{ severity_vector.vector.subsequentSystemImpactConfidentiality|cvss_printer:"high,low,none" }}{{ severity_vector.vector.subsequentSystemImpactIntegrity|cvss_printer:"high,low,none" }}{{ severity_vector.vector.subsequentSystemImpactAvailability|cvss_printer:"high,low,none" }}
+ {% elif severity_vector.vector.version == 'ssvc' %} +
+ Vector: {{ severity_vector.vector.vectorString }} Found at {{ severity_vector.origin }} +
+ {% endif %} + {% empty %} + + + There are no known vectors. + + + {% endfor %} +
+ + +
+ {% if epss_data %} +
+ Exploit Prediction Scoring System (EPSS) +
+ + + + + + + + + + + {% if epss_data.published_at %} + + + + + {% endif %} + +
+ + Percentile + + {{ epss_data.percentile }}
+ + EPSS Score + + {{ epss_data.score }}
+ + Published At + + {{ epss_data.published_at }}
+ {% else %} +

No EPSS data available for this advisory.

+ {% endif %} +
+ + +
+
+
+{% endif %} + + + + + +{% endblock %} \ No newline at end of file diff --git a/vulnerabilities/templates/advisory_package_details.html b/vulnerabilities/templates/advisory_package_details.html new file mode 100644 index 000000000..0f4c71044 --- /dev/null +++ b/vulnerabilities/templates/advisory_package_details.html @@ -0,0 +1,88 @@ +{% extends "base.html" %} +{% load humanize %} +{% load widget_tweaks %} +{% load static %} +{% load show_cvss %} +{% load url_filters %} + +{% block title %} +VulnerableCode Advisory Package Details - {{ advisory.advisory_id }} +{% endblock %} + +{% block content %} + +{% if advisory %} +
+
+
+
+ Vulnerable and Fixing Package details for Advisory: + + {{ advisory.advisory_id }} + +
+
+
+ + + + + + + + + {% for package in affected_packages %} + + + + + {% empty %} + + + + {% endfor %} + +
AffectedFixed by
+ {{ package.purl }} + + + {% for match in all_affected_fixed_by_matches %} + {% if match.affected_package == package %} + {% if match.matched_fixed_by_packages|length > 0 %} + {% for pkg in match.matched_fixed_by_packages %} + {{ pkg }} +
+ {% endfor %} + {% else %} + There are no reported fixed by versions. + {% endif %} + {% endif %} + {% endfor %} + +
+ This vulnerability is not known to affect any packages. +
+
+
+
+{% endif %} + + + + + +{% endblock %} \ No newline at end of file diff --git a/vulnerabilities/templates/index_v2.html b/vulnerabilities/templates/index_v2.html new file mode 100644 index 000000000..962b5f79f --- /dev/null +++ b/vulnerabilities/templates/index_v2.html @@ -0,0 +1,33 @@ +{% extends "base.html" %} +{% load widget_tweaks %} + +{% block title %} +VulnerableCode Home +{% endblock %} + +{% block content %} +
+
+
+
+ {% include "package_search_box_v2.html" %} +
+
+
+

+ VulnerableCode aggregates software + vulnerabilities from multiple public advisory sources + and presents their details along with their affected + packages and fixed-by packages identified by + Package URLs (PURLs). +

+

+ What's new in this Release: + + Check out latest updates here! + +

+
+
+
+{% endblock %} \ No newline at end of file diff --git a/vulnerabilities/templates/package_details_v2.html b/vulnerabilities/templates/package_details_v2.html new file mode 100644 index 000000000..54cb8ffed --- /dev/null +++ b/vulnerabilities/templates/package_details_v2.html @@ -0,0 +1,365 @@ +{% extends "base.html" %} +{% load humanize %} +{% load widget_tweaks %} +{% load static %} +{% load url_filters %} + +{% block title %} +VulnerableCode Package Details - {{ package.purl }} +{% endblock %} + +{% block content %} +
+ {% include "package_search_box_v2.html"%} +
+ +{% if package %} +
+
+
+
+ Package details: + {{ package.purl }} + +
+
+ +
+ +
+ +
+
+
+ {% if affected_by_advisories|length != 0 %} +
+ {% else %} +
+ {% endif %} + + + + + + + {% if package.is_ghost %} + + + + + {% endif %} + +
+ + purl + + + {{ fixed_package_details.purl.to_string }} +
+ Tags + + + Ghost + +
+
+ {% if affected_by_advisories|length != 0 %} + +
+ + + + + + + + + + + + + + + +
+ Next non-vulnerable version + + {% if fixed_package_details.next_non_vulnerable.version %} + {{ fixed_package_details.next_non_vulnerable.version }} + {% else %} + None. + {% endif %} +
+ Latest non-vulnerable version + + {% if fixed_package_details.latest_non_vulnerable.version %} + {{ fixed_package_details.latest_non_vulnerable.version }} + {% else %} + None. + {% endif %} +
+ Risk score + + {{package.risk_score}} +
+
+ + {% endif %} + +
+
+ Vulnerabilities affecting this package ({{ affected_by_advisories|length }}) +
+ + + + + + + + + + + + + + {% for advisory in affected_by_advisories %} + + + + + + + + {% empty %} + + + + {% endfor %} + +
AdvisorySourceDate PublishedSummaryFixed in package version
+ + {{advisory.avid }} + +
+ {% if advisory.alias|length != 0 %} + Aliases: + {% endif %} +
+ {% for alias in advisory.alias %} + {% if alias.url %} + {{ alias }} +
+ {% else %} + {{ alias }} +
+ {% endif %} + {% endfor %} +
+ {{advisory.url}} + + {{advisory.date_published}} + + {{ advisory.summary }} + + {% if package.purl == fixed_package_details.purl.to_string %} + {% for key, value in fixed_package_details.items %} + {% if key == "advisories" %} + {% for vuln in value %} + {% if vuln.advisory.advisory_id == advisory.advisory_id %} + {% if vuln.fixed_by_package_details is None %} + There are no reported fixed by versions. + {% else %} + {% for fixed_pkg in vuln.fixed_by_package_details %} +
+ {% if fixed_pkg.fixed_by_purl_advisories|length == 0 %} + {{ fixed_pkg.fixed_by_purl.version }} +
+ Subject of 0 other advisories. + {% else %} + {{ fixed_pkg.fixed_by_purl.version }} + {% if fixed_pkg.fixed_by_purl_advisories|length != 1 %} +
+ Subject of {{ fixed_pkg.fixed_by_purl_advisories|length }} other + advisory. + {% else %} +
+ Subject of {{ fixed_pkg.fixed_by_purl_advisories|length }} other + advisory. + {% endif %} + + + {% endif %} +
+ {% endfor %} + {% endif %} + {% endif %} + {% endfor %} + {% endif %} + {% endfor %} + {% endif %} +
+ This package is not known to be subject of any advisories. +
+
+ +
+
+ Vulnerabilities fixed by this package ({{ fixing_advisories|length }}) +
+ + + + + + + + + + + + + {% for advisory in fixing_advisories %} + + + + + + + + {% empty %} + + + + {% endfor %} + +
AdvisorySourceDate PublishedSummaryAliases
+ + {{advisory.avid }} + + + {{advisory.url}} + + {{advisory.date_published}} + + {{ advisory.summary }} + + {% for alias in advisory.alias %} + {% if alias.url %} + {{ alias }} +
+ {% else %} + {{ alias }} +
+ {% endif %} + {% endfor %} +
+ This package is not known to fix any advisories. +
+ +
+
+
+
+ + +
+
+
+
+ +{% endif %} +{% endblock %} diff --git a/vulnerabilities/templates/package_search_box_v2.html b/vulnerabilities/templates/package_search_box_v2.html new file mode 100644 index 000000000..e78d400e6 --- /dev/null +++ b/vulnerabilities/templates/package_search_box_v2.html @@ -0,0 +1,48 @@ +{% load widget_tweaks %} +
+
+ Search for packages + +
+
+
+
+
+
+ {{ package_search_form.search|add_class:"input" }} +
+
+ +
+
+
+
+
+
diff --git a/vulnerabilities/templates/packages_v2.html b/vulnerabilities/templates/packages_v2.html new file mode 100644 index 000000000..fe2b05abe --- /dev/null +++ b/vulnerabilities/templates/packages_v2.html @@ -0,0 +1,84 @@ +{% extends "base.html" %} +{% load humanize %} +{% load widget_tweaks %} + +{% block title %} +VulnerableCode Package Search +{% endblock %} + +{% block content %} +
+ {% include "package_search_box_v2.html" %} +
+ +{% if search %} +
+
+
+
+ {{ page_obj.paginator.count|intcomma }} results +
+ {% if is_paginated %} + {% include 'includes/pagination.html' with page_obj=page_obj %} + {% endif %} +
+
+
+ +
+
+ + + + + + + + + + {% for package in page_obj %} + + + + + + {% empty %} + + + + {% endfor %} + +
+ + Package URL + + + + Affected by vulnerabilities + + + + Fixing vulnerabilities + +
+ {{ package.purl }} + {{ package.vulnerability_count }}{{ package.patched_vulnerability_count }}
+ No Package found. +
+
+ + {% if is_paginated %} + {% include 'includes/pagination.html' with page_obj=page_obj %} + {% endif %} + +
+{% endif %} +{% endblock %} diff --git a/vulnerabilities/tests/pipelines/test_apache_httpd_importer_pipeline_v2.py b/vulnerabilities/tests/pipelines/test_apache_httpd_importer_pipeline_v2.py new file mode 100644 index 000000000..94454c473 --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_apache_httpd_importer_pipeline_v2.py @@ -0,0 +1,161 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import pytest +import requests + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.pipelines.v2_importers.apache_httpd_importer import ApacheHTTPDImporterPipeline +from vulnerabilities.pipelines.v2_importers.apache_httpd_importer import fetch_links +from vulnerabilities.pipelines.v2_importers.apache_httpd_importer import get_weaknesses + + +# Dummy responses +class DummyResponseContent: + def __init__(self, content_bytes): + self.content = content_bytes + + +class DummyResponseJSON: + def __init__(self, json_data): + self._json = json_data + + def json(self): + return self._json + + +# Tests for fetch_links +@pytest.fixture(autouse=True) +def no_requests(monkeypatch): + # Ensure other tests don't hit real HTTP + monkeypatch.setattr( + requests, + "get", + lambda url: (_ for _ in ()).throw(AssertionError(f"Unexpected HTTP GET call to {url}")), + ) + + +def test_fetch_links_filters_and_resolves(monkeypatch): + html = """ + + A1 + A2 + TXT + + """ + base_url = "https://example.com/base/" + # Monkeypatch HTTP GET for HTML + def fake_get(url): + assert url == base_url + return DummyResponseContent(html.encode("utf-8")) + + monkeypatch.setattr(requests, "get", fake_get) + links = fetch_links(base_url) + assert len(links) == 2 + assert links == [ + "https://example.com/base/advisory1.json", + "https://example.com/json/advisory2.json", + ] + + +# Tests for get_weaknesses +def test_get_weaknesses_with_cna_structure(): + mock_data = { + "containers": {"cna": {"problemTypes": [{"descriptions": [{"cweId": "CWE-125"}]}]}} + } + result = get_weaknesses(mock_data) + assert result == [125] + + +def test_get_weaknesses_with_data_meta_structure(): + mock_data = { + "CVE_data_meta": {"ID": "CVE-2020-0001"}, + "problemtype": { + "problemtype_data": [ + {"description": [{"value": "CWE-190 Integer Overflow"}]}, + {"description": [{"value": "CWE-200 Some Issue"}]}, + ] + }, + } + result = get_weaknesses(mock_data) + assert set(result) == {190, 200} + + +# Tests for ApacheHTTPDImporterPipeline +class DummyPipeline(ApacheHTTPDImporterPipeline): + # Expose protected methods for testing + pass + + +@pytest.fixture +def pipeline(monkeypatch): + pipe = DummyPipeline() + # Prevent real HTTP in fetch_links + monkeypatch.setattr( + "vulnerabilities.pipelines.v2_importers.apache_httpd_importer.fetch_links", + lambda url: ["u1", "u2"], + ) + return pipe + + +def test_advisories_count(monkeypatch, pipeline): + # Should use mocked links + count = pipeline.advisories_count() + assert count == 2 + + +def test_collect_advisories_and_to_advisory(monkeypatch, pipeline): + # Prepare two dummy JSONs + sample1 = { + "CVE_data_meta": {"ID": "CVE-1"}, + "description": {"description_data": [{"lang": "eng", "value": "Test desc"}]}, + "impact": [{"other": "5.0"}], + "affects": {"vendor": {"vendor_data": []}}, + "timeline": [], + } + sample2 = { + "cveMetadata": {"cveId": "CVE-2"}, + "description": {"description_data": [{"lang": "eng", "value": "Other desc"}]}, + "impact": [{"other": "7.5"}], + "affects": {"vendor": {"vendor_data": []}}, + "timeline": [], + } + # Monkeypatch requests.get to return JSON + def fake_get(u): + if u == "u1": + return DummyResponseJSON(sample1) + elif u == "u2": + return DummyResponseJSON(sample2) + else: + raise AssertionError(f"Unexpected URL {u}") + + monkeypatch.setattr(requests, "get", fake_get) + advisories = list(pipeline.collect_advisories()) + assert len(advisories) == 2 + # Validate first advisory + adv1 = advisories[0] + assert isinstance(adv1, AdvisoryData) + assert adv1.advisory_id == "CVE-1" + assert adv1.summary == "Test desc" + assert adv1.severities and adv1.severities[0].value == "5.0" + assert adv1.url.endswith("CVE-1.json") + # Validate second advisory + adv2 = advisories[1] + assert adv2.advisory_id == "CVE-2" + assert adv2.summary == "Other desc" + assert adv2.severities[0].value == "7.5" + + +# Test version range conversion error +def test_to_version_ranges_unknown_comparator(pipeline): + # version_data with bad comparator + versions_data = [{"version_value": "1.0.0", "version_affected": "<>"}] + fixed_versions = [] + with pytest.raises(ValueError): + pipeline.to_version_ranges(versions_data, fixed_versions) diff --git a/vulnerabilities/tests/pipelines/test_collect_commits_v2.py b/vulnerabilities/tests/pipelines/test_collect_commits_v2.py new file mode 100644 index 000000000..dddec9084 --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_collect_commits_v2.py @@ -0,0 +1,131 @@ +from datetime import datetime +from unittest.mock import patch + +import pytest + +from vulnerabilities.models import AdvisoryReference +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.models import CodeFixV2 +from vulnerabilities.models import PackageV2 +from vulnerabilities.pipelines.v2_improvers.collect_commits import CollectFixCommitsPipeline +from vulnerabilities.pipelines.v2_improvers.collect_commits import is_vcs_url +from vulnerabilities.pipelines.v2_improvers.collect_commits import is_vcs_url_already_processed +from vulnerabilities.pipelines.v2_improvers.collect_commits import normalize_vcs_url + + +@pytest.mark.parametrize( + "url,expected", + [ + ("git://github.com/angular/di.js.git", True), + ("github:user/repo", True), + ("user/repo", True), + ("https://github.com/user/repo.git", True), + ("git@github.com:user/repo.git", True), + ("ftp://example.com/not-a-repo", False), + ("random-string", False), + ("https://example.com/not-a-repo", False), + ], +) +def test_is_vcs_url(url, expected): + assert is_vcs_url(url) is expected + + +@pytest.mark.parametrize( + "url,normalized", + [ + ("git@github.com:user/repo.git", "https://github.com/user/repo.git"), + ("github:user/repo", "https://github.com/user/repo"), + ("bitbucket:example/repo", "https://bitbucket.org/example/repo"), + ("user/repo", "https://github.com/user/repo"), + ("https://gitlab.com/foo/bar.git", "https://gitlab.com/foo/bar.git"), + ], +) +def test_normalize_vcs_url(url, normalized): + assert normalize_vcs_url(url) == normalized + + +@pytest.mark.django_db +def test_is_vcs_url_already_processed_true(): + advisory = AdvisoryV2.objects.create( + advisory_id="CVE-2025-9999", + datasource_id="test-ds", + avid="test-ds/CVE-2025-9999", + url="https://example.com/advisory/CVE-2025-9999", + unique_content_id="11111", + date_collected=datetime.now(), + ) + package = PackageV2.objects.create( + type="bar", + name="foo", + version="1.0", + ) + advisory.affecting_packages.add(package) + advisory.save() + CodeFixV2.objects.create( + commits=["https://github.com/user/repo/commit/abc123"], + advisory=advisory, + affected_package=package, + ) + assert is_vcs_url_already_processed("https://github.com/user/repo/commit/abc123") is True + + +@pytest.mark.django_db +def test_collect_fix_commits_pipeline_creates_entry(): + advisory = AdvisoryV2.objects.create( + advisory_id="CVE-2025-1000", + datasource_id="test-ds", + avid="test-ds/CVE-2025-1000", + url="https://example.com/advisory/CVE-2025-1000", + unique_content_id="11111", + date_collected=datetime.now(), + ) + package = PackageV2.objects.create( + type="foo", + name="testpkg", + version="1.0", + ) + reference = AdvisoryReference.objects.create( + url="https://github.com/test/testpkg/commit/abc123" + ) + advisory.affecting_packages.add(package) + advisory.references.add(reference) + advisory.save() + + pipeline = CollectFixCommitsPipeline() + pipeline.collect_and_store_fix_commits() + + codefixes = CodeFixV2.objects.all() + assert codefixes.count() == 1 + fix = codefixes.first() + assert "abc123" in fix.commits[0] + assert fix.advisory == advisory + assert fix.affected_package == package + + +@pytest.mark.django_db +def test_collect_fix_commits_pipeline_skips_non_commit_urls(): + advisory = AdvisoryV2.objects.create( + advisory_id="CVE-2025-2000", + datasource_id="test-ds", + avid="test-ds/CVE-2025-2000", + url="https://example.com/advisory/CVE-2025-2000", + unique_content_id="11111", + date_collected=datetime.now(), + ) + package = PackageV2.objects.create( + type="pypi", + name="otherpkg", + version="2.0", + ) + + advisory.affecting_packages.add(package) + + reference = AdvisoryReference.objects.create(url="https://github.com/test/testpkg/issues/12") + + advisory.references.add(reference) + advisory.save() + + pipeline = CollectFixCommitsPipeline() + pipeline.collect_and_store_fix_commits() + + assert CodeFixV2.objects.count() == 0 diff --git a/vulnerabilities/tests/pipelines/test_compute_package_risk_v2.py b/vulnerabilities/tests/pipelines/test_compute_package_risk_v2.py new file mode 100644 index 000000000..4dbfb222a --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_compute_package_risk_v2.py @@ -0,0 +1,69 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# +from datetime import datetime +from decimal import Decimal + +import pytest + +from vulnerabilities.models import AdvisorySeverity +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.models import AdvisoryWeakness +from vulnerabilities.models import PackageV2 +from vulnerabilities.pipelines.v2_improvers.compute_package_risk import ComputePackageRiskPipeline +from vulnerabilities.severity_systems import CVSSV3 +from vulnerabilities.severity_systems import GENERIC + + +@pytest.mark.django_db +def test_simple_risk_pipeline(): + pkg = PackageV2.objects.create(type="pypi", name="foo", version="2.3.0") + assert PackageV2.objects.count() == 1 + + adv = AdvisoryV2( + advisory_id="VCID-Existing", + summary="vulnerability description here", + datasource_id="ds", + avid="ds/VCID-Existing", + unique_content_id="ajkef", + url="https://test.com", + date_collected=datetime.now(), + ) + adv.save() + + severity1 = AdvisorySeverity.objects.create( + url="https://nvd.nist.gov/vuln/detail/CVE-xxxx-xxx1", + scoring_system=CVSSV3.identifier, + scoring_elements="CVSS:3.0/AV:P/AC:H/PR:H/UI:R/S:C/C:H/I:H/A:N/E:H/RL:O/RC:R/CR:H/MAC:H/MC:L", + value="6.5", + ) + + severity2 = AdvisorySeverity.objects.create( + url="https://nvd.nist.gov/vuln/detail/CVE-xxxx-xxx1", + scoring_system=GENERIC.identifier, + value="MODERATE", # 6.9 + ) + adv.severities.add(severity1) + adv.severities.add(severity2) + + weaknesses = AdvisoryWeakness.objects.create(cwe_id=119) + adv.weaknesses.add(weaknesses) + + adv.affecting_packages.add(pkg) + adv.save() + + improver = ComputePackageRiskPipeline() + improver.execute() + + assert pkg.risk_score is None + + improver = ComputePackageRiskPipeline() + improver.execute() + + pkg = PackageV2.objects.get(type="pypi", name="foo", version="2.3.0") + assert pkg.risk_score == Decimal("3.1") diff --git a/vulnerabilities/tests/pipelines/test_compute_version_rank_v2.py b/vulnerabilities/tests/pipelines/test_compute_version_rank_v2.py new file mode 100644 index 000000000..eb8d3aebd --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_compute_version_rank_v2.py @@ -0,0 +1,70 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from unittest.mock import patch + +import pytest +from univers.versions import Version + +from vulnerabilities.models import PackageV2 +from vulnerabilities.pipelines.v2_improvers.computer_package_version_rank import ( + ComputeVersionRankPipeline, +) + + +@pytest.mark.django_db +class TestComputeVersionRankPipeline: + @pytest.fixture + def pipeline(self): + return ComputeVersionRankPipeline() + + @pytest.fixture + def packages(self, db): + package_type = "pypi" + namespace = "test_namespace" + name = "test_package" + PackageV2.objects.create(type=package_type, namespace=namespace, name=name, version="1.0.0") + PackageV2.objects.create(type=package_type, namespace=namespace, name=name, version="1.1.0") + PackageV2.objects.create(type=package_type, namespace=namespace, name=name, version="0.9.0") + return PackageV2.objects.filter(type=package_type, namespace=namespace, name=name) + + def test_compute_and_store_version_rank(self, pipeline, packages): + with patch.object(pipeline, "log") as mock_log: + pipeline.compute_and_store_version_rank() + assert mock_log.call_count > 0 + for package in packages: + assert package.version_rank is not None + + def test_update_version_rank_for_group(self, pipeline, packages): + with patch.object(PackageV2.objects, "bulk_update") as mock_bulk_update: + pipeline.update_version_rank_for_group(packages) + mock_bulk_update.assert_called_once() + updated_packages = mock_bulk_update.call_args[0][0] + assert len(updated_packages) == len(packages) + for idx, package in enumerate(sorted(packages, key=lambda p: Version(p.version))): + assert updated_packages[idx].version_rank == idx + + def test_sort_packages_by_version(self, pipeline, packages): + sorted_packages = pipeline.sort_packages_by_version(packages) + versions = [p.version for p in sorted_packages] + assert versions == sorted(versions, key=Version) + + def test_sort_packages_by_version_empty(self, pipeline): + assert pipeline.sort_packages_by_version([]) == [] + + def test_sort_packages_by_version_invalid_scheme(self, pipeline, packages): + for package in packages: + package.type = "invalid" + assert pipeline.sort_packages_by_version(packages) == [] + + def test_compute_and_store_version_rank_invalid_scheme(self, pipeline): + PackageV2.objects.create(type="invalid", namespace="test", name="package", version="1.0.0") + with patch.object(pipeline, "log") as mock_log: + pipeline.compute_and_store_version_rank() + mock_log.assert_any_call("Successfully populated `version_rank` for all packages.") diff --git a/vulnerabilities/tests/pipelines/test_elixir_security_v2_importer.py b/vulnerabilities/tests/pipelines/test_elixir_security_v2_importer.py new file mode 100644 index 000000000..96359ca3c --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_elixir_security_v2_importer.py @@ -0,0 +1,108 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import shutil +from pathlib import Path +from unittest.mock import MagicMock +from unittest.mock import patch + +import pytest + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.pipelines.v2_importers.elixir_security_importer import ( + ElixirSecurityImporterPipeline, +) + + +@pytest.fixture +def mock_vcs_response(tmp_path): + repo_dir = tmp_path / "repo" + repo_dir.mkdir() + packages_dir = repo_dir / "packages" / "some_package" + packages_dir.mkdir(parents=True) + + advisory_file = packages_dir / "CVE-2022-9999.yml" + advisory_file.write_text( + """ + cve: "2022-9999" + package: "plug" + description: "Cross-site scripting vulnerability in plug < 1.11.1" + patched_versions: + - ">= 1.11.1" + unaffected_versions: + - "< 1.0.0" + disclosure_date: "2022-12-01" + link: "https://github.com/plug/plug/security/advisories/GHSA-xxxx-yyyy" + """ + ) + + mock = MagicMock() + mock.dest_dir = str(repo_dir) + mock.delete = MagicMock() + return mock + + +@pytest.fixture +def mock_fetch_via_vcs(mock_vcs_response): + with patch( + "vulnerabilities.pipelines.v2_importers.elixir_security_importer.fetch_via_vcs" + ) as mock: + mock.return_value = mock_vcs_response + yield mock + + +def test_advisories_count(mock_fetch_via_vcs, mock_vcs_response): + importer = ElixirSecurityImporterPipeline() + importer.clone() + count = importer.advisories_count() + assert count == 1 + + +def test_collect_advisories(mock_fetch_via_vcs, mock_vcs_response): + importer = ElixirSecurityImporterPipeline() + importer.clone() + advisories = list(importer.collect_advisories()) + + assert len(advisories) == 1 + + advisory: AdvisoryData = advisories[0] + assert advisory.advisory_id == "CVE-2022-9999" + assert advisory.summary.startswith("Cross-site scripting vulnerability") + assert advisory.affected_packages[0].package.name == "plug" + assert advisory.affected_packages[0].package.type == "hex" + assert ( + advisory.references_v2[0].url + == "https://github.com/plug/plug/security/advisories/GHSA-xxxx-yyyy" + ) + assert advisory.date_published.isoformat().startswith("2022-12-01") + + +def test_collect_advisories_skips_invalid_cve(mock_fetch_via_vcs, tmp_path): + repo_dir = tmp_path / "repo" + packages_dir = repo_dir / "packages" + + if packages_dir.exists(): + shutil.rmtree(packages_dir) + packages_dir.mkdir(parents=True, exist_ok=True) + + advisory_file = packages_dir / "bad_advisory.yml" + advisory_file.write_text("cve: BAD-ID\npackage: x\n") + + mock_response = MagicMock() + mock_response.dest_dir = str(repo_dir) + mock_response.delete = MagicMock() + + with patch( + "vulnerabilities.pipelines.v2_importers.elixir_security_importer.fetch_via_vcs" + ) as mock: + mock.return_value = mock_response + importer = ElixirSecurityImporterPipeline() + importer.clone() + advisories = list(importer.collect_advisories()) + assert len(advisories) == 0 diff --git a/vulnerabilities/tests/pipelines/test_enhance_with_exploitdb_v2.py b/vulnerabilities/tests/pipelines/test_enhance_with_exploitdb_v2.py new file mode 100644 index 000000000..865356158 --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_enhance_with_exploitdb_v2.py @@ -0,0 +1,56 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import os +from datetime import datetime +from unittest import mock +from unittest.mock import Mock + +import pytest + +from vulnerabilities.models import AdvisoryAlias +from vulnerabilities.models import AdvisoryExploit +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.pipelines.v2_improvers.enhance_with_exploitdb import ExploitDBImproverPipeline + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) +TEST_DATA = os.path.join(BASE_DIR, "../test_data", "exploitdb_improver/files_exploits.csv") + + +@pytest.mark.django_db +@mock.patch("requests.get") +def test_exploit_db_improver(mock_get): + mock_response = Mock(status_code=200) + with open(TEST_DATA, "r") as f: + mock_response.text = f.read() + mock_get.return_value = mock_response + + improver = ExploitDBImproverPipeline() + + # Run the improver when there is no matching aliases + improver.execute() + + assert AdvisoryExploit.objects.count() == 0 + + adv1 = AdvisoryV2.objects.create( + advisory_id="VCIO-123-2002", + datasource_id="ds", + avid="ds/VCIO-123-2002", + unique_content_id="i3giu", + url="https://test.com", + date_collected=datetime.now(), + ) + + alias = AdvisoryAlias.objects.create(alias="CVE-2009-3699") + + adv1.aliases.add(alias) + + # Run Exploit-DB Improver again when there are matching aliases. + improver.execute() + assert AdvisoryExploit.objects.count() == 1 diff --git a/vulnerabilities/tests/pipelines/test_enhance_with_kev_v2.py b/vulnerabilities/tests/pipelines/test_enhance_with_kev_v2.py new file mode 100644 index 000000000..bd58fa5fd --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_enhance_with_kev_v2.py @@ -0,0 +1,57 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import os +from datetime import datetime +from unittest import mock +from unittest.mock import Mock + +import pytest + +from vulnerabilities.models import AdvisoryAlias +from vulnerabilities.models import AdvisoryExploit +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.pipelines.v2_improvers.enhance_with_kev import VulnerabilityKevPipeline +from vulnerabilities.utils import load_json + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) +TEST_DATA = os.path.join(BASE_DIR, "../test_data", "kev_data.json") + + +@pytest.mark.django_db +@mock.patch("requests.get") +def test_kev_improver(mock_get): + mock_response = Mock(status_code=200) + mock_response.json.return_value = load_json(TEST_DATA) + mock_get.return_value = mock_response + + improver = VulnerabilityKevPipeline() + + # Run the improver when there is no matching aliases + improver.execute() + + assert AdvisoryExploit.objects.count() == 0 + + adv1 = AdvisoryV2.objects.create( + advisory_id="VCIO-123-2002", + datasource_id="ds", + avid="ds/VCIO-123-2002", + unique_content_id="i3giu", + url="https://test.com", + date_collected=datetime.now(), + ) + adv1.save() + + alias = AdvisoryAlias.objects.create(alias="CVE-2021-38647") + + adv1.aliases.add(alias) + + # Run Kev Improver again when there are matching aliases. + improver.execute() + assert AdvisoryExploit.objects.count() == 1 diff --git a/vulnerabilities/tests/pipelines/test_enhance_with_metasploit_v2.py b/vulnerabilities/tests/pipelines/test_enhance_with_metasploit_v2.py new file mode 100644 index 000000000..c20437145 --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_enhance_with_metasploit_v2.py @@ -0,0 +1,56 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import os +from datetime import datetime +from unittest import mock +from unittest.mock import Mock + +import pytest + +from vulnerabilities.models import AdvisoryAlias +from vulnerabilities.models import AdvisoryExploit +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.pipelines.v2_improvers.enhance_with_metasploit import ( + MetasploitImproverPipeline, +) +from vulnerabilities.utils import load_json + +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) +TEST_DATA = os.path.join(BASE_DIR, "../test_data", "metasploit_improver/modules_metadata_base.json") + + +@pytest.mark.django_db +@mock.patch("requests.get") +def test_metasploit_improver(mock_get): + mock_response = Mock(status_code=200) + mock_response.json.return_value = load_json(TEST_DATA) + mock_get.return_value = mock_response + + improver = MetasploitImproverPipeline() + + # Run the improver when there is no matching aliases + improver.execute() + assert AdvisoryExploit.objects.count() == 0 + + adv1 = AdvisoryV2.objects.create( + advisory_id="VCIO-123-2002", + datasource_id="ds", + avid="ds/VCIO-123-2002", + unique_content_id="i3giu", + url="https://test.com", + date_collected=datetime.now(), + ) + alias = AdvisoryAlias.objects.create(alias="CVE-2007-4387") + + adv1.aliases.add(alias) + + # Run metasploit Improver again when there are matching aliases. + improver.execute() + assert AdvisoryExploit.objects.count() == 1 diff --git a/vulnerabilities/tests/pipelines/test_flag_ghost_packages_v2.py b/vulnerabilities/tests/pipelines/test_flag_ghost_packages_v2.py new file mode 100644 index 000000000..d082fdc3a --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_flag_ghost_packages_v2.py @@ -0,0 +1,111 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from unittest.mock import patch + +import pytest +from packageurl import PackageURL + +from vulnerabilities.models import PackageV2 +from vulnerabilities.pipelines.v2_improvers.flag_ghost_packages import ( + detect_and_flag_ghost_packages, +) +from vulnerabilities.pipelines.v2_improvers.flag_ghost_packages import flag_ghost_packages + + +@pytest.mark.django_db +def test_flag_ghost_package_marked_correctly(): + pkg = PackageV2.objects.create( + type="pypi", + namespace=None, + name="requests", + version="999.999.999", + ) + + with patch( + "vulnerabilities.pipelines.v2_improvers.flag_ghost_packages.get_versions" + ) as mock_get_versions: + mock_get_versions.return_value = {"2.25.1", "2.26.0"} + + base_purl = PackageURL(type="pypi", namespace=None, name="requests") + ghost_count = flag_ghost_packages(base_purl, [pkg]) + + pkg.refresh_from_db() + assert ghost_count == 1 + assert pkg.is_ghost is True + + +@pytest.mark.django_db +def test_flag_non_ghost_package_not_marked(): + pkg = PackageV2.objects.create( + type="pypi", + namespace=None, + name="requests", + version="2.26.0", + ) + + with patch( + "vulnerabilities.pipelines.v2_improvers.flag_ghost_packages.get_versions" + ) as mock_get_versions: + mock_get_versions.return_value = {"2.25.1", "2.26.0"} + + base_purl = PackageURL(type="pypi", namespace=None, name="requests") + ghost_count = flag_ghost_packages(base_purl, [pkg]) + + pkg.refresh_from_db() + assert ghost_count == 0 + assert pkg.is_ghost is False + + +@pytest.mark.django_db +def test_flag_ghost_packages_gracefully_handles_version_fetch_failure(): + pkg = PackageV2.objects.create( + type="pypi", + namespace=None, + name="some-lib", + version="1.0.0", + ) + + with patch( + "vulnerabilities.pipelines.v2_improvers.flag_ghost_packages.get_versions" + ) as mock_get_versions: + mock_get_versions.return_value = None + + base_purl = PackageURL(type="pypi", namespace=None, name="some-lib") + ghost_count = flag_ghost_packages(base_purl, [pkg]) + + pkg.refresh_from_db() + assert ghost_count == 0 + assert pkg.is_ghost is False + + +@pytest.mark.django_db +def test_detect_and_flag_ghost_packages(monkeypatch): + ghost_pkg = PackageV2.objects.create(type="pypi", name="fakepkg", version="9.9.9") + real_pkg = PackageV2.objects.create(type="pypi", name="realpkg", version="1.0.0") + + def fake_versions(purl, logger=None): + if purl.name == "realpkg": + return {"1.0.0"} + if purl.name == "fakepkg": + return {"0.1.0", "0.2.0"} + return set() + + monkeypatch.setattr( + "vulnerabilities.pipelines.v2_improvers.flag_ghost_packages.get_versions", + fake_versions, + ) + + detect_and_flag_ghost_packages() + + ghost_pkg.refresh_from_db() + real_pkg.refresh_from_db() + + assert ghost_pkg.is_ghost is True + assert real_pkg.is_ghost is False diff --git a/vulnerabilities/tests/pipelines/test_github_importer_v2.py b/vulnerabilities/tests/pipelines/test_github_importer_v2.py new file mode 100644 index 000000000..ec3ab5a04 --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_github_importer_v2.py @@ -0,0 +1,174 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from unittest.mock import patch + +import pytest +from packageurl import PackageURL + +from vulnerabilities.pipelines.v2_importers.github_importer import GitHubAPIImporterPipeline +from vulnerabilities.pipelines.v2_importers.github_importer import get_cwes_from_github_advisory +from vulnerabilities.pipelines.v2_importers.github_importer import get_purl +from vulnerabilities.utils import get_item + + +@pytest.fixture +def mock_fetch(): + with patch( + "vulnerabilities.pipelines.v2_importers.github_importer.utils.fetch_github_graphql_query" + ) as mock: + yield mock + + +def test_advisories_count(mock_fetch): + # Mock the GraphQL query response for advisory count + mock_fetch.return_value = {"data": {"securityVulnerabilities": {"totalCount": 10}}} + + pipeline = GitHubAPIImporterPipeline() + + count = pipeline.advisories_count() + + # Assert that the count is correct + assert count == 10 + + +def test_collect_advisories(mock_fetch): + # Mock advisory data for GitHub + advisory_data = { + "data": { + "securityVulnerabilities": { + "edges": [ + { + "node": { + "advisory": { + "identifiers": [{"type": "GHSA", "value": "GHSA-1234-ABCD"}], + "summary": "Sample advisory description", + "references": [ + {"url": "https://github.com/advisories/GHSA-1234-ABCD"} + ], + "severity": "HIGH", + "cwes": {"nodes": [{"cweId": "CWE-123"}]}, + "publishedAt": "2023-01-01T00:00:00Z", + }, + "firstPatchedVersion": {"identifier": "1.2.3"}, + "package": {"name": "example-package"}, + "vulnerableVersionRange": ">=1.0.0,<=1.2.0", + } + } + ], + "pageInfo": {"hasNextPage": False, "endCursor": None}, + } + } + } + + # Mock the response from GitHub GraphQL query + mock_fetch.return_value = advisory_data + + # Instantiate the pipeline + pipeline = GitHubAPIImporterPipeline() + + # Collect advisories + advisories = list(pipeline.collect_advisories()) + + # Check if advisories were correctly parsed + assert len(advisories) == 1 + advisory = advisories[0] + + # Validate advisory fields + assert advisory.advisory_id == "GHSA-1234-ABCD" + assert advisory.summary == "Sample advisory description" + assert advisory.url == "https://github.com/advisories/GHSA-1234-ABCD" + assert len(advisory.references_v2) == 1 + assert advisory.references_v2[0].reference_id == "GHSA-1234-ABCD" + assert advisory.severities[0].value == "HIGH" + + # Validate affected package and version range + affected_package = advisory.affected_packages[0] + assert isinstance(affected_package.package, PackageURL) + assert affected_package.package.name == "example-package" + + # Check CWE extraction + assert advisory.weaknesses == [123] + + +def test_get_purl(mock_fetch): + # Test for package URL generation + result = get_purl("cargo", "example/package-name") + + # Validate that the correct PackageURL is generated + assert isinstance(result, PackageURL) + assert result.type == "cargo" + assert result.namespace == None + assert result.name == "example/package-name" + + +def test_process_response(mock_fetch): + # Mock advisory data as input for the process_response function + advisory_data = { + "data": { + "securityVulnerabilities": { + "edges": [ + { + "node": { + "advisory": { + "identifiers": [{"type": "GHSA", "value": "GHSA-5678-EFGH"}], + "summary": "Another advisory", + "references": [ + {"url": "https://github.com/advisories/GHSA-5678-EFGH"} + ], + "severity": "MEDIUM", + "cwes": {"nodes": [{"cweId": "CWE-200"}]}, + "publishedAt": "2023-02-01T00:00:00Z", + }, + "firstPatchedVersion": {"identifier": "2.0.0"}, + "package": {"name": "another-package"}, + "vulnerableVersionRange": ">=2.0.0,<=3.0.0", + } + } + ], + "pageInfo": {"hasNextPage": False, "endCursor": None}, + } + } + } + + # Mock the response from GitHub GraphQL query + mock_fetch.return_value = advisory_data + + # Process the mock response + result = list(GitHubAPIImporterPipeline().collect_advisories()) + + # Check the results + assert len(result) == 1 + advisory = result[0] + + # Validate the advisory data + assert advisory.advisory_id == "GHSA-5678-EFGH" + assert advisory.summary == "Another advisory" + assert advisory.url == "https://github.com/advisories/GHSA-5678-EFGH" + + # Check CWE extraction + assert advisory.weaknesses == [200] + + +def test_get_cwes_from_github_advisory(mock_fetch): + # Mock CWEs extraction from GitHub advisory + advisory_data = {"cwes": {"nodes": [{"cweId": "CWE-522"}]}} + + cwes = get_cwes_from_github_advisory(advisory_data) + + # Validate the CWE ID extraction + assert cwes == [522] + + +def test_invalid_package_type_in_get_purl(mock_fetch): + # Test for invalid package type + result = get_purl("invalidpkg", "example/package-name") + + # Assert that None is returned for an invalid package type + assert result is None diff --git a/vulnerabilities/tests/pipelines/test_gitlab_v2_importer.py b/vulnerabilities/tests/pipelines/test_gitlab_v2_importer.py new file mode 100644 index 000000000..6e5c8eb15 --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_gitlab_v2_importer.py @@ -0,0 +1,153 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# + +from pathlib import Path +from unittest.mock import MagicMock +from unittest.mock import patch + +import pytest + +from vulnerabilities.importer import AdvisoryData + + +@pytest.fixture +def mock_vcs_response(tmp_path): + mock_response = MagicMock() + mock_response.dest_dir = str(tmp_path) + mock_response.delete = MagicMock() + return mock_response + + +@pytest.fixture +def mock_fetch_via_vcs(mock_vcs_response): + with patch("vulnerabilities.pipelines.v2_importers.gitlab_importer.fetch_via_vcs") as mock: + mock.return_value = mock_vcs_response + yield mock + + +@pytest.fixture +def mock_gitlab_yaml(tmp_path): + advisory_dir = tmp_path / "pypi" / "package_name" + advisory_dir.mkdir(parents=True) + + advisory_file = advisory_dir / "CVE-2022-0001.yml" + advisory_file.write_text( + """ + identifier: "CVE-2022-0001" + package_slug: "pypi/package_name" + title: "Example vulnerability" + description: "Example description" + pubdate: "2022-06-15" + affected_range: "<2.0.0" + fixed_versions: + - "2.0.0" + urls: + - "https://example.com/advisory" + cwe_ids: + - "CWE-79" + identifiers: + - "CVE-2022-0001" + """ + ) + return tmp_path + + +def test_clone(mock_fetch_via_vcs, mock_vcs_response): + from vulnerabilities.pipelines.v2_importers.gitlab_importer import GitLabImporterPipeline + + pipeline = GitLabImporterPipeline() + pipeline.clone() + + mock_fetch_via_vcs.assert_called_once_with(pipeline.repo_url) + assert pipeline.vcs_response == mock_vcs_response + + +def test_advisories_count(mock_gitlab_yaml, mock_vcs_response, mock_fetch_via_vcs): + from vulnerabilities.pipelines.v2_importers.gitlab_importer import GitLabImporterPipeline + + mock_vcs_response.dest_dir = str(mock_gitlab_yaml) + + pipeline = GitLabImporterPipeline() + pipeline.clone() + mock_fetch_via_vcs.assert_called_once() + + count = pipeline.advisories_count() + assert count == 1 + + +def test_collect_advisories(mock_gitlab_yaml, mock_vcs_response, mock_fetch_via_vcs): + from vulnerabilities.pipelines.v2_importers.gitlab_importer import GitLabImporterPipeline + + mock_vcs_response.dest_dir = str(mock_gitlab_yaml) + + pipeline = GitLabImporterPipeline() + pipeline.clone() + + advisories = list(pipeline.collect_advisories()) + assert len(advisories) == 1 + advisory = advisories[0] + + assert isinstance(advisory, AdvisoryData) + assert advisory.advisory_id == "CVE-2022-0001" + assert advisory.summary == "Example vulnerability\nExample description" + assert advisory.references_v2[0].url == "https://example.com/advisory" + assert advisory.affected_packages[0].package.name == "package-name" + assert advisory.affected_packages[0].fixed_version + assert advisory.weaknesses[0] == 79 + + +def test_clean_downloads(mock_vcs_response): + from vulnerabilities.pipelines.v2_importers.gitlab_importer import GitLabImporterPipeline + + pipeline = GitLabImporterPipeline() + pipeline.vcs_response = mock_vcs_response + + pipeline.clean_downloads() + mock_vcs_response.delete.assert_called_once() + + +def test_on_failure(mock_vcs_response): + from vulnerabilities.pipelines.v2_importers.gitlab_importer import GitLabImporterPipeline + + pipeline = GitLabImporterPipeline() + pipeline.vcs_response = mock_vcs_response + + with patch.object(pipeline, "clean_downloads") as mock_clean: + pipeline.on_failure() + mock_clean.assert_called_once() + + +def test_collect_advisories_with_invalid_yaml( + mock_gitlab_yaml, mock_vcs_response, mock_fetch_via_vcs +): + from vulnerabilities.pipelines.v2_importers.gitlab_importer import GitLabImporterPipeline + + # Add an invalid YAML file + invalid_file = Path(mock_gitlab_yaml) / "pypi" / "package_name" / "invalid.yml" + invalid_file.write_text(":::invalid_yaml") + + mock_vcs_response.dest_dir = str(mock_gitlab_yaml) + + pipeline = GitLabImporterPipeline() + pipeline.clone() + + # Should not raise but skip invalid YAML + advisories = list(pipeline.collect_advisories()) + assert len(advisories) == 1 # Only one valid advisory is parsed + + +def test_advisories_count_empty(mock_vcs_response, mock_fetch_via_vcs, tmp_path): + from vulnerabilities.pipelines.v2_importers.gitlab_importer import GitLabImporterPipeline + + mock_vcs_response.dest_dir = str(tmp_path) + + pipeline = GitLabImporterPipeline() + pipeline.clone() + mock_fetch_via_vcs.assert_called_once() + + count = pipeline.advisories_count() + assert count == 0 diff --git a/vulnerabilities/tests/pipelines/test_npm_importer_pipeline_v2.py b/vulnerabilities/tests/pipelines/test_npm_importer_pipeline_v2.py new file mode 100644 index 000000000..7941c9b69 --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_npm_importer_pipeline_v2.py @@ -0,0 +1,128 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import json +from types import SimpleNamespace + +import pytz +from packageurl import PackageURL +from univers.version_range import NpmVersionRange +from univers.versions import SemverVersion + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.pipelines.v2_importers.npm_importer import NpmImporterPipeline +from vulnerabilities.severity_systems import CVSSV2 +from vulnerabilities.severity_systems import CVSSV3 + + +def test_clone(monkeypatch): + import vulnerabilities.pipelines.v2_importers.npm_importer as npm_mod + + dummy = SimpleNamespace(dest_dir="dummy", delete=lambda: None) + # Patch the name in the npm_importer module, not fetchcode.vcs + monkeypatch.setattr(npm_mod, "fetch_via_vcs", lambda url: dummy) + + p = NpmImporterPipeline() + p.clone() + + assert p.vcs_response is dummy + + +def test_clean_downloads_and_on_failure(): + called = {} + + def delete(): + called["deleted"] = True + + dummy = SimpleNamespace(dest_dir="dummy", delete=delete) + p = NpmImporterPipeline() + p.vcs_response = dummy + p.clean_downloads() + assert called.get("deleted", False) + called.clear() + p.on_failure() + assert called.get("deleted", False) + + +def test_advisories_count_and_collect(tmp_path): + base = tmp_path + vuln_dir = base / "vuln" / "npm" + vuln_dir.mkdir(parents=True) + (vuln_dir / "index.json").write_text("{}") + (vuln_dir / "001.json").write_text(json.dumps({"id": "001"})) + p = NpmImporterPipeline() + p.vcs_response = SimpleNamespace(dest_dir=str(base), delete=lambda: None) + assert p.advisories_count() == 2 + advisories = list(p.collect_advisories()) + # Should yield None for index.json and one AdvisoryData + real = [a for a in advisories if isinstance(a, AdvisoryData)] + assert len(real) == 1 + assert real[0].advisory_id == "npm-001" + + +def test_to_advisory_data_skips_index(tmp_path): + p = NpmImporterPipeline() + file = tmp_path / "index.json" + file.write_text("{}") + assert p.to_advisory_data(file) is None + + +def test_to_advisory_data_full(tmp_path): + data = { + "id": "123", + "overview": "desc", + "title": "ti", + "created_at": "2021-01-01T00:00:00Z", + "cvss_vector": "CVSS:3.0/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H", + "cvss_score": "9.8", + "references": ["http://ref1"], + "module_name": "mypkg", + "vulnerable_versions": "<=1.2.3", + "patched_versions": ">=1.2.4", + "cves": ["CVE-123", "CVE-124"], + } + file = tmp_path / "123.json" + file.write_text(json.dumps(data)) + p = NpmImporterPipeline() + adv = p.to_advisory_data(file) + assert isinstance(adv, AdvisoryData) + assert adv.advisory_id == "npm-123" + assert "ti" in adv.summary and "desc" in adv.summary + assert adv.date_published.tzinfo == pytz.UTC + assert len(adv.severities) == 1 and adv.severities[0].system == CVSSV3 + urls = [r.url for r in adv.references_v2] + assert "http://ref1" in urls + assert f"https://github.com/nodejs/security-wg/blob/main/vuln/npm/123.json" in urls + pkg = adv.affected_packages[0] + assert pkg.package == PackageURL(type="npm", name="mypkg") + assert isinstance(pkg.affected_version_range, NpmVersionRange) + assert pkg.fixed_version == SemverVersion("1.2.4") + assert set(adv.aliases) == {"CVE-123", "CVE-124"} + + +def test_to_advisory_data_cvss_v2(tmp_path): + data = {"id": "124", "cvss_vector": "CVSS:2.0/AV:N/AC:L/Au:N/C:P/I:P/A:P", "cvss_score": "5.5"} + file = tmp_path / "124.json" + file.write_text(json.dumps(data)) + p = NpmImporterPipeline() + adv = p.to_advisory_data(file) + assert len(adv.severities) == 1 and adv.severities[0].system == CVSSV2 + + +def test_get_affected_package_special_and_standard(): + p = NpmImporterPipeline() + pkg = p.get_affected_package( + {"vulnerable_versions": "<=99.999.99999", "patched_versions": "<0.0.0"}, "pkg" + ) + assert isinstance(pkg.affected_version_range, NpmVersionRange) + assert pkg.fixed_version is None + data2 = {"vulnerable_versions": "<=2.0.0", "patched_versions": ">=2.0.1"} + pkg2 = p.get_affected_package(data2, "pkg2") + assert isinstance(pkg2.affected_version_range, NpmVersionRange) + assert pkg2.fixed_version == SemverVersion("2.0.1") diff --git a/vulnerabilities/tests/pipelines/test_postgresql_v2_importer.py b/vulnerabilities/tests/pipelines/test_postgresql_v2_importer.py new file mode 100644 index 000000000..da077f3ed --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_postgresql_v2_importer.py @@ -0,0 +1,154 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from unittest.mock import MagicMock +from unittest.mock import patch + +import pytest +from univers.versions import SemverVersion + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.pipelines.v2_importers.postgresql_importer import PostgreSQLImporterPipeline + +HTML_PAGE_WITH_LINKS = """ + + +

Security Advisory

+

Advisory 1

+

Another Advisory

+

Advisory 2

+ + +""" + +HTML_ADVISORY = """ + + + + + + + + + + + + +
+ CVE-2022-1234
+ Announcement
+
10.0, 10.110.29.8Description of the issue
+ + +""" + + +@pytest.fixture +def importer(): + return PostgreSQLImporterPipeline() + + +@patch("vulnerabilities.pipelines.v2_importers.postgresql_importer.requests.get") +def test_collect_links(mock_get, importer): + mock_get.return_value.content = HTML_PAGE_WITH_LINKS.encode("utf-8") + + importer.collect_links() + + assert len(importer.links) == 3 # base + 2 new + assert any("advisory1.html" in link for link in importer.links) + assert any("advisory2.html" in link for link in importer.links) + + +@patch("vulnerabilities.pipelines.v2_importers.postgresql_importer.requests.get") +def test_advisories_count(mock_get, importer): + mock_get.return_value.content = HTML_PAGE_WITH_LINKS.encode("utf-8") + + count = importer.advisories_count() + assert count >= 3 + + +@patch("vulnerabilities.pipelines.v2_importers.postgresql_importer.requests.get") +def test_collect_advisories(mock_get, importer): + importer.links = { + "https://www.postgresql.org/support/security/advisory1.html", + "https://www.postgresql.org/support/security/advisory2.html", + } + + mock_get.return_value.content = HTML_ADVISORY.encode("utf-8") + + advisories = list(importer.collect_advisories()) + + assert len(advisories) == 2 + advisory = advisories[0] + assert isinstance(advisory, AdvisoryData) + assert advisory.advisory_id == "CVE-2022-1234" + assert "Description of the issue" in advisory.summary + assert len(advisory.references_v2) > 0 + assert advisory.affected_packages[0].package.name == "postgresql" + assert str(advisory.affected_packages[0].fixed_version) == "10.2" + assert advisory.affected_packages[0].affected_version_range.contains(SemverVersion("10.0.0")) + assert advisory.affected_packages[0].affected_version_range.contains(SemverVersion("10.1.0")) + + +@patch("vulnerabilities.pipelines.v2_importers.postgresql_importer.requests.get") +def test_collect_advisories_with_no_fixed_version(mock_get, importer): + no_fix_html = """ + + + + + + + + + + + + +
+ CVE-2023-5678
+ Announcement
+
9.5, 9.6Unpatched issue
+ + + """ + + def side_effect(url, *args, **kwargs): + if "advisory" not in url: + return MagicMock(content=HTML_PAGE_WITH_LINKS.encode("utf-8")) + return MagicMock(content=no_fix_html.encode("utf-8")) + + mock_get.side_effect = side_effect + + advisories = list(importer.collect_advisories()) + + assert len(advisories) == 2 + advisory = advisories[0] + assert advisory.advisory_id == "CVE-2023-5678" + assert advisory.affected_packages[0].fixed_version is None + assert advisory.affected_packages[0].affected_version_range.contains(SemverVersion("9.5")) + + +@patch("vulnerabilities.pipelines.v2_importers.postgresql_importer.requests.get") +def test_cvss_parsing(mock_get, importer): + mock_get.side_effect = lambda url, *args, **kwargs: MagicMock( + content=HTML_ADVISORY.encode("utf-8") + ) + + importer.links = {"https://www.postgresql.org/support/security/advisory1.html"} + + advisories = list(importer.collect_advisories()) + + assert len(advisories) == 1 + reference = advisories[0].references_v2[0] + + severity = reference.severities[0] + assert severity.system.identifier == "cvssv3" + assert severity.value == "9.8" + assert "AV:N/AC:L/PR:N/UI:N" in severity.scoring_elements diff --git a/vulnerabilities/tests/pipelines/test_pypa_v2_importer_pipeline.py b/vulnerabilities/tests/pipelines/test_pypa_v2_importer_pipeline.py new file mode 100644 index 000000000..20aa63387 --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_pypa_v2_importer_pipeline.py @@ -0,0 +1,173 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from unittest.mock import MagicMock +from unittest.mock import patch + +import pytest +import saneyaml + +from vulnerabilities.importer import AdvisoryData + + +@pytest.fixture +def mock_vcs_response(): + # Mock the vcs_response from fetch_via_vcs + mock_response = MagicMock() + mock_response.dest_dir = "/mock/repo" + mock_response.delete = MagicMock() + return mock_response + + +@pytest.fixture +def mock_fetch_via_vcs(mock_vcs_response): + with patch("vulnerabilities.pipelines.v2_importers.pypa_importer.fetch_via_vcs") as mock: + mock.return_value = mock_vcs_response + yield mock + + +@pytest.fixture +def mock_pathlib(tmp_path): + # Mock the Path structure to simulate the `vulns` directory and advisory files + vulns_dir = tmp_path / "vulns" + vulns_dir.mkdir() + + advisory_file = vulns_dir / "CVE-2021-1234.yaml" + advisory_file.write_text( + """ + id: CVE-2021-1234 + summary: Sample PyPI vulnerability + references: + - https://pypi.org/advisory/CVE-2021-1234 + """ + ) + return vulns_dir + + +def test_clone(mock_fetch_via_vcs, mock_vcs_response): + # Import inside the test function to avoid circular import + from vulnerabilities.pipelines.v2_importers.pypa_importer import PyPaImporterPipeline + + # Test the `clone` method to ensure it calls `fetch_via_vcs` + pipeline = PyPaImporterPipeline() + pipeline.clone() + + mock_fetch_via_vcs.assert_called_once_with(pipeline.repo_url) + assert pipeline.vcs_response == mock_vcs_response + + +def test_advisories_count(mock_pathlib, mock_vcs_response, mock_fetch_via_vcs): + # Import inside the test function to avoid circular import + from vulnerabilities.pipelines.v2_importers.pypa_importer import PyPaImporterPipeline + + # Mock `vcs_response.dest_dir` to point to the temporary directory + mock_vcs_response.dest_dir = str(mock_pathlib.parent) + + pipeline = PyPaImporterPipeline() + + # Call clone() to set the vcs_response attribute + pipeline.clone() + mock_fetch_via_vcs.assert_called_once_with(pipeline.repo_url) + + count = pipeline.advisories_count() + + # Check that the count matches the number of YAML files in the `vulns` directory + assert count == 1 + + +def test_collect_advisories(mock_pathlib, mock_vcs_response, mock_fetch_via_vcs): + # Import inside the test function to avoid circular import + from vulnerabilities.pipelines.v2_importers.pypa_importer import PyPaImporterPipeline + + # Mock `vcs_response.dest_dir` to point to the temporary directory + mock_vcs_response.dest_dir = str(mock_pathlib.parent) + + # Mock `parse_advisory_data` to return an AdvisoryData object + with patch("vulnerabilities.importers.osv.parse_advisory_data_v2") as mock_parse: + mock_parse.return_value = AdvisoryData( + advisory_id="CVE-2021-1234", + summary="Sample PyPI vulnerability", + references_v2=[{"url": "https://pypi.org/advisory/CVE-2021-1234"}], + affected_packages=[], + weaknesses=[], + url="https://pypi.org/advisory/CVE-2021-1234", + ) + + pipeline = PyPaImporterPipeline() + pipeline.clone() + mock_fetch_via_vcs.assert_called_once_with(pipeline.repo_url) + advisories = list(pipeline.collect_advisories()) + + # Ensure that advisories are parsed correctly + assert len(advisories) == 1 + advisory = advisories[0] + assert advisory.advisory_id == "CVE-2021-1234" + assert advisory.summary == "Sample PyPI vulnerability" + assert advisory.url == "https://pypi.org/advisory/CVE-2021-1234" + + +def test_clean_downloads(mock_vcs_response): + # Import inside the test function to avoid circular import + from vulnerabilities.pipelines.v2_importers.pypa_importer import PyPaImporterPipeline + + # Test the `clean_downloads` method to ensure the repository is deleted + pipeline = PyPaImporterPipeline() + pipeline.vcs_response = mock_vcs_response + + pipeline.clean_downloads() + + mock_vcs_response.delete.assert_called_once() + + +def test_on_failure(mock_vcs_response): + # Import inside the test function to avoid circular import + from vulnerabilities.pipelines.v2_importers.pypa_importer import PyPaImporterPipeline + + # Test the `on_failure` method to ensure `clean_downloads` is called on failure + pipeline = PyPaImporterPipeline() + pipeline.vcs_response = mock_vcs_response + + with patch.object(pipeline, "clean_downloads") as mock_clean: + pipeline.on_failure() + + mock_clean.assert_called_once() + + +def test_collect_advisories_with_invalid_yaml(mock_pathlib, mock_vcs_response, mock_fetch_via_vcs): + # Import inside the test function to avoid circular import + from vulnerabilities.pipelines.v2_importers.pypa_importer import PyPaImporterPipeline + + # Create an invalid YAML file + invalid_file = mock_pathlib / "invalid_file.yaml" + invalid_file.write_text("invalid_yaml") + + mock_vcs_response.dest_dir = str(mock_pathlib.parent) + + with patch("vulnerabilities.importers.osv.parse_advisory_data_v2") as mock_parse: + # Mock parse_advisory_data to raise an error on invalid YAML + mock_parse.side_effect = saneyaml.YAMLError("Invalid YAML") + + pipeline = PyPaImporterPipeline() + pipeline.clone() + mock_fetch_via_vcs.assert_called_once_with(pipeline.repo_url) + with pytest.raises(saneyaml.YAMLError): + list(pipeline.collect_advisories()) + + +def test_advisories_count_empty(mock_vcs_response, mock_fetch_via_vcs): + # Import inside the test function to avoid circular import + from vulnerabilities.pipelines.v2_importers.pypa_importer import PyPaImporterPipeline + + # Mock an empty 'vulns' directory + mock_vcs_response.dest_dir = "/mock/empty_repo" + pipeline = PyPaImporterPipeline() + pipeline.clone() + # Test that advisories_count returns 0 for an empty directory + count = pipeline.advisories_count() + assert count == 0 diff --git a/vulnerabilities/tests/pipelines/test_pysec_v2_importer.py b/vulnerabilities/tests/pipelines/test_pysec_v2_importer.py new file mode 100644 index 000000000..33c716889 --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_pysec_v2_importer.py @@ -0,0 +1,137 @@ +import json +from io import BytesIO +from unittest.mock import patch +from zipfile import ZipFile + +import pytest + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.pipelines.v2_importers.pysec_importer import ( + PyPIImporterPipeline, # Path to the PyPI Importer +) + + +@pytest.fixture +def mock_zip_data(): + # Create mock zip data for testing + zip_buffer = BytesIO() + with ZipFile(zip_buffer, mode="w") as zip_file: + # Create a sample advisory file inside the zip + advisory_data = { + "advisory_id": "PYSEC-1234", + "summary": "Sample PyPI advisory", + "references": [{"url": "https://pypi.org/advisory/PYSEC-1234"}], + "package": {"name": "example-package"}, + "affected_versions": ">=1.0.0,<=2.0.0", + } + # Save the sample advisory as a JSON file + with zip_file.open("PYSEC-1234.json", "w") as f: + f.write(json.dumps(advisory_data).encode("utf-8")) + zip_buffer.seek(0) + return zip_buffer + + +@pytest.fixture +def mock_requests_get(): + with patch("requests.get") as mock: + yield mock + + +def test_fetch_zip(mock_requests_get, mock_zip_data): + # Mock the `requests.get` to return the mock zip data + mock_requests_get.return_value.content = mock_zip_data.read() + + pipeline = PyPIImporterPipeline() + + # Call the `fetch_zip` method + pipeline.fetch_zip() + + # Reset the position of mock_zip_data to 0 before comparing + mock_zip_data.seek(0) + + # Verify that the zip file content is correctly assigned + assert pipeline.advisory_zip == mock_zip_data.read() + + +def test_advisories_count(mock_requests_get, mock_zip_data): + # Mock the `requests.get` to return the mock zip data + mock_requests_get.return_value.content = mock_zip_data.read() + + pipeline = PyPIImporterPipeline() + + # Fetch the zip data + pipeline.fetch_zip() + + # Test advisories count + count = pipeline.advisories_count() + + # Verify that it correctly counts the number of advisory files starting with 'PYSEC-' + assert count == 1 + + +def test_collect_advisories(mock_requests_get, mock_zip_data): + # Mock the `requests.get` to return the mock zip data + mock_requests_get.return_value.content = mock_zip_data.read() + + pipeline = PyPIImporterPipeline() + + # Fetch the zip data + pipeline.fetch_zip() + + # Mock the `parse_advisory_data_v2` function to return a dummy AdvisoryData + with patch("vulnerabilities.importers.osv.parse_advisory_data_v2") as mock_parse: + mock_parse.return_value = AdvisoryData( + advisory_id="PYSEC-1234", + summary="Sample PyPI advisory", + references_v2=[{"url": "https://pypi.org/advisory/PYSEC-1234"}], + affected_packages=[], + weaknesses=[], + url="https://pypi.org/advisory/PYSEC-1234", + ) + + # Call the `collect_advisories` method + advisories = list(pipeline.collect_advisories()) + + # Ensure we have 1 advisory + assert len(advisories) == 1 + + # Verify advisory data + advisory = advisories[0] + assert advisory.advisory_id == "PYSEC-1234" + assert advisory.summary == "Sample PyPI advisory" + assert advisory.url == "https://pypi.org/advisory/PYSEC-1234" + + +def test_collect_advisories_invalid_file(mock_requests_get, mock_zip_data): + # Create a mock zip with an invalid file name + zip_buffer = BytesIO() + with ZipFile(zip_buffer, mode="w") as zip_file: + zip_file.writestr("INVALID_FILE.txt", "Invalid content") + + zip_buffer.seek(0) + mock_requests_get.return_value.content = zip_buffer.read() + + pipeline = PyPIImporterPipeline() + + # Fetch the zip data + pipeline.fetch_zip() + + # Mock the `parse_advisory_data_v2` function + with patch("vulnerabilities.importers.osv.parse_advisory_data_v2") as mock_parse: + mock_parse.return_value = AdvisoryData( + advisory_id="PYSEC-1234", + summary="Sample PyPI advisory", + references_v2=[{"url": "https://pypi.org/advisory/PYSEC-1234"}], + affected_packages=[], + weaknesses=[], + url="https://pypi.org/advisory/PYSEC-1234", + ) + + # Call the `collect_advisories` method and check the logging for invalid file + with patch( + "vulnerabilities.pipelines.VulnerableCodeBaseImporterPipelineV2.log" + ) as mock_log: + advisories = list(pipeline.collect_advisories()) + + # Ensure no advisories were yielded due to the invalid file + assert len(advisories) == 0 diff --git a/vulnerabilities/tests/pipelines/test_vulnerablecode_importer_v2_pipeline.py b/vulnerabilities/tests/pipelines/test_vulnerablecode_importer_v2_pipeline.py new file mode 100644 index 000000000..f995f0c1f --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_vulnerablecode_importer_v2_pipeline.py @@ -0,0 +1,180 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging +from datetime import datetime +from datetime import timedelta +from unittest import mock + +import pytest +from packageurl import PackageURL + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import UnMergeablePackageError +from vulnerabilities.models import AdvisoryV2 +from vulnerabilities.models import PackageV2 +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2 + + +class DummyImporter(VulnerableCodeBaseImporterPipelineV2): + pipeline_id = "dummy" + log_messages = [] + + def log(self, message, level=logging.INFO): + self.log_messages.append((level, message)) + + def collect_advisories(self): + yield from self._advisories + + def advisories_count(self): + return len(self._advisories) + + +@pytest.fixture +def dummy_advisory(): + return AdvisoryData( + summary="Test advisory", + aliases=["CVE-2025-0001"], + references_v2=[], + severities=[], + weaknesses=[], + affected_packages=[], + advisory_id="ADV-123", + date_published=datetime.now() - timedelta(days=10), + url="https://example.com/advisory/1", + ) + + +@pytest.fixture +def dummy_importer(dummy_advisory): + importer = DummyImporter() + importer._advisories = [dummy_advisory] + return importer + + +@pytest.mark.django_db +def test_collect_and_store_advisories(dummy_importer): + dummy_importer.collect_and_store_advisories() + assert len(dummy_importer.log_messages) >= 2 + assert "Successfully collected" in dummy_importer.log_messages[-1][1] + assert AdvisoryV2.objects.count() == 1 + + +def test_get_advisory_packages_basic(dummy_importer): + purl = PackageURL("pypi", None, "dummy", "1.0.0") + affected_package = mock.Mock() + affected_package.package = purl + dummy_importer.unfurl_version_ranges = False + + with mock.patch( + "vulnerabilities.improvers.default.get_exact_purls", return_value=([purl], [purl]) + ): + with mock.patch.object( + PackageV2.objects, "get_or_create_from_purl", return_value=(mock.Mock(), True) + ) as mock_get: + dummy_importer.get_advisory_packages( + advisory_data=mock.Mock(affected_packages=[affected_package]) + ) + assert mock_get.call_count == 2 # one affected, one fixed + + +def test_get_published_package_versions_filters(dummy_importer): + purl = PackageURL("pypi", None, "example", None) + + dummy_versions = [ + mock.Mock(value="1.0.0", release_date=datetime.now() - timedelta(days=5)), + mock.Mock(value="2.0.0", release_date=datetime.now() + timedelta(days=5)), # future + ] + + with mock.patch( + "vulnerabilities.pipelines.package_versions.versions", return_value=dummy_versions + ): + versions = dummy_importer.get_published_package_versions(purl, until=datetime.now()) + assert "1.0.0" in versions + assert "2.0.0" not in versions + + +def test_get_published_package_versions_failure_logs(dummy_importer): + purl = PackageURL("pypi", None, "example", None) + with mock.patch( + "vulnerabilities.pipelines.package_versions.versions", side_effect=Exception("fail") + ): + versions = dummy_importer.get_published_package_versions(purl) + assert versions == [] + assert any("Failed to fetch versions" in msg for lvl, msg in dummy_importer.log_messages) + + +def test_expand_version_range_to_purls(dummy_importer): + purls = list( + dummy_importer.expand_verion_range_to_purls("npm", "lodash", "lodash", ["1.0.0", "1.1.0"]) + ) + assert all(isinstance(p, PackageURL) for p in purls) + assert purls[0].name == "lodash" + + +def test_resolve_package_versions(dummy_importer): + dummy_importer.ignorable_versions = [] + dummy_importer.expand_verion_range_to_purls = lambda *args, **kwargs: [ + PackageURL("npm", None, "a", "1.0.0") + ] + + with mock.patch( + "vulnerabilities.pipelines.resolve_version_range", return_value=(["1.0.0"], ["1.1.0"]) + ), mock.patch( + "vulnerabilities.pipelines.get_affected_packages_by_patched_package", + return_value={None: [PackageURL("npm", None, "a", "1.0.0")]}, + ), mock.patch( + "vulnerabilities.pipelines.nearest_patched_package", return_value=[] + ): + aff, fix = dummy_importer.resolve_package_versions( + affected_version_range=">=1.0.0", + pkg_type="npm", + pkg_namespace=None, + pkg_name="a", + valid_versions=["1.0.0", "1.1.0"], + ) + assert any(isinstance(p, PackageURL) for p in aff) + + +def test_get_impacted_packages_mergeable(dummy_importer): + ap = mock.Mock() + ap.package = PackageURL("npm", None, "abc", None) + dummy_importer.get_published_package_versions = lambda package_url, until: ["1.0.0", "1.1.0"] + dummy_importer.resolve_package_versions = lambda **kwargs: ( + [PackageURL("npm", None, "abc", "1.0.0")], + [PackageURL("npm", None, "abc", "1.1.0")], + ) + + with mock.patch( + "vulnerabilities.importer.AffectedPackage.merge", + return_value=(ap.package, [">=1.0.0"], ["1.1.0"]), + ): + aff, fix = dummy_importer.get_impacted_packages([ap], datetime.now()) + assert len(aff) == 1 and aff[0].version == "1.0.0" + assert len(fix) == 1 and fix[0].version == "1.1.0" + + +def test_get_impacted_packages_unmergeable(dummy_importer): + ap = mock.Mock() + ap.package = PackageURL("npm", None, "abc", None) + ap.affected_version_range = ">=1.0.0" + ap.fixed_version = None + + dummy_importer.get_published_package_versions = lambda package_url, until: ["1.0.0", "1.1.0"] + dummy_importer.resolve_package_versions = lambda **kwargs: ( + [PackageURL("npm", None, "abc", "1.0.0")], + [PackageURL("npm", None, "abc", "1.1.0")], + ) + + with mock.patch( + "vulnerabilities.importer.AffectedPackage.merge", side_effect=UnMergeablePackageError + ): + aff, fix = dummy_importer.get_impacted_packages([ap], datetime.utcnow()) + assert len(aff) == 1 + assert aff[0].version == "1.0.0" diff --git a/vulnerabilities/tests/pipelines/test_vulnrichment_v2_importer.py b/vulnerabilities/tests/pipelines/test_vulnrichment_v2_importer.py new file mode 100644 index 000000000..f926058c2 --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_vulnrichment_v2_importer.py @@ -0,0 +1,205 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import json +from pathlib import Path +from unittest.mock import MagicMock +from unittest.mock import patch + +import pytest + +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import VulnerabilitySeverity +from vulnerabilities.pipelines.v2_importers.vulnrichment_importer import VulnrichImporterPipeline + + +@pytest.fixture +def mock_vcs_response(): + # Mock the vcs_response from fetch_via_vcs + mock_response = MagicMock() + mock_response.dest_dir = "/mock/repo" + mock_response.delete = MagicMock() + return mock_response + + +@pytest.fixture +def mock_fetch_via_vcs(mock_vcs_response): + with patch( + "vulnerabilities.pipelines.v2_importers.vulnrichment_importer.fetch_via_vcs" + ) as mock: + mock.return_value = mock_vcs_response + yield mock + + +@pytest.fixture +def mock_pathlib(tmp_path): + # Create a mock filesystem with a 'vulns' directory and JSON files + vulns_dir = tmp_path / "vulns" + vulns_dir.mkdir() + + advisory_file = vulns_dir / "CVE-2021-1234.json" + advisory_file.write_text( + json.dumps( + { + "cveMetadata": { + "cveId": "CVE-2021-1234", + "state": "PUBLIC", + "datePublished": "2021-01-01", + }, + "containers": { + "cna": { + "descriptions": [{"lang": "en", "value": "Sample PyPI vulnerability"}], + "metrics": [ + { + "cvssV4_0": { + "baseScore": 7.5, + "vectorString": "AV:N/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:H", + } + } + ], + "affected": [{"cpes": ["cpe:/a:example:package"]}], + "references": [{"url": "https://example.com", "tags": ["exploit"]}], + } + }, + } + ) + ) + return vulns_dir + + +def test_clone(mock_fetch_via_vcs, mock_vcs_response): + # Test the `clone` method to ensure the repository is cloned correctly + pipeline = VulnrichImporterPipeline() + pipeline.clone() + + mock_fetch_via_vcs.assert_called_once_with(pipeline.repo_url) + assert pipeline.vcs_response == mock_vcs_response + + +def test_advisories_count(mock_pathlib, mock_vcs_response, mock_fetch_via_vcs): + mock_vcs_response.dest_dir = str(mock_pathlib.parent) + + pipeline = VulnrichImporterPipeline() + pipeline.clone() + count = pipeline.advisories_count() + + assert count == 0 + + +def test_collect_advisories(mock_pathlib, mock_vcs_response, mock_fetch_via_vcs): + # Mock `vcs_response.dest_dir` to point to the temporary directory + mock_vcs_response.dest_dir = str(mock_pathlib.parent) + + # Mock `parse_cve_advisory` to return an AdvisoryData object + with patch( + "vulnerabilities.pipelines.v2_importers.vulnrichment_importer.VulnrichImporterPipeline.parse_cve_advisory" + ) as mock_parse: + mock_parse.return_value = AdvisoryData( + advisory_id="CVE-2021-1234", + summary="Sample PyPI vulnerability", + references_v2=[{"url": "https://example.com"}], + affected_packages=[], + weaknesses=[], + url="https://example.com", + severities=[ + VulnerabilitySeverity( + system="cvssv4", + value=7.5, + scoring_elements="AV:N/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:H", + ) + ], + ) + + pipeline = VulnrichImporterPipeline() + pipeline.clone() + advisories = list(pipeline.collect_advisories()) + + # Ensure that advisories are parsed correctly + assert len(advisories) == 1 + advisory = advisories[0] + assert advisory.advisory_id == "CVE-2021-1234" + assert advisory.summary == "Sample PyPI vulnerability" + assert advisory.url == "https://example.com" + + +def test_clean_downloads(mock_vcs_response, mock_fetch_via_vcs): + # Test the `clean_downloads` method to ensure the repository is deleted + pipeline = VulnrichImporterPipeline() + pipeline.clone() + pipeline.vcs_response = mock_vcs_response + + pipeline.clean_downloads() + + mock_vcs_response.delete.assert_called_once() + + +def test_on_failure(mock_vcs_response, mock_fetch_via_vcs): + pipeline = VulnrichImporterPipeline() + pipeline.clone() + pipeline.vcs_response = mock_vcs_response + + with patch.object(pipeline, "clean_downloads") as mock_clean: + pipeline.on_failure() + + mock_clean.assert_called_once() + + +def test_parse_cve_advisory(mock_pathlib, mock_vcs_response, mock_fetch_via_vcs): + from vulnerabilities.pipelines.v2_importers.vulnrichment_importer import ( + VulnrichImporterPipeline, + ) + + mock_vcs_response.dest_dir = str(mock_pathlib.parent) + + raw_data = { + "cveMetadata": {"cveId": "CVE-2021-1234", "state": "PUBLIC", "datePublished": "2021-01-01"}, + "containers": { + "cna": { + "descriptions": [{"lang": "en", "value": "Sample PyPI vulnerability"}], + "metrics": [ + { + "cvssV4_0": { + "baseScore": 7.5, + "vectorString": "AV:N/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:H", + } + } + ], + "affected": [{"cpes": ["cpe:/a:example:package"]}], + "references": [{"url": "https://example.com", "tags": ["exploit"]}], + } + }, + } + advisory_url = "https://github.com/cisagov/vulnrichment/blob/develop/CVE-2021-1234.json" + + pipeline = VulnrichImporterPipeline() + pipeline.clone() + advisory = pipeline.parse_cve_advisory(raw_data, advisory_url) + + assert advisory.advisory_id == "CVE-2021-1234" + assert advisory.summary == "Sample PyPI vulnerability" + assert advisory.url == advisory_url + assert len(advisory.severities) == 1 + assert advisory.severities[0].value == 7.5 + + +def test_collect_advisories_with_invalid_json(mock_pathlib, mock_vcs_response, mock_fetch_via_vcs): + invalid_file = mock_pathlib / "invalid_file.json" + invalid_file.write_text("invalid_json") + + mock_vcs_response.dest_dir = str(mock_pathlib.parent) + + with patch( + "vulnerabilities.pipelines.v2_importers.vulnrichment_importer.VulnrichImporterPipeline.parse_cve_advisory" + ) as mock_parse: + mock_parse.side_effect = json.JSONDecodeError("Invalid JSON", "", 0) + + pipeline = VulnrichImporterPipeline() + pipeline.clone() + with pytest.raises(json.JSONDecodeError): + list(pipeline.collect_advisories()) diff --git a/vulnerabilities/tests/pipes/test_advisory.py b/vulnerabilities/tests/pipes/test_advisory.py index ee29a4b8d..72c477455 100644 --- a/vulnerabilities/tests/pipes/test_advisory.py +++ b/vulnerabilities/tests/pipes/test_advisory.py @@ -9,6 +9,7 @@ from datetime import datetime +import pytest from django.core.exceptions import ValidationError from django.test import TestCase from django.utils import timezone @@ -19,6 +20,14 @@ from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import AffectedPackage from vulnerabilities.importer import Reference +from vulnerabilities.models import AdvisoryAlias +from vulnerabilities.models import AdvisoryReference +from vulnerabilities.models import AdvisorySeverity +from vulnerabilities.models import AdvisoryWeakness +from vulnerabilities.pipes.advisory import get_or_create_advisory_aliases +from vulnerabilities.pipes.advisory import get_or_create_advisory_references +from vulnerabilities.pipes.advisory import get_or_create_advisory_severities +from vulnerabilities.pipes.advisory import get_or_create_advisory_weaknesses from vulnerabilities.pipes.advisory import get_or_create_aliases from vulnerabilities.pipes.advisory import import_advisory from vulnerabilities.utils import compute_content_id @@ -134,3 +143,85 @@ def test_advisory_insert_no_duplicate_content_id(self): date_collected=date, created_by="test_pipeline", ) + + +@pytest.fixture +def advisory_aliases(): + return ["CVE-2021-12345", "GHSA-xyz"] + + +@pytest.fixture +def advisory_references(): + return [ + Reference(reference_id="REF-1", url="https://example.com/advisory/1"), + Reference(reference_id="REF-2", url="https://example.com/advisory/2"), + Reference(reference_id="", url="https://example.com/advisory/3"), + Reference(url="https://example.com/advisory/4"), + ] + + +@pytest.fixture +def advisory_severities(): + class Severity: + def __init__(self, system, value, scoring_elements, published_at=None, url=None): + self.system = system + self.value = value + self.scoring_elements = scoring_elements + self.published_at = published_at + self.url = url + + class System: + def __init__(self, identifier): + self.identifier = identifier + + return [ + Severity( + System("CVSSv3"), + "7.5", + "AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H", + timezone.now(), + "https://cvss.example.com", + ), + ] + + +@pytest.fixture +def advisory_weaknesses(): + return [79, 89] + + +@pytest.mark.django_db +def test_get_or_create_advisory_aliases(advisory_aliases): + aliases = get_or_create_advisory_aliases(advisory_aliases) + assert len(aliases) == len(advisory_aliases) + for alias_obj in aliases: + assert isinstance(alias_obj, AdvisoryAlias) + assert alias_obj.alias in advisory_aliases + + +@pytest.mark.django_db +def test_get_or_create_advisory_references(advisory_references): + refs = get_or_create_advisory_references(advisory_references) + assert len(refs) == len(advisory_references) + for ref in refs: + assert isinstance(ref, AdvisoryReference) + assert ref.url in [r.url for r in advisory_references] + + +@pytest.mark.django_db +def test_get_or_create_advisory_severities(advisory_severities): + sevs = get_or_create_advisory_severities(advisory_severities) + assert len(sevs) == len(advisory_severities) + for sev in sevs: + assert isinstance(sev, AdvisorySeverity) + assert sev.scoring_system == advisory_severities[0].system.identifier + assert sev.value == advisory_severities[0].value + + +@pytest.mark.django_db +def test_get_or_create_advisory_weaknesses(advisory_weaknesses): + weaknesses = get_or_create_advisory_weaknesses(advisory_weaknesses) + assert len(weaknesses) == len(advisory_weaknesses) + for w in weaknesses: + assert isinstance(w, AdvisoryWeakness) + assert w.cwe_id in advisory_weaknesses diff --git a/vulnerabilities/utils.py b/vulnerabilities/utils.py index 52104b556..3aec1f56c 100644 --- a/vulnerabilities/utils.py +++ b/vulnerabilities/utils.py @@ -39,7 +39,7 @@ from univers.version_range import NginxVersionRange from univers.version_range import VersionRange -from aboutcode.hashid import build_vcid # NOQA +from aboutcode.hashid import build_vcid logger = logging.getLogger(__name__) @@ -249,6 +249,11 @@ def fetch_github_graphql_query(graphql_query: dict): response = _get_gh_response(gh_token=gh_token, graphql_query=graphql_query) + if not response: + msg = "No response received from GitHub API." + logger.error(msg) + raise GraphQLError(msg) + message = response.get("message") if message and message == "Bad credentials": raise GitHubTokenError(f"Invalid GitHub token: {message}") @@ -266,7 +271,10 @@ def _get_gh_response(gh_token, graphql_query): """ endpoint = "https://api.github.com/graphql" headers = {"Authorization": f"bearer {gh_token}"} - return requests.post(endpoint, headers=headers, json=graphql_query).json() + try: + return requests.post(endpoint, headers=headers, json=graphql_query).json() + except Exception as e: + logger.error(f"Failed to fetch data from GitHub GraphQL API: {e}") def dedupe(original: List) -> List: @@ -287,9 +295,10 @@ def get_affected_packages_by_patched_package( """ affected_packages_by_patched_package = defaultdict(list) for package in affected_packages: - affected_packages_by_patched_package[package.patched_package].append( - package.vulnerable_package - ) + if package.vulnerable_package: + affected_packages_by_patched_package[package.patched_package].append( + package.vulnerable_package + ) return affected_packages_by_patched_package @@ -595,6 +604,7 @@ def compute_content_id(advisory_data): # Normalize fields from vulnerabilities.importer import AdvisoryData + from vulnerabilities.importer import AdvisoryDataV2 from vulnerabilities.models import Advisory if isinstance(advisory_data, Advisory): @@ -610,20 +620,62 @@ def compute_content_id(advisory_data): normalized_data["url"] = advisory_data.url elif isinstance(advisory_data, AdvisoryData): - normalized_data = { - "aliases": normalize_list(advisory_data.aliases), - "summary": normalize_text(advisory_data.summary), - "affected_packages": [ - pkg.to_dict() for pkg in normalize_list(advisory_data.affected_packages) if pkg - ], - "references": [ - ref.to_dict() for ref in normalize_list(advisory_data.references) if ref - ], - "weaknesses": normalize_list(advisory_data.weaknesses), - } + if advisory_data.references_v2: + normalized_data = { + "aliases": normalize_list(advisory_data.aliases), + "summary": normalize_text(advisory_data.summary), + "affected_packages": [ + pkg.to_dict() for pkg in normalize_list(advisory_data.affected_packages) if pkg + ], + "references": [ + ref.to_dict() for ref in normalize_list(advisory_data.references_v2) if ref + ], + "severities": [ + sev.to_dict() for sev in normalize_list(advisory_data.severities) if sev + ], + "weaknesses": normalize_list(advisory_data.weaknesses), + } + elif advisory_data.references or advisory_data.references == []: + normalized_data = { + "aliases": normalize_list(advisory_data.aliases), + "summary": normalize_text(advisory_data.summary), + "affected_packages": [ + pkg.to_dict() for pkg in normalize_list(advisory_data.affected_packages) if pkg + ], + "references": [ + ref.to_dict() for ref in normalize_list(advisory_data.references) if ref + ], + "weaknesses": normalize_list(advisory_data.weaknesses), + } + normalized_data["url"] = advisory_data.url normalized_json = json.dumps(normalized_data, separators=(",", ":"), sort_keys=True) content_id = hashlib.sha256(normalized_json.encode("utf-8")).hexdigest() return content_id + + +def create_registry(pipelines): + """ + Return a mapping of {pipeline ID: pipeline class} for a list of pipelines. + """ + from vulnerabilities.pipelines import VulnerableCodePipeline + + registry = {} + for pipeline in pipelines: + if issubclass(pipeline, VulnerableCodePipeline): + key = pipeline.pipeline_id + else: + # For everything legacy use qualified_name + key = pipeline.qualified_name + + if not key: + raise Exception(f"Pipeline ID can not be empty: {pipeline!r}") + + if key in registry: + raise Exception(f"Duplicate pipeline found: {key}") + + registry[key] = pipeline + + return registry diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index e6fb95a94..71534f9fb 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -29,6 +29,7 @@ from vulnerabilities import models from vulnerabilities.forms import AdminLoginForm +from vulnerabilities.forms import AdvisorySearchForm from vulnerabilities.forms import ApiUserCreationForm from vulnerabilities.forms import PackageSearchForm from vulnerabilities.forms import PipelineSchedulePackageForm @@ -71,6 +72,34 @@ def get_queryset(self, query=None): ) +class PackageSearchV2(ListView): + model = models.PackageV2 + template_name = "packages_v2.html" + ordering = ["type", "namespace", "name", "version"] + paginate_by = PAGE_SIZE + + def get_context_data(self, **kwargs): + context = super().get_context_data(**kwargs) + request_query = self.request.GET + context["package_search_form"] = PackageSearchForm(request_query) + context["search"] = request_query.get("search") + return context + + def get_queryset(self, query=None): + """ + Return a Package queryset for the ``query``. + Make a best effort approach to find matching packages either based + on exact purl, partial purl or just name and namespace. + """ + query = query or self.request.GET.get("search") or "" + return ( + self.model.objects.search(query) + .with_vulnerability_counts() + .prefetch_related() + .order_by("package_url") + ) + + class VulnerabilitySearch(ListView): model = models.Vulnerability template_name = "vulnerabilities.html" @@ -89,6 +118,24 @@ def get_queryset(self, query=None): return self.model.objects.search(query=query).with_package_counts() +class AdvisorySearch(ListView): + model = models.AdvisoryV2 + template_name = "vulnerabilities.html" + ordering = ["advisory_id"] + paginate_by = PAGE_SIZE + + def get_context_data(self, **kwargs): + context = super().get_context_data(**kwargs) + request_query = self.request.GET + context["advisory_search_form"] = VulnerabilitySearchForm(request_query) + context["search"] = request_query.get("search") + return context + + def get_queryset(self, query=None): + query = query or self.request.GET.get("search") or "" + return self.model.objects.search(query=query).with_package_counts() + + class PackageDetails(DetailView): model = models.Package template_name = "package_details.html" @@ -130,6 +177,47 @@ def get_object(self, queryset=None): return package +class PackageV2Details(DetailView): + model = models.PackageV2 + template_name = "package_details_v2.html" + slug_url_kwarg = "purl" + slug_field = "purl" + + def get_context_data(self, **kwargs): + context = super().get_context_data(**kwargs) + package = self.object + context["package"] = package + context["affected_by_advisories"] = package.affected_by_advisories.order_by("advisory_id") + # Ghost package should not fix any vulnerability. + context["fixing_advisories"] = ( + None if package.is_ghost else package.fixing_advisories.order_by("advisory_id") + ) + context["package_search_form"] = PackageSearchForm(self.request.GET) + context["fixed_package_details"] = package.fixed_package_details + + # context["history"] = list(package.history) + return context + + def get_object(self, queryset=None): + if queryset is None: + queryset = self.get_queryset() + + purl = self.kwargs.get(self.slug_url_kwarg) + if purl: + queryset = queryset.for_purl(purl) + else: + cls = self.__class__.__name__ + raise AttributeError( + f"Package details view {cls} must be called with a purl, " f"but got: {purl!r}" + ) + + try: + package = queryset.get() + except queryset.model.DoesNotExist: + raise Http404(f"No Package found for purl: {purl}") + return package + + class VulnerabilityDetails(DetailView): model = models.Vulnerability template_name = "vulnerability_details.html" @@ -193,9 +281,11 @@ def get_context_data(self, **kwargs): for severity in valid_severities: try: - vector_values = SCORING_SYSTEMS[severity.scoring_system].get( - severity.scoring_elements - ) + vector_values_system = SCORING_SYSTEMS[severity.scoring_system] + if not vector_values_system: + logging.error(f"Unknown scoring system: {severity.scoring_system}") + continue + vector_values = vector_values_system.get(severity.scoring_elements) if vector_values: severity_vectors.append({"vector": vector_values, "origin": severity.url}) except ( @@ -232,6 +322,112 @@ def get_context_data(self, **kwargs): return context +class AdvisoryDetails(DetailView): + model = models.AdvisoryV2 + template_name = "advisory_detail.html" + slug_url_kwarg = "id" + slug_field = "id" + + def get_queryset(self): + return ( + super() + .get_queryset() + .select_related() + .prefetch_related( + Prefetch( + "references", + queryset=models.AdvisoryReference.objects.only( + "reference_id", "reference_type", "url" + ), + ), + Prefetch( + "aliases", + queryset=models.AdvisoryAlias.objects.only("alias"), + ), + Prefetch( + "weaknesses", + queryset=models.AdvisoryWeakness.objects.only("cwe_id"), + ), + Prefetch( + "severities", + queryset=models.AdvisorySeverity.objects.only( + "scoring_system", "value", "url", "scoring_elements", "published_at" + ), + ), + Prefetch( + "exploits", + queryset=models.AdvisoryExploit.objects.only( + "data_source", "description", "required_action", "due_date", "notes" + ), + ), + ) + ) + + def get_context_data(self, **kwargs): + """ + Build context with preloaded QuerySets and minimize redundant queries. + """ + context = super().get_context_data(**kwargs) + advisory = self.object + + # Pre-fetch and process data in Python instead of the template + weaknesses_present_in_db = [ + weakness_object + for weakness_object in advisory.weaknesses.all() + if weakness_object.weakness + ] + + valid_severities = self.object.severities.exclude(scoring_system=EPSS.identifier).filter( + scoring_elements__isnull=False, scoring_system__in=SCORING_SYSTEMS.keys() + ) + + severity_vectors = [] + + for severity in valid_severities: + try: + vector_values_system = SCORING_SYSTEMS.get(severity.scoring_system) + if not vector_values_system: + logging.error(f"Unknown scoring system: {severity.scoring_system}") + continue + if vector_values_system.identifier in ["cvssv3.1_qr"]: + continue + vector_values = vector_values_system.get(severity.scoring_elements) + if vector_values: + severity_vectors.append({"vector": vector_values, "origin": severity.url}) + logging.error(f"Error processing scoring elements: {severity.scoring_elements}") + except ( + CVSS2MalformedError, + CVSS3MalformedError, + CVSS4MalformedError, + NotImplementedError, + ): + logging.error(f"CVSSMalformedError for {severity.scoring_elements}") + + epss_severity = advisory.severities.filter(scoring_system="epss").first() + epss_data = None + if epss_severity: + epss_data = { + "percentile": epss_severity.scoring_elements, + "score": epss_severity.value, + "published_at": epss_severity.published_at, + } + print(severity_vectors) + context.update( + { + "advisory": advisory, + "severities": list(advisory.severities.all()), + "severity_vectors": severity_vectors, + "references": list(advisory.references.all()), + "aliases": list(advisory.aliases.all()), + "weaknesses": weaknesses_present_in_db, + "status": advisory.get_status_label, + # "history": advisory.history, + "epss_data": epss_data, + } + ) + return context + + class HomePage(View): template_name = "index.html" @@ -245,6 +441,19 @@ def get(self, request): return render(request=request, template_name=self.template_name, context=context) +class HomePageV2(View): + template_name = "index_v2.html" + + def get(self, request): + request_query = request.GET + context = { + "vulnerability_search_form": AdvisorySearchForm(request_query), + "package_search_form": PackageSearchForm(request_query), + "release_url": f"https://github.com/aboutcode-org/vulnerablecode/releases/tag/v{VULNERABLECODE_VERSION}", + } + return render(request=request, template_name=self.template_name, context=context) + + email_template = """ Dear VulnerableCode.io user: @@ -353,6 +562,58 @@ def get_context_data(self, **kwargs): return context +class AdvisoryPackagesDetails(DetailView): + """ + View to display all packages affected by or fixing a specific vulnerability. + URL: /advisories/{id}/packages + """ + + model = models.AdvisoryV2 + template_name = "advisory_package_details.html" + slug_url_kwarg = "id" + slug_field = "id" + + def get_queryset(self): + """ + Prefetch and optimize related data to minimize database hits. + """ + return ( + super() + .get_queryset() + .prefetch_related( + Prefetch( + "affecting_packages", + queryset=models.PackageV2.objects.only("type", "namespace", "name", "version"), + ), + Prefetch( + "fixed_by_packages", + queryset=models.PackageV2.objects.only("type", "namespace", "name", "version"), + ), + ) + ) + + def get_context_data(self, **kwargs): + """ + Build context with preloaded QuerySets and minimize redundant queries. + """ + context = super().get_context_data(**kwargs) + advisory = self.object + ( + sorted_fixed_by_packages, + sorted_affected_packages, + all_affected_fixed_by_matches, + ) = advisory.aggregate_fixed_and_affected_packages() + context.update( + { + "affected_packages": sorted_affected_packages, + "fixed_by_packages": sorted_fixed_by_packages, + "all_affected_fixed_by_matches": all_affected_fixed_by_matches, + "advisory": advisory, + } + ) + return context + + class PipelineScheduleListView(ListView, FormMixin): model = PipelineSchedule context_object_name = "schedule_list" diff --git a/vulnerablecode/urls.py b/vulnerablecode/urls.py index 45a03a28d..245b8e917 100644 --- a/vulnerablecode/urls.py +++ b/vulnerablecode/urls.py @@ -20,15 +20,21 @@ from vulnerabilities.api import CPEViewSet from vulnerabilities.api import PackageViewSet from vulnerabilities.api import VulnerabilityViewSet +from vulnerabilities.api_v2 import AdvisoriesPackageV2ViewSet from vulnerabilities.api_v2 import CodeFixViewSet from vulnerabilities.api_v2 import PackageV2ViewSet from vulnerabilities.api_v2 import PipelineScheduleV2ViewSet from vulnerabilities.api_v2 import VulnerabilityV2ViewSet from vulnerabilities.views import AdminLoginView +from vulnerabilities.views import AdvisoryDetails +from vulnerabilities.views import AdvisoryPackagesDetails from vulnerabilities.views import ApiUserCreateView from vulnerabilities.views import HomePage +from vulnerabilities.views import HomePageV2 from vulnerabilities.views import PackageDetails from vulnerabilities.views import PackageSearch +from vulnerabilities.views import PackageSearchV2 +from vulnerabilities.views import PackageV2Details from vulnerabilities.views import PipelineRunDetailView from vulnerabilities.views import PipelineRunListView from vulnerabilities.views import PipelineScheduleListView @@ -55,6 +61,9 @@ def __init__(self, *args, **kwargs): api_v2_router = OptionalSlashRouter() api_v2_router.register("packages", PackageV2ViewSet, basename="package-v2") +api_v2_router.register( + "advisories-packages", AdvisoriesPackageV2ViewSet, basename="advisories-package-v2" +) api_v2_router.register("vulnerabilities", VulnerabilityV2ViewSet, basename="vulnerability-v2") api_v2_router.register("codefixes", CodeFixViewSet, basename="codefix") api_v2_router.register("schedule", PipelineScheduleV2ViewSet, basename="schedule") @@ -87,16 +96,36 @@ def __init__(self, *args, **kwargs): PipelineRunDetailView.as_view(), name="run-details", ), + path( + "v2", + HomePageV2.as_view(), + name="home", + ), + path( + "advisories/", + AdvisoryDetails.as_view(), + name="advisory_details", + ), path( "packages/search/", PackageSearch.as_view(), name="package_search", ), + path( + "packages/v2/search/", + PackageSearchV2.as_view(), + name="package_search_v2", + ), re_path( r"^packages/(?Ppkg:.+)$", PackageDetails.as_view(), name="package_details", ), + re_path( + r"^packages/v2/(?Ppkg:.+)$", + PackageV2Details.as_view(), + name="package_details_v2", + ), path( "vulnerabilities/search/", VulnerabilitySearch.as_view(), @@ -112,6 +141,11 @@ def __init__(self, *args, **kwargs): VulnerabilityPackagesDetails.as_view(), name="vulnerability_package_details", ), + path( + "advisories//packages", + AdvisoryPackagesDetails.as_view(), + name="advisory_package_details", + ), path( "api/", include(api_router.urls),