Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions SOURCES.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
.. _anchore:

Anchore NVD Overrides
---------------------
Anchore provides overrides for NVD data to improve accuracy. This importer fetches data from their
`nvd-data-overrides <https://github.com/anchore/nvd-data-overrides>`_ repository.

+----------------+------------------------------------------------------------------------------------------------------+----------------------------------------------------+
|Importer Name | Data Source |Ecosystems Covered |
+================+======================================================================================================+====================================================+
Expand Down
3 changes: 2 additions & 1 deletion vulnerabilities/importers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

from vulnerabilities.importers import apache_httpd
from vulnerabilities.importers import apache_kafka
from vulnerabilities.importers import apache_tomcat
Expand Down Expand Up @@ -35,6 +34,7 @@
from vulnerabilities.importers import xen
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline
from vulnerabilities.pipelines import alpine_linux_importer
from vulnerabilities.pipelines import anchore_importer
from vulnerabilities.pipelines import github_importer
from vulnerabilities.pipelines import gitlab_importer
from vulnerabilities.pipelines import nginx_importer
Expand Down Expand Up @@ -78,6 +78,7 @@
nvd_importer.NVDImporterPipeline,
pysec_importer.PyPIImporterPipeline,
alpine_linux_importer.AlpineLinuxImporterPipeline,
anchore_importer.AnchoreImporterPipeline,
]

IMPORTERS_REGISTRY = {
Expand Down
87 changes: 87 additions & 0 deletions vulnerabilities/pipelines/anchore_importer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# VulnerableCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

from datetime import datetime
from typing import Iterable

import requests
import yaml
from packageurl import PackageURL
from univers.versions import SemverVersion

from vulnerabilities.importer import AdvisoryData
from vulnerabilities.importer import AffectedPackage
from vulnerabilities.importer import Reference
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline


class AnchoreImporterPipeline(VulnerableCodeBaseImporterPipeline):
"""Collect advisories from Anchore's NVD overrides."""

pipeline_id = "anchore_importer"
root_url = "https://github.com/anchore/nvd-data-overrides"
license_url = "https://github.com/anchore/nvd-data-overrides/blob/main/LICENSE"
spdx_license_expression = "CC0-1.0" # License of Anchore's data
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
spdx_license_expression = "CC0-1.0" # License of Anchore's data
spdx_license_expression = "cc0-1.0"

importer_name = "Anchore NVD Overrides Importer"

@classmethod
def steps(cls):
return (
cls.collect_and_store_advisories,
cls.import_new_advisories,
)

def advisories_count(self) -> int:
raw_data = self.fetch_data()
return len(raw_data)

def collect_advisories(self) -> Iterable[AdvisoryData]:
raw_data = self.fetch_data()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why fetch the same thing twice both in advisories_count and collect_advisories?

for entry in raw_data:
yield self.parse_advisory_data(entry)

def fetch_data(self):
"""Fetch Anchore's NVD overrides from their GitHub repository."""
url = "https://raw.githubusercontent.com/anchore/nvd-data-overrides/main/overrides.yaml"
response = requests.get(url)
response.raise_for_status()
return yaml.safe_load(response.text) # Correct YAML parsing
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use saneyaml insted.


def parse_advisory_data(self, raw_data) -> AdvisoryData:
"""Parse a single advisory entry into an AdvisoryData object."""
# Ensure required fields are present
if not all(key in raw_data for key in ["cve_id", "package_name", "affected_versions"]):
return None

purl = PackageURL(type="generic", name=raw_data["package_name"])
affected_version_range = raw_data["affected_versions"] # Use raw version range string
fixed_version = (
SemverVersion(raw_data["fixed_version"]) if raw_data.get("fixed_version") else None
)

affected_package = AffectedPackage(
package=purl,
affected_version_range=affected_version_range,
fixed_version=fixed_version,
)

references = [Reference(url=url) for url in raw_data.get("references", []) if url]
date_published = (
datetime.strptime(raw_data["published_date"], "%Y-%m-%d")
if raw_data.get("published_date")
else None
)

return AdvisoryData(
aliases=[raw_data["cve_id"]],
summary=raw_data.get("description", ""),
affected_packages=[affected_package],
references=references,
date_published=date_published,
)