Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
9ee727c
Add new content ID function
TG1999 Jan 29, 2025
b6e2895
Add tests and address review comments
TG1999 Feb 4, 2025
6e959ee
New content ID pipeline
TG1999 Feb 6, 2025
309f7fd
New content ID pipeline
TG1999 Feb 6, 2025
4083a5b
Address review comments
TG1999 Feb 12, 2025
61a9ed0
Address review comments
TG1999 Feb 12, 2025
8439670
Address review comments
TG1999 Feb 12, 2025
4d892ec
Address review comments
TG1999 Feb 12, 2025
2fb7da1
Address review comments
TG1999 Feb 13, 2025
e0868e9
Address review comments
TG1999 Feb 13, 2025
72d0d94
Address review comments
TG1999 Feb 13, 2025
1c18d9e
Address review comments
TG1999 Feb 13, 2025
c8e3046
Remove unique content ID from unqiue together
TG1999 Feb 14, 2025
8f35101
Remove unique together from advisories
TG1999 Feb 14, 2025
e596ba9
Fix migrations
TG1999 Feb 14, 2025
7e7f846
Fix pipeline errors
TG1999 Feb 14, 2025
9bb565b
Add filter for fast itreation
TG1999 Feb 15, 2025
80500f1
Increase batch size
TG1999 Feb 15, 2025
161589d
Fix error
TG1999 Feb 15, 2025
f07db19
Add logs
TG1999 Feb 15, 2025
c48348c
Separate pipelines for recomputing and removing duplicates
TG1999 Feb 24, 2025
0fd24ce
Fix type errors
TG1999 Feb 24, 2025
d345e52
Fix formatting errors
TG1999 Feb 24, 2025
08bc72b
Fix tests
TG1999 Feb 26, 2025
7c005fc
Fixes
TG1999 Feb 26, 2025
2a2bbee
Add logs for get_advisory_batches
TG1999 Feb 26, 2025
35e1042
Add logs and run function in a single process
TG1999 Feb 26, 2025
b1b40f5
Change date_imported to date_collected for comparison
TG1999 Feb 26, 2025
e36500b
Change batch size to 200 and enable multiprocessing
TG1999 Feb 26, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 46 additions & 17 deletions vulnerabilities/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import dataclasses
import datetime
import functools
import logging
import os
import shutil
Expand Down Expand Up @@ -46,7 +47,8 @@
logger = logging.getLogger(__name__)


@dataclasses.dataclass(order=True)
@dataclasses.dataclass(eq=True)
@functools.total_ordering
class VulnerabilitySeverity:
# FIXME: this should be named scoring_system, like in the model
system: ScoringSystem
Expand All @@ -55,15 +57,26 @@ class VulnerabilitySeverity:
published_at: Optional[datetime.datetime] = None

def to_dict(self):
published_at_dict = (
{"published_at": self.published_at.isoformat()} if self.published_at else {}
)
return {
data = {
"system": self.system.identifier,
"value": self.value,
"scoring_elements": self.scoring_elements,
**published_at_dict,
}
if self.published_at:
if isinstance(self.published_at, datetime.datetime):
data["published_at"] = self.published_at.isoformat()
else:
data["published_at"] = self.published_at
return data

def __lt__(self, other):
if not isinstance(other, VulnerabilitySeverity):
return NotImplemented
return str(self._cmp_key()) < str(other._cmp_key())

# TODO: Add cache
def _cmp_key(self):
return (self.system.identifier, self.value, self.scoring_elements, self.published_at)

@classmethod
def from_dict(cls, severity: dict):
Expand All @@ -79,7 +92,8 @@ def from_dict(cls, severity: dict):
)


@dataclasses.dataclass(order=True)
@dataclasses.dataclass(eq=True)
@functools.total_ordering
class Reference:
reference_id: str = ""
reference_type: str = ""
Expand All @@ -90,21 +104,22 @@ def __post_init__(self):
if not self.url:
raise TypeError("Reference must have a url")

def normalized(self):
severities = sorted(self.severities)
return Reference(
reference_id=self.reference_id,
url=self.url,
severities=severities,
reference_type=self.reference_type,
)
def __lt__(self, other):
if not isinstance(other, Reference):
return NotImplemented
return str(self._cmp_key()) < str(other._cmp_key())

# TODO: Add cache
def _cmp_key(self):
return (self.reference_id, self.reference_type, self.url, tuple(self.severities))

def to_dict(self):
"""Return a normalized dictionary representation"""
return {
"reference_id": self.reference_id,
"reference_type": self.reference_type,
"url": self.url,
"severities": [severity.to_dict() for severity in self.severities],
"severities": [severity.to_dict() for severity in sorted(self.severities)],
}

@classmethod
Expand Down Expand Up @@ -140,7 +155,8 @@ class NoAffectedPackages(Exception):
"""


@dataclasses.dataclass(order=True, frozen=True)
@functools.total_ordering
@dataclasses.dataclass(eq=True)
class AffectedPackage:
"""
Relate a Package URL with a range of affected versions and a fixed version.
Expand Down Expand Up @@ -170,6 +186,19 @@ def get_fixed_purl(self):
raise ValueError(f"Affected Package {self.package!r} does not have a fixed version")
return update_purl_version(purl=self.package, version=str(self.fixed_version))

def __lt__(self, other):
if not isinstance(other, AffectedPackage):
return NotImplemented
return str(self._cmp_key()) < str(other._cmp_key())

# TODO: Add cache
def _cmp_key(self):
return (
str(self.package),
str(self.affected_version_range or ""),
str(self.fixed_version or ""),
)

@classmethod
def merge(
cls, affected_packages: Iterable
Expand Down
4 changes: 4 additions & 0 deletions vulnerabilities/improvers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
from vulnerabilities.pipelines import enhance_with_kev
from vulnerabilities.pipelines import enhance_with_metasploit
from vulnerabilities.pipelines import flag_ghost_packages
from vulnerabilities.pipelines import recompute_content_ids
from vulnerabilities.pipelines import remove_duplicate_advisories

IMPROVERS_REGISTRY = [
valid_versions.GitHubBasicImprover,
Expand Down Expand Up @@ -45,6 +47,8 @@
compute_package_version_rank.ComputeVersionRankPipeline,
collect_commits.CollectFixCommitsPipeline,
add_cvss31_to_CVEs.CVEAdvisoryMappingPipeline,
recompute_content_ids.RecomputeContentIDPipeline,
remove_duplicate_advisories.RemoveDuplicateAdvisoriesPipeline,
]

IMPROVERS_REGISTRY = {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Generated by Django 4.2.16 on 2025-02-12 13:41

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("vulnerabilities", "0088_fix_alpine_purl_type"),
]

operations = [
migrations.AlterField(
model_name="advisory",
name="unique_content_id",
field=models.CharField(
blank=True,
db_index=True,
help_text="A 64 character unique identifier for the content of the advisory since we use sha256 as hex",
max_length=64,
),
),
]
17 changes: 17 additions & 0 deletions vulnerabilities/migrations/0090_alter_advisory_unique_together.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Generated by Django 4.2.16 on 2025-02-14 16:27

from django.db import migrations


class Migration(migrations.Migration):

dependencies = [
("vulnerabilities", "0089_alter_advisory_unique_content_id"),
]

operations = [
migrations.AlterUniqueTogether(
name="advisory",
unique_together=set(),
),
]
19 changes: 7 additions & 12 deletions vulnerabilities/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
from vulnerabilities import utils
from vulnerabilities.severity_systems import EPSS
from vulnerabilities.severity_systems import SCORING_SYSTEMS
from vulnerabilities.utils import compute_content_id
from vulnerabilities.utils import normalize_purl
from vulnerabilities.utils import purl_to_dict
from vulnerablecode import __version__ as VULNERABLECODE_VERSION
Expand Down Expand Up @@ -1315,8 +1316,10 @@ class Advisory(models.Model):
"""

unique_content_id = models.CharField(
max_length=32,
max_length=64,
db_index=True,
blank=True,
help_text="A 64 character unique identifier for the content of the advisory since we use sha256 as hex",
)
aliases = models.JSONField(blank=True, default=list, help_text="A list of alias strings")
summary = models.TextField(
Expand Down Expand Up @@ -1353,20 +1356,12 @@ class Advisory(models.Model):
objects = AdvisoryQuerySet.as_manager()

class Meta:
unique_together = ["aliases", "unique_content_id", "date_published", "url"]
ordering = ["aliases", "date_published", "unique_content_id"]

def save(self, *args, **kwargs):
checksum = hashlib.md5()
for field in (
self.summary,
self.affected_packages,
self.references,
self.weaknesses,
):
value = json.dumps(field, separators=(",", ":")).encode("utf-8")
checksum.update(value)
self.unique_content_id = checksum.hexdigest()
advisory_data = self.to_advisory_data()
if not self.unique_content_id:
self.unique_content_id = compute_content_id(advisory_data, include_metadata=False)
super().save(*args, **kwargs)

def to_advisory_data(self) -> "AdvisoryData":
Expand Down
Loading
Loading