From 706b4442367b0a16d930532c1ac7248e4477cdda Mon Sep 17 00:00:00 2001 From: jasinner Date: Fri, 22 May 2026 10:34:11 +1000 Subject: [PATCH] Add SBOM compliance checker script (SPDX/CycloneDX, scores 1-10) --- sbom/validate_sbom.py | 976 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 976 insertions(+) create mode 100644 sbom/validate_sbom.py diff --git a/sbom/validate_sbom.py b/sbom/validate_sbom.py new file mode 100644 index 0000000..d38e364 --- /dev/null +++ b/sbom/validate_sbom.py @@ -0,0 +1,976 @@ +#!/usr/bin/env python3 +""" +SBOM Compliance Checker + +Validates an SBOM (SPDX 2.3 or CycloneDX 1.6) against Red Hat's security-data-guidelines: + - https://github.com/RedHatProductSecurity/security-data-guidelines/blob/main/docs/sbom.md + - https://github.com/RedHatProductSecurity/security-data-guidelines/blob/main/docs/purl.md + +Produces a compliance score from 1 (very poor) to 10 (fully compliant) and lists all +deviations with their severity. + +Usage: + python3 validate_sbom.py # .json or .json.gz + +Exit codes: + 0 - Score >= 8 (compliant) + 1 - Score 5-7 (partial compliance) + 2 - Score < 5 (non-compliant) + 3 - File could not be parsed +""" + +import argparse +import gzip +import json +import re +import sys +from dataclasses import dataclass, field +from typing import Any +from urllib.parse import parse_qs, urlparse + + +# --------------------------------------------------------------------------- +# Finding data model +# --------------------------------------------------------------------------- + +CRITICAL = "CRITICAL" +MAJOR = "MAJOR" +MINOR = "MINOR" +INFO = "INFO" + +_SEVERITY_ORDER = {CRITICAL: 0, MAJOR: 1, MINOR: 2, INFO: 3} + +_SEVERITY_COLORS = { + CRITICAL: "\033[1;31m", # bold red + MAJOR: "\033[33m", # yellow + MINOR: "\033[36m", # cyan + INFO: "\033[90m", # dark gray + "RESET": "\033[0m", + "BOLD": "\033[1m", + "GREEN": "\033[32m", + "RED": "\033[31m", +} + + +@dataclass +class Finding: + severity: str + category: str + message: str + location: str = "" + count: int = 1 # how many times this finding occurred (for aggregated display) + sample_locations: list[str] = field(default_factory=list) + + def __str__(self) -> str: + loc = f" [{self.location}]" if self.location else "" + return f"[{self.severity}] {self.category}{loc}: {self.message}" + + +@dataclass +class ValidationResult: + findings: list[Finding] = field(default_factory=list) + score: float = 10.0 + format: str = "unknown" + name: str = "" + parse_failed: bool = False + # Maps (severity, category, message) -> Finding index for deduplication + _dedup: dict = field(default_factory=dict, repr=False) + + def add(self, severity: str, category: str, message: str, location: str = "") -> None: + key = (severity, category, message) + if key in self._dedup: + f = self.findings[self._dedup[key]] + f.count += 1 + if len(f.sample_locations) < 3 and location and location not in f.sample_locations: + f.sample_locations.append(location) + else: + f = Finding(severity, category, message, location, count=1, + sample_locations=[location] if location else []) + self._dedup[key] = len(self.findings) + self.findings.append(f) + + def counts(self) -> dict[str, int]: + """Returns the total count of each severity (including repeated instances).""" + counts: dict[str, int] = {CRITICAL: 0, MAJOR: 0, MINOR: 0, INFO: 0} + for f in self.findings: + counts[f.severity] = counts.get(f.severity, 0) + f.count + return counts + + def unique_counts(self) -> dict[str, int]: + """Returns the count of unique finding types per severity.""" + counts: dict[str, int] = {CRITICAL: 0, MAJOR: 0, MINOR: 0, INFO: 0} + for f in self.findings: + counts[f.severity] = counts.get(f.severity, 0) + 1 + return counts + + def compute_score(self) -> int: + """ + Score based on unique finding types (not total occurrences), so that + a single systemic issue doesn't unfairly dominate the score: + Score = 10 - (unique_critical * 2.5) - (unique_major * 0.75) - (unique_minor * 0.2) + But also penalise pervasiveness: if a finding affects > 20% of packages, add extra penalty. + Floored at 1, rounded to nearest integer. + """ + uc = self.unique_counts() + raw = 10.0 - (uc[CRITICAL] * 2.5) - (uc[MAJOR] * 0.75) - (uc[MINOR] * 0.2) + # Extra penalty if there are any critical/major findings with very high counts + for f in self.findings: + if f.severity in (CRITICAL, MAJOR) and f.count > 50: + raw -= 0.5 # one-time extra for pervasive systemic issues + break + return max(1, round(raw)) + + +# --------------------------------------------------------------------------- +# PURL parsing and validation helpers +# --------------------------------------------------------------------------- + +_PURL_BASE_RE = re.compile( + r"^pkg:" + r"(?P[a-zA-Z][a-zA-Z0-9.+\-]*)/" + r"(?:(?P[^/]+)/)?" + r"(?P[^@#]+)" + r"(?:@(?P.+))?$" +) + +_EPOCH_IN_VERSION_RE = re.compile(r"^\d+:") + + +def parse_purl(purl: str) -> dict[str, Any] | None: + """ + Return a dict of purl components or None if invalid. + + Splits qualifiers and subpath before applying the base regex so that URLs + embedded inside qualifier values (e.g. download_url=https://...) don't + confuse the namespace/name/version parsing. + """ + # Strip scheme prefix + if not purl.startswith("pkg:"): + return None + + # Separate subpath + subpath = "" + if "#" in purl: + purl, subpath = purl.rsplit("#", 1) + + # Separate qualifiers + qualifiers_str = "" + if "?" in purl: + purl, qualifiers_str = purl.split("?", 1) + + m = _PURL_BASE_RE.match(purl) + if not m: + return None + + result = m.groupdict() + result["subpath"] = subpath + result["qualifiers"] = qualifiers_str + + # Parse qualifiers into a dict + quals: dict[str, str] = {} + if qualifiers_str: + for pair in qualifiers_str.split("&"): + if "=" in pair: + k, v = pair.split("=", 1) + quals[k.strip()] = v.strip() + result["qualifiers_dict"] = quals + return result + + +def validate_purl(purl: str, result: ValidationResult, location: str) -> dict[str, Any] | None: + """ + Validate a purl string against Red Hat guidelines. + Returns parsed purl dict or None if fundamentally invalid. + + Finding messages are written as generic descriptions so that identical issues + across many packages get deduplicated — the purl/location provides the example. + """ + parsed = parse_purl(purl) + if not parsed: + result.add(CRITICAL, "PURL", "Cannot parse purl (invalid format)", f"{location} — {purl!r}") + return None + + purl_type = parsed["type"].lower() + qualifiers = parsed["qualifiers_dict"] + + # ---- RPM ---- + if purl_type == "rpm": + namespace = (parsed.get("namespace") or "").lower() + + if namespace not in ("redhat", "fedora"): + result.add( + MAJOR, + "PURL/RPM", + f"RPM purl namespace should be 'redhat' (or 'fedora' for Fedora packages), got {namespace!r}", + location, + ) + elif namespace == "fedora": + result.add( + MAJOR, + "PURL/RPM", + "RPM purl uses 'fedora' namespace — expected 'redhat' for Red Hat-distributed packages", + location, + ) + + version = parsed.get("version") or "" + if _EPOCH_IN_VERSION_RE.match(version): + result.add( + MAJOR, + "PURL/RPM", + "Epoch must use the 'epoch' qualifier, not be embedded in the version string " + "(e.g. use '?epoch=1' instead of '@1:version')", + location, + ) + + if "arch" not in qualifiers: + result.add( + MAJOR, + "PURL/RPM", + "RPM purl is missing the required 'arch' qualifier", + location, + ) + + if "repository_url" in qualifiers: + result.add( + MINOR, + "PURL/RPM", + "RPM purl uses 'repository_url' — Red Hat guidelines recommend 'repository_id' instead", + location, + ) + + if "distro" in qualifiers: + result.add( + MINOR, + "PURL/RPM", + "RPM purl uses 'distro' qualifier — Red Hat guidelines recommend omitting it", + location, + ) + + # ---- OCI ---- + elif purl_type == "oci": + if parsed.get("namespace"): + result.add( + MINOR, + "PURL/OCI", + "OCI purl should not have a namespace component", + location, + ) + + if "repository_url" not in qualifiers: + result.add( + MINOR, + "PURL/OCI", + "OCI purl is missing 'repository_url' qualifier — required for release SBOMs to " + "identify the container registry (may be absent in build-time SBOMs)", + location, + ) + + if "tag" not in qualifiers: + result.add( + MINOR, + "PURL/OCI", + "OCI purl is missing 'tag' qualifier (recommended for unique identification)", + location, + ) + + version = parsed.get("version") or "" + if not version or not version.startswith("sha256"): + result.add( + MAJOR, + "PURL/OCI", + "OCI purl version should be a SHA256 digest (e.g. sha256%3Aabc...)", + location, + ) + elif "%" not in version and ":" in version: + result.add( + MINOR, + "PURL/OCI", + "OCI purl digest colon should be percent-encoded as %3A", + location, + ) + + # ---- Maven ---- + elif purl_type == "maven": + if "repository_url" not in qualifiers: + result.add( + MINOR, + "PURL/Maven", + "Maven purl is missing 'repository_url' qualifier — should point to " + "https://maven.repository.redhat.com/ga/", + location, + ) + elif "maven.repository.redhat.com" not in qualifiers.get("repository_url", ""): + result.add( + MINOR, + "PURL/Maven", + "Maven purl 'repository_url' should point to the Red Hat Maven repo " + "(maven.repository.redhat.com)", + location, + ) + + # ---- Generic ---- + elif purl_type == "generic": + if "download_url" not in qualifiers: + result.add( + MAJOR, + "PURL/Generic", + "Generic purl must include a 'download_url' qualifier with the exact artifact URL", + location, + ) + + return parsed + + +# --------------------------------------------------------------------------- +# SPDX 2.3 validation +# --------------------------------------------------------------------------- + +_UTC_TIMESTAMP_RE = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$") +_RH_NAMESPACES = ( + "https://www.redhat.com/", + "https://security.access.redhat.com/data/sbom/", +) + + +def _pkg_location(pkg: dict) -> str: + spdxid = pkg.get("SPDXID", "?") + name = pkg.get("name", "?") + version = pkg.get("versionInfo", "") + return f"{spdxid} ({name}{'@' + version if version else ''})" + + +def validate_spdx(data: dict, result: ValidationResult) -> None: + result.format = "SPDX" + + # ---- Document-level mandatory fields ---- + if data.get("spdxVersion") != "SPDX-2.3": + result.add( + CRITICAL, + "Document", + f"spdxVersion must be 'SPDX-2.3', got {data.get('spdxVersion')!r}", + ) + + if data.get("dataLicense") != "CC0-1.0": + result.add( + CRITICAL, + "Document", + f"dataLicense must be 'CC0-1.0', got {data.get('dataLicense')!r}", + ) + + if data.get("SPDXID") != "SPDXRef-DOCUMENT": + result.add( + CRITICAL, + "Document", + f"SPDXID must be 'SPDXRef-DOCUMENT', got {data.get('SPDXID')!r}", + ) + + name = data.get("name", "") + result.name = name + if not name: + result.add(MAJOR, "Document", "Document 'name' field is missing or empty") + + ns = data.get("documentNamespace", "") + if not ns: + result.add(CRITICAL, "Document", "documentNamespace is missing") + else: + try: + parsed_ns = urlparse(ns) + if not parsed_ns.scheme or not parsed_ns.netloc: + result.add(CRITICAL, "Document", f"documentNamespace is not a valid URI: {ns!r}") + elif not any(ns.startswith(rh) for rh in _RH_NAMESPACES): + result.add( + MAJOR, + "Document", + f"documentNamespace does not use a known Red Hat namespace " + f"(expected one starting with {_RH_NAMESPACES}): {ns!r}", + ) + except Exception: + result.add(CRITICAL, "Document", f"documentNamespace is not parseable: {ns!r}") + + # ---- creationInfo ---- + ci = data.get("creationInfo") + if not ci: + result.add(CRITICAL, "Document/creationInfo", "creationInfo block is missing") + else: + created = ci.get("created", "") + if not created: + result.add(CRITICAL, "Document/creationInfo", "creationInfo.created is missing") + elif not _UTC_TIMESTAMP_RE.match(created): + result.add( + MAJOR, + "Document/creationInfo", + f"creationInfo.created must be in YYYY-MM-DDThh:mm:ssZ (UTC) format, got {created!r}", + ) + + creators: list[str] = ci.get("creators", []) + if not creators: + result.add(CRITICAL, "Document/creationInfo", "creationInfo.creators is empty or missing") + else: + has_tool = any(c.startswith("Tool:") for c in creators) + has_org_rh = "Organization: Red Hat" in creators + + if not has_tool: + result.add( + MAJOR, + "Document/creationInfo", + "creationInfo.creators must include a 'Tool: ' entry", + ) + else: + # Check if tool has a version + tool_entries = [c for c in creators if c.startswith("Tool:")] + for tool_entry in tool_entries: + tool_value = tool_entry[len("Tool:"):].strip() + if not tool_value: + result.add( + MINOR, + "Document/creationInfo", + "Tool creator entry is empty — should include tool name and version", + ) + + if not has_org_rh: + result.add( + CRITICAL, + "Document/creationInfo", + f"creationInfo.creators must include 'Organization: Red Hat' (required for RTPA2 handling). " + f"Found: {creators}", + ) + else: + # Check for non-Red Hat organization entries alongside Red Hat + non_rh_orgs = [ + c for c in creators if c.startswith("Organization:") and c != "Organization: Red Hat" + ] + if non_rh_orgs: + result.add( + MINOR, + "Document/creationInfo", + f"Additional organization entries found alongside 'Organization: Red Hat': {non_rh_orgs}", + ) + + # ---- Relationships ---- + relationships: list[dict] = data.get("relationships", []) + if not relationships: + result.add(CRITICAL, "Document/Relationships", "No relationships defined in SBOM") + else: + has_describes = any(r.get("relationshipType") == "DESCRIBES" for r in relationships) + if not has_describes: + result.add( + CRITICAL, + "Document/Relationships", + "No DESCRIBES relationship found — document must describe its primary artifact", + ) + + # ---- Packages ---- + packages: list[dict] = data.get("packages", []) + if not packages: + result.add(CRITICAL, "Packages", "No packages defined in SBOM") + return + + # Build a set of all SPDX IDs for relationship validation + all_ids = {pkg.get("SPDXID") for pkg in packages} + + # Detect SBOM type based on purl namespaces present + srpm_id = None + binary_rpm_ids: list[str] = [] + has_upstream_source = False + + # Track deduplication: only report PURL issues once per purl string + seen_purls: set[str] = set() + + for pkg in packages: + loc = _pkg_location(pkg) + + # Mandatory fields + if not pkg.get("SPDXID"): + result.add(CRITICAL, "Packages", "Package missing SPDXID", loc) + + if not pkg.get("name"): + result.add(MAJOR, "Packages", "Package missing 'name' field", loc) + + if not pkg.get("versionInfo"): + result.add(MINOR, "Packages", "Package missing 'versionInfo' field", loc) + + if not pkg.get("downloadLocation"): + result.add(MAJOR, "Packages", "Package missing 'downloadLocation' field", loc) + + # Supplier check — packages identified by pkg:rpm/redhat/* must have Red Hat as supplier + supplier = pkg.get("supplier", "") + ext_refs = pkg.get("externalRefs", []) + all_purls = [ + r.get("referenceLocator", "") + for r in ext_refs + if r.get("referenceType") == "purl" and r.get("referenceCategory") == "PACKAGE-MANAGER" + ] + cpes = [r for r in ext_refs if "cpe" in r.get("referenceType", "").lower()] + is_redhat_rpm = any("pkg:rpm/redhat/" in p for p in all_purls) + is_redhat_oci = any("pkg:oci/" in p and "redhat" in p for p in all_purls) + + if (is_redhat_rpm or is_redhat_oci) and supplier and supplier != "Organization: Red Hat": + result.add( + MAJOR, + "Packages", + f"Supplier is {supplier!r} — expected 'Organization: Red Hat' for Red Hat-distributed packages", + loc, + ) + elif not is_redhat_rpm and not is_redhat_oci and not cpes: + # For non-Red Hat packages, flag if supplier is wrong Org (fedora etc) but not NOASSERTION + if supplier and supplier.startswith("Organization:") and supplier != "Organization: Red Hat": + result.add( + MAJOR, + "Packages", + f"Supplier is {supplier!r} — expected 'Organization: Red Hat' for Red Hat-distributed packages", + loc, + ) + + # License + has_license = pkg.get("licenseConcluded") or pkg.get("licenseDeclared") + if not has_license: + result.add(MINOR, "Packages", "Package has neither licenseConcluded nor licenseDeclared", loc) + + # External references (purls) + ext_refs = pkg.get("externalRefs", []) + purls = [ + r.get("referenceLocator", "") + for r in ext_refs + if r.get("referenceType") == "purl" and r.get("referenceCategory") == "PACKAGE-MANAGER" + ] + cpe_refs = [r for r in ext_refs if "cpe" in r.get("referenceType", "").lower()] + is_product_component = bool(cpe_refs) and not purls + + if not purls and not is_product_component: + result.add( + MAJOR, + "Packages", + "Package has no purl in externalRefs (PACKAGE-MANAGER category) — purl required for all non-product packages", + loc, + ) + else: + for purl in purls: + if purl not in seen_purls: + seen_purls.add(purl) + parsed = validate_purl(purl, result, loc) + if parsed: + ptype = parsed["type"].lower() + if ptype == "rpm": + fname = pkg.get("packageFileName", "") + if fname.endswith(".src.rpm"): + srpm_id = pkg.get("SPDXID") + elif any( + fname.endswith(f".{a}.rpm") + for a in ("x86_64", "aarch64", "ppc64le", "s390x", "i686", "noarch") + ): + binary_rpm_ids.append(pkg.get("SPDXID", "")) + elif ptype == "generic": + has_upstream_source = True + + # Checksums — required for RPMs and OCI images + checksums = pkg.get("checksums", []) + fname = pkg.get("packageFileName", "") + is_rpm = fname.endswith(".rpm") if fname else any( + "rpm" in (r.get("referenceLocator", "")) for r in ext_refs + ) + is_oci = any( + "oci" in (r.get("referenceLocator", "")[:7]) for r in ext_refs + if r.get("referenceType") == "purl" + ) + if (is_rpm or is_oci) and not checksums: + result.add(MINOR, "Packages", "RPM/OCI package is missing checksums", loc) + elif checksums: + has_sha256 = any(c.get("algorithm", "").upper() == "SHA256" for c in checksums) + if not has_sha256 and (is_rpm or is_oci): + result.add( + MINOR, + "Packages", + "SHA256 checksum not found in checksums list", + loc, + ) + + # ---- Relationship completeness checks ---- + if srpm_id: + srpm_rels = [ + r for r in relationships + if r.get("relatedSpdxElement") == srpm_id and r.get("relationshipType") == "GENERATED_FROM" + ] + if not srpm_rels: + result.add( + MINOR, + "Relationships", + f"SRPM {srpm_id!r} has no binary RPMs with GENERATED_FROM relationship pointing to it", + ) + + # Check that SRPM contains source archives (CONTAINS relationships) + srpm_contains = [ + r for r in relationships + if r.get("spdxElementId") == srpm_id and r.get("relationshipType") == "CONTAINS" + ] + if not srpm_contains and has_upstream_source: + result.add( + MINOR, + "Relationships", + f"SRPM {srpm_id!r} has upstream source packages but no CONTAINS relationships to them", + ) + + +# --------------------------------------------------------------------------- +# CycloneDX 1.6 validation +# --------------------------------------------------------------------------- + +_CDX_TOOL_KEY = "tools" + + +def validate_cyclonedx(data: dict, result: ValidationResult) -> None: + result.format = "CycloneDX" + + if data.get("bomFormat") != "CycloneDX": + result.add(CRITICAL, "Document", f"bomFormat must be 'CycloneDX', got {data.get('bomFormat')!r}") + + spec = data.get("specVersion", "") + if spec != "1.6": + result.add( + MAJOR, + "Document", + f"specVersion should be '1.6' (current guideline version), got {spec!r}", + ) + + if not data.get("version"): + result.add(MINOR, "Document", "Document 'version' field is missing (recommended)") + + serial = data.get("serialNumber", "") + if not serial: + result.add(MINOR, "Document", "serialNumber is missing") + elif not serial.startswith("urn:uuid:"): + result.add(MINOR, "Document", f"serialNumber should be a URN UUID (urn:uuid:...), got {serial!r}") + + # ---- metadata ---- + metadata = data.get("metadata") + if not metadata: + result.add(CRITICAL, "Document/metadata", "metadata block is missing") + return + + result.name = (metadata.get("component") or {}).get("name", "") + + ts = metadata.get("timestamp", "") + if not ts: + result.add(MAJOR, "Document/metadata", "metadata.timestamp is missing") + elif not _UTC_TIMESTAMP_RE.match(ts): + result.add( + MAJOR, + "Document/metadata", + f"metadata.timestamp must be in YYYY-MM-DDThh:mm:ssZ format, got {ts!r}", + ) + + # Supplier + supplier = metadata.get("supplier") or {} + supplier_name = supplier.get("name", "") + if not supplier_name: + result.add( + MAJOR, + "Document/metadata", + "metadata.supplier is missing — must identify Red Hat as the supplier", + ) + elif "Red Hat" not in supplier_name: + result.add( + MAJOR, + "Document/metadata", + f"metadata.supplier.name should be 'Red Hat', got {supplier_name!r}", + ) + + # Tools + tools_block = metadata.get("tools") + if not tools_block: + result.add( + MAJOR, + "Document/metadata", + "metadata.tools is missing — tool information is required", + ) + else: + # Tools may be a list (CDX <1.5) or a dict with 'components' + if isinstance(tools_block, dict): + tool_components = tools_block.get("components", []) + else: + tool_components = tools_block + if not tool_components: + result.add(MAJOR, "Document/metadata", "metadata.tools is empty — at least one tool must be listed") + + # Main component + main_component = metadata.get("component") + if not main_component: + result.add(MAJOR, "Document/metadata", "metadata.component is missing — root component not defined") + else: + if not main_component.get("purl"): + result.add( + MAJOR, + "Document/metadata", + "metadata.component is missing a 'purl' identifier", + ) + else: + validate_purl(main_component["purl"], result, "metadata.component") + + # ---- components ---- + components: list[dict] = data.get("components", []) + if not components: + result.add(MINOR, "Components", "No components listed in SBOM") + return + + seen_purls: set[str] = set() + + for comp in components: + bom_ref = comp.get("bom-ref", comp.get("name", "?")) + loc = f"component:{bom_ref}" + + if not comp.get("type"): + result.add(MAJOR, "Components", "Component missing 'type' field", loc) + + if not comp.get("name"): + result.add(MAJOR, "Components", "Component missing 'name' field", loc) + + if not comp.get("version"): + result.add(MINOR, "Components", "Component missing 'version' field", loc) + + purl = comp.get("purl", "") + if not purl: + result.add(MAJOR, "Components", "Component missing 'purl' identifier", loc) + elif purl not in seen_purls: + seen_purls.add(purl) + validate_purl(purl, result, loc) + + # Supplier / manufacturer + if not comp.get("supplier") and not comp.get("manufacturer"): + result.add(MINOR, "Components", "Component has no 'supplier' or 'manufacturer' field", loc) + + # Hashes + hashes = comp.get("hashes", []) + purl_type = (parse_purl(purl) or {}).get("type", "").lower() if purl else "" + if purl_type in ("rpm", "oci") and not hashes: + result.add(MINOR, "Components", "RPM/OCI component is missing hashes", loc) + + # ---- dependencies ---- + dependencies = data.get("dependencies", []) + if not dependencies: + result.add( + INFO, + "Dependencies", + "No dependency graph defined — consider adding top-level dependencies block", + ) + + +# --------------------------------------------------------------------------- +# Auto-detect format and dispatch +# --------------------------------------------------------------------------- + + +def load_sbom(path: str) -> dict: + opener = gzip.open if path.endswith(".gz") else open + with opener(path, "rt", encoding="utf-8") as fh: + return json.load(fh) + + +def detect_format(data: dict) -> str: + if "spdxVersion" in data: + return "spdx" + if data.get("bomFormat") == "CycloneDX": + return "cyclonedx" + return "unknown" + + +def validate(path: str) -> ValidationResult: + result = ValidationResult() + try: + data = load_sbom(path) + except Exception as exc: + result.add(CRITICAL, "Parse", f"Failed to load SBOM: {exc}") + result.parse_failed = True + return result + + fmt = detect_format(data) + if fmt == "spdx": + validate_spdx(data, result) + elif fmt == "cyclonedx": + validate_cyclonedx(data, result) + else: + result.add(CRITICAL, "Parse", "Could not detect SBOM format (expected SPDX or CycloneDX JSON)") + result.parse_failed = True + + return result + + +# --------------------------------------------------------------------------- +# Output +# --------------------------------------------------------------------------- + + +def _c(severity: str, colors: dict[str, str] | None = None) -> str: + mapping = _SEVERITY_COLORS if colors is None else colors + return mapping.get(severity, "") + + +def _reset(colors: dict[str, str] | None = None) -> str: + mapping = _SEVERITY_COLORS if colors is None else colors + return mapping["RESET"] + + +def print_report( + result: ValidationResult, + path: str, + use_color: bool = True, + score: int | None = None, + counts: dict[str, int] | None = None, + unique_counts: dict[str, int] | None = None, +) -> None: + colors = dict(_SEVERITY_COLORS) + if not use_color: + for k in colors: + colors[k] = "" + + bold = colors["BOLD"] + reset = colors["RESET"] + green = colors["GREEN"] + red = colors["RED"] + + score = score if score is not None else result.compute_score() + counts = counts if counts is not None else result.counts() + + print(f"\n{bold}SBOM Compliance Report{reset}") + print(f"{'─' * 60}") + print(f" File : {path}") + print(f" Format : {result.format}") + print(f" Name : {result.name or '(unknown)'}") + print(f"{'─' * 60}") + + # Sort findings: CRITICAL first, then MAJOR, MINOR, INFO + sorted_findings = sorted( + result.findings, key=lambda f: _SEVERITY_ORDER.get(f.severity, 99) + ) + + if not sorted_findings: + print(f"\n {green}No issues found! Fully compliant.{reset}\n") + else: + current_sev = None + for finding in sorted_findings: + if finding.severity != current_sev: + current_sev = finding.severity + print(f"\n {bold}{_c(current_sev, colors)}{current_sev}{reset}") + color = _c(finding.severity, colors) + + if finding.count > 1: + count_note = f" {bold}(×{finding.count}){reset}" + if finding.sample_locations: + samples = ", ".join(finding.sample_locations[:2]) + if finding.count > len(finding.sample_locations): + samples += f", … +{finding.count - len(finding.sample_locations)} more" + loc_str = f" [e.g. {samples}]" + else: + loc_str = "" + else: + count_note = "" + loc_str = f" [{finding.location}]" if finding.location else "" + + print(f" {color}●{reset} {finding.category}{count_note}{loc_str}:") + print(f" {finding.message}") + + ucounts = unique_counts if unique_counts is not None else result.unique_counts() + print(f"\n{'─' * 60}") + print(f" Summary (unique issue types / total occurrences):") + print(f" {_c(CRITICAL, colors)}{bold}CRITICAL{reset}: {ucounts[CRITICAL]} types / {counts[CRITICAL]} occurrences") + print(f" {_c(MAJOR, colors)}{bold}MAJOR{reset} : {ucounts[MAJOR]} types / {counts[MAJOR]} occurrences") + print(f" {_c(MINOR, colors)}{bold}MINOR{reset} : {ucounts[MINOR]} types / {counts[MINOR]} occurrences") + print(f" {_c(INFO, colors)}{bold}INFO{reset} : {ucounts[INFO]} types / {counts[INFO]} occurrences") + + bar_len = 40 + filled = round((score / 10) * bar_len) + bar_color = green if score >= 8 else (_c(MAJOR, colors) if score >= 5 else red) + bar = bar_color + "█" * filled + reset + "░" * (bar_len - filled) + print(f"\n {bold}Compliance Score: {bar_color}{score}/10{reset}") + print(f" [{bar}]") + + if score >= 8: + label = f"{green}COMPLIANT{reset}" + elif score >= 5: + label = f"{_c(MAJOR, colors)}PARTIALLY COMPLIANT{reset}" + else: + label = f"{red}NON-COMPLIANT{reset}" + print(f" Status: {bold}{label}") + print(f"{'─' * 60}\n") + + +# --------------------------------------------------------------------------- +# CLI entry point +# --------------------------------------------------------------------------- + + +def main() -> int: + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument("sbom", help="Path to SBOM file (.json or .json.gz)") + parser.add_argument( + "--no-color", action="store_true", help="Disable ANSI color output" + ) + parser.add_argument( + "--json", action="store_true", help="Output findings as JSON (implies --no-color)" + ) + parser.add_argument( + "--min-severity", + choices=[CRITICAL, MAJOR, MINOR, INFO], + default=MINOR, + help="Only show findings at or above this severity (default: MINOR)", + ) + args = parser.parse_args() + + result = validate(args.sbom) + + # Compute score and counts before any filtering so they reflect all findings + final_score = result.compute_score() + counts_before = result.counts() + unique_counts_before = result.unique_counts() + + # Filter displayed findings by min severity + min_level = _SEVERITY_ORDER.get(args.min_severity, 2) + result.findings = [ + f for f in result.findings if _SEVERITY_ORDER.get(f.severity, 99) <= min_level + ] + + if args.json: + output = { + "file": args.sbom, + "format": result.format, + "name": result.name, + "score": final_score, + "counts_total": counts_before, + "counts_unique": unique_counts_before, + "findings": [ + { + "severity": f.severity, + "category": f.category, + "location": f.location, + "message": f.message, + "count": f.count, + "sample_locations": f.sample_locations, + } + for f in result.findings + ], + } + print(json.dumps(output, indent=2)) + else: + print_report( + result, + args.sbom, + use_color=not args.no_color, + score=final_score, + counts=counts_before, + unique_counts=unique_counts_before, + ) + + if result.parse_failed: + return 3 + + if final_score >= 8: + return 0 + elif final_score >= 5: + return 1 + else: + return 2 + + +if __name__ == "__main__": + sys.exit(main())