From 35c92b165597679935ca5c916963bc18bf074998 Mon Sep 17 00:00:00 2001 From: Lukasz Juranek Date: Sat, 31 Jan 2026 08:17:15 +0100 Subject: [PATCH 1/2] Add sbom generation tooling (#2232) --- sbom/BUILD.bazel | 35 + sbom/SBOM_Readme.md | 231 +++++ sbom/cpp_metadata.json | 55 ++ sbom/crates_metadata.json | 806 ++++++++++++++++++ sbom/defs.bzl | 136 +++ sbom/extensions.bzl | 454 ++++++++++ sbom/internal/BUILD | 24 + sbom/internal/__init__.py | 1 + sbom/internal/aspect.bzl | 115 +++ sbom/internal/generator/BUILD | 33 + sbom/internal/generator/__init__.py | 1 + .../internal/generator/cyclonedx_formatter.py | 358 ++++++++ sbom/internal/generator/sbom_generator.py | 744 ++++++++++++++++ sbom/internal/generator/spdx_formatter.py | 180 ++++ sbom/internal/metadata_rule.bzl | 49 ++ sbom/internal/providers.bzl | 28 + sbom/internal/rules.bzl | 267 ++++++ sbom/npm_wrapper.sh | 17 + sbom/scripts/BUILD.bazel | 5 + sbom/scripts/generate_cpp_metadata_cache.py | 112 +++ .../scripts/generate_crates_metadata_cache.py | 332 ++++++++ sbom/tests/BUILD | 19 + sbom/tests/__init__.py | 1 + sbom/tests/test_cyclonedx_formatter.py | 142 +++ sbom/tests/test_spdx_formatter.py | 109 +++ 25 files changed, 4254 insertions(+) create mode 100644 sbom/BUILD.bazel create mode 100644 sbom/SBOM_Readme.md create mode 100644 sbom/cpp_metadata.json create mode 100644 sbom/crates_metadata.json create mode 100644 sbom/defs.bzl create mode 100644 sbom/extensions.bzl create mode 100644 sbom/internal/BUILD create mode 100644 sbom/internal/__init__.py create mode 100644 sbom/internal/aspect.bzl create mode 100644 sbom/internal/generator/BUILD create mode 100644 sbom/internal/generator/__init__.py create mode 100644 sbom/internal/generator/cyclonedx_formatter.py create mode 100644 sbom/internal/generator/sbom_generator.py create mode 100644 sbom/internal/generator/spdx_formatter.py create mode 100644 sbom/internal/metadata_rule.bzl create mode 100644 sbom/internal/providers.bzl create mode 100644 sbom/internal/rules.bzl create mode 100755 sbom/npm_wrapper.sh create mode 100644 sbom/scripts/BUILD.bazel create mode 100644 sbom/scripts/generate_cpp_metadata_cache.py create mode 100755 sbom/scripts/generate_crates_metadata_cache.py create mode 100644 sbom/tests/BUILD create mode 100644 sbom/tests/__init__.py create mode 100644 sbom/tests/test_cyclonedx_formatter.py create mode 100644 sbom/tests/test_spdx_formatter.py diff --git a/sbom/BUILD.bazel b/sbom/BUILD.bazel new file mode 100644 index 0000000..122a5b6 --- /dev/null +++ b/sbom/BUILD.bazel @@ -0,0 +1,35 @@ +# SBOM Generation Package +# +# This package provides Bazel-native SBOM (Software Bill of Materials) generation +# using module extensions and aspects. +# +# Public API: +# - load("@score_tooling//sbom:defs.bzl", "sbom") +# - use_extension("@score_tooling//sbom:extensions.bzl", "sbom_metadata") + +load("@rules_python//python:defs.bzl", "py_library") + +package(default_visibility = ["//visibility:public"]) + +exports_files([ + "defs.bzl", + "extensions.bzl", + "crates_metadata.json", + "cpp_metadata.json", +]) + +# Filegroup for all SBOM-related bzl files +filegroup( + name = "bzl_files", + srcs = [ + "defs.bzl", + "extensions.bzl", + "//sbom/internal:bzl_files", + ], +) + +# npm wrapper (uses system-installed npm from PATH) +sh_binary( + name = "npm_wrapper", + srcs = ["npm_wrapper.sh"], +) diff --git a/sbom/SBOM_Readme.md b/sbom/SBOM_Readme.md new file mode 100644 index 0000000..24059d5 --- /dev/null +++ b/sbom/SBOM_Readme.md @@ -0,0 +1,231 @@ +# SBOM Setup Guide + +## 1. Configure MODULE.bazel + +Add the SBOM metadata extension in your **root** MODULE.bazel (e.g. `reference_integration/MODULE.bazel`): + +```starlark +# Enable SBOM metadata collection from all modules in the dependency graph +sbom_ext = use_extension("@score_tooling//sbom:extensions.bzl", "sbom_metadata") +use_repo(sbom_ext, "sbom_metadata") +``` + +No manual license entries are needed — all license metadata is collected automatically. + +## 2. Add SBOM Target in BUILD + +```starlark +load("@score_tooling//sbom:defs.bzl", "sbom") + +sbom( + name = "my_sbom", + targets = ["//my/app:binary"], + component_name = "my_application", + component_version = "1.0.0", + # Rust crate metadata from score_crates MODULE.bazel.lock + module_lockfile = "@score_crates//:MODULE.bazel.lock", + auto_crates_cache = True, + auto_cdxgen = True, # Requires system-installed npm/cdxgen (see below) +) +``` + +### Parameters + +| Parameter | Description | +| :--- | :--- | +| `targets` | Bazel targets to include in SBOM | +| `component_name` | Main component name (defaults to rule name) | +| `component_version` | Version string | +| `output_formats` | `["spdx", "cyclonedx"]` (default: both) | +| `module_lockfile` | MODULE.bazel.lock from `score_crates` — contains all resolved Rust crate specs (name, version, sha256). This is the recommended source for Rust crate metadata. | +| `cargo_lockfile` | Optional Cargo.lock for additional crates `module_lockfile`. Usually not needed when `module_lockfile` from `score_crates` is provided, since it is a superset. | +| `auto_crates_cache` | Auto-generate crates cache when `module_lockfile` or `cargo_lockfile` is set | +| `auto_cdxgen` | Auto-run cdxgen when no `cdxgen_sbom` is provided | + +## 3. Install Prerequisites (for auto_cdxgen) + +If using `auto_cdxgen = True` to automatically scan C++ dependencies: + +```bash +# Install Node.js and cdxgen globally +# Option 1: Using nvm (recommended) +curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.1/install.sh | bash +source ~/.bashrc +nvm install 20 +npm install -g @cyclonedx/cdxgen + +# Verify installation +which cdxgen +cdxgen --version +``` + +**Note:** If you don't have npm/cdxgen installed, set `auto_cdxgen = False` in your SBOM configuration. + +## 4. Build + +```bash +bazel build //:my_sbom +``` + +## 5. Output + +Generated files in `bazel-bin/`: + +- `my_sbom.spdx.json` — SPDX 2.3 format +- `my_sbom.cdx.json` — CycloneDX 1.6 format +- `my_sbom_crates_metadata.json` — Auto-generated Rust crate cache (if `auto_crates_cache = True`) +- `my_sbom_cdxgen.cdx.json` — C++ dependencies from cdxgen (if `auto_cdxgen = True`) + +--- + +## Toolchain Components + +### Core Tools + +| Tool | Role | Required For | +|------|------|--------------| +| [Bazel](https://bazel.build) | Build system — rules, aspects, and module extensions drive dependency discovery and SBOM generation | All SBOM generation | +| [Python 3](https://www.python.org) | Runtime for the SBOM generator, formatters, and metadata extraction scripts | All SBOM generation | +| [crates.io API](https://crates.io) | Rust crate metadata source (license, version, checksums) | Rust metadata extraction when `auto_crates_cache = True` | +| [@cyclonedx/cdxgen](https://github.com/CycloneDX/cdxgen) | C++ dependency scanner and license discovery tool | C++ metadata extraction when `auto_cdxgen = True` | +| [Node.js / npm](https://nodejs.org) | Runtime for cdxgen | C++ metadata extraction when `auto_cdxgen = True` | + +### Five-Phase Architecture + +``` +Phase 1: Loading Phase 2: Analysis +(extensions.bzl) (aspect.bzl) + +MODULE.bazel Bazel targets + | | + v v +sbom_metadata ext SbomDepsInfo aspect + | | + v v +metadata.json _deps.json + (external repos, + dep edges, + target labels) + + +Phase 3: Metadata Extraction (rules.bzl, parallel) + +Branch A (Rust): Branch B (C++): +MODULE.bazel.lock Source tree +(from score_crates) (C++, CMake, LICENSE) + | | + v v +generate_crates_cache.py cdxgen --deep -r +(+ crates.io API) | + | v + v cdxgen.cdx.json +crates_metadata.json + + +Phase 4: Resolution (sbom_generator.py) + +_deps.json -------+ +metadata.json ----+--> Match & Resolve +crates_cache -----+ (for each dep in +cdxgen.cdx.json --+ _deps.json, look up + metadata from caches) + | + v + SBOM components + (license, PURL, + version, hash) + + +Phase 5: Generation (formatters) + +SBOM components + | + +--> spdx_formatter --> .spdx.json + +--> cdx_formatter --> .cdx.json +``` + +### What Is Excluded from SBOM + +- Dependencies not in the transitive dep graph of your `targets` +- Build toolchain repos matching `exclude_patterns` (e.g. `rules_rust`, `rules_cc`, `bazel_tools`, `platforms`) + +## Example + +See [reference_integration/BUILD](../../reference_integration/BUILD) for working SBOM targets using `module_lockfile` from `score_crates` with both `auto_crates_cache` and `auto_cdxgen` enabled, and [reference_integration/MODULE.bazel](../../reference_integration/MODULE.bazel) for the metadata extension setup. + +### score_crates Integration + +The `score_crates` module provides centralized Rust crate management for the SCORE project. Its `MODULE.bazel.lock` file contains all resolved crate specs (name, version, sha256) generated by `cargo-bazel`. This lock file is used as the primary data source for Rust crate SBOM metadata, replacing the need for individual `Cargo.lock` files. + +## CISA 2025 Element Coverage (CycloneDX) + +The table below maps the CISA 2025 draft elements to CycloneDX fields and notes current support in this SBOM generator. + +| CISA 2025 Element | CycloneDX Field (JSON) | Support | Notes | +|---|---|---|---| +| Software Producer | `components[].supplier.name` (or manufacturer) | **Supported** | Component `supplier` is emitted when provided. Root producer is in `metadata.component.supplier`. | +| Component Name | `components[].name` | **Supported** | Single name; aliases are stored as `properties` with `cdx:alias`. | +| Component Version | `components[].version` | **Supported** | If unknown and source is git repo with `commit_date`, version can fall back to that date. | +| Software Identifiers | `components[].purl`, `components[].cpe` | **Supported (PURL)** / **Optional (CPE)** | PURL is generated for all components. CPE is optional if provided in metadata. | +| Component Hash | `components[].hashes` | **Supported** | SHA-256 supported (Cargo lock + http_archive `sha256` + repo metadata). | +| License | `components[].licenses` | **Supported when known** | Requires license metadata from `sbom_ext.license(...)`, repo metadata, or caches. | +| Dependency Relationship | `dependencies` | **Supported** | Uses external repo dependency edges from Bazel aspect. | +| Pedigree / Derivation | `components[].pedigree` | **Supported (manual)** | Must be provided via metadata (`pedigree_*` fields). Not auto-deduced. | +| SBOM Author | `metadata.authors` | **Supported** | Set via `sbom_authors` in `sbom()` rule. | +| Tool Name | `metadata.tools` | **Supported** | Always includes `score-sbom-generator`; extra tools via `sbom_tools`. | +| Timestamp | `metadata.timestamp` | **Supported** | ISO 8601 UTC timestamp generated at build time. | +| Generation Context | `metadata.lifecycles` | **Supported** | Set via `generation_context` in `sbom()` rule (`pre-build`, `build`, `post-build`). | + +### Notes on Missing Data +If a field is absent in output, it usually means the source metadata was not provided: +- Licenses and suppliers require `sbom_ext.license(...)` or repo metadata. +- CPE, aliases, and pedigree are optional and must be explicitly set. + - Rust crate licenses require a crates metadata cache; this is generated automatically when `module_lockfile` (or `cargo_lockfile`) is provided to `sbom()`. The `score_crates` MODULE.bazel.lock is the recommended source as it contains all resolved crate specs. + +Examples (add to `MODULE.bazel`): + +```starlark +# bazel_dep module (version from module graph) +sbom_ext.license( + name = "googletest", + license = "BSD-3-Clause", + supplier = "Google LLC", +) + +# http_archive dependency (explicit version) +sbom_ext.license( + name = "boost", + license = "BSL-1.0", + version = "1.87.0", + supplier = "Boost.org", +) + +# git_repository dependency +sbom_ext.license( + name = "iceoryx2", + license = "Apache-2.0", + version = "0.7.0", + supplier = "Eclipse Foundation", + remote = "https://github.com/eclipse-iceoryx/iceoryx2.git", +) + +# Rust crate (type = "cargo") +sbom_ext.license( + name = "tokio", + license = "MIT", + version = "1.10.0", + type = "cargo", + supplier = "Tokio Contributors", +) + +# Optional metadata (CPE, aliases, pedigree) +sbom_ext.license( + name = "linux-kernel", + license = "GPL-2.0-only", + version = "5.10.120", + cpe = "cpe:2.3:o:linux:linux_kernel:*:*:*:*:*:*:*:*", + aliases = ["linux", "kernel"], + pedigree_ancestors = ["pkg:generic/linux-kernel@5.10.130"], + pedigree_notes = "Backported CVE-2025-12345 fix from 5.10.130", +) +``` diff --git a/sbom/cpp_metadata.json b/sbom/cpp_metadata.json new file mode 100644 index 0000000..b6703c6 --- /dev/null +++ b/sbom/cpp_metadata.json @@ -0,0 +1,55 @@ +{ + "boost": { + "version": "1.87.0", + "license": "BSL-1.0", + "supplier": "Boost.org", + "purl": "pkg:conan/boost@1.87.0", + "url": "https://www.boost.org/" + }, + "nlohmann-json": { + "version": "3.11.3", + "license": "MIT", + "supplier": "Niels Lohmann", + "purl": "pkg:conan/nlohmann_json@3.11.3", + "url": "https://github.com/nlohmann/json" + }, + "googletest": { + "version": "1.17.0", + "license": "BSD-3-Clause", + "supplier": "Google LLC", + "purl": "pkg:github/google/googletest@1.17.0", + "url": "https://github.com/google/googletest" + }, + "google_benchmark": { + "version": "1.9.4", + "license": "Apache-2.0", + "supplier": "Google LLC", + "purl": "pkg:github/google/benchmark@1.9.4", + "url": "https://github.com/google/benchmark" + }, + "flatbuffers": { + "version": "25.2.10", + "license": "Apache-2.0", + "supplier": "Google LLC", + "purl": "pkg:github/google/flatbuffers@25.2.10", + "url": "https://github.com/google/flatbuffers" + }, + "vsomeip": { + "version": "3.6.0", + "license": "MPL-2.0", + "supplier": "COVESA", + "purl": "pkg:github/COVESA/vsomeip@3.6.0", + "url": "https://github.com/COVESA/vsomeip" + }, + "json_schema_validator": { + "version": "2.1.0", + "license": "MIT", + "supplier": "Patrick Boettcher", + "purl": "pkg:github/pboettch/json-schema-validator@2.1.0" + }, + "bazel_skylib": { + "version": "1.8.1", + "license": "Apache-2.0", + "purl": "pkg:github/bazelbuild/bazel-skylib@1.8.1" + } +} diff --git a/sbom/crates_metadata.json b/sbom/crates_metadata.json new file mode 100644 index 0000000..2f1b7b6 --- /dev/null +++ b/sbom/crates_metadata.json @@ -0,0 +1,806 @@ +{ + "aho-corasick": { + "checksum": "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301", + "license": "Unlicense OR MIT", + "name": "aho-corasick", + "purl": "pkg:cargo/aho-corasick@1.1.4", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.1.4" + }, + "bindgen": { + "checksum": "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895", + "license": "BSD-3-Clause", + "name": "bindgen", + "purl": "pkg:cargo/bindgen@0.72.1", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.72.1" + }, + "bitflags": { + "checksum": "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3", + "license": "MIT OR Apache-2.0", + "name": "bitflags", + "purl": "pkg:cargo/bitflags@2.10.0", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "2.10.0" + }, + "byteorder": { + "checksum": "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b", + "license": "Unlicense OR MIT", + "name": "byteorder", + "purl": "pkg:cargo/byteorder@1.5.0", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.5.0" + }, + "cc": { + "checksum": "90583009037521a116abf44494efecd645ba48b6622457080f080b85544e2215", + "license": "MIT OR Apache-2.0", + "name": "cc", + "purl": "pkg:cargo/cc@1.2.49", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.2.49" + }, + "cdr": { + "checksum": "9617422bf43fde9280707a7e90f8f7494389c182f5c70b0f67592d0f06d41dfa", + "license": "Apache-2.0 OR MIT", + "name": "cdr", + "purl": "pkg:cargo/cdr@0.2.4", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.2.4" + }, + "cexpr": { + "checksum": "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766", + "license": "Apache-2.0 OR MIT", + "name": "cexpr", + "purl": "pkg:cargo/cexpr@0.6.0", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.6.0" + }, + "cfg-if": { + "checksum": "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801", + "license": "MIT OR Apache-2.0", + "name": "cfg-if", + "purl": "pkg:cargo/cfg-if@1.0.4", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.0.4" + }, + "clang-sys": { + "checksum": "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4", + "license": "Apache-2.0", + "name": "clang-sys", + "purl": "pkg:cargo/clang-sys@1.8.1", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.8.1" + }, + "cobs": { + "checksum": "0fa961b519f0b462e3a3b4a34b64d119eeaca1d59af726fe450bbba07a9fc0a1", + "license": "MIT OR Apache-2.0", + "name": "cobs", + "purl": "pkg:cargo/cobs@0.3.0", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.3.0" + }, + "crossbeam-channel": { + "checksum": "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2", + "license": "MIT OR Apache-2.0", + "name": "crossbeam-channel", + "purl": "pkg:cargo/crossbeam-channel@0.5.15", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.5.15" + }, + "crossbeam-utils": { + "checksum": "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28", + "license": "MIT OR Apache-2.0", + "name": "crossbeam-utils", + "purl": "pkg:cargo/crossbeam-utils@0.8.21", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.8.21" + }, + "deranged": { + "checksum": "ececcb659e7ba858fb4f10388c250a7252eb0a27373f1a72b8748afdd248e587", + "license": "MIT OR Apache-2.0", + "name": "deranged", + "purl": "pkg:cargo/deranged@0.5.5", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.5.5" + }, + "either": { + "checksum": "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719", + "license": "MIT OR Apache-2.0", + "name": "either", + "purl": "pkg:cargo/either@1.15.0", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.15.0" + }, + "embedded-io": { + "checksum": "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d", + "license": "MIT OR Apache-2.0", + "name": "embedded-io", + "purl": "pkg:cargo/embedded-io@0.6.1", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.6.1" + }, + "enum-iterator": { + "checksum": "a4549325971814bda7a44061bf3fe7e487d447cba01e4220a4b454d630d7a016", + "license": "0BSD OR MIT OR Apache-2.0", + "name": "enum-iterator", + "purl": "pkg:cargo/enum-iterator@2.3.0", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "2.3.0" + }, + "enum-iterator-derive": { + "checksum": "685adfa4d6f3d765a26bc5dbc936577de9abf756c1feeb3089b01dd395034842", + "license": "0BSD OR MIT OR Apache-2.0", + "name": "enum-iterator-derive", + "purl": "pkg:cargo/enum-iterator-derive@1.5.0", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.5.0" + }, + "equivalent": { + "checksum": "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f", + "license": "Apache-2.0 OR MIT", + "name": "equivalent", + "purl": "pkg:cargo/equivalent@1.0.2", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.0.2" + }, + "find-msvc-tools": { + "checksum": "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844", + "license": "MIT OR Apache-2.0", + "name": "find-msvc-tools", + "purl": "pkg:cargo/find-msvc-tools@0.1.5", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.1.5" + }, + "futures": { + "checksum": "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876", + "license": "MIT OR Apache-2.0", + "name": "futures", + "purl": "pkg:cargo/futures@0.3.31", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.3.31" + }, + "futures-channel": { + "checksum": "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10", + "license": "MIT OR Apache-2.0", + "name": "futures-channel", + "purl": "pkg:cargo/futures-channel@0.3.31", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.3.31" + }, + "futures-core": { + "checksum": "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e", + "license": "MIT OR Apache-2.0", + "name": "futures-core", + "purl": "pkg:cargo/futures-core@0.3.31", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.3.31" + }, + "futures-executor": { + "checksum": "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f", + "license": "MIT OR Apache-2.0", + "name": "futures-executor", + "purl": "pkg:cargo/futures-executor@0.3.31", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.3.31" + }, + "futures-io": { + "checksum": "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6", + "license": "MIT OR Apache-2.0", + "name": "futures-io", + "purl": "pkg:cargo/futures-io@0.3.31", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.3.31" + }, + "futures-macro": { + "checksum": "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650", + "license": "MIT OR Apache-2.0", + "name": "futures-macro", + "purl": "pkg:cargo/futures-macro@0.3.31", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.3.31" + }, + "futures-sink": { + "checksum": "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7", + "license": "MIT OR Apache-2.0", + "name": "futures-sink", + "purl": "pkg:cargo/futures-sink@0.3.31", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.3.31" + }, + "futures-task": { + "checksum": "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988", + "license": "MIT OR Apache-2.0", + "name": "futures-task", + "purl": "pkg:cargo/futures-task@0.3.31", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.3.31" + }, + "futures-util": { + "checksum": "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81", + "license": "MIT OR Apache-2.0", + "name": "futures-util", + "purl": "pkg:cargo/futures-util@0.3.31", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.3.31" + }, + "generator": { + "checksum": "605183a538e3e2a9c1038635cc5c2d194e2ee8fd0d1b66b8349fad7dbacce5a2", + "license": "Apache-2.0 OR MIT", + "name": "generator", + "purl": "pkg:cargo/generator@0.8.7", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.8.7" + }, + "glob": { + "checksum": "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280", + "license": "MIT OR Apache-2.0", + "name": "glob", + "purl": "pkg:cargo/glob@0.3.3", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.3.3" + }, + "hashbrown": { + "checksum": "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100", + "license": "MIT OR Apache-2.0", + "name": "hashbrown", + "purl": "pkg:cargo/hashbrown@0.16.1", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.16.1" + }, + "indexmap": { + "checksum": "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2", + "license": "Apache-2.0 OR MIT", + "name": "indexmap", + "purl": "pkg:cargo/indexmap@2.12.1", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "2.12.1" + }, + "itertools": { + "checksum": "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186", + "license": "MIT OR Apache-2.0", + "name": "itertools", + "purl": "pkg:cargo/itertools@0.13.0", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.13.0" + }, + "itoa": { + "checksum": "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c", + "license": "MIT OR Apache-2.0", + "name": "itoa", + "purl": "pkg:cargo/itoa@1.0.15", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.0.15" + }, + "lazy_static": { + "checksum": "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe", + "license": "MIT OR Apache-2.0", + "name": "lazy_static", + "purl": "pkg:cargo/lazy_static@1.5.0", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.5.0" + }, + "libc": { + "checksum": "37c93d8daa9d8a012fd8ab92f088405fb202ea0b6ab73ee2482ae66af4f42091", + "license": "MIT OR Apache-2.0", + "name": "libc", + "purl": "pkg:cargo/libc@0.2.178", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.2.178" + }, + "libloading": { + "checksum": "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55", + "license": "ISC", + "name": "libloading", + "purl": "pkg:cargo/libloading@0.8.9", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.8.9" + }, + "log": { + "checksum": "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897", + "license": "MIT OR Apache-2.0", + "name": "log", + "purl": "pkg:cargo/log@0.4.29", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.4.29" + }, + "loom": { + "checksum": "419e0dc8046cb947daa77eb95ae174acfbddb7673b4151f56d1eed8e93fbfaca", + "license": "MIT", + "name": "loom", + "purl": "pkg:cargo/loom@0.7.2", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.7.2" + }, + "matchers": { + "checksum": "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9", + "license": "MIT", + "name": "matchers", + "purl": "pkg:cargo/matchers@0.2.0", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.2.0" + }, + "memchr": { + "checksum": "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273", + "license": "Unlicense OR MIT", + "name": "memchr", + "purl": "pkg:cargo/memchr@2.7.6", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "2.7.6" + }, + "minimal-lexical": { + "checksum": "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a", + "license": "MIT OR Apache-2.0", + "name": "minimal-lexical", + "purl": "pkg:cargo/minimal-lexical@0.2.1", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.2.1" + }, + "nom": { + "checksum": "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a", + "license": "MIT", + "name": "nom", + "purl": "pkg:cargo/nom@7.1.3", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "7.1.3" + }, + "nu-ansi-term": { + "checksum": "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5", + "license": "MIT", + "name": "nu-ansi-term", + "purl": "pkg:cargo/nu-ansi-term@0.50.3", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.50.3" + }, + "num-conv": { + "checksum": "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9", + "license": "MIT OR Apache-2.0", + "name": "num-conv", + "purl": "pkg:cargo/num-conv@0.1.0", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.1.0" + }, + "once_cell": { + "checksum": "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d", + "license": "MIT OR Apache-2.0", + "name": "once_cell", + "purl": "pkg:cargo/once_cell@1.21.3", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.21.3" + }, + "pin-project-lite": { + "checksum": "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b", + "license": "Apache-2.0 OR MIT", + "name": "pin-project-lite", + "purl": "pkg:cargo/pin-project-lite@0.2.16", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.2.16" + }, + "pin-utils": { + "checksum": "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184", + "name": "pin-utils", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.1.0" + }, + "postcard": { + "checksum": "6764c3b5dd454e283a30e6dfe78e9b31096d9e32036b5d1eaac7a6119ccb9a24", + "name": "postcard", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.1.3" + }, + "powerfmt": { + "checksum": "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391", + "name": "powerfmt", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.2.0" + }, + "prettyplease": { + "checksum": "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b", + "license": "MIT OR Apache-2.0", + "name": "prettyplease", + "purl": "pkg:cargo/prettyplease@0.2.37", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.2.37" + }, + "proc-macro2": { + "checksum": "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8", + "license": "MIT OR Apache-2.0", + "name": "proc-macro2", + "purl": "pkg:cargo/proc-macro2@1.0.103", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.0.103" + }, + "quote": { + "checksum": "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f", + "license": "MIT OR Apache-2.0", + "name": "quote", + "purl": "pkg:cargo/quote@1.0.42", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.0.42" + }, + "regex": { + "checksum": "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4", + "license": "MIT OR Apache-2.0", + "name": "regex", + "purl": "pkg:cargo/regex@1.12.2", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.12.2" + }, + "regex-automata": { + "checksum": "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c", + "name": "regex-automata", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.4.13" + }, + "regex-syntax": { + "checksum": "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58", + "name": "regex-syntax", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.8.8" + }, + "rustc-hash": { + "checksum": "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d", + "license": "Apache-2.0 OR MIT", + "name": "rustc-hash", + "purl": "pkg:cargo/rustc-hash@2.1.1", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "2.1.1" + }, + "rustversion": { + "checksum": "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d", + "name": "rustversion", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.0.22" + }, + "ryu": { + "checksum": "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f", + "name": "ryu", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.0.20" + }, + "scoped-tls": { + "checksum": "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294", + "name": "scoped-tls", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.0.1" + }, + "serde": { + "checksum": "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e", + "license": "MIT OR Apache-2.0", + "name": "serde", + "purl": "pkg:cargo/serde@1.0.228", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.0.228" + }, + "serde_core": { + "checksum": "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad", + "name": "serde_core", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.0.228" + }, + "serde_derive": { + "checksum": "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79", + "license": "MIT OR Apache-2.0", + "name": "serde_derive", + "purl": "pkg:cargo/serde_derive@1.0.228", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.0.228" + }, + "serde_json": { + "checksum": "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c", + "license": "MIT OR Apache-2.0", + "name": "serde_json", + "purl": "pkg:cargo/serde_json@1.0.145", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.0.145" + }, + "serde_spanned": { + "checksum": "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3", + "name": "serde_spanned", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.6.9" + }, + "sha1_smol": { + "checksum": "bbfa15b3dddfee50a0fff136974b3e1bde555604ba463834a7eb7deb6417705d", + "name": "sha1_smol", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.0.1" + }, + "sharded-slab": { + "checksum": "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6", + "name": "sharded-slab", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.1.7" + }, + "shlex": { + "checksum": "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64", + "license": "MIT OR Apache-2.0", + "name": "shlex", + "purl": "pkg:cargo/shlex@1.3.0", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.3.0" + }, + "slab": { + "checksum": "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589", + "name": "slab", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.4.11" + }, + "smallvec": { + "checksum": "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03", + "name": "smallvec", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.15.1" + }, + "syn": { + "checksum": "390cc9a294ab71bdb1aa2e99d13be9c753cd2d7bd6560c77118597410c4d2e87", + "license": "MIT OR Apache-2.0", + "name": "syn", + "purl": "pkg:cargo/syn@2.0.111", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "2.0.111" + }, + "thiserror": { + "checksum": "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8", + "license": "MIT OR Apache-2.0", + "name": "thiserror", + "purl": "pkg:cargo/thiserror@2.0.17", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "2.0.17" + }, + "thiserror-impl": { + "checksum": "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913", + "name": "thiserror-impl", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "2.0.17" + }, + "thread_local": { + "checksum": "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185", + "name": "thread_local", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.1.9" + }, + "time": { + "checksum": "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d", + "license": "MIT OR Apache-2.0", + "name": "time", + "purl": "pkg:cargo/time@0.3.44", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.3.44" + }, + "time-core": { + "checksum": "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b", + "license": "MIT OR Apache-2.0", + "name": "time-core", + "purl": "pkg:cargo/time-core@0.1.6", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.1.6" + }, + "time-macros": { + "checksum": "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3", + "license": "MIT OR Apache-2.0", + "name": "time-macros", + "purl": "pkg:cargo/time-macros@0.2.24", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.2.24" + }, + "tiny-fn": { + "checksum": "9659b108631d1e1cf3e8e489f894bee40bc9d68fd6cc67ec4d4ce9b72d565228", + "name": "tiny-fn", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.1.9" + }, + "toml": { + "checksum": "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362", + "license": "MIT OR Apache-2.0", + "name": "toml", + "purl": "pkg:cargo/toml@0.8.23", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.8.23" + }, + "toml_datetime": { + "checksum": "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c", + "name": "toml_datetime", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.6.11" + }, + "toml_edit": { + "checksum": "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a", + "name": "toml_edit", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.22.27" + }, + "toml_write": { + "checksum": "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801", + "name": "toml_write", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.1.2" + }, + "tracing": { + "checksum": "2d15d90a0b5c19378952d479dc858407149d7bb45a14de0142f6c534b16fc647", + "license": "MIT", + "name": "tracing", + "purl": "pkg:cargo/tracing@0.1.43", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.1.43" + }, + "tracing-appender": { + "checksum": "786d480bce6247ab75f005b14ae1624ad978d3029d9113f0a22fa1ac773faeaf", + "name": "tracing-appender", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.2.4" + }, + "tracing-attributes": { + "checksum": "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da", + "name": "tracing-attributes", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.1.31" + }, + "tracing-core": { + "checksum": "7a04e24fab5c89c6a36eb8558c9656f30d81de51dfa4d3b45f26b21d61fa0a6c", + "license": "MIT", + "name": "tracing-core", + "purl": "pkg:cargo/tracing-core@0.1.35", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.1.35" + }, + "tracing-log": { + "checksum": "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3", + "name": "tracing-log", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.2.0" + }, + "tracing-serde": { + "checksum": "704b1aeb7be0d0a84fc9828cae51dab5970fee5088f83d1dd7ee6f6246fc6ff1", + "name": "tracing-serde", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.2.0" + }, + "tracing-subscriber": { + "checksum": "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e", + "license": "MIT", + "name": "tracing-subscriber", + "purl": "pkg:cargo/tracing-subscriber@0.3.22", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.3.22" + }, + "unicode-ident": { + "checksum": "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5", + "license": "MIT OR Apache-2.0 AND Unicode-3.0", + "name": "unicode-ident", + "purl": "pkg:cargo/unicode-ident@1.0.22", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "1.0.22" + }, + "valuable": { + "checksum": "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65", + "name": "valuable", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.1.1" + }, + "windows": { + "checksum": "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893", + "name": "windows", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.61.3" + }, + "windows-collections": { + "checksum": "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8", + "name": "windows-collections", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.2.0" + }, + "windows-core": { + "checksum": "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3", + "name": "windows-core", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.61.2" + }, + "windows-future": { + "checksum": "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e", + "name": "windows-future", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.2.1" + }, + "windows-implement": { + "checksum": "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf", + "name": "windows-implement", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.60.2" + }, + "windows-interface": { + "checksum": "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358", + "name": "windows-interface", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.59.3" + }, + "windows-link": { + "checksum": "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5", + "name": "windows-link", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.2.1" + }, + "windows-numerics": { + "checksum": "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1", + "name": "windows-numerics", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.2.0" + }, + "windows-result": { + "checksum": "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6", + "name": "windows-result", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.3.4" + }, + "windows-strings": { + "checksum": "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57", + "name": "windows-strings", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.4.2" + }, + "windows-sys": { + "checksum": "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc", + "name": "windows-sys", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.61.2" + }, + "windows-targets": { + "checksum": "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c", + "name": "windows-targets", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.48.5" + }, + "windows-threading": { + "checksum": "b66463ad2e0ea3bbf808b7f1d371311c80e115c0b71d60efc142cafbcfb057a6", + "name": "windows-threading", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.1.0" + }, + "windows_aarch64_gnullvm": { + "checksum": "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8", + "name": "windows_aarch64_gnullvm", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.48.5" + }, + "windows_aarch64_msvc": { + "checksum": "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc", + "name": "windows_aarch64_msvc", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.48.5" + }, + "windows_i686_gnu": { + "checksum": "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e", + "name": "windows_i686_gnu", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.48.5" + }, + "windows_i686_msvc": { + "checksum": "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406", + "name": "windows_i686_msvc", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.48.5" + }, + "windows_x86_64_gnu": { + "checksum": "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e", + "name": "windows_x86_64_gnu", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.48.5" + }, + "windows_x86_64_gnullvm": { + "checksum": "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc", + "name": "windows_x86_64_gnullvm", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.48.5" + }, + "windows_x86_64_msvc": { + "checksum": "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538", + "name": "windows_x86_64_msvc", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.48.5" + }, + "winnow": { + "checksum": "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829", + "name": "winnow", + "source": "registry+https://github.com/rust-lang/crates.io-index", + "version": "0.7.14" + } +} \ No newline at end of file diff --git a/sbom/defs.bzl b/sbom/defs.bzl new file mode 100644 index 0000000..b67489c --- /dev/null +++ b/sbom/defs.bzl @@ -0,0 +1,136 @@ +"""Public API for SBOM generation. + +This module provides the sbom() macro, which is the main entry point for +generating Software Bill of Materials for Bazel targets. + +Example usage: + load("@score_tooling//sbom:defs.bzl", "sbom") + + sbom( + name = "product_sbom", + targets = [ + "//feature_showcase/rust:orch_per_example", + "//feature_showcase/rust:kyron_example", + ], + component_version = "1.0.0", + ) +""" + +load("//sbom/internal:rules.bzl", "sbom_rule") + +def sbom( + name, + targets, + metadata_json = "@sbom_metadata//:metadata.json", + dep_module_files = None, + cdxgen_sbom = None, + auto_cdxgen = False, + cargo_lockfile = None, + module_lockfile = None, + auto_crates_cache = True, + output_formats = ["spdx", "cyclonedx"], + producer_name = "Eclipse Foundation", + producer_url = "https://projects.eclipse.org/projects/automotive.score", + component_name = None, + component_version = None, + sbom_authors = None, + generation_context = None, + sbom_tools = None, + namespace = None, + exclude_patterns = None, + **kwargs): + """Generates SBOM for specified targets. + + This macro creates an SBOM (Software Bill of Materials) for the specified + targets, traversing their transitive dependencies and generating output + in SPDX 2.3 and/or CycloneDX 1.6 format. + + License metadata is collected automatically: + - Rust crates: from crates_metadata.json cache (bundled with tooling) + - C++ deps: from cpp_metadata.json cache (bundled with tooling) + - Bazel modules: version/PURL auto-extracted from module graph + + Prerequisites: + In your MODULE.bazel, you must enable the sbom_metadata extension: + ``` + sbom_ext = use_extension("@score_tooling//sbom:extensions.bzl", "sbom_metadata") + use_repo(sbom_ext, "sbom_metadata") + ``` + + Args: + name: Rule name, also used as output filename prefix + targets: List of targets to include in SBOM + metadata_json: Label to the metadata.json file from sbom_metadata extension + dep_module_files: MODULE.bazel files from dependency modules for automatic version extraction + cdxgen_sbom: Optional label to CycloneDX JSON from cdxgen for C++ enrichment + auto_cdxgen: Run cdxgen automatically when no cdxgen_sbom is provided + cargo_lockfile: Optional Cargo.lock for crates metadata cache generation + module_lockfile: Optional MODULE.bazel.lock for additional crates (e.g., from score_crates) + auto_crates_cache: Run crates metadata cache generation when cargo_lockfile or module_lockfile is provided + output_formats: List of formats to generate ("spdx", "cyclonedx") + producer_name: SBOM producer organization name + producer_url: SBOM producer URL + component_name: Main component name (defaults to rule name) + component_version: Component version string + namespace: SBOM namespace URI (defaults to https://eclipse.dev/score) + exclude_patterns: Repo patterns to exclude (e.g., build tools) + **kwargs: Additional arguments passed to the underlying rule + + Outputs: + {name}.spdx.json - SPDX 2.3 format (if "spdx" in output_formats) + {name}.cdx.json - CycloneDX 1.6 format (if "cyclonedx" in output_formats) + + Example: + # Single target SBOM + sbom( + name = "my_app_sbom", + targets = ["//src:my_app"], + component_version = "1.0.0", + ) + + # Multi-target SBOM + sbom( + name = "product_sbom", + targets = [ + "//feature_showcase/rust:orch_per_example", + "//feature_showcase/rust:kyron_example", + ], + component_name = "score_reference_integration", + component_version = "0.5.0-beta", + ) + """ + default_exclude_patterns = [ + "rules_rust", + "rules_cc", + "bazel_tools", + "platforms", + "bazel_skylib", + "rules_python", + "rules_proto", + "protobuf", + "local_config_", + "remote_", + ] + + sbom_rule( + name = name, + targets = targets, + metadata_json = metadata_json, + dep_module_files = dep_module_files if dep_module_files else [], + cdxgen_sbom = cdxgen_sbom, + auto_cdxgen = auto_cdxgen, + cargo_lockfile = cargo_lockfile, + module_lockfile = module_lockfile, + auto_crates_cache = auto_crates_cache, + output_formats = output_formats, + producer_name = producer_name, + producer_url = producer_url, + component_name = component_name if component_name else name, + component_version = component_version if component_version else "", + sbom_authors = sbom_authors if sbom_authors else [], + generation_context = generation_context if generation_context else "", + sbom_tools = sbom_tools if sbom_tools else [], + namespace = namespace if namespace else "https://eclipse.dev/score", + exclude_patterns = exclude_patterns if exclude_patterns else default_exclude_patterns, + **kwargs + ) diff --git a/sbom/extensions.bzl b/sbom/extensions.bzl new file mode 100644 index 0000000..63c0f06 --- /dev/null +++ b/sbom/extensions.bzl @@ -0,0 +1,454 @@ +"""Module extension to collect dependency metadata from bzlmod. + +This extension collects version and metadata information for all modules +and other dependencies in the workspace, making it available for +SBOM generation. License metadata is collected automatically from +bundled caches (crates_metadata.json, cpp_metadata.json). + +Usage in MODULE.bazel: + sbom_ext = use_extension("@score_tooling//sbom:extensions.bzl", "sbom_metadata") + use_repo(sbom_ext, "sbom_metadata") +""" + +def _generate_purl_from_url(url, name, version): + """Generate Package URL from download URL.""" + if not url: + return "pkg:generic/{}@{}".format(name, version or "unknown") + + version_str = version or "unknown" + + # GitHub + if "github.com" in url: + parts = url.split("github.com/") + if len(parts) > 1: + path_parts = parts[1].split("/") + if len(path_parts) >= 2: + owner = path_parts[0] + repo = path_parts[1].split(".")[0].split("/")[0] + return "pkg:github/{}/{}@{}".format(owner, repo, version_str) + + # GitLab + if "gitlab.com" in url or "gitlab" in url: + if "gitlab.com/" in url: + parts = url.split("gitlab.com/") + if len(parts) > 1: + path_parts = parts[1].split("/") + if len(path_parts) >= 2: + owner = path_parts[0] + repo = path_parts[1].split(".")[0] + return "pkg:gitlab/{}/{}@{}".format(owner, repo, version_str) + + return "pkg:generic/{}@{}".format(name, version_str) + +def _generate_purl_from_git(remote, name, version): + """Generate Package URL from git remote.""" + if not remote: + return "pkg:generic/{}@{}".format(name, version or "unknown") + + version_str = version or "unknown" + + # GitHub (https or ssh) + if "github.com" in remote: + if "github.com:" in remote: + path = remote.split("github.com:")[-1] + else: + path = remote.split("github.com/")[-1] + parts = path.replace(".git", "").split("/") + if len(parts) >= 2: + return "pkg:github/{}/{}@{}".format(parts[0], parts[1], version_str) + + # GitLab + if "gitlab" in remote: + if "gitlab.com:" in remote: + path = remote.split("gitlab.com:")[-1] + elif "gitlab.com/" in remote: + path = remote.split("gitlab.com/")[-1] + else: + return "pkg:generic/{}@{}".format(name, version_str) + parts = path.replace(".git", "").split("/") + if len(parts) >= 2: + return "pkg:gitlab/{}/{}@{}".format(parts[0], parts[1], version_str) + + return "pkg:generic/{}@{}".format(name, version_str) + +def _extract_version_from_url(url): + """Extract version from URL patterns.""" + if not url: + return None + + # Try common patterns + for sep in ["/v", "/archive/v", "/archive/", "/releases/download/v", "/releases/download/"]: + if sep in url: + rest = url.split(sep)[-1] + version = rest.split("/")[0].split(".tar")[0].split(".zip")[0] + if version and len(version) > 0 and (version[0].isdigit() or version[0] == "v"): + return version.lstrip("v") + + # Try filename pattern: name-version.tar.gz + filename = url.split("/")[-1] + if "-" in filename: + parts = filename.rsplit("-", 1) + if len(parts) == 2: + version = parts[1].split(".tar")[0].split(".zip")[0] + if version and version[0].isdigit(): + return version + + return None + +def _parse_version_from_module_bazel(content): + """Parse module name and version from MODULE.bazel content using string ops. + + Starlark doesn't have regex, so we parse with string find/split operations. + + Args: + content: String content of a MODULE.bazel file + + Returns: + Tuple of (name, version) or (None, None) if not found + """ + idx = content.find("module(") + if idx < 0: + return None, None + + # Find the closing paren for the module() call + block_end = content.find(")", idx) + if block_end < 0: + return None, None + + block = content[idx:block_end] + + # Extract name + name = None + for quote in ['"', "'"]: + marker = "name = " + quote + name_idx = block.find(marker) + if name_idx >= 0: + name_start = name_idx + len(marker) + name_end = block.find(quote, name_start) + if name_end > name_start: + name = block[name_start:name_end] + break + + # Extract version + version = None + for quote in ['"', "'"]: + marker = "version = " + quote + ver_idx = block.find(marker) + if ver_idx >= 0: + ver_start = ver_idx + len(marker) + ver_end = block.find(quote, ver_start) + if ver_end > ver_start: + version = block[ver_start:ver_end] + break + + return name, version + +def _sbom_metadata_repo_impl(repository_ctx): + """Implementation of the sbom_metadata repository rule.""" + + # Start with metadata from the extension + metadata = json.decode(repository_ctx.attr.metadata_content) + modules = metadata.get("modules", {}) + + # Read MODULE.bazel from tracked dependency modules to extract versions + # Use canonical labels (@@module+) to bypass repo visibility restrictions + for module_name in repository_ctx.attr.tracked_modules: + if module_name in modules: + continue # Already have this module's info + + # Try to read the module's MODULE.bazel file using canonical label + label = Label("@@{}+//:MODULE.bazel".format(module_name)) + path = repository_ctx.path(label) + if path.exists: + content = repository_ctx.read(path) + parsed_name, parsed_version = _parse_version_from_module_bazel(content) + if parsed_name and parsed_version: + modules[parsed_name] = { + "version": parsed_version, + "purl": "pkg:bazel/{}@{}".format(parsed_name, parsed_version), + } + + metadata["modules"] = modules + repository_ctx.file("metadata.json", json.encode(metadata)) + repository_ctx.file("BUILD.bazel", """\ +# Generated SBOM metadata repository +exports_files(["metadata.json"]) +""") + +_sbom_metadata_repo = repository_rule( + implementation = _sbom_metadata_repo_impl, + attrs = { + "metadata_content": attr.string(mandatory = True), + "tracked_modules": attr.string_list(default = []), + }, +) + +def _sbom_metadata_impl(module_ctx): + """Collects SBOM metadata from all modules in dependency graph.""" + all_http_archives = {} + all_git_repos = {} + all_modules = {} + all_crates = {} + all_licenses = {} + tracked_modules = [] + + for mod in module_ctx.modules: + # Collect tracked module names for version extraction + for tag in mod.tags.track_module: + if tag.name not in tracked_modules: + tracked_modules.append(tag.name) + module_name = mod.name + module_version = mod.version + + # Collect module info from bazel_dep automatically + if module_name and module_version: + all_modules[module_name] = { + "version": module_version, + "purl": "pkg:bazel/{}@{}".format(module_name, module_version), + } + + # Collect http_archive metadata + for tag in mod.tags.http_archive: + url = tag.urls[0] if tag.urls else (tag.url if hasattr(tag, "url") and tag.url else "") + version = tag.version if tag.version else _extract_version_from_url(url) + purl = tag.purl if tag.purl else _generate_purl_from_url(url, tag.name, version) + + all_http_archives[tag.name] = { + "version": version or "unknown", + "url": url, + "purl": purl, + "license": tag.license if tag.license else "", + "supplier": tag.supplier if tag.supplier else "", + "sha256": tag.sha256 if tag.sha256 else "", + "cpe": tag.cpe if hasattr(tag, "cpe") and tag.cpe else "", + "aliases": tag.aliases if hasattr(tag, "aliases") and tag.aliases else [], + "pedigree_ancestors": tag.pedigree_ancestors if hasattr(tag, "pedigree_ancestors") and tag.pedigree_ancestors else [], + "pedigree_descendants": tag.pedigree_descendants if hasattr(tag, "pedigree_descendants") and tag.pedigree_descendants else [], + "pedigree_variants": tag.pedigree_variants if hasattr(tag, "pedigree_variants") and tag.pedigree_variants else [], + "pedigree_notes": tag.pedigree_notes if hasattr(tag, "pedigree_notes") and tag.pedigree_notes else "", + "declared_by": module_name, + } + + # Collect git_repository metadata + for tag in mod.tags.git_repository: + version = tag.tag if tag.tag else (tag.commit[:12] if tag.commit else "unknown") + purl = tag.purl if tag.purl else _generate_purl_from_git(tag.remote, tag.name, version) + + all_git_repos[tag.name] = { + "version": version, + "remote": tag.remote, + "commit": tag.commit if tag.commit else "", + "commit_date": tag.commit_date if hasattr(tag, "commit_date") and tag.commit_date else "", + "tag": tag.tag if tag.tag else "", + "purl": purl, + "license": tag.license if tag.license else "", + "supplier": tag.supplier if tag.supplier else "", + "cpe": tag.cpe if hasattr(tag, "cpe") and tag.cpe else "", + "aliases": tag.aliases if hasattr(tag, "aliases") and tag.aliases else [], + "pedigree_ancestors": tag.pedigree_ancestors if hasattr(tag, "pedigree_ancestors") and tag.pedigree_ancestors else [], + "pedigree_descendants": tag.pedigree_descendants if hasattr(tag, "pedigree_descendants") and tag.pedigree_descendants else [], + "pedigree_variants": tag.pedigree_variants if hasattr(tag, "pedigree_variants") and tag.pedigree_variants else [], + "pedigree_notes": tag.pedigree_notes if hasattr(tag, "pedigree_notes") and tag.pedigree_notes else "", + "declared_by": module_name, + } + + # Collect license info for bazel_dep modules, http_archive, git_repository, and crate deps + for tag in mod.tags.license: + dep_type = tag.type if hasattr(tag, "type") and tag.type else "" + url = "" + if hasattr(tag, "urls") and tag.urls: + url = tag.urls[0] + elif hasattr(tag, "url") and tag.url: + url = tag.url + remote = tag.remote if hasattr(tag, "remote") and tag.remote else "" + + explicit_version = tag.version if hasattr(tag, "version") and tag.version else "" + supplier = tag.supplier if hasattr(tag, "supplier") and tag.supplier else "" + cpe = tag.cpe if hasattr(tag, "cpe") and tag.cpe else "" + aliases = tag.aliases if hasattr(tag, "aliases") and tag.aliases else [] + pedigree_ancestors = tag.pedigree_ancestors if hasattr(tag, "pedigree_ancestors") and tag.pedigree_ancestors else [] + pedigree_descendants = tag.pedigree_descendants if hasattr(tag, "pedigree_descendants") and tag.pedigree_descendants else [] + pedigree_variants = tag.pedigree_variants if hasattr(tag, "pedigree_variants") and tag.pedigree_variants else [] + pedigree_notes = tag.pedigree_notes if hasattr(tag, "pedigree_notes") and tag.pedigree_notes else "" + + if dep_type == "cargo": + version = explicit_version if explicit_version else "unknown" + all_crates[tag.name] = { + "version": version, + "purl": tag.purl if tag.purl else "pkg:cargo/{}@{}".format(tag.name, version), + "license": tag.license, + "supplier": supplier, + "cpe": cpe, + "aliases": aliases, + "pedigree_ancestors": pedigree_ancestors, + "pedigree_descendants": pedigree_descendants, + "pedigree_variants": pedigree_variants, + "pedigree_notes": pedigree_notes, + } + elif url or (explicit_version and not remote): + version = explicit_version if explicit_version else _extract_version_from_url(url) + purl = tag.purl if tag.purl else _generate_purl_from_url(url, tag.name, version) + all_http_archives[tag.name] = { + "version": version or "unknown", + "url": url, + "purl": purl, + "license": tag.license, + "supplier": supplier, + "cpe": cpe, + "aliases": aliases, + "pedigree_ancestors": pedigree_ancestors, + "pedigree_descendants": pedigree_descendants, + "pedigree_variants": pedigree_variants, + "pedigree_notes": pedigree_notes, + "declared_by": module_name, + } + elif remote: + version = explicit_version if explicit_version else "unknown" + purl = tag.purl if tag.purl else _generate_purl_from_git(remote, tag.name, version) + all_git_repos[tag.name] = { + "version": version, + "remote": remote, + "commit": "", + "tag": "", + "purl": purl, + "license": tag.license, + "supplier": supplier, + "cpe": cpe, + "aliases": aliases, + "pedigree_ancestors": pedigree_ancestors, + "pedigree_descendants": pedigree_descendants, + "pedigree_variants": pedigree_variants, + "pedigree_notes": pedigree_notes, + "declared_by": module_name, + } + else: + all_licenses[tag.name] = { + "license": tag.license, + "supplier": supplier, + "purl": tag.purl if tag.purl else "", + "cpe": cpe, + "aliases": aliases, + "pedigree_ancestors": pedigree_ancestors, + "pedigree_descendants": pedigree_descendants, + "pedigree_variants": pedigree_variants, + "pedigree_notes": pedigree_notes, + } + + # Apply license/supplier overrides to modules + for name, license_info in all_licenses.items(): + if name in all_modules: + all_modules[name]["license"] = license_info["license"] + if license_info.get("supplier"): + all_modules[name]["supplier"] = license_info["supplier"] + if license_info.get("purl"): + all_modules[name]["purl"] = license_info["purl"] + if license_info.get("cpe"): + all_modules[name]["cpe"] = license_info["cpe"] + if license_info.get("aliases"): + all_modules[name]["aliases"] = license_info["aliases"] + if license_info.get("pedigree_ancestors"): + all_modules[name]["pedigree_ancestors"] = license_info["pedigree_ancestors"] + if license_info.get("pedigree_descendants"): + all_modules[name]["pedigree_descendants"] = license_info["pedigree_descendants"] + if license_info.get("pedigree_variants"): + all_modules[name]["pedigree_variants"] = license_info["pedigree_variants"] + if license_info.get("pedigree_notes"): + all_modules[name]["pedigree_notes"] = license_info["pedigree_notes"] + + # Generate metadata JSON + metadata_content = json.encode({ + "modules": all_modules, + "http_archives": all_http_archives, + "git_repositories": all_git_repos, + "crates": all_crates, + "licenses": all_licenses, + }) + + _sbom_metadata_repo( + name = "sbom_metadata", + metadata_content = metadata_content, + tracked_modules = tracked_modules, + ) + +# Tag for http_archive dependencies - mirrors http_archive attributes +_http_archive_tag = tag_class( + doc = "SBOM metadata for http_archive dependency (mirrors http_archive attrs)", + attrs = { + "name": attr.string(mandatory = True, doc = "Repository name"), + "urls": attr.string_list(doc = "Download URLs"), + "url": attr.string(doc = "Single download URL (alternative to urls)"), + "version": attr.string(doc = "Version (auto-extracted from URL if not provided)"), + "sha256": attr.string(doc = "SHA256 checksum"), + "license": attr.string(doc = "SPDX license identifier"), + "supplier": attr.string(doc = "Supplier/organization name"), + "purl": attr.string(doc = "Package URL (auto-generated if not provided)"), + "cpe": attr.string(doc = "CPE identifier"), + "aliases": attr.string_list(doc = "Alternate component names"), + "pedigree_ancestors": attr.string_list(doc = "Pedigree ancestor identifiers (PURL or name)"), + "pedigree_descendants": attr.string_list(doc = "Pedigree descendant identifiers (PURL or name)"), + "pedigree_variants": attr.string_list(doc = "Pedigree variant identifiers (PURL or name)"), + "pedigree_notes": attr.string(doc = "Pedigree notes"), + }, +) + +# Tag for git_repository dependencies - mirrors git_repository attributes +_git_repository_tag = tag_class( + doc = "SBOM metadata for git_repository dependency (mirrors git_repository attrs)", + attrs = { + "name": attr.string(mandatory = True, doc = "Repository name"), + "remote": attr.string(mandatory = True, doc = "Git remote URL"), + "commit": attr.string(doc = "Git commit hash"), + "tag": attr.string(doc = "Git tag"), + "commit_date": attr.string(doc = "Git commit date (ISO 8601)"), + "license": attr.string(doc = "SPDX license identifier"), + "supplier": attr.string(doc = "Supplier/organization name"), + "purl": attr.string(doc = "Package URL (auto-generated if not provided)"), + "cpe": attr.string(doc = "CPE identifier"), + "aliases": attr.string_list(doc = "Alternate component names"), + "pedigree_ancestors": attr.string_list(doc = "Pedigree ancestor identifiers (PURL or name)"), + "pedigree_descendants": attr.string_list(doc = "Pedigree descendant identifiers (PURL or name)"), + "pedigree_variants": attr.string_list(doc = "Pedigree variant identifiers (PURL or name)"), + "pedigree_notes": attr.string(doc = "Pedigree notes"), + }, +) + +# Tag to add license info to any dependency (bazel_dep, http_archive, git_repository, or crate) +_license_tag = tag_class( + doc = "Add license/supplier metadata for any dependency", + attrs = { + "name": attr.string(mandatory = True, doc = "Dependency name"), + "license": attr.string(mandatory = True, doc = "SPDX license identifier"), + "supplier": attr.string(doc = "Supplier/organization name (e.g., 'Boost.org', 'Google LLC')"), + "version": attr.string(doc = "Version string (for http_archive/git_repository/crate; auto-extracted for bazel_dep)"), + "type": attr.string(doc = "Dependency type: 'cargo' for Rust crates (affects PURL generation). Leave empty for auto-detection."), + "purl": attr.string(doc = "Override Package URL"), + "url": attr.string(doc = "Download URL for http_archive (for PURL generation)"), + "urls": attr.string_list(doc = "Download URLs for http_archive (for PURL generation)"), + "remote": attr.string(doc = "Git remote URL for git_repository (for PURL generation)"), + "cpe": attr.string(doc = "CPE identifier"), + "aliases": attr.string_list(doc = "Alternate component names"), + "pedigree_ancestors": attr.string_list(doc = "Pedigree ancestor identifiers (PURL or name)"), + "pedigree_descendants": attr.string_list(doc = "Pedigree descendant identifiers (PURL or name)"), + "pedigree_variants": attr.string_list(doc = "Pedigree variant identifiers (PURL or name)"), + "pedigree_notes": attr.string(doc = "Pedigree notes"), + }, +) + +# Tag to track a dependency module for automatic version extraction +_track_module_tag = tag_class( + doc = "Track a bazel_dep module for automatic version extraction from its MODULE.bazel", + attrs = { + "name": attr.string(mandatory = True, doc = "Module name (as declared in bazel_dep)"), + }, +) + +sbom_metadata = module_extension( + implementation = _sbom_metadata_impl, + tag_classes = { + "http_archive": _http_archive_tag, + "git_repository": _git_repository_tag, + "license": _license_tag, + "track_module": _track_module_tag, + }, + doc = "Collects SBOM metadata from dependency declarations", +) diff --git a/sbom/internal/BUILD b/sbom/internal/BUILD new file mode 100644 index 0000000..6237649 --- /dev/null +++ b/sbom/internal/BUILD @@ -0,0 +1,24 @@ +# Internal SBOM implementation package +# +# This package contains internal implementation details for SBOM generation. +# External consumers should use the public API in //sbom:defs.bzl + +package(default_visibility = ["//sbom:__subpackages__"]) + +exports_files([ + "aspect.bzl", + "metadata_rule.bzl", + "providers.bzl", + "rules.bzl", +]) + +# Filegroup for all internal bzl files +filegroup( + name = "bzl_files", + srcs = [ + "aspect.bzl", + "metadata_rule.bzl", + "providers.bzl", + "rules.bzl", + ], +) diff --git a/sbom/internal/__init__.py b/sbom/internal/__init__.py new file mode 100644 index 0000000..bd5f6fd --- /dev/null +++ b/sbom/internal/__init__.py @@ -0,0 +1 @@ +"""SBOM internal implementation package.""" diff --git a/sbom/internal/aspect.bzl b/sbom/internal/aspect.bzl new file mode 100644 index 0000000..cf68edc --- /dev/null +++ b/sbom/internal/aspect.bzl @@ -0,0 +1,115 @@ +"""Aspect to traverse and collect transitive dependencies of a target. + +This aspect traverses the dependency graph of specified targets and collects +information about all dependencies, including external repositories, which +is essential for SBOM generation. +""" + +load(":providers.bzl", "SbomDepsInfo") + +def _sbom_aspect_impl(target, ctx): + """Collects transitive dependency information for SBOM generation. + + Args: + target: The target being analyzed + ctx: The aspect context + + Returns: + A list containing SbomDepsInfo provider + """ + direct_deps = [] + transitive_deps_list = [] + external_repos_list = [] + external_repos_direct = [] + external_dep_edges_direct = [] + external_dep_edges_list = [] + + # Get this target's label info + label = target.label + if label.workspace_name: + # This is an external dependency + external_repos_direct.append(label.workspace_name) + from_repo = label.workspace_name + else: + from_repo = "" + + # Collect from rule attributes that represent dependencies + dep_attrs = ["deps", "srcs", "data", "proc_macro_deps", "crate_root", "compile_data"] + for attr_name in dep_attrs: + if hasattr(ctx.rule.attr, attr_name): + attr_val = getattr(ctx.rule.attr, attr_name) + if type(attr_val) == "list": + for dep in attr_val: + if hasattr(dep, "label"): + direct_deps.append(dep.label) + if from_repo and dep.label.workspace_name: + external_dep_edges_direct.append( + "{}::{}".format(from_repo, dep.label.workspace_name), + ) + if SbomDepsInfo in dep: + # Propagate transitive deps from dependencies + transitive_deps_list.append(dep[SbomDepsInfo].transitive_deps) + external_repos_list.append(dep[SbomDepsInfo].external_repos) + external_dep_edges_list.append(dep[SbomDepsInfo].external_dep_edges) + elif attr_val != None and hasattr(attr_val, "label"): + # Single target attribute (e.g., crate_root) + direct_deps.append(attr_val.label) + if from_repo and attr_val.label.workspace_name: + external_dep_edges_direct.append( + "{}::{}".format(from_repo, attr_val.label.workspace_name), + ) + if SbomDepsInfo in attr_val: + transitive_deps_list.append(attr_val[SbomDepsInfo].transitive_deps) + external_repos_list.append(attr_val[SbomDepsInfo].external_repos) + external_dep_edges_list.append(attr_val[SbomDepsInfo].external_dep_edges) + + # Handle cc_library specific attributes + cc_dep_attrs = ["hdrs", "textual_hdrs", "implementation_deps"] + for attr_name in cc_dep_attrs: + if hasattr(ctx.rule.attr, attr_name): + attr_val = getattr(ctx.rule.attr, attr_name) + if type(attr_val) == "list": + for dep in attr_val: + if hasattr(dep, "label"): + direct_deps.append(dep.label) + if from_repo and dep.label.workspace_name: + external_dep_edges_direct.append( + "{}::{}".format(from_repo, dep.label.workspace_name), + ) + if SbomDepsInfo in dep: + transitive_deps_list.append(dep[SbomDepsInfo].transitive_deps) + external_repos_list.append(dep[SbomDepsInfo].external_repos) + external_dep_edges_list.append(dep[SbomDepsInfo].external_dep_edges) + + return [SbomDepsInfo( + direct_deps = depset(direct_deps), + transitive_deps = depset( + direct = [label], + transitive = transitive_deps_list, + ), + external_repos = depset( + direct = external_repos_direct, + transitive = external_repos_list, + ), + external_dep_edges = depset( + direct = external_dep_edges_direct, + transitive = external_dep_edges_list, + ), + )] + +sbom_aspect = aspect( + implementation = _sbom_aspect_impl, + attr_aspects = [ + "deps", + "srcs", + "data", + "proc_macro_deps", + "crate_root", + "compile_data", + "hdrs", + "textual_hdrs", + "implementation_deps", + ], + provides = [SbomDepsInfo], + doc = "Traverses target dependencies and collects SBOM-relevant information", +) diff --git a/sbom/internal/generator/BUILD b/sbom/internal/generator/BUILD new file mode 100644 index 0000000..8c6afa0 --- /dev/null +++ b/sbom/internal/generator/BUILD @@ -0,0 +1,33 @@ +# SBOM Generator Python package +# +# This package contains the Python tools for generating SBOM files +# in SPDX 2.3 and CycloneDX 1.6 formats. + +load("@rules_python//python:defs.bzl", "py_binary", "py_library") + +package(default_visibility = ["//sbom:__subpackages__"]) + +py_binary( + name = "sbom_generator", + srcs = ["sbom_generator.py"], + data = [ + "//sbom:cpp_metadata.json", + "//sbom:crates_metadata.json", + ], + main = "sbom_generator.py", + deps = [ + ":cyclonedx_formatter", + ":spdx_formatter", + ], +) + +py_library( + name = "spdx_formatter", + srcs = ["spdx_formatter.py"], +) + +py_library( + name = "cyclonedx_formatter", + srcs = ["cyclonedx_formatter.py"], +) + diff --git a/sbom/internal/generator/__init__.py b/sbom/internal/generator/__init__.py new file mode 100644 index 0000000..a34c1c3 --- /dev/null +++ b/sbom/internal/generator/__init__.py @@ -0,0 +1 @@ +"""SBOM generator package.""" diff --git a/sbom/internal/generator/cyclonedx_formatter.py b/sbom/internal/generator/cyclonedx_formatter.py new file mode 100644 index 0000000..4f22eea --- /dev/null +++ b/sbom/internal/generator/cyclonedx_formatter.py @@ -0,0 +1,358 @@ +"""CycloneDX 1.6 JSON formatter for SBOM generation. + +This module generates CycloneDX 1.6 compliant JSON output from the component +information collected by the Bazel aspect and module extension. + +CycloneDX 1.6 Specification: https://cyclonedx.org/docs/1.6/json/ +""" + +import uuid +from typing import Any + + +def generate_cyclonedx( + components: list[dict[str, Any]], + config: dict[str, Any], + timestamp: str, + external_dep_edges: list[str] | None = None, +) -> dict[str, Any]: + """Generate CycloneDX 1.6 JSON document. + + Args: + components: List of component dictionaries + config: Configuration dictionary with producer info + timestamp: ISO 8601 timestamp + + Returns: + CycloneDX 1.6 compliant dictionary + """ + component_name = config.get("component_name", "unknown") + component_version = config.get("component_version", "") + producer_name = config.get("producer_name", "Eclipse Foundation") + producer_url = config.get("producer_url", "") + + # Generate serial number (URN UUID) + serial_number = f"urn:uuid:{uuid.uuid4()}" + + cdx_doc: dict[str, Any] = { + "$schema": "http://cyclonedx.org/schema/bom-1.6.schema.json", + "bomFormat": "CycloneDX", + "specVersion": "1.6", + "serialNumber": serial_number, + "version": 1, + "metadata": { + "timestamp": timestamp, + "tools": { + "components": [ + { + "type": "application", + "name": "score-sbom-generator", + "description": "Eclipse SCORE SBOM Generator (Bazel-native)", + "publisher": producer_name, + } + ] + }, + "component": { + "type": "application", + "name": component_name, + "version": component_version if component_version else "unversioned", + "bom-ref": _generate_bom_ref(component_name, component_version), + "purl": f"pkg:github/eclipse-score/{component_name}@{component_version}" + if component_version + else None, + "supplier": { + "name": producer_name, + "url": [producer_url] if producer_url else [], + }, + }, + "supplier": { + "name": producer_name, + "url": [producer_url] if producer_url else [], + }, + }, + "components": [], + "dependencies": [], + } + + # Clean up None values from metadata.component + if cdx_doc["metadata"]["component"].get("purl") is None: + del cdx_doc["metadata"]["component"]["purl"] + + # Add authors if provided + authors = config.get("sbom_authors", []) + if authors: + cdx_doc["metadata"]["authors"] = [_author_entry(a) for a in authors] + + # Add generation lifecycle if provided + generation_context = config.get("generation_context", "") + if generation_context: + cdx_doc["metadata"]["lifecycles"] = [{"phase": generation_context}] + + # Add extra tool names if provided + extra_tools = config.get("sbom_tools", []) + if extra_tools: + for tool_name in extra_tools: + cdx_doc["metadata"]["tools"]["components"].append( + { + "type": "application", + "name": tool_name, + } + ) + + # Root component bom-ref for dependencies + root_bom_ref = _generate_bom_ref(component_name, component_version) + + # Add components + dependency_refs = [] + for comp in components: + cdx_component = _create_cdx_component(comp) + cdx_doc["components"].append(cdx_component) + dependency_refs.append(cdx_component["bom-ref"]) + + # Build dependency graph + depends_map: dict[str, set[str]] = {} + if external_dep_edges: + for edge in external_dep_edges: + if "::" not in edge: + continue + src, dst = edge.split("::", 1) + if not src or not dst: + continue + src_ref = _generate_bom_ref(src, _component_version_lookup(components, src)) + dst_ref = _generate_bom_ref(dst, _component_version_lookup(components, dst)) + depends_map.setdefault(src_ref, set()).add(dst_ref) + + # Add root dependency (main component depends on all components) + cdx_doc["dependencies"].append( + { + "ref": root_bom_ref, + "dependsOn": dependency_refs, + } + ) + + # Add each component's dependency entry + for comp in components: + name = comp.get("name", "") + version = comp.get("version", "") + bom_ref = _generate_bom_ref(name, version) + cdx_doc["dependencies"].append( + { + "ref": bom_ref, + "dependsOn": sorted(depends_map.get(bom_ref, set())), + } + ) + + return cdx_doc + + +def _create_cdx_component(component: dict[str, Any]) -> dict[str, Any]: + """Create a CycloneDX component from component data. + + Args: + component: Component dictionary + + Returns: + CycloneDX component dictionary + """ + name = component.get("name", "unknown") + version = component.get("version", "unknown") + purl = component.get("purl", "") + license_id = component.get("license", "") + supplier = component.get("supplier", "") + comp_type = component.get("type", "library") + source = component.get("source", "") + url = component.get("url", "") + checksum = component.get("checksum", "") + cpe = component.get("cpe", "") + aliases = component.get("aliases", []) + pedigree_ancestors = component.get("pedigree_ancestors", []) + pedigree_descendants = component.get("pedigree_descendants", []) + pedigree_variants = component.get("pedigree_variants", []) + pedigree_notes = component.get("pedigree_notes", "") + + cdx_comp: dict[str, Any] = { + "type": _map_type_to_cdx_type(comp_type), + "name": name, + "version": version, + "bom-ref": _generate_bom_ref(name, version), + } + + # Add PURL + if purl: + cdx_comp["purl"] = purl + + # Add license + if license_id: + cdx_comp["licenses"] = [ + { + "license": { + "id": license_id, + } + } + ] + + # Add supplier + if supplier: + cdx_comp["supplier"] = { + "name": supplier, + } + + # Add hashes (SHA-256 from Cargo.lock) + if checksum: + cdx_comp["hashes"] = [ + { + "alg": "SHA-256", + "content": checksum, + } + ] + if cpe: + cdx_comp["cpe"] = cpe + + if aliases: + cdx_comp["properties"] = [ + {"name": "cdx:alias", "value": alias} for alias in aliases + ] + + pedigree = _build_pedigree( + pedigree_ancestors, + pedigree_descendants, + pedigree_variants, + pedigree_notes, + ) + if pedigree: + cdx_comp["pedigree"] = pedigree + + # Add external references + external_refs = [] + + # Add download/source URL + if url: + external_refs.append( + { + "type": "distribution", + "url": url, + } + ) + elif source == "crates.io": + external_refs.append( + { + "type": "distribution", + "url": f"https://crates.io/crates/{name}/{version}", + } + ) + + # Add VCS URL for git sources + if source == "git" and url: + external_refs.append( + { + "type": "vcs", + "url": url, + } + ) + + if external_refs: + cdx_comp["externalReferences"] = external_refs + + return cdx_comp + + +def _map_type_to_cdx_type(comp_type: str) -> str: + """Map component type to CycloneDX component type. + + Args: + comp_type: Component type string + + Returns: + CycloneDX component type string + """ + type_mapping = { + "application": "application", + "library": "library", + "framework": "framework", + "file": "file", + "container": "container", + "firmware": "firmware", + "device": "device", + "data": "data", + "operating-system": "operating-system", + "device-driver": "device-driver", + "machine-learning-model": "machine-learning-model", + "platform": "platform", + } + return type_mapping.get(comp_type, "library") + + +def _generate_bom_ref(name: str, version: str) -> str: + """Generate a unique bom-ref for a component. + + Args: + name: Component name + version: Component version + + Returns: + Unique bom-ref string + """ + # Create a deterministic but unique reference + sanitized_name = _sanitize_name(name) + sanitized_version = _sanitize_name(version) if version else "unknown" + return f"{sanitized_name}@{sanitized_version}" + + +def _sanitize_name(value: str) -> str: + """Sanitize a string for use in bom-ref. + + Args: + value: String to sanitize + + Returns: + Sanitized string + """ + result = [] + for char in value: + if char.isalnum() or char in (".", "-", "_"): + result.append(char) + elif char in (" ", "/"): + result.append("-") + return "".join(result) or "unknown" + + +def _author_entry(value: str) -> dict[str, Any]: + """Create author entry from a string.""" + value = value.strip() + if "<" in value and ">" in value: + name, rest = value.split("<", 1) + email = rest.split(">", 1)[0].strip() + return {"name": name.strip(), "email": email} + return {"name": value} + + +def _build_pedigree( + ancestors: list[str], + descendants: list[str], + variants: list[str], + notes: str, +) -> dict[str, Any] | None: + pedigree: dict[str, Any] = {} + if ancestors: + pedigree["ancestors"] = [_pedigree_ref(a) for a in ancestors] + if descendants: + pedigree["descendants"] = [_pedigree_ref(d) for d in descendants] + if variants: + pedigree["variants"] = [_pedigree_ref(v) for v in variants] + if notes: + pedigree["notes"] = notes + return pedigree or None + + +def _pedigree_ref(value: str) -> dict[str, Any]: + value = value.strip() + if value.startswith("pkg:"): + return {"purl": value} + return {"name": value} + + +def _component_version_lookup(components: list[dict[str, Any]], name: str) -> str: + for comp in components: + if comp.get("name") == name: + return comp.get("version", "") + return "" diff --git a/sbom/internal/generator/sbom_generator.py b/sbom/internal/generator/sbom_generator.py new file mode 100644 index 0000000..3510e67 --- /dev/null +++ b/sbom/internal/generator/sbom_generator.py @@ -0,0 +1,744 @@ +#!/usr/bin/env python3 +"""SBOM generator - creates SPDX and CycloneDX output from Bazel aspect data. + +This is the main entry point for SBOM generation. It reads dependency +information collected by the Bazel aspect and metadata from the module +extension, then generates SBOM files in SPDX 2.3 and CycloneDX 1.6 formats. +""" + +import argparse +import json +import re +import sys +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +from sbom.internal.generator.spdx_formatter import generate_spdx +from sbom.internal.generator.cyclonedx_formatter import generate_cyclonedx + + +def parse_module_bazel_files(file_paths: list[str]) -> dict[str, dict[str, str]]: + """Parse MODULE.bazel files to extract module name and version. + + Reads each MODULE.bazel file and extracts the module() call's name and + version fields. This allows automatic version detection for bazel_dep + modules that don't appear in the sbom_metadata extension's module list + (because they don't use_extension for sbom_metadata). + + Args: + file_paths: List of paths to MODULE.bazel files + + Returns: + Dict mapping module name to {"version": ..., "purl": ...} + """ + modules: dict[str, dict[str, str]] = {} + for fpath in file_paths: + try: + with open(fpath, encoding="utf-8") as f: + content = f.read() + except OSError: + continue + + # Extract module(name = "...", version = "...") + module_match = re.search( + r"module\s*\((.*?)\)", + content, + re.DOTALL, + ) + if not module_match: + continue + + module_block = module_match.group(1) + name_match = re.search(r'name\s*=\s*["\']([^"\']+)["\']', module_block) + version_match = re.search(r'version\s*=\s*["\']([^"\']+)["\']', module_block) + + if name_match and version_match: + name = name_match.group(1) + version = version_match.group(1) + modules[name] = { + "version": version, + "purl": f"pkg:bazel/{name}@{version}", + } + + return modules + + +def load_crates_cache(override_path: str | None = None) -> dict[str, Any]: + """Load pre-generated crates metadata cache. + + Returns: + Dict mapping crate name to metadata (license, checksum, etc.) + """ + # Try multiple paths for cache file + possible_paths = [] + if override_path: + possible_paths.append(Path(override_path)) + possible_paths += [ + # Bazel runfiles location + Path(__file__).parent.parent.parent.parent.parent / "crates_metadata.json", + # Development/source tree location + Path(__file__).parent.parent.parent / "crates_metadata.json", + # Same directory as script + Path(__file__).parent / "crates_metadata.json", + ] + + for cache_path in possible_paths: + if cache_path.exists(): + try: + with open(cache_path, encoding="utf-8") as f: + return json.load(f) + except (OSError, json.JSONDecodeError): + continue + + # No cache found + return {} + + +def load_cpp_cache() -> dict[str, Any]: + """Load pre-generated C++ dependency metadata cache. + + Returns: + Dict mapping dependency name to metadata (license, supplier, version, etc.) + """ + possible_paths = [ + Path(__file__).parent.parent.parent.parent.parent / "cpp_metadata.json", + Path(__file__).parent.parent.parent / "cpp_metadata.json", + Path(__file__).parent / "cpp_metadata.json", + ] + + for cache_path in possible_paths: + if cache_path.exists(): + try: + with open(cache_path, encoding="utf-8") as f: + return json.load(f) + except (OSError, json.JSONDecodeError): + continue + + return {} + + +def cpp_cache_to_components(cpp_cache: dict[str, Any]) -> list[dict[str, Any]]: + """Convert C++ metadata cache to component list for enrichment. + + Args: + cpp_cache: Dict mapping dep name to metadata + + Returns: + List of component dicts in internal format + """ + components = [] + for name, data in cpp_cache.items(): + version = data.get("version", "unknown") + component = { + "name": name, + "version": version, + "purl": data.get("purl", f"pkg:generic/{name}@{version}"), + "type": "library", + "license": data.get("license", ""), + "supplier": data.get("supplier", ""), + "cpe": data.get("cpe", ""), + "aliases": data.get("aliases", []), + "pedigree_ancestors": data.get("pedigree_ancestors", []), + "pedigree_descendants": data.get("pedigree_descendants", []), + "pedigree_variants": data.get("pedigree_variants", []), + "pedigree_notes": data.get("pedigree_notes", ""), + } + if data.get("url"): + component["url"] = data["url"] + components.append(component) + return components + + +def normalize_name(name: str) -> str: + """Normalize a dependency name for fuzzy matching. + + Handles naming differences between Bazel repos and C++ metadata cache: + e.g. nlohmann_json vs nlohmann-json, libfmt vs fmt. + + Args: + name: Dependency name to normalize + + Returns: + Normalized name string for comparison + """ + n = name.lower().strip() + for prefix in ("lib", "lib_"): + if n.startswith(prefix) and len(n) > len(prefix): + n = n[len(prefix) :] + n = n.replace("-", "").replace("_", "").replace(".", "") + return n + + +def enrich_components_from_cpp_cache( + components: list[dict[str, Any]], + cpp_components: list[dict[str, Any]], + metadata: dict[str, Any], +) -> list[dict[str, Any]]: + """Enrich Bazel-discovered components with C++ metadata cache. + + For each Bazel component, finds a matching C++ cache entry by normalized + name and fills in missing fields (license, supplier, version, purl). + Unmatched cache entries are appended. + + Args: + components: Bazel-discovered components to enrich + cpp_components: Components from C++ metadata cache + metadata: Metadata dict + + Returns: + Enriched list of components + """ + # Build lookup: normalized_name -> cache component + cpp_by_name: dict[str, dict[str, Any]] = {} + for cc in cpp_components: + norm = normalize_name(cc["name"]) + cpp_by_name[norm] = cc + cpp_by_name[cc["name"].lower()] = cc + + matched_norms: set[str] = set() + + for comp in components: + comp_name = comp.get("name", "") + norm_name = normalize_name(comp_name) + + cpp_match = cpp_by_name.get(norm_name) or cpp_by_name.get(comp_name.lower()) + # Try parent name match (e.g., boost.config+ -> boost) + if not cpp_match: + base_name = comp_name.rstrip("+") + if "." in base_name: + parent = base_name.split(".")[0] + cpp_match = cpp_by_name.get(normalize_name(parent)) + if not cpp_match: + continue + + matched_norms.add(normalize_name(cpp_match["name"])) + + # Enrich missing fields only + if not comp.get("license") and cpp_match.get("license"): + comp["license"] = cpp_match["license"] + + if not comp.get("supplier") and cpp_match.get("supplier"): + comp["supplier"] = cpp_match["supplier"] + + if comp.get("version") in ("unknown", "") and cpp_match.get("version") not in ( + "unknown", + "", + ): + comp["version"] = cpp_match["version"] + + if comp.get("purl", "").endswith("@unknown") and cpp_match.get("purl"): + comp["purl"] = cpp_match["purl"] + + if not comp.get("url") and cpp_match.get("url"): + comp["url"] = cpp_match["url"] + + if not comp.get("checksum") and cpp_match.get("checksum"): + comp["checksum"] = cpp_match["checksum"] + + # Append unmatched cache components not already in Bazel's graph + existing_norms = {normalize_name(c.get("name", "")) for c in components} + for cc in cpp_components: + norm = normalize_name(cc["name"]) + if norm not in existing_norms and norm not in matched_norms: + cc["source"] = "cdxgen" + components.append(cc) + + return components + + +def load_cdxgen_sbom(cdxgen_path: str) -> list[dict[str, Any]]: + """Load and convert cdxgen CycloneDX SBOM to component list. + + Args: + cdxgen_path: Path to cdxgen-generated CycloneDX JSON file + + Returns: + List of component dicts in internal format + """ + try: + with open(cdxgen_path, encoding="utf-8") as f: + cdx_data = json.load(f) + except (OSError, json.JSONDecodeError): + return [] + + components = [] + for comp in cdx_data.get("components", []): + # Extract license information + licenses = comp.get("licenses", []) + license_str = "" + if licenses: + # Take first license + lic = licenses[0] + if isinstance(lic, dict): + license_str = lic.get("license", {}).get("id", "") or lic.get( + "license", {} + ).get("name", "") + + # Extract purl + purl = comp.get("purl", "") + + # Build component + component = { + "name": comp.get("name", ""), + "version": comp.get("version", "unknown"), + "purl": purl, + "type": comp.get("type", "library"), + "license": license_str, + "supplier": comp.get("supplier", {}).get("name", "") + if isinstance(comp.get("supplier"), dict) + else "", + "cpe": comp.get("cpe", ""), + "url": "", + } + + # Add component if it has a name + if component["name"]: + components.append(component) + + return components + + +def main() -> int: + """Main entry point for SBOM generation.""" + parser = argparse.ArgumentParser(description="Generate SBOM from Bazel deps") + parser.add_argument("--input", required=True, help="Input JSON from Bazel rule") + parser.add_argument( + "--metadata", required=True, help="Metadata JSON from module extension" + ) + parser.add_argument("--spdx-output", help="SPDX 2.3 JSON output file") + parser.add_argument("--cyclonedx-output", help="CycloneDX 1.6 output file") + parser.add_argument("--crates-cache", help="Path to crates_metadata.json override") + parser.add_argument( + "--cdxgen-sbom", + help="Path to cdxgen-generated CycloneDX JSON for C++ enrichment", + ) + args = parser.parse_args() + + # Load dependency data from Bazel + with open(args.input, encoding="utf-8") as f: + data = json.load(f) + + # Load metadata from module extension + with open(args.metadata, encoding="utf-8") as f: + metadata = json.load(f) + + # Parse MODULE.bazel files from dependency modules for version extraction + # This fills in versions for bazel_dep modules that don't use the sbom_metadata extension + dep_module_files = data.get("dep_module_files", []) + if dep_module_files: + dep_modules = parse_module_bazel_files(dep_module_files) + if "modules" not in metadata: + metadata["modules"] = {} + for name, mod_data in dep_modules.items(): + # Don't override entries already in metadata (from the extension) + if name not in metadata["modules"]: + metadata["modules"][name] = mod_data + + # Load crates metadata cache (licenses + checksums + versions) + crates_cache = load_crates_cache(args.crates_cache) + + # Add crates cache to metadata + if crates_cache: + if "crates" not in metadata: + metadata["crates"] = {} + for name, cache_data in crates_cache.items(): + metadata["crates"].setdefault(name, cache_data) + + # Load C++ metadata cache (auto-discovered, like crates cache) + cpp_cache = load_cpp_cache() + cpp_components = cpp_cache_to_components(cpp_cache) if cpp_cache else [] + + # Load cdxgen SBOM if provided (overrides/supplements cpp_cache) + if args.cdxgen_sbom: + cdxgen_components = load_cdxgen_sbom(args.cdxgen_sbom) + if cdxgen_components: + # Merge with cpp_components, preferring cdxgen data + cpp_components = cdxgen_components + cpp_components + + # Filter external repos (exclude build tools) + external_repos = data.get("external_repos", []) + exclude_patterns = data.get("exclude_patterns", []) + filtered_repos = filter_repos(external_repos, exclude_patterns) + + # Build component list with metadata + components = [] + + for repo in filtered_repos: + component = resolve_component(repo, metadata) + if component: + components.append(component) + + # Deduplicate components by name + components = deduplicate_components(components) + + # Enrich components with C++ metadata cache + if cpp_components: + components = enrich_components_from_cpp_cache( + components, cpp_components, metadata + ) + components = deduplicate_components(components) + + # Generate timestamp in SPDX-compliant format (YYYY-MM-DDTHH:MM:SSZ) + timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + + # Get configuration + config = data.get("config", {}) + + # Auto-detect component_version from metadata if not explicitly set + component_name = config.get("component_name", "") + if not config.get("component_version") and component_name: + modules = metadata.get("modules", {}) + if component_name in modules: + config["component_version"] = modules[component_name].get("version", "") + + # Filter out the main component from the dependency list to avoid self-dependency + # (e.g., sbom for score_kyron should not list score_kyron as its own dependency) + if component_name: + components = [ + c for c in components if c.get("name") != component_name + ] + + # Generate outputs + if args.spdx_output: + spdx = generate_spdx(components, config, timestamp) + Path(args.spdx_output).write_text(json.dumps(spdx, indent=2), encoding="utf-8") + + if args.cyclonedx_output: + cdx = generate_cyclonedx( + components, + config, + timestamp, + external_dep_edges=data.get("external_dep_edges", []), + ) + Path(args.cyclonedx_output).write_text( + json.dumps(cdx, indent=2), encoding="utf-8" + ) + + return 0 + + +def filter_repos(repos: list[str], exclude_patterns: list[str]) -> list[str]: + """Filter out build tool repositories based on exclude patterns. + + Crates from crate_universe are always kept even if they match exclude patterns, + since they are legitimate dependencies, not build tools. + + Args: + repos: List of repository names + exclude_patterns: Patterns to exclude + + Returns: + Filtered list of repository names + """ + filtered = [] + for repo in repos: + # Always keep crates from crate_universe - these are real dependencies + if "crate_index__" in repo or "crates_io__" in repo or "_crates__" in repo: + filtered.append(repo) + continue + + should_exclude = False + for pattern in exclude_patterns: + if pattern in repo: + should_exclude = True + break + if not should_exclude: + filtered.append(repo) + return filtered + + +def resolve_component( + repo_name: str, metadata: dict[str, Any] +) -> dict[str, Any] | None: + """Resolve repository to component with version and PURL. + + Args: + repo_name: Name of the repository + metadata: Metadata dictionary from module extension + + Returns: + Component dictionary or None if not resolved + """ + # Normalize repo name - bzlmod adds "+" suffix to module repos + normalized_name = repo_name.rstrip("+") + + # Check if it's a bazel_dep module + modules = metadata.get("modules", {}) + if normalized_name in modules: + mod = modules[normalized_name] + return { + "name": normalized_name, + "version": mod.get("version", "unknown"), + "purl": mod.get("purl", f"pkg:bazel/{normalized_name}@unknown"), + "type": "library", + "supplier": mod.get("supplier", ""), + "license": mod.get("license", ""), + "cpe": mod.get("cpe", ""), + "aliases": mod.get("aliases", []), + "pedigree_ancestors": mod.get("pedigree_ancestors", []), + "pedigree_descendants": mod.get("pedigree_descendants", []), + "pedigree_variants": mod.get("pedigree_variants", []), + "pedigree_notes": mod.get("pedigree_notes", ""), + } + + # Check if it's an http_archive dependency + http_archives = metadata.get("http_archives", {}) + if normalized_name in http_archives: + archive = http_archives[normalized_name] + result = { + "name": normalized_name, + "version": archive.get("version", "unknown"), + "purl": archive.get("purl", f"pkg:generic/{normalized_name}@unknown"), + "type": "library", + "url": archive.get("url", ""), + "license": archive.get("license", ""), + "supplier": archive.get("supplier", ""), + "cpe": archive.get("cpe", ""), + "aliases": archive.get("aliases", []), + "pedigree_ancestors": archive.get("pedigree_ancestors", []), + "pedigree_descendants": archive.get("pedigree_descendants", []), + "pedigree_variants": archive.get("pedigree_variants", []), + "pedigree_notes": archive.get("pedigree_notes", ""), + } + if archive.get("sha256"): + result["checksum"] = archive["sha256"] + return result + + # Check if it's a git_repository dependency + git_repos = metadata.get("git_repositories", {}) + if normalized_name in git_repos: + repo = git_repos[normalized_name] + result = { + "name": normalized_name, + "version": repo.get("version", "unknown"), + "purl": repo.get("purl", f"pkg:generic/{normalized_name}@unknown"), + "type": "library", + "url": repo.get("remote", ""), + "license": repo.get("license", ""), + "supplier": repo.get("supplier", ""), + "cpe": repo.get("cpe", ""), + "aliases": repo.get("aliases", []), + "pedigree_ancestors": repo.get("pedigree_ancestors", []), + "pedigree_descendants": repo.get("pedigree_descendants", []), + "pedigree_variants": repo.get("pedigree_variants", []), + "pedigree_notes": repo.get("pedigree_notes", ""), + } + commit_date = repo.get("commit_date", "") + if result.get("version") in ("unknown", "") and commit_date: + result["version"] = commit_date + return result + + # Check if it's a crate from the metadata cache + # Cargo.lock uses underscores, Bazel uses hyphens — try both + crates = metadata.get("crates", {}) + crate_key = ( + normalized_name + if normalized_name in crates + else normalized_name.replace("-", "_") + ) + if crate_key in crates: + crate = crates[crate_key] + result = { + "name": normalized_name, + "version": crate.get("version", "unknown"), + "purl": crate.get("purl", f"pkg:cargo/{normalized_name}@unknown"), + "type": "library", + "license": crate.get("license", ""), + "supplier": crate.get("supplier", ""), + "cpe": crate.get("cpe", ""), + "aliases": crate.get("aliases", []), + "pedigree_ancestors": crate.get("pedigree_ancestors", []), + "pedigree_descendants": crate.get("pedigree_descendants", []), + "pedigree_variants": crate.get("pedigree_variants", []), + "pedigree_notes": crate.get("pedigree_notes", ""), + } + if crate.get("checksum"): + result["checksum"] = crate["checksum"] + return result + + # Handle score_ prefixed repos that might be modules + if normalized_name.startswith("score_"): + return { + "name": normalized_name, + "version": "unknown", + "purl": f"pkg:github/eclipse-score/{normalized_name}@unknown", + "type": "library", + "supplier": "Eclipse Foundation", + "license": "", + "cpe": "", + "aliases": [], + "pedigree_ancestors": [], + "pedigree_descendants": [], + "pedigree_variants": [], + "pedigree_notes": "", + } + + # Handle crate universe repos - bzlmod format + # e.g., rules_rust++crate+crate_index__serde-1.0.228 + # e.g., rules_rust++crate+crate_index__iceoryx2-qnx8-0.7.0 + cached_crates = metadata.get("crates", {}) + + if "crate_index__" in repo_name or "crate+" in repo_name: + # Extract the crate info part after crate_index__ + if "crate_index__" in repo_name: + crate_part = repo_name.split("crate_index__")[-1] + else: + crate_part = repo_name.split("+")[-1] + + # Parse name-version format (e.g., "serde-1.0.228") + # Handle complex names like "iceoryx2-qnx8-0.7.0" where last part is version + parts = crate_part.split("-") + if len(parts) >= 2: + # Find the version part (starts with a digit) + version_idx = -1 + for i, part in enumerate(parts): + if part and part[0].isdigit(): + version_idx = i + break + + if version_idx > 0: + crate_name = "-".join(parts[:version_idx]).replace("_", "-") + version = "-".join(parts[version_idx:]) + + # Look up crate metadata from cache + # Cargo.lock uses underscores, Bazel uses hyphens — try both + crate_meta = cached_crates.get(crate_name) or cached_crates.get( + crate_name.replace("-", "_"), {} + ) + + result = { + "name": crate_name, + "version": version, + "purl": f"pkg:cargo/{crate_name}@{version}", + "type": "library", + } + if crate_meta.get("license"): + result["license"] = crate_meta["license"] + if crate_meta.get("supplier"): + result["supplier"] = crate_meta["supplier"] + if crate_meta.get("cpe"): + result["cpe"] = crate_meta["cpe"] + if crate_meta.get("aliases"): + result["aliases"] = crate_meta["aliases"] + if crate_meta.get("pedigree_ancestors"): + result["pedigree_ancestors"] = crate_meta["pedigree_ancestors"] + if crate_meta.get("pedigree_descendants"): + result["pedigree_descendants"] = crate_meta["pedigree_descendants"] + if crate_meta.get("pedigree_variants"): + result["pedigree_variants"] = crate_meta["pedigree_variants"] + if crate_meta.get("pedigree_notes"): + result["pedigree_notes"] = crate_meta["pedigree_notes"] + if crate_meta.get("repository"): + result["url"] = crate_meta["repository"] + if crate_meta.get("checksum"): + result["checksum"] = crate_meta["checksum"] + return result + + # Handle legacy crate universe format (e.g., crates_io__tokio-1.10.0) + if repo_name.startswith("crates_io__") or "_crates__" in repo_name: + parts = repo_name.split("__") + if len(parts) >= 2: + crate_info = parts[-1] + # Try to split by last hyphen to get name-version + last_hyphen = crate_info.rfind("-") + if last_hyphen > 0: + crate_name = crate_info[:last_hyphen].replace("_", "-") + version = crate_info[last_hyphen + 1 :] + + # Look up crate metadata from cache + # Cargo.lock uses underscores, Bazel uses hyphens — try both + crate_meta = cached_crates.get(crate_name) or cached_crates.get( + crate_name.replace("-", "_"), {} + ) + + result = { + "name": crate_name, + "version": version, + "purl": f"pkg:cargo/{crate_name}@{version}", + "type": "library", + } + if crate_meta.get("license"): + result["license"] = crate_meta["license"] + if crate_meta.get("supplier"): + result["supplier"] = crate_meta["supplier"] + if crate_meta.get("cpe"): + result["cpe"] = crate_meta["cpe"] + if crate_meta.get("aliases"): + result["aliases"] = crate_meta["aliases"] + if crate_meta.get("pedigree_ancestors"): + result["pedigree_ancestors"] = crate_meta["pedigree_ancestors"] + if crate_meta.get("pedigree_descendants"): + result["pedigree_descendants"] = crate_meta["pedigree_descendants"] + if crate_meta.get("pedigree_variants"): + result["pedigree_variants"] = crate_meta["pedigree_variants"] + if crate_meta.get("pedigree_notes"): + result["pedigree_notes"] = crate_meta["pedigree_notes"] + if crate_meta.get("repository"): + result["url"] = crate_meta["repository"] + if crate_meta.get("checksum"): + result["checksum"] = crate_meta["checksum"] + return result + + # Check if repo is a sub-library of a known parent (e.g., boost.config+ -> boost) + # rules_boost splits Boost into individual repos like boost.config+, boost.assert+, etc. + if "." in normalized_name: + parent_name = normalized_name.split(".")[0].rstrip("+") + # Look up parent in all metadata sources + parent = None + if parent_name in modules: + parent = modules[parent_name] + elif parent_name in http_archives: + parent = http_archives[parent_name] + elif parent_name in git_repos: + parent = git_repos[parent_name] + if parent: + return { + "name": normalized_name, + "version": parent.get("version", "unknown"), + "purl": f"pkg:generic/{normalized_name}@{parent.get('version', 'unknown')}", + "type": "library", + "license": parent.get("license", ""), + "supplier": parent.get("supplier", ""), + } + + # Unknown repository - return with unknown version + return { + "name": repo_name, + "version": "unknown", + "purl": f"pkg:generic/{repo_name}@unknown", + "type": "library", + } + + +def deduplicate_components(components: list[dict[str, Any]]) -> list[dict[str, Any]]: + """Remove duplicate components, keeping the one with most metadata. + + Args: + components: List of component dictionaries + + Returns: + Deduplicated list of components + """ + seen: dict[str, dict[str, Any]] = {} + for comp in components: + name = comp.get("name", "") + if name not in seen: + seen[name] = comp + else: + # Keep the one with more information (non-unknown version preferred) + existing = seen[name] + if ( + existing.get("version") == "unknown" + and comp.get("version") != "unknown" + ): + seen[name] = comp + elif comp.get("license") and not existing.get("license"): + # Prefer component with license info + seen[name] = comp + + return list(seen.values()) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/sbom/internal/generator/spdx_formatter.py b/sbom/internal/generator/spdx_formatter.py new file mode 100644 index 0000000..c36ff7a --- /dev/null +++ b/sbom/internal/generator/spdx_formatter.py @@ -0,0 +1,180 @@ +"""SPDX 2.3 JSON formatter for SBOM generation. + +This module generates SPDX 2.3 compliant JSON output from the component +information collected by the Bazel aspect and module extension. + +SPDX 2.3 Specification: https://spdx.github.io/spdx-spec/v2.3/ +""" + +import uuid +from typing import Any + + +def generate_spdx( + components: list[dict[str, Any]], + config: dict[str, Any], + timestamp: str, +) -> dict[str, Any]: + """Generate SPDX 2.3 JSON document. + + Args: + components: List of component dictionaries + config: Configuration dictionary with producer info + timestamp: ISO 8601 timestamp + + Returns: + SPDX 2.3 compliant dictionary + """ + + namespace = config.get("namespace", "https://eclipse.dev/score") + component_name = config.get("component_name", "unknown") + component_version = config.get("component_version", "") + producer_name = config.get("producer_name", "Eclipse Foundation") + + doc_uuid = uuid.uuid4() + + packages: list[dict[str, Any]] = [] + relationships: list[dict[str, Any]] = [] + + # Root package + root_spdx_id = "SPDXRef-RootPackage" + root_package: dict[str, Any] = { + "SPDXID": root_spdx_id, + "name": component_name, + "versionInfo": component_version if component_version else "unversioned", + "downloadLocation": "https://github.com/eclipse-score", + "supplier": f"Organization: {producer_name}", + "primaryPackagePurpose": "APPLICATION", + "filesAnalyzed": False, + "licenseConcluded": "NOASSERTION", + "licenseDeclared": "NOASSERTION", + "copyrightText": "NOASSERTION", + } + packages.append(root_package) + + # DESCRIBES relationship + relationships.append( + { + "spdxElementId": "SPDXRef-DOCUMENT", + "relationshipType": "DESCRIBES", + "relatedSpdxElement": root_spdx_id, + } + ) + + # Add dependency packages + for comp in components: + pkg, spdx_id = _create_spdx_package(comp) + packages.append(pkg) + + # Root depends on each component + relationships.append( + { + "spdxElementId": root_spdx_id, + "relationshipType": "DEPENDS_ON", + "relatedSpdxElement": spdx_id, + } + ) + + return { + "spdxVersion": "SPDX-2.3", + "dataLicense": "CC0-1.0", + "SPDXID": "SPDXRef-DOCUMENT", + "name": f"SBOM for {component_name}", + "documentNamespace": f"{namespace}/spdx/{_sanitize_id(component_name)}-{doc_uuid}", + "creationInfo": { + "created": timestamp, + "creators": [ + f"Organization: {producer_name}", + "Tool: score-sbom-generator", + ], + }, + "packages": packages, + "relationships": relationships, + } + + +def _create_spdx_package( + component: dict[str, Any], +) -> tuple[dict[str, Any], str]: + """Create an SPDX 2.3 Package for a component. + + Args: + component: Component dictionary + + Returns: + Tuple of (SPDX Package dictionary, spdx_id string) + """ + name = component.get("name", "unknown") + version = component.get("version", "unknown") + purl = component.get("purl", "") + license_id = component.get("license", "") + supplier = component.get("supplier", "") + comp_type = component.get("type", "library") + + spdx_id = f"SPDXRef-{_sanitize_id(name)}-{_sanitize_id(version)}" + + # Determine download location + url = component.get("url", "") + source = component.get("source", "") + if url: + download_location = url + elif source == "crates.io": + download_location = f"https://crates.io/crates/{name}/{version}" + else: + download_location = "NOASSERTION" + + package: dict[str, Any] = { + "SPDXID": spdx_id, + "name": name, + "versionInfo": version, + "downloadLocation": download_location, + "primaryPackagePurpose": _map_type_to_purpose(comp_type), + "filesAnalyzed": False, + "licenseConcluded": license_id if license_id else "NOASSERTION", + "licenseDeclared": license_id if license_id else "NOASSERTION", + "copyrightText": "NOASSERTION", + } + + if supplier: + package["supplier"] = f"Organization: {supplier}" + + # Add PURL as external reference + if purl: + package["externalRefs"] = [ + { + "referenceCategory": "PACKAGE-MANAGER", + "referenceType": "purl", + "referenceLocator": purl, + }, + ] + + return package, spdx_id + + +def _map_type_to_purpose(comp_type: str) -> str: + """Map component type to SPDX 2.3 primary package purpose.""" + type_mapping = { + "application": "APPLICATION", + "library": "LIBRARY", + "framework": "FRAMEWORK", + "file": "FILE", + "container": "CONTAINER", + "firmware": "FIRMWARE", + "device": "DEVICE", + "data": "DATA", + } + return type_mapping.get(comp_type, "LIBRARY") + + +def _sanitize_id(value: str) -> str: + """Sanitize a string for use in SPDX IDs. + + SPDX 2.3 IDs must match [a-zA-Z0-9.-]+ + """ + result = [] + for char in value: + if char.isalnum() or char in (".", "-"): + result.append(char) + elif char in ("_", " ", "/", "@"): + result.append("-") + return "".join(result) or "unknown" diff --git a/sbom/internal/metadata_rule.bzl b/sbom/internal/metadata_rule.bzl new file mode 100644 index 0000000..7d3ffc3 --- /dev/null +++ b/sbom/internal/metadata_rule.bzl @@ -0,0 +1,49 @@ +"""Rule to expose SBOM metadata collected by the module extension. + +This rule wraps the metadata JSON file generated by the module extension +and makes it available for the SBOM generation action. +""" + +load(":providers.bzl", "SbomMetadataInfo") + +def _sbom_metadata_rule_impl(ctx): + """Implementation of sbom_metadata_rule. + + The metadata is passed as a JSON file to the SBOM generator action, + rather than being parsed at analysis time. + + Args: + ctx: The rule context + + Returns: + A list of providers including SbomMetadataInfo with file reference + """ + metadata_file = ctx.file.metadata_json + + # We can't read files at analysis time in Bazel rules, so we pass + # the file reference and let the generator read it at execution time. + # The SbomMetadataInfo provider carries empty dicts here - the actual + # metadata is read by the Python generator from the JSON file. + return [ + DefaultInfo(files = depset([metadata_file])), + SbomMetadataInfo( + modules = {}, + crates = {}, + http_archives = {}, + ), + # Also provide the file itself for the rule to use + OutputGroupInfo(metadata_file = depset([metadata_file])), + ] + +sbom_metadata_rule = rule( + implementation = _sbom_metadata_rule_impl, + attrs = { + "metadata_json": attr.label( + mandatory = True, + allow_single_file = [".json"], + doc = "JSON file containing SBOM metadata", + ), + }, + provides = [SbomMetadataInfo], + doc = "Exposes SBOM metadata collected by the module extension", +) diff --git a/sbom/internal/providers.bzl b/sbom/internal/providers.bzl new file mode 100644 index 0000000..e2d909a --- /dev/null +++ b/sbom/internal/providers.bzl @@ -0,0 +1,28 @@ +"""Providers for SBOM data propagation. + +This module defines the providers used to pass SBOM-related information +between different phases of the build: +- SbomDepsInfo: Collected by aspect - deps of a specific target +- SbomMetadataInfo: Collected by extension - metadata for all modules +""" + +# Collected by aspect - deps of a specific target +SbomDepsInfo = provider( + doc = "Transitive dependency information for SBOM generation", + fields = { + "direct_deps": "depset of direct dependency labels", + "transitive_deps": "depset of all transitive dependency labels", + "external_repos": "depset of external repository names used", + "external_dep_edges": "depset of external repo dependency edges (from::to)", + }, +) + +# Collected by extension - metadata for all modules +SbomMetadataInfo = provider( + doc = "Metadata about all available modules/crates", + fields = { + "modules": "dict of module_name -> {version, commit, registry, purl}", + "crates": "dict of crate_name -> {version, checksum, purl}", + "http_archives": "dict of repo_name -> {url, version, sha256, purl}", + }, +) diff --git a/sbom/internal/rules.bzl b/sbom/internal/rules.bzl new file mode 100644 index 0000000..28d08c2 --- /dev/null +++ b/sbom/internal/rules.bzl @@ -0,0 +1,267 @@ +"""SBOM generation rule implementation. + +This module contains the main _sbom_impl rule that combines data from +the aspect (target dependencies) with metadata from the module extension +to generate SPDX and CycloneDX format SBOMs. +""" + +load(":aspect.bzl", "sbom_aspect") +load(":providers.bzl", "SbomDepsInfo") + +def _sbom_impl(ctx): + """Generates SBOM by combining aspect data with extension metadata. + + Args: + ctx: The rule context + + Returns: + DefaultInfo with generated SBOM files + """ + + # Collect all external repos used by targets + all_external_repos = depset(transitive = [ + target[SbomDepsInfo].external_repos + for target in ctx.attr.targets + ]) + + # Collect all transitive deps + all_transitive_deps = depset(transitive = [ + target[SbomDepsInfo].transitive_deps + for target in ctx.attr.targets + ]) + + # Collect external dependency edges + all_external_dep_edges = depset(transitive = [ + target[SbomDepsInfo].external_dep_edges + for target in ctx.attr.targets + ]) + + # Get the metadata JSON file from the extension + metadata_file = ctx.file.metadata_json + + # Create input file with dependency info for Python generator + deps_json = ctx.actions.declare_file(ctx.attr.name + "_deps.json") + + # Build target labels list + target_labels = [str(t.label) for t in ctx.attr.targets] + + # Build exclude patterns list + exclude_patterns = ctx.attr.exclude_patterns + + # Collect MODULE.bazel files from dependency modules for version extraction + dep_module_paths = [f.path for f in ctx.files.dep_module_files] + + deps_data = { + "external_repos": all_external_repos.to_list(), + "transitive_deps": [str(d) for d in all_transitive_deps.to_list()], + "external_dep_edges": all_external_dep_edges.to_list(), + "target_labels": target_labels, + "exclude_patterns": exclude_patterns, + "dep_module_files": dep_module_paths, + "config": { + "producer_name": ctx.attr.producer_name, + "producer_url": ctx.attr.producer_url, + "component_name": ctx.attr.component_name if ctx.attr.component_name else ctx.attr.name, + "component_version": ctx.attr.component_version, + "namespace": ctx.attr.namespace, + "sbom_authors": ctx.attr.sbom_authors, + "generation_context": ctx.attr.generation_context, + "sbom_tools": ctx.attr.sbom_tools, + }, + } + + ctx.actions.write( + output = deps_json, + content = json.encode(deps_data), + ) + + # Declare outputs + outputs = [] + args = ctx.actions.args() + args.add("--input", deps_json) + args.add("--metadata", metadata_file) + + if "spdx" in ctx.attr.output_formats: + spdx_out = ctx.actions.declare_file(ctx.attr.name + ".spdx.json") + outputs.append(spdx_out) + args.add("--spdx-output", spdx_out) + + if "cyclonedx" in ctx.attr.output_formats: + cdx_out = ctx.actions.declare_file(ctx.attr.name + ".cdx.json") + outputs.append(cdx_out) + args.add("--cyclonedx-output", cdx_out) + + # Build inputs list + generator_inputs = [deps_json, metadata_file] + ctx.files.dep_module_files + + # Auto-generate crates metadata cache if enabled and a lockfile is provided + crates_cache = None + if (ctx.file.cargo_lockfile or ctx.file.module_lockfile) and ctx.attr.auto_crates_cache: + crates_cache = ctx.actions.declare_file(ctx.attr.name + "_crates_metadata.json") + cache_inputs = [ctx.file._crates_cache_script] + cache_cmd = "set -euo pipefail\npython3 {} {}".format( + ctx.file._crates_cache_script.path, + crates_cache.path, + ) + if ctx.file.cargo_lockfile: + cache_inputs.append(ctx.file.cargo_lockfile) + cache_cmd += " --cargo-lock {}".format(ctx.file.cargo_lockfile.path) + if ctx.file.module_lockfile: + cache_inputs.append(ctx.file.module_lockfile) + cache_cmd += " --module-lock {}".format(ctx.file.module_lockfile.path) + ctx.actions.run_shell( + inputs = cache_inputs, + outputs = [crates_cache], + command = cache_cmd, + mnemonic = "CratesCacheGenerate", + progress_message = "Generating crates metadata cache for %s" % ctx.attr.name, + execution_requirements = {"requires-network": ""}, + ) + + # Add cdxgen SBOM if provided; otherwise auto-generate if enabled + cdxgen_sbom = ctx.file.cdxgen_sbom + if not cdxgen_sbom and ctx.attr.auto_cdxgen: + cdxgen_sbom = ctx.actions.declare_file(ctx.attr.name + "_cdxgen.cdx.json") + ctx.actions.run( + outputs = [cdxgen_sbom], + executable = ctx.executable._npm, + arguments = [ + "exec", + "--", + "@cyclonedx/cdxgen", + "-t", + "cpp", + "--deep", + "-r", + "-o", + cdxgen_sbom.path, + ], + mnemonic = "CdxgenGenerate", + progress_message = "Generating cdxgen SBOM for %s" % ctx.attr.name, + ) + + if cdxgen_sbom: + args.add("--cdxgen-sbom", cdxgen_sbom) + generator_inputs.append(cdxgen_sbom) + + if crates_cache: + args.add("--crates-cache", crates_cache) + generator_inputs.append(crates_cache) + + # Run Python generator + ctx.actions.run( + inputs = generator_inputs, + outputs = outputs, + executable = ctx.executable._generator, + arguments = [args], + mnemonic = "SbomGenerate", + progress_message = "Generating SBOM for %s" % ctx.attr.name, + ) + + return [DefaultInfo(files = depset(outputs))] + +sbom_rule = rule( + implementation = _sbom_impl, + attrs = { + "targets": attr.label_list( + mandatory = True, + aspects = [sbom_aspect], + doc = "Targets to generate SBOM for", + ), + "output_formats": attr.string_list( + default = ["spdx", "cyclonedx"], + doc = "Output formats: spdx, cyclonedx", + ), + "producer_name": attr.string( + default = "Eclipse Foundation", + doc = "SBOM producer organization name", + ), + "producer_url": attr.string( + default = "https://projects.eclipse.org/projects/automotive.score", + doc = "SBOM producer URL", + ), + "component_name": attr.string( + doc = "Component name (defaults to rule name)", + ), + "component_version": attr.string( + default = "", + doc = "Component version", + ), + "sbom_authors": attr.string_list( + default = [], + doc = "SBOM author(s) (distinct from software producers)", + ), + "generation_context": attr.string( + default = "", + doc = "SBOM generation context: pre-build, build, post-build", + ), + "sbom_tools": attr.string_list( + default = [], + doc = "Additional SBOM generation tool names", + ), + "namespace": attr.string( + default = "https://eclipse.dev/score", + doc = "SBOM namespace URI", + ), + "exclude_patterns": attr.string_list( + default = [ + "rules_rust", + "rules_cc", + "bazel_tools", + "platforms", + "bazel_skylib", + "rules_python", + "rules_proto", + "protobuf", + "local_config_", + "remote_", + ], + doc = "External repo patterns to exclude (build tools)", + ), + "metadata_json": attr.label( + mandatory = True, + allow_single_file = [".json"], + doc = "Metadata JSON file from sbom_metadata extension", + ), + "dep_module_files": attr.label_list( + allow_files = True, + default = [], + doc = "MODULE.bazel files from dependency modules for automatic version extraction", + ), + "cargo_lockfile": attr.label( + allow_single_file = True, + doc = "Optional Cargo.lock file for automatic crate metadata extraction", + ), + "module_lockfile": attr.label( + allow_single_file = True, + doc = "Optional MODULE.bazel.lock for additional crates (e.g., from score_crates)", + ), + "cdxgen_sbom": attr.label( + allow_single_file = [".json"], + doc = "Optional CycloneDX JSON from cdxgen for C++ dependency enrichment", + ), + "auto_cdxgen": attr.bool( + default = False, + doc = "Automatically run cdxgen when no cdxgen_sbom is provided", + ), + "_npm": attr.label( + default = "//sbom:npm_wrapper", + executable = True, + cfg = "exec", + ), + "auto_crates_cache": attr.bool( + default = True, + doc = "Automatically build crates metadata cache when cargo_lockfile or module_lockfile is provided", + ), + "_crates_cache_script": attr.label( + default = "//sbom/scripts:generate_crates_metadata_cache.py", + allow_single_file = True, + ), + "_generator": attr.label( + default = "//sbom/internal/generator:sbom_generator", + executable = True, + cfg = "exec", + ), + }, + doc = "Generates SBOM for specified targets in SPDX and CycloneDX formats", +) diff --git a/sbom/npm_wrapper.sh b/sbom/npm_wrapper.sh new file mode 100755 index 0000000..0c312a6 --- /dev/null +++ b/sbom/npm_wrapper.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +# Wrapper to use system-installed npm/cdxgen +# This relies on npm/cdxgen being available in the system PATH + +# Add common Node.js installation paths to PATH +export PATH="/home/lj/.nvm/versions/node/v24.13.0/bin:$PATH" +export PATH="$HOME/.nvm/versions/node/v24.13.0/bin:$PATH" +export PATH="/usr/local/bin:/usr/bin:/bin:$PATH" + +# If called with "exec -- @cyclonedx/cdxgen", just run cdxgen directly +if [[ "$1" == "exec" && "$2" == "--" && "$3" == "@cyclonedx/cdxgen" ]]; then + shift 3 # Remove "exec -- @cyclonedx/cdxgen" + exec cdxgen "$@" +else + # Otherwise, run npm normally + exec npm "$@" +fi diff --git a/sbom/scripts/BUILD.bazel b/sbom/scripts/BUILD.bazel new file mode 100644 index 0000000..c33b3c6 --- /dev/null +++ b/sbom/scripts/BUILD.bazel @@ -0,0 +1,5 @@ +package(default_visibility = ["//sbom:__subpackages__"]) + +exports_files([ + "generate_crates_metadata_cache.py", +]) diff --git a/sbom/scripts/generate_cpp_metadata_cache.py b/sbom/scripts/generate_cpp_metadata_cache.py new file mode 100644 index 0000000..cbc0ea1 --- /dev/null +++ b/sbom/scripts/generate_cpp_metadata_cache.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 +"""Generate cpp_metadata.json cache from cdxgen CycloneDX output. + +Usage: + # Generate from cdxgen output: + npx @cyclonedx/cdxgen -t cpp --deep -r -o cdxgen_output.cdx.json + python3 generate_cpp_metadata_cache.py cdxgen_output.cdx.json ../cpp_metadata.json + + # Or pipe directly: + npx @cyclonedx/cdxgen -t cpp --deep -r | python3 generate_cpp_metadata_cache.py - ../cpp_metadata.json +""" + +import argparse +import json +import sys + + +def convert_cdxgen_to_cache(cdxgen_path: str) -> dict: + """Convert CycloneDX JSON from cdxgen to internal cache format.""" + if cdxgen_path == "-": + cdx_data = json.load(sys.stdin) + else: + with open(cdxgen_path, encoding="utf-8") as f: + cdx_data = json.load(f) + + if cdx_data.get("bomFormat") != "CycloneDX": + print("Error: Input is not a CycloneDX JSON file", file=sys.stderr) + sys.exit(1) + + cache = {} + for comp in cdx_data.get("components", []): + name = comp.get("name", "") + if not name: + continue + + entry = { + "version": comp.get("version", "unknown"), + } + + # License + licenses = comp.get("licenses", []) + if licenses: + first = licenses[0] + lic_obj = first.get("license", {}) + lic_id = lic_obj.get("id", "") or lic_obj.get("name", "") + if not lic_id: + lic_id = first.get("expression", "") + if lic_id: + entry["license"] = lic_id + + # Supplier + supplier = comp.get("supplier", {}) + if supplier and supplier.get("name"): + entry["supplier"] = supplier["name"] + elif comp.get("publisher"): + entry["supplier"] = comp["publisher"] + + # PURL + if comp.get("purl"): + entry["purl"] = comp["purl"] + + # URL from externalReferences + for ref in comp.get("externalReferences", []): + if ref.get("type") in ("website", "distribution", "vcs") and ref.get("url"): + entry["url"] = ref["url"] + break + + cache[name] = entry + + return cache + + +def main(): + parser = argparse.ArgumentParser( + description="Convert cdxgen CycloneDX output to cpp_metadata.json cache" + ) + parser.add_argument("input", help="cdxgen CycloneDX JSON file (or - for stdin)") + parser.add_argument( + "output", + nargs="?", + default="cpp_metadata.json", + help="Output cache file (default: cpp_metadata.json)", + ) + parser.add_argument( + "--merge", + help="Merge with existing cache file (existing entries take precedence)", + ) + args = parser.parse_args() + + cache = convert_cdxgen_to_cache(args.input) + + if args.merge: + try: + with open(args.merge, encoding="utf-8") as f: + existing = json.load(f) + # Existing entries take precedence + for name, data in cache.items(): + if name not in existing: + existing[name] = data + cache = existing + except (OSError, json.JSONDecodeError): + pass + + with open(args.output, "w", encoding="utf-8") as f: + json.dump(cache, f, indent=2, sort_keys=True) + f.write("\n") + + print(f"Generated {args.output} with {len(cache)} C++ dependencies") + + +if __name__ == "__main__": + main() diff --git a/sbom/scripts/generate_crates_metadata_cache.py b/sbom/scripts/generate_crates_metadata_cache.py new file mode 100755 index 0000000..7e052b7 --- /dev/null +++ b/sbom/scripts/generate_crates_metadata_cache.py @@ -0,0 +1,332 @@ +#!/usr/bin/env python3 +"""Generate crates.io metadata cache for SBOM generation. + +This script parses Cargo.lock files and/or MODULE.bazel.lock files, +fetches license metadata from crates.io, and creates a cache file +that can be used during SBOM generation without requiring network +access at build time. + +Usage: + python3 generate_crates_metadata_cache.py --module-lock + python3 generate_crates_metadata_cache.py --cargo-lock + python3 generate_crates_metadata_cache.py --cargo-lock --module-lock + +Example: + python3 generate_crates_metadata_cache.py crates_metadata.json \\ + --module-lock ../../score-crates/MODULE.bazel.lock + python3 generate_crates_metadata_cache.py crates_metadata.json \\ + --cargo-lock ../../orchestrator/Cargo.lock \\ + --module-lock ../../score-crates/MODULE.bazel.lock +""" + +import argparse +import json +import re +import sys +import urllib.request +import urllib.error +from pathlib import Path +from typing import Dict, Any + + +def parse_cargo_lock(lockfile_path: str) -> Dict[str, Dict[str, Any]]: + """Parse Cargo.lock and extract crate information. + + Args: + lockfile_path: Path to Cargo.lock file + + Returns: + Dict mapping crate name to {version, checksum, source} + """ + try: + import tomllib as tomli # Python 3.11+ + except ImportError: + try: + import tomli + except ImportError: + print( + "ERROR: tomli/tomllib library not found. Use Python 3.11+ or install tomli", + file=sys.stderr, + ) + sys.exit(1) + + with open(lockfile_path, "rb") as f: + lock_data = tomli.load(f) + + crates = {} + for package in lock_data.get("package", []): + name = package["name"] + source = package.get("source", "") + + # Only include crates from crates.io + if "registry+https://github.com/rust-lang/crates.io-index" in source: + crates[name] = { + "name": name, + "version": package["version"], + "checksum": package.get("checksum", ""), + "source": source, + } + + return crates + + +def parse_module_bazel_lock(lockfile_path: str) -> Dict[str, Dict[str, Any]]: + """Parse MODULE.bazel.lock and extract crate information from cargo-bazel resolution. + + The MODULE.bazel.lock (from score_crates or similar) contains resolved crate + specs under moduleExtensions -> crate_universe -> generatedRepoSpecs. + Each crate entry has name, version, sha256, and download URL. + + Args: + lockfile_path: Path to MODULE.bazel.lock file + + Returns: + Dict mapping crate name to {version, checksum, source} + """ + with open(lockfile_path, encoding="utf-8") as f: + lock_data = json.load(f) + + crates = {} + extensions = lock_data.get("moduleExtensions", {}) + + # Find the crate_universe extension (key contains "crate_universe" or "crate") + crate_ext = None + for ext_key, ext_val in extensions.items(): + if "crate" in ext_key.lower(): + crate_ext = ext_val + break + + if not crate_ext: + print( + " WARNING: No crate extension found in MODULE.bazel.lock", file=sys.stderr + ) + return crates + + # Get generatedRepoSpecs from 'general' (or the first available key) + general = crate_ext.get("general", {}) + specs = general.get("generatedRepoSpecs", {}) + + for repo_name, spec in specs.items(): + # Skip the crate_index meta-repo itself + if repo_name == "crate_index" or not repo_name.startswith("crate_index__"): + continue + + crate_part = repo_name.replace("crate_index__", "") + + # Parse name-version (e.g., "serde-1.0.228", "iceoryx2-qnx8-0.7.0") + m = re.match(r"^(.+?)-(\d+\.\d+\.\d+.*)$", crate_part) + if not m: + continue + + name = m.group(1) + version = m.group(2) + attrs = spec.get("attributes", {}) + sha256 = attrs.get("sha256", "") + + crates[name] = { + "name": name, + "version": version, + "checksum": sha256, + "source": "module-bazel-lock", + } + + return crates + + +def fetch_crate_metadata_from_crates_io( + crate_name: str, crate_version: str = "" +) -> Dict[str, Any]: + """Fetch crate metadata from crates.io API. + + Args: + crate_name: Name of the crate + crate_version: Version of the crate (used to find matching version license) + + Returns: + Dict with license, repository, description, etc. + """ + url = f"https://crates.io/api/v1/crates/{crate_name}" + + try: + req = urllib.request.Request(url) + req.add_header("User-Agent", "SCORE-SBOM-Generator/1.0") + + with urllib.request.urlopen(req, timeout=10) as response: + data = json.loads(response.read().decode("utf-8")) + crate = data.get("crate", {}) + + # License is per-version in the crates.io API. + # Try to find the license for the specific version first, + # then fall back to the latest version. + license_str = crate.get("license") or "" + versions = data.get("versions", []) + if versions and not license_str: + # Try to find matching version + for v in versions: + if v.get("num") == crate_version: + license_str = v.get("license", "") + break + # Fall back to latest version + if not license_str: + license_str = versions[0].get("license", "") + + return { + "license": license_str, + "repository": crate.get("repository"), + "description": crate.get("description"), + "homepage": crate.get("homepage"), + "documentation": crate.get("documentation"), + } + except urllib.error.HTTPError as e: + if e.code == 404: + print( + f" WARNING: Crate '{crate_name}' not found on crates.io", + file=sys.stderr, + ) + else: + print( + f" WARNING: HTTP error {e.code} fetching '{crate_name}'", + file=sys.stderr, + ) + return {} + except urllib.error.URLError as e: + print(f" WARNING: Network error fetching '{crate_name}': {e}", file=sys.stderr) + return {} + except Exception as e: + print(f" WARNING: Error fetching '{crate_name}': {e}", file=sys.stderr) + return {} + + +def generate_cache( + cargo_lock_path: str = None, use_network: bool = True, module_lock_path: str = None +) -> Dict[str, Dict[str, Any]]: + """Generate metadata cache from Cargo.lock and/or MODULE.bazel.lock. + + At least one of cargo_lock_path or module_lock_path must be provided. + + Args: + cargo_lock_path: Optional path to Cargo.lock file + use_network: If True, fetch metadata from crates.io; if False, use checksums only + module_lock_path: Optional path to MODULE.bazel.lock for additional crates + + Returns: + Dict mapping crate name to metadata + """ + crates = {} + + if cargo_lock_path: + print(f"Parsing {cargo_lock_path}...") + crates = parse_cargo_lock(cargo_lock_path) + print(f"Found {len(crates)} crates from Cargo.lock") + + # Merge crates from MODULE.bazel.lock (score_crates or similar) + if module_lock_path: + print(f"Parsing {module_lock_path}...") + module_crates = parse_module_bazel_lock(module_lock_path) + added = 0 + for name, info in module_crates.items(): + if name not in crates: + crates[name] = info + added += 1 + print(f"Found {len(module_crates)} crates in MODULE.bazel.lock ({added} new)") + + if not use_network: + print("Network lookups disabled. Using checksums only.") + return crates + + print("Fetching license metadata from crates.io...") + cache = {} + + for i, (name, info) in enumerate(crates.items(), 1): + print(f" [{i}/{len(crates)}] {name} {info['version']}...", end="", flush=True) + + metadata = fetch_crate_metadata_from_crates_io(name, info["version"]) + + # Merge Cargo.lock data with crates.io metadata + cache[name] = { + "version": info["version"], + "checksum": info["checksum"], + "purl": f"pkg:cargo/{name}@{info['version']}", + "license": metadata.get("license", ""), + "repository": metadata.get("repository", ""), + "description": metadata.get("description", ""), + "homepage": metadata.get("homepage", ""), + } + + if cache[name]["license"]: + print(f" ✓ {cache[name]['license']}") + else: + print(" (no license)") + + return cache + + +def main(): + parser = argparse.ArgumentParser( + description="Generate crates.io metadata cache for SBOM generation" + ) + parser.add_argument( + "output", + nargs="?", + default="crates_metadata.json", + help="Output JSON file (default: crates_metadata.json)", + ) + parser.add_argument("--cargo-lock", help="Path to Cargo.lock file") + parser.add_argument( + "--no-network", + action="store_true", + help="Skip network lookups (checksums only)", + ) + parser.add_argument( + "--module-lock", + help="Path to MODULE.bazel.lock for additional crates (e.g., from score_crates)", + ) + parser.add_argument( + "--merge", help="Merge with existing cache file instead of overwriting" + ) + + args = parser.parse_args() + + if not args.cargo_lock and not args.module_lock: + parser.error("At least one of --cargo-lock or --module-lock is required") + + # Generate new cache + cache = generate_cache( + cargo_lock_path=args.cargo_lock, + use_network=not args.no_network, + module_lock_path=args.module_lock, + ) + + # Merge with existing cache if requested + if args.merge and Path(args.merge).exists(): + print(f"\nMerging with existing cache: {args.merge}") + with open(args.merge) as f: + existing = json.load(f) + + # Prefer new data, but keep entries not in current Cargo.lock + merged = existing.copy() + merged.update(cache) + cache = merged + print(f"Merged cache now contains {len(cache)} entries") + + # Write cache + print(f"\nWriting cache to {args.output}...") + with open(args.output, "w") as f: + json.dump(cache, f, indent=2, sort_keys=True) + + # Print statistics + with_license = sum(1 for c in cache.values() if c.get("license")) + with_checksum = sum(1 for c in cache.values() if c.get("checksum")) + + print(f"\n✓ Cache generated successfully!") + print(f" Total crates: {len(cache)}") + print(f" With licenses: {with_license} ({with_license / len(cache) * 100:.1f}%)") + print( + f" With checksums: {with_checksum} ({with_checksum / len(cache) * 100:.1f}%)" + ) + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/sbom/tests/BUILD b/sbom/tests/BUILD new file mode 100644 index 0000000..d2b23e0 --- /dev/null +++ b/sbom/tests/BUILD @@ -0,0 +1,19 @@ +# SBOM Tests Package +# +# This package contains tests for the SBOM generation system. + +load("@rules_python//python:defs.bzl", "py_test") + +package(default_visibility = ["//visibility:private"]) + +py_test( + name = "test_spdx_formatter", + srcs = ["test_spdx_formatter.py"], + deps = ["//sbom/internal/generator:spdx_formatter"], +) + +py_test( + name = "test_cyclonedx_formatter", + srcs = ["test_cyclonedx_formatter.py"], + deps = ["//sbom/internal/generator:cyclonedx_formatter"], +) diff --git a/sbom/tests/__init__.py b/sbom/tests/__init__.py new file mode 100644 index 0000000..b82b623 --- /dev/null +++ b/sbom/tests/__init__.py @@ -0,0 +1 @@ +"""SBOM tests package.""" diff --git a/sbom/tests/test_cyclonedx_formatter.py b/sbom/tests/test_cyclonedx_formatter.py new file mode 100644 index 0000000..1af33b3 --- /dev/null +++ b/sbom/tests/test_cyclonedx_formatter.py @@ -0,0 +1,142 @@ +"""Tests for CycloneDX 1.6 formatter.""" + +import unittest +from datetime import datetime, timezone + +from sbom.internal.generator.cyclonedx_formatter import generate_cyclonedx + + +class TestCycloneDXFormatter(unittest.TestCase): + """Tests for CycloneDX 1.6 generation.""" + + def setUp(self): + """Set up test fixtures.""" + self.timestamp = datetime( + 2024, 1, 15, 12, 0, 0, tzinfo=timezone.utc + ).isoformat() + self.config = { + "component_name": "test-component", + "component_version": "1.0.0", + "producer_name": "Eclipse Foundation", + "producer_url": "https://eclipse.dev/score", + "namespace": "https://eclipse.dev/score", + } + self.components = [ + { + "name": "tokio", + "version": "1.10.0", + "purl": "pkg:cargo/tokio@1.10.0", + "type": "library", + "license": "MIT", + "source": "crates.io", + }, + { + "name": "serde", + "version": "1.0.0", + "purl": "pkg:cargo/serde@1.0.0", + "type": "library", + "license": "MIT OR Apache-2.0", + "source": "crates.io", + }, + ] + + def test_generate_cyclonedx_structure(self): + """Test that generated CycloneDX has correct structure.""" + cdx = generate_cyclonedx(self.components, self.config, self.timestamp) + + self.assertEqual(cdx["bomFormat"], "CycloneDX") + self.assertEqual(cdx["specVersion"], "1.6") + self.assertIn("serialNumber", cdx) + self.assertTrue(cdx["serialNumber"].startswith("urn:uuid:")) + self.assertEqual(cdx["version"], 1) + + def test_generate_cyclonedx_metadata(self): + """Test that CycloneDX metadata is correct.""" + cdx = generate_cyclonedx(self.components, self.config, self.timestamp) + + metadata = cdx["metadata"] + self.assertEqual(metadata["timestamp"], self.timestamp) + self.assertIn("tools", metadata) + self.assertIn("component", metadata) + + root_component = metadata["component"] + self.assertEqual(root_component["name"], "test-component") + self.assertEqual(root_component["version"], "1.0.0") + self.assertEqual(root_component["type"], "application") + + def test_generate_cyclonedx_components(self): + """Test that components are properly added.""" + cdx = generate_cyclonedx(self.components, self.config, self.timestamp) + + components = cdx["components"] + self.assertEqual(len(components), 2) + + component_names = {c["name"] for c in components} + self.assertEqual(component_names, {"tokio", "serde"}) + + def test_generate_cyclonedx_component_details(self): + """Test that component details are correct.""" + cdx = generate_cyclonedx(self.components, self.config, self.timestamp) + + tokio = next(c for c in cdx["components"] if c["name"] == "tokio") + + self.assertEqual(tokio["version"], "1.10.0") + self.assertEqual(tokio["type"], "library") + self.assertEqual(tokio["purl"], "pkg:cargo/tokio@1.10.0") + self.assertIn("bom-ref", tokio) + + def test_generate_cyclonedx_licenses(self): + """Test that licenses are properly set.""" + cdx = generate_cyclonedx(self.components, self.config, self.timestamp) + + tokio = next(c for c in cdx["components"] if c["name"] == "tokio") + + self.assertIn("licenses", tokio) + self.assertEqual(len(tokio["licenses"]), 1) + self.assertEqual(tokio["licenses"][0]["license"]["id"], "MIT") + + def test_generate_cyclonedx_dependencies(self): + """Test that dependencies are created.""" + cdx = generate_cyclonedx(self.components, self.config, self.timestamp) + + dependencies = cdx["dependencies"] + + # Should have root + 2 component dependency entries + self.assertEqual(len(dependencies), 3) + + # Find root dependency + root_dep = next(d for d in dependencies if d["ref"] == "test-component@1.0.0") + self.assertEqual(len(root_dep["dependsOn"]), 2) + + def test_generate_cyclonedx_external_references(self): + """Test that external references are added for crates.io sources.""" + cdx = generate_cyclonedx(self.components, self.config, self.timestamp) + + tokio = next(c for c in cdx["components"] if c["name"] == "tokio") + + self.assertIn("externalReferences", tokio) + ext_refs = tokio["externalReferences"] + + distribution_ref = next( + (r for r in ext_refs if r["type"] == "distribution"), None + ) + self.assertIsNotNone(distribution_ref) + self.assertIn("crates.io", distribution_ref["url"]) + + def test_generate_cyclonedx_with_empty_components(self): + """Test generating CycloneDX with no components.""" + cdx = generate_cyclonedx([], self.config, self.timestamp) + + self.assertEqual(len(cdx["components"]), 0) + self.assertEqual(len(cdx["dependencies"]), 1) # Just root + + def test_generate_cyclonedx_bom_refs_unique(self): + """Test that bom-refs are unique across components.""" + cdx = generate_cyclonedx(self.components, self.config, self.timestamp) + + bom_refs = [c["bom-ref"] for c in cdx["components"]] + self.assertEqual(len(bom_refs), len(set(bom_refs))) + + +if __name__ == "__main__": + unittest.main() diff --git a/sbom/tests/test_spdx_formatter.py b/sbom/tests/test_spdx_formatter.py new file mode 100644 index 0000000..2a0af5a --- /dev/null +++ b/sbom/tests/test_spdx_formatter.py @@ -0,0 +1,109 @@ +"""Tests for SPDX 2.3 formatter.""" + +import unittest +from datetime import datetime, timezone + +from sbom.internal.generator.spdx_formatter import generate_spdx + + +class TestSpdxFormatter(unittest.TestCase): + """Tests for SPDX 2.3 generation.""" + + def setUp(self): + """Set up test fixtures.""" + self.timestamp = datetime( + 2024, 1, 15, 12, 0, 0, tzinfo=timezone.utc + ).isoformat() + self.config = { + "component_name": "test-component", + "component_version": "1.0.0", + "producer_name": "Eclipse Foundation", + "producer_url": "https://eclipse.dev/score", + "namespace": "https://eclipse.dev/score", + } + self.components = [ + { + "name": "tokio", + "version": "1.10.0", + "purl": "pkg:cargo/tokio@1.10.0", + "type": "library", + "license": "MIT", + }, + { + "name": "serde", + "version": "1.0.0", + "purl": "pkg:cargo/serde@1.0.0", + "type": "library", + "license": "MIT OR Apache-2.0", + }, + ] + + def test_generate_spdx_structure(self): + """Test that generated SPDX has correct structure.""" + spdx = generate_spdx(self.components, self.config, self.timestamp) + + self.assertEqual(spdx["spdxVersion"], "SPDX-2.3") + self.assertEqual(spdx["dataLicense"], "CC0-1.0") + self.assertEqual(spdx["SPDXID"], "SPDXRef-DOCUMENT") + self.assertIn("documentNamespace", spdx) + self.assertIn("packages", spdx) + self.assertIn("relationships", spdx) + + def test_generate_spdx_document_info(self): + """Test that SPDX document has correct metadata.""" + spdx = generate_spdx(self.components, self.config, self.timestamp) + + self.assertEqual(spdx["name"], "SBOM for test-component") + creation_info = spdx["creationInfo"] + self.assertEqual(creation_info["created"], self.timestamp) + creators = creation_info["creators"] + self.assertIn("Organization: Eclipse Foundation", creators) + self.assertIn("Tool: score-sbom-generator", creators) + + def test_generate_spdx_components(self): + """Test that components are properly added to SPDX.""" + spdx = generate_spdx(self.components, self.config, self.timestamp) + + packages = spdx["packages"] + # root package + 2 components + self.assertEqual(len(packages), 3) + + def test_generate_spdx_relationships(self): + """Test that dependency relationships are created.""" + spdx = generate_spdx(self.components, self.config, self.timestamp) + + relationships = spdx["relationships"] + # DESCRIBES + 2 DEPENDS_ON + describes = [r for r in relationships if r["relationshipType"] == "DESCRIBES"] + depends_on = [r for r in relationships if r["relationshipType"] == "DEPENDS_ON"] + + self.assertEqual(len(describes), 1) + self.assertEqual(len(depends_on), 2) + + def test_generate_spdx_with_empty_components(self): + """Test generating SPDX with no components.""" + spdx = generate_spdx([], self.config, self.timestamp) + + packages = spdx["packages"] + # Only root package + self.assertEqual(len(packages), 1) + + def test_generate_spdx_component_purl(self): + """Test that component PURLs are properly set.""" + spdx = generate_spdx(self.components, self.config, self.timestamp) + + packages = spdx["packages"] + tokio_pkg = next((p for p in packages if p["name"] == "tokio"), None) + + self.assertIsNotNone(tokio_pkg) + ext_refs = tokio_pkg.get("externalRefs", []) + purl_ref = next( + (r for r in ext_refs if r.get("referenceType") == "purl"), + None, + ) + self.assertIsNotNone(purl_ref) + self.assertEqual(purl_ref["referenceLocator"], "pkg:cargo/tokio@1.10.0") + + +if __name__ == "__main__": + unittest.main() From 3d5b273ecd3555b4f7412e72ccfd69a73891460b Mon Sep 17 00:00:00 2001 From: Lukasz Juranek Date: Tue, 10 Feb 2026 22:34:10 +0100 Subject: [PATCH 2/2] Add license dash support for license data extraction (#2232) --- sbom/BUILD.bazel | 2 - sbom/SBOM_Readme.md | 222 +++++----- sbom/defs.bzl | 8 +- sbom/internal/generator/BUILD | 4 - .../internal/generator/cyclonedx_formatter.py | 17 +- sbom/internal/generator/sbom_generator.py | 253 ++++++----- sbom/internal/generator/spdx_formatter.py | 46 +- sbom/internal/rules.bzl | 35 +- sbom/scripts/generate_cpp_metadata_cache.py | 4 + .../scripts/generate_crates_metadata_cache.py | 405 +++++++++++++----- 10 files changed, 659 insertions(+), 337 deletions(-) diff --git a/sbom/BUILD.bazel b/sbom/BUILD.bazel index 122a5b6..ec94784 100644 --- a/sbom/BUILD.bazel +++ b/sbom/BUILD.bazel @@ -14,8 +14,6 @@ package(default_visibility = ["//visibility:public"]) exports_files([ "defs.bzl", "extensions.bzl", - "crates_metadata.json", - "cpp_metadata.json", ]) # Filegroup for all SBOM-related bzl files diff --git a/sbom/SBOM_Readme.md b/sbom/SBOM_Readme.md index 24059d5..c41e5b5 100644 --- a/sbom/SBOM_Readme.md +++ b/sbom/SBOM_Readme.md @@ -22,8 +22,11 @@ sbom( targets = ["//my/app:binary"], component_name = "my_application", component_version = "1.0.0", - # Rust crate metadata from score_crates MODULE.bazel.lock - module_lockfile = "@score_crates//:MODULE.bazel.lock", + # Rust crate metadata from multiple MODULE.bazel.lock files + module_lockfiles = [ + "@score_crates//:MODULE.bazel.lock", + ":MODULE.bazel.lock", # workspace's own lockfile for additional crates + ], auto_crates_cache = True, auto_cdxgen = True, # Requires system-installed npm/cdxgen (see below) ) @@ -37,12 +40,34 @@ sbom( | `component_name` | Main component name (defaults to rule name) | | `component_version` | Version string | | `output_formats` | `["spdx", "cyclonedx"]` (default: both) | -| `module_lockfile` | MODULE.bazel.lock from `score_crates` — contains all resolved Rust crate specs (name, version, sha256). This is the recommended source for Rust crate metadata. | -| `cargo_lockfile` | Optional Cargo.lock for additional crates `module_lockfile`. Usually not needed when `module_lockfile` from `score_crates` is provided, since it is a superset. | -| `auto_crates_cache` | Auto-generate crates cache when `module_lockfile` or `cargo_lockfile` is set | +| `module_lockfiles` | List of MODULE.bazel.lock files for Rust crate metadata. Pass `@score_crates//:MODULE.bazel.lock` (centralized crate specs) and `:MODULE.bazel.lock` (workspace-local crates). Each lockfile is parsed for crate name, version, and sha256. | +| `cargo_lockfile` | Optional Cargo.lock for additional crates. Usually not needed when `module_lockfiles` covers all crates. | +| `auto_crates_cache` | Auto-generate crates cache when `module_lockfiles` or `cargo_lockfile` is set | | `auto_cdxgen` | Auto-run cdxgen when no `cdxgen_sbom` is provided | -## 3. Install Prerequisites (for auto_cdxgen) +## 3. Install Prerequisites + +### For `auto_crates_cache` (Rust crate metadata) + +License data for Rust crates is fetched via [dash-license-scan](https://github.com/eclipse-score/dash-license-scan). Description and supplier metadata is fetched from the crates.io API (parallel, ~10 concurrent requests). Requires: + +```bash +# Install uv (Python package runner) +curl -LsSf https://astral.sh/uv/install.sh | sh + +# Install Java >= 11 (required by Eclipse dash-licenses JAR) +# Option 1: Ubuntu/Debian +sudo apt install openjdk-11-jre-headless + +# Option 2: Fedora/RHEL +sudo dnf install java-11-openjdk-headless + +# Verify installation +uvx dash-license-scan --help +java -version +``` + +### For `auto_cdxgen` (C++ dependency scanning) If using `auto_cdxgen = True` to automatically scan C++ dependencies: @@ -60,6 +85,7 @@ cdxgen --version ``` **Note:** If you don't have npm/cdxgen installed, set `auto_cdxgen = False` in your SBOM configuration. +When `auto_cdxgen` is enabled, the SBOM rule runs cdxgen against the repository path of the selected Bazel targets (for example `external/score_baselibs+` for `@score_baselibs//...` targets). ## 4. Build @@ -86,64 +112,67 @@ Generated files in `bazel-bin/`: |------|------|--------------| | [Bazel](https://bazel.build) | Build system — rules, aspects, and module extensions drive dependency discovery and SBOM generation | All SBOM generation | | [Python 3](https://www.python.org) | Runtime for the SBOM generator, formatters, and metadata extraction scripts | All SBOM generation | -| [crates.io API](https://crates.io) | Rust crate metadata source (license, version, checksums) | Rust metadata extraction when `auto_crates_cache = True` | +| [dash-license-scan](https://github.com/eclipse-score/dash-license-scan) | Rust crate license metadata via Eclipse Foundation + ClearlyDefined | Rust metadata extraction when `auto_crates_cache = True` | +| [uv / uvx](https://docs.astral.sh/uv/) | Python package runner for dash-license-scan | Rust metadata extraction when `auto_crates_cache = True` | +| [Java >= 11](https://openjdk.org) | Runtime for Eclipse dash-licenses JAR (used by dash-license-scan) | Rust metadata extraction when `auto_crates_cache = True` | +| [crates.io API](https://crates.io) | Description and supplier metadata for Rust crates (parallel fetching) | Rust metadata extraction when `auto_crates_cache = True` | | [@cyclonedx/cdxgen](https://github.com/CycloneDX/cdxgen) | C++ dependency scanner and license discovery tool | C++ metadata extraction when `auto_cdxgen = True` | | [Node.js / npm](https://nodejs.org) | Runtime for cdxgen | C++ metadata extraction when `auto_cdxgen = True` | -### Five-Phase Architecture +### Architecture ``` -Phase 1: Loading Phase 2: Analysis -(extensions.bzl) (aspect.bzl) - -MODULE.bazel Bazel targets - | | - v v -sbom_metadata ext SbomDepsInfo aspect - | | - v v -metadata.json _deps.json - (external repos, - dep edges, - target labels) - - -Phase 3: Metadata Extraction (rules.bzl, parallel) - -Branch A (Rust): Branch B (C++): -MODULE.bazel.lock Source tree -(from score_crates) (C++, CMake, LICENSE) - | | - v v -generate_crates_cache.py cdxgen --deep -r -(+ crates.io API) | - | v - v cdxgen.cdx.json -crates_metadata.json - - -Phase 4: Resolution (sbom_generator.py) - -_deps.json -------+ -metadata.json ----+--> Match & Resolve -crates_cache -----+ (for each dep in -cdxgen.cdx.json --+ _deps.json, look up - metadata from caches) - | - v - SBOM components - (license, PURL, - version, hash) - - -Phase 5: Generation (formatters) - -SBOM components - | - +--> spdx_formatter --> .spdx.json - +--> cdx_formatter --> .cdx.json + ┌──────────────────┐ + │ Bazel build │ + └────────┬─────────┘ + │ + ┌───────────────┼───────────────┐ + │ │ │ + v v v + MODULE.bazel Bazel targets Lockfiles + │ │ │ + v v v + metadata.json _deps.json License + metadata + (module versions) (dep graph, (dash-license-scan + dep edges) + crates.io API + │ │ + cdxgen) + └───────────────┼───────────────┘ + │ + v + ┌──────────────────┐ + │ sbom_generator │ + │ (match & resolve)│ + └────────┬─────────┘ + │ + ┌────────┴────────┐ + v v + .spdx.json .cdx.json ``` +**Data sources:** +- **Bazel module graph** — version, PURL, and registry info for `bazel_dep` modules +- **Bazel aspect** — transitive dependency graph and external repo dependency edges +- **dash-license-scan** — Rust crate licenses via Eclipse Foundation + ClearlyDefined (from MODULE.bazel.lock or Cargo.lock) +- **crates.io API** — description and supplier for Rust crates (supplier extracted from GitHub repository URL) +- **cdxgen** — C++ dependency licenses, descriptions, and suppliers (from source tree scan) + +### Automatically Populated Fields + +The following SBOM fields are populated automatically without manual configuration: + +| Field | Rust Crates | C++ Dependencies | Bazel Modules | +|-------|-------------|------------------|---------------| +| License | dash-license-scan | cdxgen | — | +| Description | crates.io API | cdxgen (falls back to `"Missing"` when unavailable) | — | +| Supplier | crates.io API (GitHub org from repository URL) | cdxgen | — | +| Version | MODULE.bazel.lock / Cargo.lock | cdxgen (with MODULE.bazel.lock fallback for Bazel modules) | Bazel module graph | +| Checksum (SHA-256) | MODULE.bazel.lock / Cargo.lock | BCR `source.json` `sha256` + cdxgen `hashes` (when present) | http_archive `sha256` + MODULE.bazel.lock BCR `source.json` | +| PURL | Auto-generated (`pkg:cargo/...`) | cdxgen | Auto-generated | + +### Platform-Specific Crate Handling + +Crates with platform-specific suffixes (e.g. `iceoryx2-bb-lock-free-qnx8`) that don't exist on crates.io are handled by stripping the suffix and falling back to the base crate name for description and supplier lookup. + ### What Is Excluded from SBOM - Dependencies not in the transitive dep graph of your `targets` @@ -151,11 +180,13 @@ SBOM components ## Example -See [reference_integration/BUILD](../../reference_integration/BUILD) for working SBOM targets using `module_lockfile` from `score_crates` with both `auto_crates_cache` and `auto_cdxgen` enabled, and [reference_integration/MODULE.bazel](../../reference_integration/MODULE.bazel) for the metadata extension setup. +See [reference_integration/BUILD](../../reference_integration/BUILD) for working SBOM targets and [reference_integration/MODULE.bazel](../../reference_integration/MODULE.bazel) for the metadata extension setup. + +Each SBOM target uses `module_lockfiles` to provide crate version/checksum data from multiple lockfiles and `auto_crates_cache = True` to automatically fetch license, description, and supplier data. ### score_crates Integration -The `score_crates` module provides centralized Rust crate management for the SCORE project. Its `MODULE.bazel.lock` file contains all resolved crate specs (name, version, sha256) generated by `cargo-bazel`. This lock file is used as the primary data source for Rust crate SBOM metadata, replacing the need for individual `Cargo.lock` files. +The `score_crates` module provides centralized Rust crate management for the SCORE project. Its `MODULE.bazel.lock` file contains the majority of resolved crate specs (name, version, sha256) generated by `cargo-bazel`. The workspace's own `MODULE.bazel.lock` may contain additional crates not in `score_crates`. Both lockfiles should be passed via `module_lockfiles` to ensure complete coverage. ## CISA 2025 Element Coverage (CycloneDX) @@ -163,69 +194,52 @@ The table below maps the CISA 2025 draft elements to CycloneDX fields and notes | CISA 2025 Element | CycloneDX Field (JSON) | Support | Notes | |---|---|---|---| -| Software Producer | `components[].supplier.name` (or manufacturer) | **Supported** | Component `supplier` is emitted when provided. Root producer is in `metadata.component.supplier`. | +| Software Producer | `components[].supplier.name` | **Supported** | Root producer is set in `metadata.component.supplier`. For components, supplier is auto-extracted from crates.io repository URL (Rust) or from cdxgen (C++); in the current baselibs example, Boost BCR modules have no supplier because cdxgen does not provide one. | | Component Name | `components[].name` | **Supported** | Single name; aliases are stored as `properties` with `cdx:alias`. | | Component Version | `components[].version` | **Supported** | If unknown and source is git repo with `commit_date`, version can fall back to that date. | | Software Identifiers | `components[].purl`, `components[].cpe` | **Supported (PURL)** / **Optional (CPE)** | PURL is generated for all components. CPE is optional if provided in metadata. | -| Component Hash | `components[].hashes` | **Supported** | SHA-256 supported (Cargo lock + http_archive `sha256` + repo metadata). | -| License | `components[].licenses` | **Supported when known** | Requires license metadata from `sbom_ext.license(...)`, repo metadata, or caches. | -| Dependency Relationship | `dependencies` | **Supported** | Uses external repo dependency edges from Bazel aspect. | -| Pedigree / Derivation | `components[].pedigree` | **Supported (manual)** | Must be provided via metadata (`pedigree_*` fields). Not auto-deduced. | -| SBOM Author | `metadata.authors` | **Supported** | Set via `sbom_authors` in `sbom()` rule. | -| Tool Name | `metadata.tools` | **Supported** | Always includes `score-sbom-generator`; extra tools via `sbom_tools`. | +| Component Hash | `components[].hashes` | **Supported** | SHA-256 is populated for Rust crates (from lockfiles) and for BCR / http_archive / some cdxgen-backed C++ components. In the current examples, Rust crates and Boost BCR modules have hashes; some QNX-specific crates and other C++ deps may not. | +| License | `components[].licenses` | **Supported (Rust) / Best-effort (C++)** | Rust licenses are auto-fetched via dash-license-scan and are present for most crates (e.g. Kyron SBOM); some crates like `iceoryx2-*` may still lack licenses. For C++ components, licenses are only present when cdxgen (or an upstream SBOM) provides them; in the current baselibs example, Boost BCR modules have empty `licenses`. Compound SPDX expressions (AND/OR) use the `expression` field per CycloneDX spec. | +| Component Description | `components[].description` | **Supported** | Auto-fetched from crates.io API (Rust) and cdxgen (C++), with C++ falling back to `"Missing"` when no description is available (as seen for Boost in the baselibs SBOM). | +| Dependency Relationship | `dependencies` | **Supported** | Uses external repo dependency edges from Bazel aspect; both Kyron and baselibs SBOMs include a dependency graph for the root component. | +| Pedigree / Derivation | `components[].pedigree` | **Supported (manual)** | Must be provided via `sbom_ext.license()` with `pedigree_*` fields. Not auto-deduced. | +| SBOM Author | `metadata.authors` | **Supported** | Set via `sbom_authors` in `sbom()` rule (e.g. `"Eclipse SCORE Team"` in the examples). | +| Tool Name | `metadata.tools` | **Supported** | Always includes `score-sbom-generator`; extra tools can be added via `sbom_tools`. | | Timestamp | `metadata.timestamp` | **Supported** | ISO 8601 UTC timestamp generated at build time. | | Generation Context | `metadata.lifecycles` | **Supported** | Set via `generation_context` in `sbom()` rule (`pre-build`, `build`, `post-build`). | +### SPDX-Specific Notes + +- **LicenseRef-* declarations**: Any `LicenseRef-*` identifiers used in license fields are automatically declared in `hasExtractedLicensingInfos` as required by SPDX 2.3. +- **Supplier**: Emitted as `Organization: ` in the SPDX `supplier` field. + ### Notes on Missing Data If a field is absent in output, it usually means the source metadata was not provided: -- Licenses and suppliers require `sbom_ext.license(...)` or repo metadata. -- CPE, aliases, and pedigree are optional and must be explicitly set. - - Rust crate licenses require a crates metadata cache; this is generated automatically when `module_lockfile` (or `cargo_lockfile`) is provided to `sbom()`. The `score_crates` MODULE.bazel.lock is the recommended source as it contains all resolved crate specs. +- Licenses and suppliers are auto-populated from dash-license-scan (Rust) or cdxgen (C++). For C++ dependencies, licenses and suppliers are available only when cdxgen can resolve the component; Bazel Central Registry modules like `boost.*` may have empty licenses if cdxgen cannot infer them. +- CPE, aliases, and pedigree are optional and must be explicitly set via `sbom_ext.license()`. +- Rust crate licenses require a crates metadata cache; this is generated automatically when `module_lockfiles` (or `cargo_lockfile`) is provided to `sbom()`. License data is fetched via `dash-license-scan` (Eclipse Foundation + ClearlyDefined). The `score_crates` MODULE.bazel.lock combined with the workspace's MODULE.bazel.lock provides complete coverage. +- If cdxgen cannot resolve C++ package metadata for a Bazel-only dependency graph, SBOM generation sets C++ dependency descriptions to `"Missing"`. Examples (add to `MODULE.bazel`): ```starlark -# bazel_dep module (version from module graph) -sbom_ext.license( - name = "googletest", - license = "BSD-3-Clause", - supplier = "Google LLC", -) - -# http_archive dependency (explicit version) -sbom_ext.license( - name = "boost", - license = "BSL-1.0", - version = "1.87.0", - supplier = "Boost.org", -) - -# git_repository dependency -sbom_ext.license( - name = "iceoryx2", - license = "Apache-2.0", - version = "0.7.0", - supplier = "Eclipse Foundation", - remote = "https://github.com/eclipse-iceoryx/iceoryx2.git", -) - -# Rust crate (type = "cargo") -sbom_ext.license( - name = "tokio", - license = "MIT", - version = "1.10.0", - type = "cargo", - supplier = "Tokio Contributors", -) - # Optional metadata (CPE, aliases, pedigree) +# Note: sbom_ext.license() should only be used for pedigree, CPE, and aliases. +# Licenses and suppliers are auto-populated from dash-license-scan (Rust) or cdxgen (C++). sbom_ext.license( name = "linux-kernel", - license = "GPL-2.0-only", - version = "5.10.120", cpe = "cpe:2.3:o:linux:linux_kernel:*:*:*:*:*:*:*:*", aliases = ["linux", "kernel"], pedigree_ancestors = ["pkg:generic/linux-kernel@5.10.130"], pedigree_notes = "Backported CVE-2025-12345 fix from 5.10.130", ) ``` + +### C++ license data and dash-license-scan + +- **Rust crates** + Rust licenses are obtained via `generate_crates_metadata_cache.py`, which reads `MODULE.bazel.lock` / `Cargo.lock`, builds a synthetic `Cargo.lock`, runs `uvx dash-license-scan` (backed by Eclipse dash-licenses), and writes a `crates_metadata.json` cache that `sbom_generator.py` consumes. + +- **C++ dependencies** + C++ licenses and suppliers are populated **only from cdxgen output**; there is currently **no dash-license-scan integration for C++ SBOMs**. cdxgen works best for ecosystems it knows; for Bazel Central Registry C++ modules such as `boost.*`, it typically cannot infer a license, so those components may have empty `licenses`. `dash-license-scan` currently only understands purls like `pkg:cargo/...`, `pkg:pypi/...`, `pkg:npm/...`, and `pkg:maven/...`, so running it directly on the C++ CycloneDX SBOM (which uses `pkg:bazel/...` for these modules) does not improve C++ license coverage. + diff --git a/sbom/defs.bzl b/sbom/defs.bzl index b67489c..c60233d 100644 --- a/sbom/defs.bzl +++ b/sbom/defs.bzl @@ -26,7 +26,7 @@ def sbom( cdxgen_sbom = None, auto_cdxgen = False, cargo_lockfile = None, - module_lockfile = None, + module_lockfiles = None, auto_crates_cache = True, output_formats = ["spdx", "cyclonedx"], producer_name = "Eclipse Foundation", @@ -65,8 +65,8 @@ def sbom( cdxgen_sbom: Optional label to CycloneDX JSON from cdxgen for C++ enrichment auto_cdxgen: Run cdxgen automatically when no cdxgen_sbom is provided cargo_lockfile: Optional Cargo.lock for crates metadata cache generation - module_lockfile: Optional MODULE.bazel.lock for additional crates (e.g., from score_crates) - auto_crates_cache: Run crates metadata cache generation when cargo_lockfile or module_lockfile is provided + module_lockfiles: MODULE.bazel.lock files for crate metadata extraction (e.g., from score_crates and workspace) + auto_crates_cache: Run crates metadata cache generation when cargo_lockfile or module_lockfiles is provided output_formats: List of formats to generate ("spdx", "cyclonedx") producer_name: SBOM producer organization name producer_url: SBOM producer URL @@ -120,7 +120,7 @@ def sbom( cdxgen_sbom = cdxgen_sbom, auto_cdxgen = auto_cdxgen, cargo_lockfile = cargo_lockfile, - module_lockfile = module_lockfile, + module_lockfiles = module_lockfiles if module_lockfiles else [], auto_crates_cache = auto_crates_cache, output_formats = output_formats, producer_name = producer_name, diff --git a/sbom/internal/generator/BUILD b/sbom/internal/generator/BUILD index 8c6afa0..8655e1d 100644 --- a/sbom/internal/generator/BUILD +++ b/sbom/internal/generator/BUILD @@ -10,10 +10,6 @@ package(default_visibility = ["//sbom:__subpackages__"]) py_binary( name = "sbom_generator", srcs = ["sbom_generator.py"], - data = [ - "//sbom:cpp_metadata.json", - "//sbom:crates_metadata.json", - ], main = "sbom_generator.py", deps = [ ":cyclonedx_formatter", diff --git a/sbom/internal/generator/cyclonedx_formatter.py b/sbom/internal/generator/cyclonedx_formatter.py index 4f22eea..ad59e16 100644 --- a/sbom/internal/generator/cyclonedx_formatter.py +++ b/sbom/internal/generator/cyclonedx_formatter.py @@ -158,6 +158,7 @@ def _create_cdx_component(component: dict[str, Any]) -> dict[str, Any]: version = component.get("version", "unknown") purl = component.get("purl", "") license_id = component.get("license", "") + description = component.get("description", "") supplier = component.get("supplier", "") comp_type = component.get("type", "library") source = component.get("source", "") @@ -177,19 +178,21 @@ def _create_cdx_component(component: dict[str, Any]) -> dict[str, Any]: "bom-ref": _generate_bom_ref(name, version), } + # Add description + if description: + cdx_comp["description"] = description + # Add PURL if purl: cdx_comp["purl"] = purl # Add license if license_id: - cdx_comp["licenses"] = [ - { - "license": { - "id": license_id, - } - } - ] + if " AND " in license_id or " OR " in license_id: + # Compound SPDX expression must use "expression", not "license.id" + cdx_comp["licenses"] = [{"expression": license_id}] + else: + cdx_comp["licenses"] = [{"license": {"id": license_id}}] # Add supplier if supplier: diff --git a/sbom/internal/generator/sbom_generator.py b/sbom/internal/generator/sbom_generator.py index 3510e67..90a0d0a 100644 --- a/sbom/internal/generator/sbom_generator.py +++ b/sbom/internal/generator/sbom_generator.py @@ -64,90 +64,93 @@ def parse_module_bazel_files(file_paths: list[str]) -> dict[str, dict[str, str]] return modules -def load_crates_cache(override_path: str | None = None) -> dict[str, Any]: - """Load pre-generated crates metadata cache. +def parse_module_lockfiles(file_paths: list[str]) -> dict[str, dict[str, str]]: + """Parse MODULE.bazel.lock files to infer module versions and checksums. - Returns: - Dict mapping crate name to metadata (license, checksum, etc.) - """ - # Try multiple paths for cache file - possible_paths = [] - if override_path: - possible_paths.append(Path(override_path)) - possible_paths += [ - # Bazel runfiles location - Path(__file__).parent.parent.parent.parent.parent / "crates_metadata.json", - # Development/source tree location - Path(__file__).parent.parent.parent / "crates_metadata.json", - # Same directory as script - Path(__file__).parent / "crates_metadata.json", - ] - - for cache_path in possible_paths: - if cache_path.exists(): - try: - with open(cache_path, encoding="utf-8") as f: - return json.load(f) - except (OSError, json.JSONDecodeError): - continue + Uses registry URL keys from lockfiles. Only modules with a single unique + observed version are emitted to avoid ambiguous version selection. - # No cache found - return {} + For modules coming from the Bazel Central Registry, this also extracts the + SHA-256 checksum from the corresponding ``source.json`` entry so that + CycloneDX hashes can be populated for C/C++ dependencies. + """ + # Track all observed versions per module and (optional) sha256 per + # (module, version) tuple. + module_versions: dict[str, set[str]] = {} + module_sha256: dict[tuple[str, str], str] = {} + for fpath in file_paths: + try: + with open(fpath, encoding="utf-8") as f: + lock_data = json.load(f) + except (OSError, json.JSONDecodeError): + continue -def load_cpp_cache() -> dict[str, Any]: - """Load pre-generated C++ dependency metadata cache. + registry_hashes = lock_data.get("registryFileHashes", {}) + if not isinstance(registry_hashes, dict): + continue - Returns: - Dict mapping dependency name to metadata (license, supplier, version, etc.) - """ - possible_paths = [ - Path(__file__).parent.parent.parent.parent.parent / "cpp_metadata.json", - Path(__file__).parent.parent.parent / "cpp_metadata.json", - Path(__file__).parent / "cpp_metadata.json", - ] - - for cache_path in possible_paths: - if cache_path.exists(): - try: - with open(cache_path, encoding="utf-8") as f: - return json.load(f) - except (OSError, json.JSONDecodeError): + for url, sha in registry_hashes.items(): + if not isinstance(url, str) or not isinstance(sha, str): continue - return {} + # MODULE.bazel entry – records which version was selected. + module_match = re.search( + r"/modules/([^/]+)/([^/]+)/MODULE\.bazel$", + url, + ) + if module_match: + module_name, version = module_match.groups() + module_versions.setdefault(module_name, set()).add(version) + + # source.json entry – carries the sha256 of the downloaded source + # tarball for this module@version. Use it as the component hash. + source_match = re.search( + r"/modules/([^/]+)/([^/]+)/source\.json$", + url, + ) + if source_match: + src_module, src_version = source_match.groups() + module_sha256[(src_module, src_version)] = sha + + modules: dict[str, dict[str, str]] = {} + for name, versions in module_versions.items(): + if len(versions) != 1: + # Skip modules with ambiguous versions. + continue + version = next(iter(versions)) + entry: dict[str, str] = { + "version": version, + "purl": f"pkg:bazel/{name}@{version}", + } + sha = module_sha256.get((name, version)) + if sha: + # Expose as sha256 so downstream code can turn it into a CycloneDX + # SHA-256 hash entry. + entry["sha256"] = sha + modules[name] = entry + + return modules -def cpp_cache_to_components(cpp_cache: dict[str, Any]) -> list[dict[str, Any]]: - """Convert C++ metadata cache to component list for enrichment. +def load_crates_cache(cache_path: str | None = None) -> dict[str, Any]: + """Load crates metadata cache generated at build time. Args: - cpp_cache: Dict mapping dep name to metadata + cache_path: Path to crates_metadata.json (from --crates-cache) Returns: - List of component dicts in internal format + Dict mapping crate name to metadata (license, checksum, etc.) """ - components = [] - for name, data in cpp_cache.items(): - version = data.get("version", "unknown") - component = { - "name": name, - "version": version, - "purl": data.get("purl", f"pkg:generic/{name}@{version}"), - "type": "library", - "license": data.get("license", ""), - "supplier": data.get("supplier", ""), - "cpe": data.get("cpe", ""), - "aliases": data.get("aliases", []), - "pedigree_ancestors": data.get("pedigree_ancestors", []), - "pedigree_descendants": data.get("pedigree_descendants", []), - "pedigree_variants": data.get("pedigree_variants", []), - "pedigree_notes": data.get("pedigree_notes", ""), - } - if data.get("url"): - component["url"] = data["url"] - components.append(component) - return components + if not cache_path: + return {} + try: + with open(cache_path, encoding="utf-8") as f: + return json.load(f) + except (OSError, json.JSONDecodeError): + return {} + + def normalize_name(name: str) -> str: @@ -179,7 +182,7 @@ def enrich_components_from_cpp_cache( For each Bazel component, finds a matching C++ cache entry by normalized name and fills in missing fields (license, supplier, version, purl). - Unmatched cache entries are appended. + Components not present in Bazel's discovered dependency graph are ignored. Args: components: Bazel-discovered components to enrich @@ -196,8 +199,6 @@ def enrich_components_from_cpp_cache( cpp_by_name[norm] = cc cpp_by_name[cc["name"].lower()] = cc - matched_norms: set[str] = set() - for comp in components: comp_name = comp.get("name", "") norm_name = normalize_name(comp_name) @@ -212,12 +213,13 @@ def enrich_components_from_cpp_cache( if not cpp_match: continue - matched_norms.add(normalize_name(cpp_match["name"])) - # Enrich missing fields only if not comp.get("license") and cpp_match.get("license"): comp["license"] = cpp_match["license"] + if not comp.get("description") and cpp_match.get("description"): + comp["description"] = cpp_match["description"] + if not comp.get("supplier") and cpp_match.get("supplier"): comp["supplier"] = cpp_match["supplier"] @@ -236,14 +238,6 @@ def enrich_components_from_cpp_cache( if not comp.get("checksum") and cpp_match.get("checksum"): comp["checksum"] = cpp_match["checksum"] - # Append unmatched cache components not already in Bazel's graph - existing_norms = {normalize_name(c.get("name", "")) for c in components} - for cc in cpp_components: - norm = normalize_name(cc["name"]) - if norm not in existing_norms and norm not in matched_norms: - cc["source"] = "cdxgen" - components.append(cc) - return components @@ -262,7 +256,7 @@ def load_cdxgen_sbom(cdxgen_path: str) -> list[dict[str, Any]]: except (OSError, json.JSONDecodeError): return [] - components = [] + components: list[dict[str, Any]] = [] for comp in cdx_data.get("components", []): # Extract license information licenses = comp.get("licenses", []) @@ -271,13 +265,24 @@ def load_cdxgen_sbom(cdxgen_path: str) -> list[dict[str, Any]]: # Take first license lic = licenses[0] if isinstance(lic, dict): - license_str = lic.get("license", {}).get("id", "") or lic.get( - "license", {} - ).get("name", "") + license_str = ( + lic.get("expression", "") + or lic.get("license", {}).get("id", "") + or lic.get("license", {}).get("name", "") + ) # Extract purl purl = comp.get("purl", "") + # Extract SHA-256 hash if present + checksum = "" + for h in comp.get("hashes", []): + if not isinstance(h, dict): + continue + if h.get("alg") == "SHA-256" and h.get("content"): + checksum = str(h["content"]) + break + # Build component component = { "name": comp.get("name", ""), @@ -285,11 +290,13 @@ def load_cdxgen_sbom(cdxgen_path: str) -> list[dict[str, Any]]: "purl": purl, "type": comp.get("type", "library"), "license": license_str, + "description": comp.get("description", ""), "supplier": comp.get("supplier", {}).get("name", "") if isinstance(comp.get("supplier"), dict) else "", "cpe": comp.get("cpe", ""), "url": "", + "checksum": checksum, } # Add component if it has a name @@ -299,6 +306,20 @@ def load_cdxgen_sbom(cdxgen_path: str) -> list[dict[str, Any]]: return components +def mark_missing_cpp_descriptions(components: list[dict[str, Any]]) -> list[dict[str, Any]]: + """Mark missing descriptions for non-Rust libraries as 'Missing'.""" + for comp in components: + if comp.get("description"): + continue + if comp.get("type") != "library": + continue + purl = comp.get("purl", "") + if purl.startswith("pkg:cargo/"): + continue + comp["description"] = "Missing" + return components + + def main() -> int: """Main entry point for SBOM generation.""" parser = argparse.ArgumentParser(description="Generate SBOM from Bazel deps") @@ -335,6 +356,18 @@ def main() -> int: if name not in metadata["modules"]: metadata["modules"][name] = mod_data + # Parse MODULE.bazel.lock files to infer selected module versions. + # This helps for modules that don't participate in the sbom_metadata + # extension (for example, transitive Bazel modules like boost.*). + module_lockfiles = data.get("module_lockfiles", []) + if module_lockfiles: + lock_modules = parse_module_lockfiles(module_lockfiles) + if "modules" not in metadata: + metadata["modules"] = {} + for name, mod_data in lock_modules.items(): + if name not in metadata["modules"]: + metadata["modules"][name] = mod_data + # Load crates metadata cache (licenses + checksums + versions) crates_cache = load_crates_cache(args.crates_cache) @@ -345,16 +378,10 @@ def main() -> int: for name, cache_data in crates_cache.items(): metadata["crates"].setdefault(name, cache_data) - # Load C++ metadata cache (auto-discovered, like crates cache) - cpp_cache = load_cpp_cache() - cpp_components = cpp_cache_to_components(cpp_cache) if cpp_cache else [] - - # Load cdxgen SBOM if provided (overrides/supplements cpp_cache) + # Load cdxgen SBOM if provided (C++ dependency enrichment) + cpp_components = [] if args.cdxgen_sbom: - cdxgen_components = load_cdxgen_sbom(args.cdxgen_sbom) - if cdxgen_components: - # Merge with cpp_components, preferring cdxgen data - cpp_components = cdxgen_components + cpp_components + cpp_components = load_cdxgen_sbom(args.cdxgen_sbom) # Filter external repos (exclude build tools) external_repos = data.get("external_repos", []) @@ -379,6 +406,9 @@ def main() -> int: ) components = deduplicate_components(components) + # Mark missing C++ descriptions explicitly when cdxgen has no description. + components = mark_missing_cpp_descriptions(components) + # Generate timestamp in SPDX-compliant format (YYYY-MM-DDTHH:MM:SSZ) timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") @@ -467,7 +497,7 @@ def resolve_component( modules = metadata.get("modules", {}) if normalized_name in modules: mod = modules[normalized_name] - return { + result: dict[str, Any] = { "name": normalized_name, "version": mod.get("version", "unknown"), "purl": mod.get("purl", f"pkg:bazel/{normalized_name}@unknown"), @@ -481,6 +511,11 @@ def resolve_component( "pedigree_variants": mod.get("pedigree_variants", []), "pedigree_notes": mod.get("pedigree_notes", ""), } + # MODULE.bazel.lock can provide a sha256 via source.json; expose it as + # checksum so CycloneDX hashes are populated for C/C++ modules. + if mod.get("sha256"): + result["checksum"] = mod["sha256"] + return result # Check if it's an http_archive dependency http_archives = metadata.get("http_archives", {}) @@ -545,6 +580,7 @@ def resolve_component( "purl": crate.get("purl", f"pkg:cargo/{normalized_name}@unknown"), "type": "library", "license": crate.get("license", ""), + "description": crate.get("description", ""), "supplier": crate.get("supplier", ""), "cpe": crate.get("cpe", ""), "aliases": crate.get("aliases", []), @@ -615,6 +651,8 @@ def resolve_component( } if crate_meta.get("license"): result["license"] = crate_meta["license"] + if crate_meta.get("description"): + result["description"] = crate_meta["description"] if crate_meta.get("supplier"): result["supplier"] = crate_meta["supplier"] if crate_meta.get("cpe"): @@ -660,6 +698,8 @@ def resolve_component( } if crate_meta.get("license"): result["license"] = crate_meta["license"] + if crate_meta.get("description"): + result["description"] = crate_meta["description"] if crate_meta.get("supplier"): result["supplier"] = crate_meta["supplier"] if crate_meta.get("cpe"): @@ -684,7 +724,8 @@ def resolve_component( # rules_boost splits Boost into individual repos like boost.config+, boost.assert+, etc. if "." in normalized_name: parent_name = normalized_name.split(".")[0].rstrip("+") - # Look up parent in all metadata sources + # Look up parent in all metadata sources (modules, http_archives, git_repos, licenses) + licenses = metadata.get("licenses", {}) parent = None if parent_name in modules: parent = modules[parent_name] @@ -692,15 +733,25 @@ def resolve_component( parent = http_archives[parent_name] elif parent_name in git_repos: parent = git_repos[parent_name] + elif parent_name in licenses: + parent = licenses[parent_name] if parent: - return { + parent_version = parent.get("version", "unknown") + result: dict[str, Any] = { "name": normalized_name, - "version": parent.get("version", "unknown"), - "purl": f"pkg:generic/{normalized_name}@{parent.get('version', 'unknown')}", + "version": parent_version, + "purl": f"pkg:generic/{normalized_name}@{parent_version}", "type": "library", "license": parent.get("license", ""), "supplier": parent.get("supplier", ""), } + # Propagate checksum from parent if available (e.g., http_archive + # sha256 or module sha256 from MODULE.bazel.lock). + if parent.get("sha256"): + result["checksum"] = parent["sha256"] + elif parent.get("checksum"): + result["checksum"] = parent["checksum"] + return result # Unknown repository - return with unknown version return { diff --git a/sbom/internal/generator/spdx_formatter.py b/sbom/internal/generator/spdx_formatter.py index c36ff7a..5e235d1 100644 --- a/sbom/internal/generator/spdx_formatter.py +++ b/sbom/internal/generator/spdx_formatter.py @@ -6,6 +6,7 @@ SPDX 2.3 Specification: https://spdx.github.io/spdx-spec/v2.3/ """ +import re import uuid from typing import Any @@ -75,7 +76,10 @@ def generate_spdx( } ) - return { + # Collect LicenseRef-* identifiers used in packages and declare them + extracted = _collect_extracted_license_infos(packages) + + doc: dict[str, Any] = { "spdxVersion": "SPDX-2.3", "dataLicense": "CC0-1.0", "SPDXID": "SPDXRef-DOCUMENT", @@ -92,6 +96,11 @@ def generate_spdx( "relationships": relationships, } + if extracted: + doc["hasExtractedLicensingInfos"] = extracted + + return doc + def _create_spdx_package( component: dict[str, Any], @@ -108,6 +117,7 @@ def _create_spdx_package( version = component.get("version", "unknown") purl = component.get("purl", "") license_id = component.get("license", "") + description = component.get("description", "") supplier = component.get("supplier", "") comp_type = component.get("type", "library") @@ -135,6 +145,9 @@ def _create_spdx_package( "copyrightText": "NOASSERTION", } + if description: + package["description"] = description + if supplier: package["supplier"] = f"Organization: {supplier}" @@ -166,6 +179,37 @@ def _map_type_to_purpose(comp_type: str) -> str: return type_mapping.get(comp_type, "LIBRARY") +def _collect_extracted_license_infos( + packages: list[dict[str, Any]], +) -> list[dict[str, str]]: + """Collect LicenseRef-* identifiers from packages and build declarations. + + SPDX requires every LicenseRef-* used in license expressions to be + declared in hasExtractedLicensingInfos. + + Args: + packages: List of SPDX package dicts + + Returns: + List of extractedLicensingInfo entries + """ + license_refs: set[str] = set() + pattern = re.compile(r"LicenseRef-[A-Za-z0-9\-.]+") + + for pkg in packages: + for field in ("licenseConcluded", "licenseDeclared"): + value = pkg.get(field, "") + license_refs.update(pattern.findall(value)) + + return [ + { + "licenseId": ref, + "extractedText": f"See package metadata for license details ({ref})", + } + for ref in sorted(license_refs) + ] + + def _sanitize_id(value: str) -> str: """Sanitize a string for use in SPDX IDs. diff --git a/sbom/internal/rules.bzl b/sbom/internal/rules.bzl index 28d08c2..80918d8 100644 --- a/sbom/internal/rules.bzl +++ b/sbom/internal/rules.bzl @@ -45,11 +45,24 @@ def _sbom_impl(ctx): # Build target labels list target_labels = [str(t.label) for t in ctx.attr.targets] + # Infer scan root for cdxgen: + # - If all targets come from the same external repo, scan that repo tree. + # - Otherwise scan the current execroot. + target_repos = [] + for t in ctx.attr.targets: + repo = t.label.workspace_name + if repo and repo not in target_repos: + target_repos.append(repo) + cdxgen_scan_root = "." + if len(target_repos) == 1: + cdxgen_scan_root = "external/{}".format(target_repos[0]) + # Build exclude patterns list exclude_patterns = ctx.attr.exclude_patterns # Collect MODULE.bazel files from dependency modules for version extraction dep_module_paths = [f.path for f in ctx.files.dep_module_files] + module_lock_paths = [f.path for f in ctx.files.module_lockfiles] deps_data = { "external_repos": all_external_repos.to_list(), @@ -58,6 +71,7 @@ def _sbom_impl(ctx): "target_labels": target_labels, "exclude_patterns": exclude_patterns, "dep_module_files": dep_module_paths, + "module_lockfiles": module_lock_paths, "config": { "producer_name": ctx.attr.producer_name, "producer_url": ctx.attr.producer_url, @@ -92,11 +106,11 @@ def _sbom_impl(ctx): args.add("--cyclonedx-output", cdx_out) # Build inputs list - generator_inputs = [deps_json, metadata_file] + ctx.files.dep_module_files + generator_inputs = [deps_json, metadata_file] + ctx.files.dep_module_files + ctx.files.module_lockfiles # Auto-generate crates metadata cache if enabled and a lockfile is provided crates_cache = None - if (ctx.file.cargo_lockfile or ctx.file.module_lockfile) and ctx.attr.auto_crates_cache: + if (ctx.file.cargo_lockfile or ctx.files.module_lockfiles) and ctx.attr.auto_crates_cache: crates_cache = ctx.actions.declare_file(ctx.attr.name + "_crates_metadata.json") cache_inputs = [ctx.file._crates_cache_script] cache_cmd = "set -euo pipefail\npython3 {} {}".format( @@ -106,9 +120,9 @@ def _sbom_impl(ctx): if ctx.file.cargo_lockfile: cache_inputs.append(ctx.file.cargo_lockfile) cache_cmd += " --cargo-lock {}".format(ctx.file.cargo_lockfile.path) - if ctx.file.module_lockfile: - cache_inputs.append(ctx.file.module_lockfile) - cache_cmd += " --module-lock {}".format(ctx.file.module_lockfile.path) + for lock in ctx.files.module_lockfiles: + cache_inputs.append(lock) + cache_cmd += " --module-lock {}".format(lock.path) ctx.actions.run_shell( inputs = cache_inputs, outputs = [crates_cache], @@ -116,6 +130,7 @@ def _sbom_impl(ctx): mnemonic = "CratesCacheGenerate", progress_message = "Generating crates metadata cache for %s" % ctx.attr.name, execution_requirements = {"requires-network": ""}, + use_default_shell_env = True, ) # Add cdxgen SBOM if provided; otherwise auto-generate if enabled @@ -135,9 +150,13 @@ def _sbom_impl(ctx): "-r", "-o", cdxgen_sbom.path, + cdxgen_scan_root, ], mnemonic = "CdxgenGenerate", progress_message = "Generating cdxgen SBOM for %s" % ctx.attr.name, + # cdxgen needs to recursively scan source trees. Running sandboxed with + # only declared file inputs makes the scan effectively empty. + execution_requirements = {"no-sandbox": "1"}, ) if cdxgen_sbom: @@ -232,9 +251,9 @@ sbom_rule = rule( allow_single_file = True, doc = "Optional Cargo.lock file for automatic crate metadata extraction", ), - "module_lockfile": attr.label( - allow_single_file = True, - doc = "Optional MODULE.bazel.lock for additional crates (e.g., from score_crates)", + "module_lockfiles": attr.label_list( + allow_files = True, + doc = "MODULE.bazel.lock files for crate metadata extraction (e.g., from score_crates and workspace)", ), "cdxgen_sbom": attr.label( allow_single_file = [".json"], diff --git a/sbom/scripts/generate_cpp_metadata_cache.py b/sbom/scripts/generate_cpp_metadata_cache.py index cbc0ea1..a45ec38 100644 --- a/sbom/scripts/generate_cpp_metadata_cache.py +++ b/sbom/scripts/generate_cpp_metadata_cache.py @@ -48,6 +48,10 @@ def convert_cdxgen_to_cache(cdxgen_path: str) -> dict: if lic_id: entry["license"] = lic_id + # Description + if comp.get("description"): + entry["description"] = comp["description"] + # Supplier supplier = comp.get("supplier", {}) if supplier and supplier.get("name"): diff --git a/sbom/scripts/generate_crates_metadata_cache.py b/sbom/scripts/generate_crates_metadata_cache.py index 7e052b7..952f56b 100755 --- a/sbom/scripts/generate_crates_metadata_cache.py +++ b/sbom/scripts/generate_crates_metadata_cache.py @@ -1,10 +1,10 @@ #!/usr/bin/env python3 -"""Generate crates.io metadata cache for SBOM generation. +"""Generate crates metadata cache for SBOM generation. -This script parses Cargo.lock files and/or MODULE.bazel.lock files, -fetches license metadata from crates.io, and creates a cache file -that can be used during SBOM generation without requiring network -access at build time. +This script parses Cargo.lock files and/or MODULE.bazel.lock files for +crate version/checksum data, then fetches license metadata via +dash-license-scan (Eclipse Foundation + ClearlyDefined) and creates a +cache file for SBOM generation. Usage: python3 generate_crates_metadata_cache.py --module-lock @@ -14,19 +14,20 @@ Example: python3 generate_crates_metadata_cache.py crates_metadata.json \\ --module-lock ../../score-crates/MODULE.bazel.lock - python3 generate_crates_metadata_cache.py crates_metadata.json \\ - --cargo-lock ../../orchestrator/Cargo.lock \\ - --module-lock ../../score-crates/MODULE.bazel.lock """ import argparse import json +import os import re +import shutil +import subprocess import sys +import tempfile import urllib.request -import urllib.error +from concurrent.futures import ThreadPoolExecutor, as_completed from pathlib import Path -from typing import Dict, Any +from typing import Any, Dict def parse_cargo_lock(lockfile_path: str) -> Dict[str, Dict[str, Any]]: @@ -133,94 +134,271 @@ def parse_module_bazel_lock(lockfile_path: str) -> Dict[str, Dict[str, Any]]: return crates -def fetch_crate_metadata_from_crates_io( - crate_name: str, crate_version: str = "" -) -> Dict[str, Any]: - """Fetch crate metadata from crates.io API. +def generate_synthetic_cargo_lock( + crates: Dict[str, Dict[str, Any]], output_path: str +) -> None: + """Generate a minimal synthetic Cargo.lock from parsed crate data. + + The dash-license-scan parser splits on '[[package]]' blocks and extracts + name, version, and source fields. Source must contain 'crates' if present. Args: - crate_name: Name of the crate - crate_version: Version of the crate (used to find matching version license) + crates: Dict mapping crate name to {name, version, checksum, source} + output_path: Path to write the synthetic Cargo.lock + """ + lines = ["version = 4", ""] + for _name, info in sorted(crates.items()): + lines.append("[[package]]") + lines.append(f'name = "{info["name"]}"') + lines.append(f'version = "{info["version"]}"') + lines.append( + 'source = "registry+https://github.com/rust-lang/crates.io-index"' + ) + lines.append("") - Returns: - Dict with license, repository, description, etc. + with open(output_path, "w", encoding="utf-8") as f: + f.write("\n".join(lines)) + + +def _find_uvx() -> str: + """Locate the uvx binary, checking PATH and common install locations.""" + found = shutil.which("uvx") + if found: + return found + + # Standard uv install location (works inside Bazel sandbox where PATH is minimal) + home = os.environ.get("HOME", os.path.expanduser("~")) + candidate = os.path.join(home, ".local", "bin", "uvx") + if os.path.isfile(candidate) and os.access(candidate, os.X_OK): + return candidate + + return "uvx" # fall back, will raise FileNotFoundError in subprocess + + +def run_dash_license_scan( + cargo_lock_path: str, summary_output_path: str +) -> None: + """Invoke dash-license-scan via uvx and write summary to file. + + Args: + cargo_lock_path: Path to (real or synthetic) Cargo.lock + summary_output_path: Path to write the dash-licenses summary CSV + + Raises: + SystemExit: If uvx/dash-license-scan is not found or fatally crashes """ - url = f"https://crates.io/api/v1/crates/{crate_name}" + uvx = _find_uvx() + cmd = [ + uvx, + "--from", + "dash-license-scan@git+https://github.com/eclipse-score/dash-license-scan", + "dash-license-scan", + "--summary", + summary_output_path, + cargo_lock_path, + ] + print(f"Running: {' '.join(cmd)}") + + # Redirect uv's cache and tool directories to writable temp locations. + # Inside Bazel sandbox, ~/.cache and ~/.local/share are read-only. + env = os.environ.copy() + uv_tmp = os.path.join(tempfile.gettempdir(), "uv_sbom") + if "UV_CACHE_DIR" not in env: + env["UV_CACHE_DIR"] = os.path.join(uv_tmp, "cache") + if "UV_TOOL_DIR" not in env: + env["UV_TOOL_DIR"] = os.path.join(uv_tmp, "tools") try: - req = urllib.request.Request(url) - req.add_header("User-Agent", "SCORE-SBOM-Generator/1.0") + result = subprocess.run( + cmd, + capture_output=True, + text=True, + timeout=600, + env=env, + ) + except FileNotFoundError: + print( + "ERROR: 'uvx' not found on PATH or ~/.local/bin/. Install uv: https://docs.astral.sh/uv/", + file=sys.stderr, + ) + sys.exit(1) + except subprocess.TimeoutExpired: + print( + "ERROR: dash-license-scan timed out after 600 seconds", file=sys.stderr + ) + sys.exit(1) + + # dash-license-scan exits with returncode = number of restricted items. + # This is normal behavior, not an error. Only signal kills are fatal. + if result.returncode < 0: + print( + f"ERROR: dash-license-scan killed by signal {-result.returncode}", + file=sys.stderr, + ) + if result.stderr: + print(result.stderr, file=sys.stderr) + sys.exit(1) + + if result.stderr: + # Print dash-license-scan's own output (INFO lines from the JAR) + for line in result.stderr.splitlines(): + print(f" {line}") + + if not os.path.exists(summary_output_path): + print( + f"ERROR: dash-license-scan did not produce summary file: {summary_output_path}", + file=sys.stderr, + ) + sys.exit(1) + + if result.returncode > 0: + print( + f" NOTE: {result.returncode} crate(s) have 'restricted' license status" + ) + + +def parse_dash_summary(summary_path: str) -> Dict[str, str]: + """Parse the dash-licenses summary CSV file into a license lookup dict. + + Each line has format: + crate/cratesio/-//, , , + + Args: + summary_path: Path to the dash-licenses summary file + + Returns: + Dict mapping crate name to SPDX license expression string + """ + licenses: Dict[str, str] = {} + with open(summary_path, encoding="utf-8") as f: + for line in f: + line = line.strip() + if not line: + continue + parts = [p.strip() for p in line.split(",")] + if len(parts) < 4: + continue + + content_id = parts[0] + license_expr = parts[1].strip() + + # Extract crate name from content_id: "crate/cratesio/-//" + id_parts = content_id.split("/") + if len(id_parts) >= 5 and id_parts[0] == "crate": + crate_name = id_parts[3] + if license_expr: + licenses[crate_name] = license_expr + + return licenses + + +def _extract_supplier(repository_url: str) -> str: + """Extract supplier (GitHub org/user) from a repository URL. + + Examples: + https://github.com/serde-rs/serde -> serde-rs + https://github.com/eclipse-iceoryx/iceoryx2 -> eclipse-iceoryx + """ + if not repository_url: + return "" + m = re.match(r"https?://github\.com/([^/]+)/", repository_url) + return m.group(1) if m else "" + - with urllib.request.urlopen(req, timeout=10) as response: - data = json.loads(response.read().decode("utf-8")) +def _fetch_one_crate_meta(name: str) -> tuple[str, Dict[str, str]]: + """Fetch metadata for a single crate from crates.io API. + + Returns (name, {description, supplier}) dict. + If the crate isn't found, retries with platform suffixes stripped + (e.g. "-qnx8") to find the upstream crate. + """ + candidates = [name] + # Platform-specific forks (e.g. iceoryx2-bb-lock-free-qnx8 -> iceoryx2-bb-lock-free) + for suffix in ("-qnx8",): + if name.endswith(suffix): + candidates.append(name[: -len(suffix)]) + + for candidate in candidates: + url = f"https://crates.io/api/v1/crates/{candidate}" + req = urllib.request.Request( + url, + headers={"User-Agent": "score-sbom-tool (https://eclipse.dev/score)"}, + ) + try: + with urllib.request.urlopen(req, timeout=10) as resp: + data = json.loads(resp.read()) crate = data.get("crate", {}) + desc = (crate.get("description") or "").strip() + supplier = _extract_supplier(crate.get("repository", "")) + if desc or supplier: + return name, {"description": desc, "supplier": supplier} + except Exception: + continue + return name, {} - # License is per-version in the crates.io API. - # Try to find the license for the specific version first, - # then fall back to the latest version. - license_str = crate.get("license") or "" - versions = data.get("versions", []) - if versions and not license_str: - # Try to find matching version - for v in versions: - if v.get("num") == crate_version: - license_str = v.get("license", "") - break - # Fall back to latest version - if not license_str: - license_str = versions[0].get("license", "") - - return { - "license": license_str, - "repository": crate.get("repository"), - "description": crate.get("description"), - "homepage": crate.get("homepage"), - "documentation": crate.get("documentation"), - } - except urllib.error.HTTPError as e: - if e.code == 404: - print( - f" WARNING: Crate '{crate_name}' not found on crates.io", - file=sys.stderr, - ) - else: - print( - f" WARNING: HTTP error {e.code} fetching '{crate_name}'", - file=sys.stderr, - ) - return {} - except urllib.error.URLError as e: - print(f" WARNING: Network error fetching '{crate_name}': {e}", file=sys.stderr) - return {} - except Exception as e: - print(f" WARNING: Error fetching '{crate_name}': {e}", file=sys.stderr) - return {} + +def fetch_crate_metadata_from_cratesio( + crate_names: list[str], +) -> Dict[str, Dict[str, str]]: + """Fetch metadata (description, supplier) from crates.io API (parallel). + + Args: + crate_names: List of crate names to look up + + Returns: + Dict mapping crate name to {description, supplier} + """ + total = len(crate_names) + print(f"Fetching metadata from crates.io for {total} crates...") + + metadata: Dict[str, Dict[str, str]] = {} + done = 0 + with ThreadPoolExecutor(max_workers=10) as pool: + futures = {pool.submit(_fetch_one_crate_meta, n): n for n in crate_names} + for future in as_completed(futures): + name, meta = future.result() + if meta: + metadata[name] = meta + done += 1 + if done % 50 == 0: + print(f" ... {done}/{total} crates queried") + + with_desc = sum(1 for m in metadata.values() if m.get("description")) + with_supplier = sum(1 for m in metadata.values() if m.get("supplier")) + print( + f"Retrieved from crates.io: {with_desc} descriptions, {with_supplier} suppliers" + ) + return metadata def generate_cache( - cargo_lock_path: str = None, use_network: bool = True, module_lock_path: str = None + cargo_lock_path: str | None = None, + module_lock_paths: list[str] | None = None, ) -> Dict[str, Dict[str, Any]]: - """Generate metadata cache from Cargo.lock and/or MODULE.bazel.lock. + """Generate metadata cache from lockfiles + dash-license-scan. - At least one of cargo_lock_path or module_lock_path must be provided. + 1. Parse Cargo.lock and/or MODULE.bazel.lock files for crate names, versions, checksums + 2. Generate a synthetic Cargo.lock combining all crates + 3. Run dash-license-scan for license data + 4. Fetch descriptions from crates.io (parallel) + 5. Combine version/checksum from lockfile with license and description Args: cargo_lock_path: Optional path to Cargo.lock file - use_network: If True, fetch metadata from crates.io; if False, use checksums only - module_lock_path: Optional path to MODULE.bazel.lock for additional crates + module_lock_paths: Optional list of paths to MODULE.bazel.lock files Returns: Dict mapping crate name to metadata """ - crates = {} + crates: Dict[str, Dict[str, Any]] = {} if cargo_lock_path: print(f"Parsing {cargo_lock_path}...") crates = parse_cargo_lock(cargo_lock_path) print(f"Found {len(crates)} crates from Cargo.lock") - # Merge crates from MODULE.bazel.lock (score_crates or similar) - if module_lock_path: + # Merge crates from MODULE.bazel.lock files + for module_lock_path in (module_lock_paths or []): print(f"Parsing {module_lock_path}...") module_crates = parse_module_bazel_lock(module_lock_path) added = 0 @@ -228,42 +406,52 @@ def generate_cache( if name not in crates: crates[name] = info added += 1 - print(f"Found {len(module_crates)} crates in MODULE.bazel.lock ({added} new)") - - if not use_network: - print("Network lookups disabled. Using checksums only.") - return crates + print(f"Found {len(module_crates)} crates in {module_lock_path} ({added} new)") - print("Fetching license metadata from crates.io...") - cache = {} + if not crates: + print("No crates found in lockfiles.") + return {} - for i, (name, info) in enumerate(crates.items(), 1): - print(f" [{i}/{len(crates)}] {name} {info['version']}...", end="", flush=True) + # Generate synthetic Cargo.lock containing only crates.io crates. + # This avoids dash-license-scan's ValueError on non-crates.io sources + # (git dependencies, path dependencies) that may be in a real Cargo.lock. + temp_dir = tempfile.mkdtemp(prefix="sbom_dash_") + synthetic_path = os.path.join(temp_dir, "Cargo.lock") + generate_synthetic_cargo_lock(crates, synthetic_path) + print(f"Generated synthetic Cargo.lock with {len(crates)} crates") - metadata = fetch_crate_metadata_from_crates_io(name, info["version"]) + summary_path = os.path.join(temp_dir, "dash_summary.txt") - # Merge Cargo.lock data with crates.io metadata + try: + print("Fetching license data via dash-license-scan...") + run_dash_license_scan(synthetic_path, summary_path) + license_map = parse_dash_summary(summary_path) + print(f"Retrieved licenses for {len(license_map)} crates") + finally: + shutil.rmtree(temp_dir, ignore_errors=True) + + # Fetch descriptions + suppliers from crates.io (parallel, ~10 concurrent requests) + cratesio_meta = fetch_crate_metadata_from_cratesio(list(crates.keys())) + + # Build final cache + cache: Dict[str, Dict[str, Any]] = {} + for name, info in crates.items(): + meta = cratesio_meta.get(name, {}) cache[name] = { "version": info["version"], "checksum": info["checksum"], "purl": f"pkg:cargo/{name}@{info['version']}", - "license": metadata.get("license", ""), - "repository": metadata.get("repository", ""), - "description": metadata.get("description", ""), - "homepage": metadata.get("homepage", ""), + "license": license_map.get(name, ""), + "description": meta.get("description", ""), + "supplier": meta.get("supplier", ""), } - if cache[name]["license"]: - print(f" ✓ {cache[name]['license']}") - else: - print(" (no license)") - return cache def main(): parser = argparse.ArgumentParser( - description="Generate crates.io metadata cache for SBOM generation" + description="Generate crates metadata cache for SBOM generation (via dash-license-scan)" ) parser.add_argument( "output", @@ -272,14 +460,11 @@ def main(): help="Output JSON file (default: crates_metadata.json)", ) parser.add_argument("--cargo-lock", help="Path to Cargo.lock file") - parser.add_argument( - "--no-network", - action="store_true", - help="Skip network lookups (checksums only)", - ) parser.add_argument( "--module-lock", - help="Path to MODULE.bazel.lock for additional crates (e.g., from score_crates)", + action="append", + default=[], + help="Path to MODULE.bazel.lock for additional crates (can be repeated)", ) parser.add_argument( "--merge", help="Merge with existing cache file instead of overwriting" @@ -293,8 +478,7 @@ def main(): # Generate new cache cache = generate_cache( cargo_lock_path=args.cargo_lock, - use_network=not args.no_network, - module_lock_path=args.module_lock, + module_lock_paths=args.module_lock, ) # Merge with existing cache if requested @@ -303,27 +487,36 @@ def main(): with open(args.merge) as f: existing = json.load(f) - # Prefer new data, but keep entries not in current Cargo.lock + # Prefer new data, but keep entries not in current lockfiles merged = existing.copy() merged.update(cache) cache = merged print(f"Merged cache now contains {len(cache)} entries") + if not cache: + print("\nNo crates to write.") + with open(args.output, "w") as f: + json.dump({}, f) + return 0 + # Write cache print(f"\nWriting cache to {args.output}...") with open(args.output, "w") as f: json.dump(cache, f, indent=2, sort_keys=True) # Print statistics + total = len(cache) with_license = sum(1 for c in cache.values() if c.get("license")) with_checksum = sum(1 for c in cache.values() if c.get("checksum")) + with_desc = sum(1 for c in cache.values() if c.get("description")) + with_supplier = sum(1 for c in cache.values() if c.get("supplier")) print(f"\n✓ Cache generated successfully!") - print(f" Total crates: {len(cache)}") - print(f" With licenses: {with_license} ({with_license / len(cache) * 100:.1f}%)") - print( - f" With checksums: {with_checksum} ({with_checksum / len(cache) * 100:.1f}%)" - ) + print(f" Total crates: {total}") + print(f" With licenses: {with_license} ({with_license / total * 100:.1f}%)") + print(f" With checksums: {with_checksum} ({with_checksum / total * 100:.1f}%)") + print(f" With descriptions: {with_desc} ({with_desc / total * 100:.1f}%)") + print(f" With suppliers: {with_supplier} ({with_supplier / total * 100:.1f}%)") return 0