diff --git a/src/run.rs b/src/run.rs deleted file mode 100644 index a7f56ba..0000000 --- a/src/run.rs +++ /dev/null @@ -1,1326 +0,0 @@ -use std::fs; -use std::io::IsTerminal; -use std::path::Path; - -use anyhow::{Context, Result}; - -use crate::cli::{BaselineAction, Cli, Command, DiffArgs, FailOn, InitArgs, OutputFormat}; -use crate::diff::ChangeSet; -use crate::enrich::{Enrichment, Severity}; -use crate::{ - attestation, baseline, cli, clock, config, diff, enrich, model, parse, plugin, refresh, render, - vex, -}; - -/// Process exit code emitted when `--fail-on` trips. Distinct from clap's -/// usage-error exit (`2`-ish on parse failure) because clap exits before -/// `run` is called — there's no overlap window where this code is ambiguous. -pub const FAIL_ON_EXIT_CODE: i32 = 2; - -pub fn run(cli: Cli) -> Result<()> { - match cli.command { - Command::Diff(args) => run_diff(*args), - Command::RefreshTyposquat(args) => refresh::run(args), - Command::Baseline { action } => run_baseline(action), - Command::Init(args) => run_init(args), - } -} - -fn run_init(args: InitArgs) -> Result<()> { - write_scaffold_file(Path::new(".bomdrift.toml"), INIT_CONFIG, args.force)?; - if !args.config_only { - write_scaffold_file( - Path::new(".github/workflows/sbom-diff.yml"), - INIT_SBOM_WORKFLOW, - args.force, - )?; - write_scaffold_file( - Path::new(".github/workflows/bomdrift-suppress.yml"), - INIT_SUPPRESS_WORKFLOW, - args.force, - )?; - } - eprintln!("bomdrift: initialized repository files"); - Ok(()) -} - -fn write_scaffold_file(path: &Path, contents: &str, force: bool) -> Result<()> { - if path.exists() && !force { - anyhow::bail!( - "{} already exists; re-run with --force to overwrite", - path.display() - ); - } - if let Some(parent) = path.parent().filter(|p| !p.as_os_str().is_empty()) { - fs::create_dir_all(parent) - .with_context(|| format!("creating parent directory: {}", parent.display()))?; - } - fs::write(path, contents).with_context(|| format!("writing scaffold file: {}", path.display())) -} - -fn run_baseline(action: BaselineAction) -> Result<()> { - match action { - BaselineAction::Add(args) => { - // Validate --expires upfront so a typo'd date doesn't write a - // bad entry that errors on the NEXT diff load. - if let Some(s) = &args.expires { - clock::parse_ymd(s) - .with_context(|| format!("--expires must be YYYY-MM-DD, got {s:?}"))?; - } - - // --from-comment overrides positional id/reason. Used by the - // GitLab webhook bridge (Phase L). Non-zero exit when the - // body has no directive — silent no-op would let mis-configured - // bridges look like they worked. - let (id, reason_owned) = if let Some(body) = &args.from_comment { - match baseline::parse_comment_directive(body)? { - Some((id, reason)) => (id, reason), - None => { - eprintln!( - "bomdrift: --from-comment body contained no `/bomdrift suppress ` directive" - ); - std::process::exit(1); - } - } - } else { - let Some(id) = args.id.clone() else { - eprintln!( - "bomdrift baseline add: missing required ADVISORY_ID (use a positional argument or --from-comment )" - ); - std::process::exit(2); - }; - (id, args.reason.clone()) - }; - - let outcome = baseline::add_suppression_full( - &args.path, - &id, - args.expires.as_deref(), - reason_owned.as_deref(), - )?; - match outcome { - baseline::AddOutcome::Added => { - eprintln!( - "bomdrift: added '{id}' to {path}", - id = id.trim(), - path = args.path.display(), - ); - } - baseline::AddOutcome::AlreadyPresent => { - eprintln!( - "bomdrift: '{id}' already present in {path}; no change", - id = id.trim(), - path = args.path.display(), - ); - } - } - Ok(()) - } - } -} - -fn run_diff(mut args: DiffArgs) -> Result<()> { - config::apply_diff_config(&mut args)?; - - if args.require_attestation - && (args.before_attestation.is_none() || args.after_attestation.is_none()) - { - anyhow::bail!( - "--require-attestation needs both --before-attestation and --after-attestation" - ); - } - - let output = args.output.unwrap_or(OutputFormat::Terminal); - let format = args.format.unwrap_or(cli::InputFormat::Auto); - let fail_on = args.fail_on.unwrap_or(FailOn::None); - - let format_hint = format.to_sbom_format(); - let before = load_sbom_or_attestation( - args.before.as_deref(), - args.before_attestation.as_deref(), - args.cosign_identity.as_deref(), - args.cosign_issuer.as_deref(), - format_hint, - args.include_file_components, - "before", - args.debug_calibration, - args.debug_calibration_format, - )?; - let after = load_sbom_or_attestation( - args.after.as_deref(), - args.after_attestation.as_deref(), - args.cosign_identity.as_deref(), - args.cosign_issuer.as_deref(), - format_hint, - args.include_file_components, - "after", - args.debug_calibration, - args.debug_calibration_format, - )?; - - let mut cs = diff::diff(&before, &after); - - let mut enrichment = if args.no_osv { - enrich::Enrichment::default() - } else { - // OSV enrichment is best-effort. Network failures must not block the diff - // from rendering — a PR review is still useful without CVE data. - match enrich::osv::enrich_cached_with_ttl(&cs, args.no_osv_cache, args.cache_ttl_hours) { - Ok(e) => e, - Err(err) => { - eprintln!("warning: OSV enrichment failed, continuing without it: {err:#}"); - enrich::Enrichment::default() - } - } - }; - - // EPSS / KEV enrichment piggyback on OSV's VulnRefs and only have - // anything to do when there are CVE-aliased advisories. Skip both if - // there are no vulns. - if !args.no_epss - && !enrichment.vulns.is_empty() - && let Err(err) = enrich::epss::enrich_with_ttl(&mut enrichment, args.cache_ttl_hours) - { - eprintln!("warning: EPSS enrichment failed, continuing without it: {err:#}"); - } - if !args.no_kev - && !enrichment.vulns.is_empty() - && let Err(err) = enrich::kev::enrich_with_ttl(&mut enrichment, args.cache_ttl_hours) - { - eprintln!("warning: KEV enrichment failed, continuing without it: {err:#}"); - } - - // Typosquat detection is pure-compute (embedded reference list) and always - // runs, regardless of `--no-osv`. Findings are informational. - enrichment.typosquats = - enrich::typosquat::enrich_with_threshold(&cs, args.typosquat_similarity_threshold); - - // Multi-major version-jump detection is pure-compute and also always runs. - // Findings are informational. - enrichment.version_jumps = enrich::version_jump::enrich_with(&cs, args.multi_major_delta); - - // Maintainer-age enrichment hits the GitHub REST API; gated behind - // `--no-maintainer-age` for offline runs. Best-effort: failures warn and - // continue, mirroring the OSV enricher's contract. - if !args.no_maintainer_age { - match enrich::maintainer::enrich_with_hosts( - &cs, - "https://api.github.com", - std::time::Duration::from_secs(15), - args.young_maintainer_days, - ) { - Ok(findings) => enrichment.maintainer_age = findings, - Err(err) => { - eprintln!( - "warning: maintainer-age enrichment failed, continuing without it: {err:#}" - ); - } - } - } - - // License-policy enrichment (Phase D, v0.8). Pure-compute, runs after - // OSV/EPSS/KEV. Empty allow + empty deny means "no policy" — the - // enricher returns no violations. - let license_policy = enrich::license::Policy { - allow: args.allow_licenses.clone(), - deny: args.deny_licenses.clone(), - allow_ambiguous: args.allow_ambiguous_licenses, - allow_exceptions: args.allow_exception.clone(), - deny_exceptions: args.deny_exception.clone(), - }; - enrichment.license_violations = enrich::license::enrich(&cs, &license_policy); - - // Registry-metadata enrichers (Phase K, v0.9). Best-effort — a - // registry timeout returns Ok with no findings. - if !args.no_registry { - let findings = - enrich::registry::enrich(&cs, args.recently_published_days, args.cache_ttl_hours); - enrichment.recently_published = findings.recently_published; - enrichment.deprecated = findings.deprecated; - enrichment.maintainer_set_changed = findings.maintainer_set_changed; - } - - // Plugin findings (Phase C, v0.9.6). Run after every built-in - // enricher so plugins observe the same `cs` view bomdrift renders; - // before baseline so plugin findings can be baselined too. Plugin - // failures degrade gracefully — a malformed manifest aborts the - // run (config error), but plugin runtime failures emit only a - // BOMDRIFT_DEBUG-gated stderr warning and contribute no findings. - if !args.plugin.is_empty() { - let mut manifests = Vec::with_capacity(args.plugin.len()); - for path in &args.plugin { - let manifest = plugin::load_manifest(path) - .with_context(|| format!("loading --plugin {}", path.display()))?; - manifests.push(manifest); - } - enrichment.plugin_findings = plugin::run_plugins(&manifests, &cs); - } - - // Apply the baseline AFTER all enrichers run — suppression operates on - // the realized finding set, not on intermediate inputs. This keeps the - // baseline file format stable as new enrichers are added: a new finding - // type that the baseline doesn't know about simply isn't suppressed. - let mut baseline_entries: Vec = Vec::new(); - if let Some(path) = &args.baseline { - let baseline = baseline::Baseline::load(path)?; - for ent in &baseline.expired_entries { - eprintln!( - "warning: baseline entry {id}{purl} expired {expires}; finding will surface in this run{reason}", - id = ent.id, - purl = ent - .purl - .as_deref() - .map(|p| format!(" ({p})")) - .unwrap_or_default(), - expires = ent.expires.as_deref().unwrap_or(""), - reason = ent - .reason - .as_deref() - .map(|r| format!(" — was: {r}")) - .unwrap_or_default(), - ); - } - baseline_entries = baseline.entries.clone(); - baseline::apply(&mut cs, &mut enrichment, &baseline); - } - - // VEX consumption (Phase G, v0.9). Applied AFTER baseline so VEX - // statements operate on the post-baseline view — this matches what - // a downstream tool would see and avoids double-counting "already - // suppressed" findings in the VEX-suppressed tally. - if !args.vex.is_empty() { - match vex::load(&args.vex) { - Ok(stmts) => { - let idx = vex::VexIndex::build(stmts); - vex::apply(&mut enrichment, &idx); - } - Err(err) => { - eprintln!("warning: VEX load failed, continuing without VEX filtering: {err:#}"); - } - } - } - - // VEX emission (Phase H, v0.9). Writes a single OpenVEX 0.2.0 doc - // to the requested path, covering baseline-suppressed entries and - // un-suppressed findings. Byte-deterministic when SOURCE_DATE_EPOCH - // is set. - if let Some(path) = &args.emit_vex { - let author = args - .vex_author - .clone() - .or_else(|| args.repo_url.clone()) - .or_else(|| std::env::var("BOMDRIFT_REPO_URL").ok()) - .filter(|s| !s.is_empty()) - .unwrap_or_else(|| "bomdrift".to_string()); - let default_just = args - .vex_default_justification - .clone() - .unwrap_or_else(|| "vulnerable_code_not_in_execute_path".to_string()); - let opts = vex::EmitOptions { - author: &author, - default_justification: &default_just, - baseline_entries: &baseline_entries, - }; - let body = vex::emit(&cs, &enrichment, &opts); - std::fs::write(path, body) - .with_context(|| format!("writing --emit-vex {}", path.display()))?; - } - - // Calibration tap. Off by default; opt-in via `--debug-calibration`. - // Emits one CSV-friendly line per finding to stderr so an adopter - // can run the flag across a representative N PRs and feed the - // resulting CSV back as tuning data (issue #5). The output is - // deliberately plain — no JSON, no schema versioning — because the - // intended consumer is a one-off awk/jq pipeline, not a long-lived - // integration. Format: `kind|key|score|threshold`. No telemetry: the - // user owns the bytes and pipes them wherever they want. - if args.debug_calibration { - write_calibration_lines( - &enrichment, - &mut std::io::stderr(), - args.debug_calibration_format, - CalibrationOverrides { - similarity_threshold: args.typosquat_similarity_threshold, - young_maintainer_days: args.young_maintainer_days, - multi_major_delta: args.multi_major_delta, - }, - ); - } - - // CLI flag wins; otherwise the env var supplies the default. Empty - // strings are treated as unset to match shell-script callers that - // pass `BOMDRIFT_REPO_URL=` to clear the value rather than `unset`. - // GitLab CI exposes the project URL as `CI_PROJECT_URL` (analog of - // GitHub's `GITHUB_REPOSITORY`-derived URL); honor it as a third - // fallback so users on the GitLab template don't have to plumb - // `BOMDRIFT_REPO_URL` themselves. - let repo_url = args - .repo_url - .clone() - .or_else(|| std::env::var("BOMDRIFT_REPO_URL").ok()) - .or_else(|| std::env::var("CI_PROJECT_URL").ok()) - .or_else(|| std::env::var("BITBUCKET_GIT_HTTP_ORIGIN").ok()) - .or_else(|| std::env::var("BUILD_REPOSITORY_URI").ok()) - .filter(|s| !s.is_empty()); - - // Platform precedence: explicit `--platform` (or `[diff] platform` - // in `.bomdrift.toml`, already merged into `args.platform`) wins; - // otherwise auto-detect from CI env. Detection order: GitLab - // (`GITLAB_CI=true`), Bitbucket (`BITBUCKET_BUILD_NUMBER`), Azure - // DevOps (`TF_BUILD`), then default GitHub. - let platform = args.platform.unwrap_or_else(|| { - if std::env::var("GITLAB_CI").is_ok_and(|v| v == "true") { - crate::cli::Platform::GitLab - } else if std::env::var("BITBUCKET_BUILD_NUMBER").is_ok() { - crate::cli::Platform::Bitbucket - } else if std::env::var("TF_BUILD").is_ok() { - crate::cli::Platform::AzureDevOps - } else { - crate::cli::Platform::GitHub - } - }); - let md_options = render::markdown::Options { - summary_only: args.summary_only, - findings_only: args.findings_only, - repo_url, - platform: platform.into(), - }; - let rendered = match output { - OutputFormat::Terminal => { - // ANSI escapes are only safe on a real TTY. Piped/redirected stdout - // (e.g. captured by a CI step that posts a PR comment) must stay - // plain markdown so it renders correctly in a comment body. - if std::io::stdout().is_terminal() { - render::term::render(&cs, &enrichment) - } else { - render::markdown::render_with_options(&cs, &enrichment, md_options) - } - } - OutputFormat::Markdown => { - render::markdown::render_with_options(&cs, &enrichment, md_options) - } - OutputFormat::Json => render::json::render(&cs, &enrichment), - OutputFormat::Sarif => render::sarif::render(&cs, &enrichment), - OutputFormat::Html => render::html::render(&cs, &enrichment), - }; - - if let Some(path) = &args.output_file { - std::fs::write(path, &rendered) - .with_context(|| format!("writing --output-file {}", path.display()))?; - } else { - print!("{rendered}"); - } - - // Body must be fully written before we exit-2 — the action's `tee` - // wrapper still wants the comment posted even when fail-on trips. - let budget_tripped = budget_tripped( - &cs, - args.max_added, - args.max_removed, - args.max_version_changed, - ); - if budget_tripped { - log_budget_trips( - &cs, - args.max_added, - args.max_removed, - args.max_version_changed, - ); - } - - let epss_tripped = args - .fail_on_epss - .is_some_and(|threshold| any_epss_at_or_above(&enrichment, threshold)); - if epss_tripped { - let threshold = args.fail_on_epss.unwrap_or(0.0); - eprintln!( - "bomdrift: policy gate tripped: --fail-on-epss {threshold:.2} (one or more advisories at or above this score)" - ); - } - - if tripped(&cs, &enrichment, fail_on) || budget_tripped || epss_tripped { - std::process::exit(FAIL_ON_EXIT_CODE); - } - - Ok(()) -} - -/// Pure helper: does this `(changeset, enrichment)` pair trip the configured -/// fail-on threshold? Side-effect-free so the policy is easy to unit-test -/// without spinning up the full pipeline. -/// -/// `FailOn::CriticalCve` filters on real severity now that OSV `/v1/vulns/{id}` -/// is fetched; only advisories with [`Severity::High`] or higher trip it. -/// (High is included because GHSA's `CRITICAL` label is relatively rare — -/// many actively-exploited supply-chain advisories ship as `HIGH`. Treating -/// "critical-cve" as "high-or-critical" matches what the option's name -/// communicates to a CI policy author: "block on the actionable bucket".) -pub fn tripped(cs: &ChangeSet, e: &Enrichment, threshold: FailOn) -> bool { - match threshold { - FailOn::None => false, - FailOn::Cve => !e.vulns.is_empty(), - FailOn::CriticalCve => any_advisory_at_or_above(e, Severity::High), - FailOn::Typosquat => !e.typosquats.is_empty(), - FailOn::LicenseChange => !cs.license_changed.is_empty(), - FailOn::Kev => any_kev(e), - FailOn::LicenseViolation => !e.license_violations.is_empty(), - FailOn::RecentlyPublished => !e.recently_published.is_empty(), - FailOn::Deprecated => !e.deprecated.is_empty(), - FailOn::Any => e.has_findings() || !cs.license_changed.is_empty() || any_kev(e), - } -} - -/// True when any advisory across all components has its CISA KEV flag set. -pub fn any_kev(e: &Enrichment) -> bool { - e.vulns.values().any(|refs| refs.iter().any(|r| r.kev)) -} - -/// True when any advisory has an EPSS score >= the threshold. -pub fn any_epss_at_or_above(e: &Enrichment, threshold: f32) -> bool { - e.vulns.values().any(|refs| { - refs.iter() - .any(|r| r.epss_score.is_some_and(|s| s >= threshold)) - }) -} - -pub fn budget_tripped( - cs: &ChangeSet, - max_added: Option, - max_removed: Option, - max_version_changed: Option, -) -> bool { - max_added.is_some_and(|max| cs.added.len() > max) - || max_removed.is_some_and(|max| cs.removed.len() > max) - || max_version_changed.is_some_and(|max| cs.version_changed.len() > max) -} - -/// Emit one CSV-friendly line per finding to the given writer, capturing -/// the score and the constant it was compared against. Off by default -/// (driven by `--debug-calibration`); when set, the user pipes stderr -/// to a file and feeds the resulting CSV back as tuning data. -/// -/// Schema: `kind|key|score|threshold` — pipe-delimited because purls -/// already contain commas (`pkg:npm/@scope/name`) which would force CSV -/// quoting. `kind` ∈ {`typosquat`, `version-jump`, `maintainer-age`, -/// `cve`}. `score` is the underlying numeric the enricher computed -/// (similarity for typosquat, major-version delta for version-jump, -/// days-old for maintainer-age, max CVSS-equivalent for cve); -/// `threshold` is the constant the score was gated against. CVE rows -/// surface every advisory (no internal threshold) so adopters can see -/// the score distribution before tuning `--fail-on critical-cve`. -/// Active overrides for the configurable calibration thresholds. Threaded -/// into [`write_calibration_lines`] so emitted rows reflect the effective -/// threshold the enricher actually used, not the unconditional const default. -#[derive(Debug, Default, Clone, Copy)] -pub(crate) struct CalibrationOverrides { - pub similarity_threshold: Option, - pub young_maintainer_days: Option, - pub multi_major_delta: Option, -} - -fn write_calibration_lines( - e: &Enrichment, - out: &mut W, - format: crate::cli::DebugFormat, - overrides: CalibrationOverrides, -) { - use crate::enrich::maintainer::YOUNG_MAINTAINER_DAYS; - use crate::enrich::typosquat::SIMILARITY_THRESHOLD; - use crate::enrich::version_jump::MIN_MAJOR_DELTA; - - let active_similarity = overrides - .similarity_threshold - .unwrap_or(SIMILARITY_THRESHOLD); - let active_young = overrides - .young_maintainer_days - .unwrap_or(YOUNG_MAINTAINER_DAYS); - let active_major_delta = overrides.multi_major_delta.unwrap_or(MIN_MAJOR_DELTA); - - for f in &e.typosquats { - write_calibration_row( - out, - "typosquat", - f.component - .purl - .as_deref() - .unwrap_or(f.component.name.as_str()), - CalibrationScore::Float(f.score), - CalibrationThreshold::Float(active_similarity), - format, - ); - } - for f in &e.version_jumps { - write_calibration_row( - out, - "version-jump", - f.after.purl.as_deref().unwrap_or(f.after.name.as_str()), - CalibrationScore::Int(f.after_major.saturating_sub(f.before_major) as i64), - CalibrationThreshold::Int(active_major_delta as i64), - format, - ); - } - for f in &e.maintainer_age { - write_calibration_row( - out, - "maintainer-age", - f.component - .purl - .as_deref() - .unwrap_or(f.component.name.as_str()), - CalibrationScore::Int(f.days_old), - CalibrationThreshold::Int(active_young), - format, - ); - } - for (purl, refs) in &e.vulns { - for vuln in refs { - // Severity has no numeric score in our model; emit the bucket - // label as a non-numeric "score" so the row stays well-formed - // (string in JSONL, plain token in pipe). - write_calibration_row( - out, - "cve", - &format!("{purl}#{}", vuln.id), - CalibrationScore::Text(vuln.severity.as_str()), - CalibrationThreshold::Text("high+"), - format, - ); - for cve in vuln.cves() { - if let Some(score) = vuln.epss_score { - write_calibration_row( - out, - "epss", - &format!("{purl}+{cve}"), - CalibrationScore::Float(score as f64), - CalibrationThreshold::Float(0.5), - format, - ); - } - if vuln.kev { - write_calibration_row( - out, - "kev", - &format!("{purl}+{cve}"), - CalibrationScore::Text("true"), - CalibrationThreshold::Text("kev"), - format, - ); - } - } - } - } - for v in &e.license_violations { - // Threshold field carries the precise matched_rule (e.g. - // "deny: GPL-3.0-only" or "exception:LLVM-exception denied") - // so calibration consumers see the WHY, not just the kind tag. - write_calibration_row( - out, - "license", - v.component - .purl - .as_deref() - .unwrap_or(v.component.name.as_str()), - CalibrationScore::Text(&v.license), - CalibrationThreshold::Text(&v.matched_rule), - format, - ); - } - for f in &e.recently_published { - write_calibration_row( - out, - "recently-published", - f.component - .purl - .as_deref() - .unwrap_or(f.component.name.as_str()), - CalibrationScore::Int(f.days_old), - CalibrationThreshold::Int(crate::enrich::registry::MIN_PUBLISHED_AGE_DAYS), - format, - ); - } - for f in &e.deprecated { - write_calibration_row( - out, - "deprecated", - f.component - .purl - .as_deref() - .unwrap_or(f.component.name.as_str()), - CalibrationScore::Text(f.message.as_deref().unwrap_or("(deprecated)")), - CalibrationThreshold::Text("any"), - format, - ); - } - for f in &e.maintainer_set_changed { - write_calibration_row( - out, - "maintainer-set-changed", - f.after.purl.as_deref().unwrap_or(f.after.name.as_str()), - CalibrationScore::Int((f.added.len() + f.removed.len()) as i64), - CalibrationThreshold::Int(1), - format, - ); - } -} - -/// Numeric or symbolic score for a calibration row. Float/Int rendered -/// without quotes in JSONL; Text rendered as a JSON string. -pub(crate) enum CalibrationScore<'a> { - Float(f64), - Int(i64), - Text(&'a str), -} - -pub(crate) enum CalibrationThreshold<'a> { - Float(f64), - Int(i64), - Text(&'a str), -} - -/// Single dispatch point for both pipe and JSONL calibration formats. -/// Adding a new finding kind is one call site, not two — the format -/// branches stay localized to this helper. -pub(crate) fn write_calibration_row( - out: &mut W, - kind: &str, - key: &str, - score: CalibrationScore<'_>, - threshold: CalibrationThreshold<'_>, - format: crate::cli::DebugFormat, -) { - match format { - crate::cli::DebugFormat::Pipe => { - let score_s = match score { - CalibrationScore::Float(v) => format!("{v:.4}"), - CalibrationScore::Int(v) => v.to_string(), - CalibrationScore::Text(s) => s.to_string(), - }; - let thr_s = match threshold { - CalibrationThreshold::Float(v) => format!("{v:.4}"), - CalibrationThreshold::Int(v) => v.to_string(), - CalibrationThreshold::Text(s) => s.to_string(), - }; - let _ = writeln!(out, "{kind}|{key}|{score_s}|{thr_s}"); - } - crate::cli::DebugFormat::Jsonl => { - let score_v = match score { - CalibrationScore::Float(v) => serde_json::Value::from(v), - CalibrationScore::Int(v) => serde_json::Value::from(v), - CalibrationScore::Text(s) => serde_json::Value::from(s), - }; - let thr_v = match threshold { - CalibrationThreshold::Float(v) => serde_json::Value::from(v), - CalibrationThreshold::Int(v) => serde_json::Value::from(v), - CalibrationThreshold::Text(s) => serde_json::Value::from(s), - }; - let line = serde_json::json!({ - "kind": kind, - "key": key, - "score": score_v, - "threshold": thr_v, - }); - let _ = writeln!(out, "{line}"); - } - } -} - -fn log_budget_trips( - cs: &ChangeSet, - max_added: Option, - max_removed: Option, - max_version_changed: Option, -) { - if let Some(max) = max_added.filter(|max| cs.added.len() > *max) { - eprintln!( - "bomdrift: policy gate tripped: added count {} exceeds --max-added {}", - cs.added.len(), - max - ); - } - if let Some(max) = max_removed.filter(|max| cs.removed.len() > *max) { - eprintln!( - "bomdrift: policy gate tripped: removed count {} exceeds --max-removed {}", - cs.removed.len(), - max - ); - } - if let Some(max) = max_version_changed.filter(|max| cs.version_changed.len() > *max) { - eprintln!( - "bomdrift: policy gate tripped: version-changed count {} exceeds --max-version-changed {}", - cs.version_changed.len(), - max - ); - } -} - -fn any_advisory_at_or_above(e: &Enrichment, threshold: Severity) -> bool { - e.vulns.values().flatten().any(|v| v.severity >= threshold) -} - -const INIT_CONFIG: &str = r#"# bomdrift repo policy. -# CLI flags override these defaults for one-off runs. - -[diff] -fail_on = "critical-cve" -baseline = ".bomdrift/baseline.json" -findings_only = false - -# Optional churn budgets. Uncomment to fail the workflow when a PR changes too -# many dependencies at once. -# max_added = 25 -# max_removed = 50 -# max_version_changed = 10 -"#; - -const INIT_SBOM_WORKFLOW: &str = r#"name: SBOM diff - -on: pull_request - -permissions: - contents: read - pull-requests: write - -jobs: - diff: - runs-on: ubuntu-latest - steps: - - uses: Metbcy/bomdrift@v1 - with: - config: .bomdrift.toml -"#; - -const INIT_SUPPRESS_WORKFLOW: &str = r#"name: bomdrift suppress - -on: - issue_comment: - types: [created] - -permissions: - contents: write - pull-requests: write - -jobs: - suppress: - if: | - github.event.issue.pull_request && - startsWith(github.event.comment.body, '/bomdrift suppress ') - runs-on: ubuntu-latest - steps: - - uses: Metbcy/bomdrift/comment-suppress@v1 -"#; - -fn load_sbom( - path: &Path, - format_hint: Option, - include_file_components: bool, -) -> Result { - let raw = fs::read_to_string(path) - .with_context(|| format!("reading SBOM file: {}", path.display()))?; - parse_sbom_bytes( - &raw, - &path.display().to_string(), - format_hint, - include_file_components, - ) -} - -fn parse_sbom_bytes( - raw: &str, - source_label: &str, - format_hint: Option, - include_file_components: bool, -) -> Result { - let value: serde_json::Value = - serde_json::from_str(raw).with_context(|| format!("parsing JSON in: {source_label}"))?; - let mut sbom = parse::parse_with_format(value, format_hint) - .with_context(|| format!("normalizing SBOM from: {source_label}"))?; - if !include_file_components { - parse::filter_file_components(&mut sbom); - } - Ok(sbom) -} - -#[allow(clippy::too_many_arguments)] -fn load_sbom_or_attestation( - path: Option<&Path>, - oci_ref: Option<&str>, - cosign_identity: Option<&str>, - cosign_issuer: Option<&str>, - format_hint: Option, - include_file_components: bool, - side: &str, - debug_calibration: bool, - debug_format: crate::cli::DebugFormat, -) -> Result { - if let Some(oci) = oci_ref { - let identity = cosign_identity.ok_or_else(|| { - anyhow::anyhow!( - "--{side}-attestation requires --cosign-identity (regex passed to cosign --certificate-identity-regexp)" - ) - })?; - let issuer = cosign_issuer.ok_or_else(|| { - anyhow::anyhow!( - "--{side}-attestation requires --cosign-issuer (URL passed to cosign --certificate-oidc-issuer)" - ) - })?; - let body = attestation::fetch_verified_sbom(oci, identity, issuer) - .with_context(|| format!("fetching --{side}-attestation {oci}"))?; - if debug_calibration { - // One row per verified attestation; surfaces the cert - // regex cosign accepted so adopters can confirm policy. - let _ = - write_attestation_calibration(&mut std::io::stderr(), oci, identity, debug_format); - } - return parse_sbom_bytes( - &body, - &format!("attestation:{oci}"), - format_hint, - include_file_components, - ); - } - let path = path.ok_or_else(|| { - anyhow::anyhow!( - "internal: {side} requires either a positional path or --{side}-attestation" - ) - })?; - load_sbom(path, format_hint, include_file_components) -} - -fn write_attestation_calibration( - out: &mut W, - oci_ref: &str, - identity: &str, - format: crate::cli::DebugFormat, -) -> std::io::Result<()> { - match format { - crate::cli::DebugFormat::Pipe => { - writeln!(out, "attestation|{oci_ref}|verified|{identity}") - } - crate::cli::DebugFormat::Jsonl => { - let row = serde_json::json!({ - "kind": "attestation", - "key": oci_ref, - "score": "verified", - "threshold": identity, - }); - writeln!(out, "{row}") - } - } -} - -#[cfg(test)] -mod tests { - #![allow( - clippy::unwrap_used, - clippy::expect_used, - clippy::panic, - clippy::todo, - clippy::unimplemented - )] - use super::*; - use std::collections::HashMap; - - use crate::enrich::typosquat::TyposquatFinding; - use crate::enrich::version_jump::VersionJumpFinding; - use crate::enrich::{LicenseViolation, Severity, VulnRef}; - use crate::model::{Component, Ecosystem, Relationship}; - - fn comp(name: &str) -> Component { - Component { - name: name.to_string(), - version: "1.0.0".to_string(), - ecosystem: Ecosystem::Npm, - purl: Some(format!("pkg:npm/{name}@1.0.0")), - licenses: Vec::new(), - supplier: None, - hashes: Vec::new(), - relationship: Relationship::Unknown, - source_url: None, - bom_ref: None, - } - } - - fn enrichment_with_cve_at(severity: Severity) -> Enrichment { - let mut vulns: HashMap> = HashMap::new(); - vulns.insert( - "pkg:npm/foo@1.0.0".into(), - vec![VulnRef { - id: "CVE-2025-1".into(), - severity, - aliases: Vec::new(), - epss_score: None, - kev: false, - }], - ); - Enrichment { - vulns, - ..Default::default() - } - } - - fn enrichment_with_cve() -> Enrichment { - // Severity::None is what every v0.2-era test implicitly assumed — the - // pre-severity world. Tests that don't care about the bucket use this. - enrichment_with_cve_at(Severity::None) - } - - fn enrichment_with_typosquat() -> Enrichment { - Enrichment { - typosquats: vec![TyposquatFinding { - component: comp("plain-crypto-js"), - closest: "crypto-js".to_string(), - score: 0.95, - }], - ..Default::default() - } - } - - fn enrichment_with_version_jump() -> Enrichment { - Enrichment { - version_jumps: vec![VersionJumpFinding { - before: comp("foo"), - after: comp("foo"), - before_major: 1, - after_major: 4, - }], - ..Default::default() - } - } - - fn cs_with_license_change() -> ChangeSet { - let mut before = comp("foo"); - before.licenses = vec!["MIT".into()]; - let mut after = comp("foo"); - after.licenses = vec!["GPL-3.0".into()]; - ChangeSet { - license_changed: vec![(before, after)], - ..Default::default() - } - } - - #[test] - fn fail_on_none_never_trips() { - assert!(!tripped( - &ChangeSet::default(), - &Enrichment::default(), - FailOn::None - )); - assert!(!tripped( - &cs_with_license_change(), - &enrichment_with_cve(), - FailOn::None - )); - } - - #[test] - fn fail_on_cve_trips_only_on_cve_findings() { - assert!(tripped( - &ChangeSet::default(), - &enrichment_with_cve(), - FailOn::Cve - )); - assert!(!tripped( - &ChangeSet::default(), - &enrichment_with_typosquat(), - FailOn::Cve - )); - assert!(!tripped( - &ChangeSet::default(), - &Enrichment::default(), - FailOn::Cve - )); - } - - #[test] - fn fail_on_critical_cve_filters_on_severity_high_or_above() { - // Critical and High advisories trip; Medium / Low / None don't. The - // doc on `tripped()` explains why High is included in the - // "critical-cve" bucket. - assert!(tripped( - &ChangeSet::default(), - &enrichment_with_cve_at(Severity::Critical), - FailOn::CriticalCve - )); - assert!(tripped( - &ChangeSet::default(), - &enrichment_with_cve_at(Severity::High), - FailOn::CriticalCve - )); - assert!(!tripped( - &ChangeSet::default(), - &enrichment_with_cve_at(Severity::Medium), - FailOn::CriticalCve - )); - assert!(!tripped( - &ChangeSet::default(), - &enrichment_with_cve_at(Severity::None), - FailOn::CriticalCve - )); - } - - #[test] - fn fail_on_cve_still_trips_on_severity_none_advisories() { - // --fail-on cve is the broad "any advisory" bucket; severity threading - // doesn't change its semantics. An advisory with unresolved severity - // still trips it (the alternative — silent suppression — would be the - // real footgun). - assert!(tripped( - &ChangeSet::default(), - &enrichment_with_cve_at(Severity::None), - FailOn::Cve - )); - } - - #[test] - fn fail_on_typosquat_trips_only_on_typosquat_findings() { - assert!(tripped( - &ChangeSet::default(), - &enrichment_with_typosquat(), - FailOn::Typosquat - )); - assert!(!tripped( - &ChangeSet::default(), - &enrichment_with_cve(), - FailOn::Typosquat - )); - } - - #[test] - fn fail_on_any_trips_on_each_finding_kind_and_license_changes() { - assert!(tripped( - &ChangeSet::default(), - &enrichment_with_cve(), - FailOn::Any - )); - assert!(tripped( - &ChangeSet::default(), - &enrichment_with_typosquat(), - FailOn::Any - )); - assert!(tripped( - &ChangeSet::default(), - &enrichment_with_version_jump(), - FailOn::Any - )); - // license-changed-without-version-bump alone trips Any (the suspicious - // case lives on the ChangeSet, not the enrichment). - assert!(tripped( - &cs_with_license_change(), - &Enrichment::default(), - FailOn::Any - )); - assert!(!tripped( - &ChangeSet::default(), - &Enrichment::default(), - FailOn::Any - )); - } - - #[test] - fn fail_on_license_change_trips_only_on_license_changes() { - assert!(tripped( - &cs_with_license_change(), - &Enrichment::default(), - FailOn::LicenseChange - )); - assert!(!tripped( - &ChangeSet::default(), - &enrichment_with_cve(), - FailOn::LicenseChange - )); - assert!(!tripped( - &ChangeSet::default(), - &enrichment_with_typosquat(), - FailOn::LicenseChange - )); - } - - #[test] - fn fail_on_typosquat_ignores_license_change() { - // license_changed is a ChangeSet field, not an enrichment. The - // typosquat threshold is strictly about typosquat findings — license - // drift must NOT trip it (otherwise consumers using --fail-on=typosquat - // get unexpected exit-2's on every license correction). - assert!(!tripped( - &cs_with_license_change(), - &Enrichment::default(), - FailOn::Typosquat - )); - } - - #[test] - fn budget_trips_when_counts_exceed_limits() { - let cs = ChangeSet { - added: vec![comp("a"), comp("b")], - removed: vec![comp("c")], - version_changed: vec![(comp("d"), comp("d"))], - ..Default::default() - }; - assert!(budget_tripped(&cs, Some(1), None, None)); - assert!(budget_tripped(&cs, None, Some(0), None)); - assert!(budget_tripped(&cs, None, None, Some(0))); - assert!(!budget_tripped(&cs, Some(2), Some(1), Some(1))); - } - - #[test] - fn calibration_pipe_format_matches_v0_7_layout() { - let e = enrichment_with_typosquat(); - let mut buf = Vec::new(); - write_calibration_lines( - &e, - &mut buf, - crate::cli::DebugFormat::Pipe, - CalibrationOverrides::default(), - ); - let s = String::from_utf8(buf).unwrap(); - assert!(s.starts_with("typosquat|"), "got: {s}"); - assert_eq!( - s.matches('|').count(), - 3, - "pipe row has 4 fields → 3 separators; got: {s}" - ); - } - - #[test] - fn calibration_jsonl_format_emits_one_object_per_line() { - let e = enrichment_with_typosquat(); - let mut buf = Vec::new(); - write_calibration_lines( - &e, - &mut buf, - crate::cli::DebugFormat::Jsonl, - CalibrationOverrides::default(), - ); - let s = String::from_utf8(buf).unwrap(); - let lines: Vec<&str> = s.lines().collect(); - assert_eq!(lines.len(), 1); - let v: serde_json::Value = serde_json::from_str(lines[0]).expect("valid jsonl"); - assert_eq!(v["kind"], "typosquat"); - assert!(v["score"].is_number(), "numeric score in jsonl"); - assert!(v["threshold"].is_number()); - assert!(v["key"].is_string()); - } - - #[test] - fn calibration_jsonl_keeps_severity_label_as_string() { - let e = enrichment_with_cve_at(Severity::High); - let mut buf = Vec::new(); - write_calibration_lines( - &e, - &mut buf, - crate::cli::DebugFormat::Jsonl, - CalibrationOverrides::default(), - ); - let s = String::from_utf8(buf).unwrap(); - let v: serde_json::Value = serde_json::from_str(s.trim()).unwrap(); - assert_eq!(v["kind"], "cve"); - assert_eq!(v["score"], "HIGH"); - assert_eq!(v["threshold"], "high+"); - } - - #[test] - fn fail_on_kev_trips_when_any_advisory_kev_set() { - let mut e = enrichment_with_cve_at(Severity::Medium); - // Flip the kev flag on the single advisory. - for refs in e.vulns.values_mut() { - refs[0].kev = true; - } - assert!(tripped(&ChangeSet::default(), &e, FailOn::Kev)); - assert!(!tripped( - &ChangeSet::default(), - &enrichment_with_cve_at(Severity::Medium), - FailOn::Kev - )); - } - - #[test] - fn any_epss_threshold_gating() { - let mut e = enrichment_with_cve_at(Severity::Medium); - for refs in e.vulns.values_mut() { - refs[0].epss_score = Some(0.6); - } - assert!(any_epss_at_or_above(&e, 0.5)); - assert!(any_epss_at_or_above(&e, 0.6)); - assert!(!any_epss_at_or_above(&e, 0.7)); - } - - #[test] - fn calibration_emits_epss_and_kev_rows_when_set() { - let mut e = enrichment_with_cve_at(Severity::High); - for refs in e.vulns.values_mut() { - refs[0].epss_score = Some(0.87); - refs[0].kev = true; - } - let mut buf = Vec::new(); - write_calibration_lines( - &e, - &mut buf, - crate::cli::DebugFormat::Pipe, - CalibrationOverrides::default(), - ); - let s = String::from_utf8(buf).unwrap(); - assert!(s.contains("epss|"), "missing epss row: {s}"); - assert!(s.contains("kev|"), "missing kev row: {s}"); - } - - #[test] - fn calibration_license_row_includes_exception_detail() { - // v0.9.5: matched_rule on an exception-driven license violation - // must surface the exception identifier in the calibration tap - // so operators tuning policy see why a row fired. - let mut e = Enrichment::default(); - let component = crate::model::Component { - name: "llvm-sys".into(), - version: "1.0.0".into(), - ecosystem: crate::model::Ecosystem::Cargo, - purl: Some("pkg:cargo/llvm-sys@1.0.0".into()), - licenses: vec!["Apache-2.0 WITH LLVM-exception".into()], - supplier: None, - hashes: Vec::new(), - relationship: crate::model::Relationship::Unknown, - source_url: None, - bom_ref: None, - }; - e.license_violations.push(LicenseViolation { - component, - license: "Apache-2.0 WITH LLVM-exception".into(), - matched_rule: "exception:LLVM-exception denied".into(), - kind: crate::enrich::LicenseViolationKind::Deny, - }); - let mut buf = Vec::new(); - write_calibration_lines( - &e, - &mut buf, - crate::cli::DebugFormat::Pipe, - CalibrationOverrides::default(), - ); - let s = String::from_utf8(buf).unwrap(); - assert!( - s.contains("license|"), - "missing license calibration row: {s}" - ); - assert!( - s.contains("exception:LLVM-exception denied"), - "row must surface matched_rule with exception detail: {s}" - ); - } - - #[test] - fn fail_on_license_violation_trips() { - use crate::enrich::{LicenseViolation, LicenseViolationKind}; - let mut e = Enrichment::default(); - e.license_violations.push(LicenseViolation { - component: comp("foo"), - license: "GPL-3.0-only".into(), - matched_rule: "deny: GPL-3.0-only".into(), - kind: LicenseViolationKind::Deny, - }); - assert!(tripped(&ChangeSet::default(), &e, FailOn::LicenseViolation)); - assert!(tripped(&ChangeSet::default(), &e, FailOn::Any)); - assert!(!tripped( - &ChangeSet::default(), - &Enrichment::default(), - FailOn::LicenseViolation - )); - } -} diff --git a/src/run/baseline.rs b/src/run/baseline.rs new file mode 100644 index 0000000..b1cb96d --- /dev/null +++ b/src/run/baseline.rs @@ -0,0 +1,66 @@ +use anyhow::{Context, Result}; + +use crate::baseline; +use crate::cli::BaselineAction; +use crate::clock; + +pub(super) fn run_baseline(action: BaselineAction) -> Result<()> { + match action { + BaselineAction::Add(args) => { + // Validate --expires upfront so a typo'd date doesn't write a + // bad entry that errors on the NEXT diff load. + if let Some(s) = &args.expires { + clock::parse_ymd(s) + .with_context(|| format!("--expires must be YYYY-MM-DD, got {s:?}"))?; + } + + // --from-comment overrides positional id/reason. Used by the + // GitLab webhook bridge (Phase L). Non-zero exit when the + // body has no directive — silent no-op would let mis-configured + // bridges look like they worked. + let (id, reason_owned) = if let Some(body) = &args.from_comment { + match baseline::parse_comment_directive(body)? { + Some((id, reason)) => (id, reason), + None => { + eprintln!( + "bomdrift: --from-comment body contained no `/bomdrift suppress ` directive" + ); + std::process::exit(1); + } + } + } else { + let Some(id) = args.id.clone() else { + eprintln!( + "bomdrift baseline add: missing required ADVISORY_ID (use a positional argument or --from-comment )" + ); + std::process::exit(2); + }; + (id, args.reason.clone()) + }; + + let outcome = baseline::add_suppression_full( + &args.path, + &id, + args.expires.as_deref(), + reason_owned.as_deref(), + )?; + match outcome { + baseline::AddOutcome::Added => { + eprintln!( + "bomdrift: added '{id}' to {path}", + id = id.trim(), + path = args.path.display(), + ); + } + baseline::AddOutcome::AlreadyPresent => { + eprintln!( + "bomdrift: '{id}' already present in {path}; no change", + id = id.trim(), + path = args.path.display(), + ); + } + } + Ok(()) + } + } +} diff --git a/src/run/calibration.rs b/src/run/calibration.rs new file mode 100644 index 0000000..5eef520 --- /dev/null +++ b/src/run/calibration.rs @@ -0,0 +1,231 @@ +use crate::enrich::Enrichment; + +/// Emit one CSV-friendly line per finding to the given writer, capturing +/// the score and the constant it was compared against. Off by default +/// (driven by `--debug-calibration`); when set, the user pipes stderr +/// to a file and feeds the resulting CSV back as tuning data. +/// +/// Schema: `kind|key|score|threshold` — pipe-delimited because purls +/// already contain commas (`pkg:npm/@scope/name`) which would force CSV +/// quoting. `kind` ∈ {`typosquat`, `version-jump`, `maintainer-age`, +/// `cve`}. `score` is the underlying numeric the enricher computed +/// (similarity for typosquat, major-version delta for version-jump, +/// days-old for maintainer-age, max CVSS-equivalent for cve); +/// `threshold` is the constant the score was gated against. CVE rows +/// surface every advisory (no internal threshold) so adopters can see +/// the score distribution before tuning `--fail-on critical-cve`. +/// Active overrides for the configurable calibration thresholds. Threaded +/// into [`write_calibration_lines`] so emitted rows reflect the effective +/// threshold the enricher actually used, not the unconditional const default. +#[derive(Debug, Default, Clone, Copy)] +pub(crate) struct CalibrationOverrides { + pub similarity_threshold: Option, + pub young_maintainer_days: Option, + pub multi_major_delta: Option, +} + +pub(super) fn write_calibration_lines( + e: &Enrichment, + out: &mut W, + format: crate::cli::DebugFormat, + overrides: CalibrationOverrides, +) { + use crate::enrich::maintainer::YOUNG_MAINTAINER_DAYS; + use crate::enrich::typosquat::SIMILARITY_THRESHOLD; + use crate::enrich::version_jump::MIN_MAJOR_DELTA; + + let active_similarity = overrides + .similarity_threshold + .unwrap_or(SIMILARITY_THRESHOLD); + let active_young = overrides + .young_maintainer_days + .unwrap_or(YOUNG_MAINTAINER_DAYS); + let active_major_delta = overrides.multi_major_delta.unwrap_or(MIN_MAJOR_DELTA); + + for f in &e.typosquats { + write_calibration_row( + out, + "typosquat", + f.component + .purl + .as_deref() + .unwrap_or(f.component.name.as_str()), + CalibrationScore::Float(f.score), + CalibrationThreshold::Float(active_similarity), + format, + ); + } + for f in &e.version_jumps { + write_calibration_row( + out, + "version-jump", + f.after.purl.as_deref().unwrap_or(f.after.name.as_str()), + CalibrationScore::Int(f.after_major.saturating_sub(f.before_major) as i64), + CalibrationThreshold::Int(active_major_delta as i64), + format, + ); + } + for f in &e.maintainer_age { + write_calibration_row( + out, + "maintainer-age", + f.component + .purl + .as_deref() + .unwrap_or(f.component.name.as_str()), + CalibrationScore::Int(f.days_old), + CalibrationThreshold::Int(active_young), + format, + ); + } + for (purl, refs) in &e.vulns { + for vuln in refs { + // Severity has no numeric score in our model; emit the bucket + // label as a non-numeric "score" so the row stays well-formed + // (string in JSONL, plain token in pipe). + write_calibration_row( + out, + "cve", + &format!("{purl}#{}", vuln.id), + CalibrationScore::Text(vuln.severity.as_str()), + CalibrationThreshold::Text("high+"), + format, + ); + for cve in vuln.cves() { + if let Some(score) = vuln.epss_score { + write_calibration_row( + out, + "epss", + &format!("{purl}+{cve}"), + CalibrationScore::Float(score as f64), + CalibrationThreshold::Float(0.5), + format, + ); + } + if vuln.kev { + write_calibration_row( + out, + "kev", + &format!("{purl}+{cve}"), + CalibrationScore::Text("true"), + CalibrationThreshold::Text("kev"), + format, + ); + } + } + } + } + for v in &e.license_violations { + // Threshold field carries the precise matched_rule (e.g. + // "deny: GPL-3.0-only" or "exception:LLVM-exception denied") + // so calibration consumers see the WHY, not just the kind tag. + write_calibration_row( + out, + "license", + v.component + .purl + .as_deref() + .unwrap_or(v.component.name.as_str()), + CalibrationScore::Text(&v.license), + CalibrationThreshold::Text(&v.matched_rule), + format, + ); + } + for f in &e.recently_published { + write_calibration_row( + out, + "recently-published", + f.component + .purl + .as_deref() + .unwrap_or(f.component.name.as_str()), + CalibrationScore::Int(f.days_old), + CalibrationThreshold::Int(crate::enrich::registry::MIN_PUBLISHED_AGE_DAYS), + format, + ); + } + for f in &e.deprecated { + write_calibration_row( + out, + "deprecated", + f.component + .purl + .as_deref() + .unwrap_or(f.component.name.as_str()), + CalibrationScore::Text(f.message.as_deref().unwrap_or("(deprecated)")), + CalibrationThreshold::Text("any"), + format, + ); + } + for f in &e.maintainer_set_changed { + write_calibration_row( + out, + "maintainer-set-changed", + f.after.purl.as_deref().unwrap_or(f.after.name.as_str()), + CalibrationScore::Int((f.added.len() + f.removed.len()) as i64), + CalibrationThreshold::Int(1), + format, + ); + } +} + +/// Numeric or symbolic score for a calibration row. Float/Int rendered +/// without quotes in JSONL; Text rendered as a JSON string. +pub(crate) enum CalibrationScore<'a> { + Float(f64), + Int(i64), + Text(&'a str), +} + +pub(crate) enum CalibrationThreshold<'a> { + Float(f64), + Int(i64), + Text(&'a str), +} + +/// Single dispatch point for both pipe and JSONL calibration formats. +/// Adding a new finding kind is one call site, not two — the format +/// branches stay localized to this helper. +pub(crate) fn write_calibration_row( + out: &mut W, + kind: &str, + key: &str, + score: CalibrationScore<'_>, + threshold: CalibrationThreshold<'_>, + format: crate::cli::DebugFormat, +) { + match format { + crate::cli::DebugFormat::Pipe => { + let score_s = match score { + CalibrationScore::Float(v) => format!("{v:.4}"), + CalibrationScore::Int(v) => v.to_string(), + CalibrationScore::Text(s) => s.to_string(), + }; + let thr_s = match threshold { + CalibrationThreshold::Float(v) => format!("{v:.4}"), + CalibrationThreshold::Int(v) => v.to_string(), + CalibrationThreshold::Text(s) => s.to_string(), + }; + let _ = writeln!(out, "{kind}|{key}|{score_s}|{thr_s}"); + } + crate::cli::DebugFormat::Jsonl => { + let score_v = match score { + CalibrationScore::Float(v) => serde_json::Value::from(v), + CalibrationScore::Int(v) => serde_json::Value::from(v), + CalibrationScore::Text(s) => serde_json::Value::from(s), + }; + let thr_v = match threshold { + CalibrationThreshold::Float(v) => serde_json::Value::from(v), + CalibrationThreshold::Int(v) => serde_json::Value::from(v), + CalibrationThreshold::Text(s) => serde_json::Value::from(s), + }; + let line = serde_json::json!({ + "kind": kind, + "key": key, + "score": score_v, + "threshold": thr_v, + }); + let _ = writeln!(out, "{line}"); + } + } +} diff --git a/src/run/diff.rs b/src/run/diff.rs new file mode 100644 index 0000000..b82d16b --- /dev/null +++ b/src/run/diff.rs @@ -0,0 +1,467 @@ +use std::fs; +use std::io::IsTerminal; +use std::path::Path; + +use anyhow::{Context, Result}; + +use crate::cli::{self, DiffArgs, FailOn, OutputFormat}; +use crate::diff::ChangeSet; +use crate::{attestation, baseline, config, diff, enrich, model, parse, plugin, render, vex}; + +use super::FAIL_ON_EXIT_CODE; +use super::calibration::{CalibrationOverrides, write_calibration_lines}; +use super::predicates::{any_epss_at_or_above, budget_tripped, tripped}; + +pub(super) fn run_diff(mut args: DiffArgs) -> Result<()> { + config::apply_diff_config(&mut args)?; + + if args.require_attestation + && (args.before_attestation.is_none() || args.after_attestation.is_none()) + { + anyhow::bail!( + "--require-attestation needs both --before-attestation and --after-attestation" + ); + } + + let output = args.output.unwrap_or(OutputFormat::Terminal); + let format = args.format.unwrap_or(cli::InputFormat::Auto); + let fail_on = args.fail_on.unwrap_or(FailOn::None); + + let format_hint = format.to_sbom_format(); + let before = load_sbom_or_attestation( + args.before.as_deref(), + args.before_attestation.as_deref(), + args.cosign_identity.as_deref(), + args.cosign_issuer.as_deref(), + format_hint, + args.include_file_components, + "before", + args.debug_calibration, + args.debug_calibration_format, + )?; + let after = load_sbom_or_attestation( + args.after.as_deref(), + args.after_attestation.as_deref(), + args.cosign_identity.as_deref(), + args.cosign_issuer.as_deref(), + format_hint, + args.include_file_components, + "after", + args.debug_calibration, + args.debug_calibration_format, + )?; + + let mut cs = diff::diff(&before, &after); + + let mut enrichment = if args.no_osv { + enrich::Enrichment::default() + } else { + // OSV enrichment is best-effort. Network failures must not block the diff + // from rendering — a PR review is still useful without CVE data. + match enrich::osv::enrich_cached_with_ttl(&cs, args.no_osv_cache, args.cache_ttl_hours) { + Ok(e) => e, + Err(err) => { + eprintln!("warning: OSV enrichment failed, continuing without it: {err:#}"); + enrich::Enrichment::default() + } + } + }; + + // EPSS / KEV enrichment piggyback on OSV's VulnRefs and only have + // anything to do when there are CVE-aliased advisories. Skip both if + // there are no vulns. + if !args.no_epss + && !enrichment.vulns.is_empty() + && let Err(err) = enrich::epss::enrich_with_ttl(&mut enrichment, args.cache_ttl_hours) + { + eprintln!("warning: EPSS enrichment failed, continuing without it: {err:#}"); + } + if !args.no_kev + && !enrichment.vulns.is_empty() + && let Err(err) = enrich::kev::enrich_with_ttl(&mut enrichment, args.cache_ttl_hours) + { + eprintln!("warning: KEV enrichment failed, continuing without it: {err:#}"); + } + + // Typosquat detection is pure-compute (embedded reference list) and always + // runs, regardless of `--no-osv`. Findings are informational. + enrichment.typosquats = + enrich::typosquat::enrich_with_threshold(&cs, args.typosquat_similarity_threshold); + + // Multi-major version-jump detection is pure-compute and also always runs. + // Findings are informational. + enrichment.version_jumps = enrich::version_jump::enrich_with(&cs, args.multi_major_delta); + + // Maintainer-age enrichment hits the GitHub REST API; gated behind + // `--no-maintainer-age` for offline runs. Best-effort: failures warn and + // continue, mirroring the OSV enricher's contract. + if !args.no_maintainer_age { + match enrich::maintainer::enrich_with_hosts( + &cs, + "https://api.github.com", + std::time::Duration::from_secs(15), + args.young_maintainer_days, + ) { + Ok(findings) => enrichment.maintainer_age = findings, + Err(err) => { + eprintln!( + "warning: maintainer-age enrichment failed, continuing without it: {err:#}" + ); + } + } + } + + // License-policy enrichment (Phase D, v0.8). Pure-compute, runs after + // OSV/EPSS/KEV. Empty allow + empty deny means "no policy" — the + // enricher returns no violations. + let license_policy = enrich::license::Policy { + allow: args.allow_licenses.clone(), + deny: args.deny_licenses.clone(), + allow_ambiguous: args.allow_ambiguous_licenses, + allow_exceptions: args.allow_exception.clone(), + deny_exceptions: args.deny_exception.clone(), + }; + enrichment.license_violations = enrich::license::enrich(&cs, &license_policy); + + // Registry-metadata enrichers (Phase K, v0.9). Best-effort — a + // registry timeout returns Ok with no findings. + if !args.no_registry { + let findings = + enrich::registry::enrich(&cs, args.recently_published_days, args.cache_ttl_hours); + enrichment.recently_published = findings.recently_published; + enrichment.deprecated = findings.deprecated; + enrichment.maintainer_set_changed = findings.maintainer_set_changed; + } + + // Plugin findings (Phase C, v0.9.6). Run after every built-in + // enricher so plugins observe the same `cs` view bomdrift renders; + // before baseline so plugin findings can be baselined too. Plugin + // failures degrade gracefully — a malformed manifest aborts the + // run (config error), but plugin runtime failures emit only a + // BOMDRIFT_DEBUG-gated stderr warning and contribute no findings. + if !args.plugin.is_empty() { + let mut manifests = Vec::with_capacity(args.plugin.len()); + for path in &args.plugin { + let manifest = plugin::load_manifest(path) + .with_context(|| format!("loading --plugin {}", path.display()))?; + manifests.push(manifest); + } + enrichment.plugin_findings = plugin::run_plugins(&manifests, &cs); + } + + // Apply the baseline AFTER all enrichers run — suppression operates on + // the realized finding set, not on intermediate inputs. This keeps the + // baseline file format stable as new enrichers are added: a new finding + // type that the baseline doesn't know about simply isn't suppressed. + let mut baseline_entries: Vec = Vec::new(); + if let Some(path) = &args.baseline { + let baseline = baseline::Baseline::load(path)?; + for ent in &baseline.expired_entries { + eprintln!( + "warning: baseline entry {id}{purl} expired {expires}; finding will surface in this run{reason}", + id = ent.id, + purl = ent + .purl + .as_deref() + .map(|p| format!(" ({p})")) + .unwrap_or_default(), + expires = ent.expires.as_deref().unwrap_or(""), + reason = ent + .reason + .as_deref() + .map(|r| format!(" — was: {r}")) + .unwrap_or_default(), + ); + } + baseline_entries = baseline.entries.clone(); + baseline::apply(&mut cs, &mut enrichment, &baseline); + } + + // VEX consumption (Phase G, v0.9). Applied AFTER baseline so VEX + // statements operate on the post-baseline view — this matches what + // a downstream tool would see and avoids double-counting "already + // suppressed" findings in the VEX-suppressed tally. + if !args.vex.is_empty() { + match vex::load(&args.vex) { + Ok(stmts) => { + let idx = vex::VexIndex::build(stmts); + vex::apply(&mut enrichment, &idx); + } + Err(err) => { + eprintln!("warning: VEX load failed, continuing without VEX filtering: {err:#}"); + } + } + } + + // VEX emission (Phase H, v0.9). Writes a single OpenVEX 0.2.0 doc + // to the requested path, covering baseline-suppressed entries and + // un-suppressed findings. Byte-deterministic when SOURCE_DATE_EPOCH + // is set. + if let Some(path) = &args.emit_vex { + let author = args + .vex_author + .clone() + .or_else(|| args.repo_url.clone()) + .or_else(|| std::env::var("BOMDRIFT_REPO_URL").ok()) + .filter(|s| !s.is_empty()) + .unwrap_or_else(|| "bomdrift".to_string()); + let default_just = args + .vex_default_justification + .clone() + .unwrap_or_else(|| "vulnerable_code_not_in_execute_path".to_string()); + let opts = vex::EmitOptions { + author: &author, + default_justification: &default_just, + baseline_entries: &baseline_entries, + }; + let body = vex::emit(&cs, &enrichment, &opts); + std::fs::write(path, body) + .with_context(|| format!("writing --emit-vex {}", path.display()))?; + } + + // Calibration tap. Off by default; opt-in via `--debug-calibration`. + // Emits one CSV-friendly line per finding to stderr so an adopter + // can run the flag across a representative N PRs and feed the + // resulting CSV back as tuning data (issue #5). The output is + // deliberately plain — no JSON, no schema versioning — because the + // intended consumer is a one-off awk/jq pipeline, not a long-lived + // integration. Format: `kind|key|score|threshold`. No telemetry: the + // user owns the bytes and pipes them wherever they want. + if args.debug_calibration { + write_calibration_lines( + &enrichment, + &mut std::io::stderr(), + args.debug_calibration_format, + CalibrationOverrides { + similarity_threshold: args.typosquat_similarity_threshold, + young_maintainer_days: args.young_maintainer_days, + multi_major_delta: args.multi_major_delta, + }, + ); + } + + // CLI flag wins; otherwise the env var supplies the default. Empty + // strings are treated as unset to match shell-script callers that + // pass `BOMDRIFT_REPO_URL=` to clear the value rather than `unset`. + // GitLab CI exposes the project URL as `CI_PROJECT_URL` (analog of + // GitHub's `GITHUB_REPOSITORY`-derived URL); honor it as a third + // fallback so users on the GitLab template don't have to plumb + // `BOMDRIFT_REPO_URL` themselves. + let repo_url = args + .repo_url + .clone() + .or_else(|| std::env::var("BOMDRIFT_REPO_URL").ok()) + .or_else(|| std::env::var("CI_PROJECT_URL").ok()) + .or_else(|| std::env::var("BITBUCKET_GIT_HTTP_ORIGIN").ok()) + .or_else(|| std::env::var("BUILD_REPOSITORY_URI").ok()) + .filter(|s| !s.is_empty()); + + // Platform precedence: explicit `--platform` (or `[diff] platform` + // in `.bomdrift.toml`, already merged into `args.platform`) wins; + // otherwise auto-detect from CI env. Detection order: GitLab + // (`GITLAB_CI=true`), Bitbucket (`BITBUCKET_BUILD_NUMBER`), Azure + // DevOps (`TF_BUILD`), then default GitHub. + let platform = args.platform.unwrap_or_else(|| { + if std::env::var("GITLAB_CI").is_ok_and(|v| v == "true") { + crate::cli::Platform::GitLab + } else if std::env::var("BITBUCKET_BUILD_NUMBER").is_ok() { + crate::cli::Platform::Bitbucket + } else if std::env::var("TF_BUILD").is_ok() { + crate::cli::Platform::AzureDevOps + } else { + crate::cli::Platform::GitHub + } + }); + let md_options = render::markdown::Options { + summary_only: args.summary_only, + findings_only: args.findings_only, + repo_url, + platform: platform.into(), + }; + let rendered = match output { + OutputFormat::Terminal => { + // ANSI escapes are only safe on a real TTY. Piped/redirected stdout + // (e.g. captured by a CI step that posts a PR comment) must stay + // plain markdown so it renders correctly in a comment body. + if std::io::stdout().is_terminal() { + render::term::render(&cs, &enrichment) + } else { + render::markdown::render_with_options(&cs, &enrichment, md_options) + } + } + OutputFormat::Markdown => { + render::markdown::render_with_options(&cs, &enrichment, md_options) + } + OutputFormat::Json => render::json::render(&cs, &enrichment), + OutputFormat::Sarif => render::sarif::render(&cs, &enrichment), + OutputFormat::Html => render::html::render(&cs, &enrichment), + }; + + if let Some(path) = &args.output_file { + std::fs::write(path, &rendered) + .with_context(|| format!("writing --output-file {}", path.display()))?; + } else { + print!("{rendered}"); + } + + // Body must be fully written before we exit-2 — the action's `tee` + // wrapper still wants the comment posted even when fail-on trips. + let budget_tripped = budget_tripped( + &cs, + args.max_added, + args.max_removed, + args.max_version_changed, + ); + if budget_tripped { + log_budget_trips( + &cs, + args.max_added, + args.max_removed, + args.max_version_changed, + ); + } + + let epss_tripped = args + .fail_on_epss + .is_some_and(|threshold| any_epss_at_or_above(&enrichment, threshold)); + if epss_tripped { + let threshold = args.fail_on_epss.unwrap_or(0.0); + eprintln!( + "bomdrift: policy gate tripped: --fail-on-epss {threshold:.2} (one or more advisories at or above this score)" + ); + } + + if tripped(&cs, &enrichment, fail_on) || budget_tripped || epss_tripped { + std::process::exit(FAIL_ON_EXIT_CODE); + } + + Ok(()) +} + +fn log_budget_trips( + cs: &ChangeSet, + max_added: Option, + max_removed: Option, + max_version_changed: Option, +) { + if let Some(max) = max_added.filter(|max| cs.added.len() > *max) { + eprintln!( + "bomdrift: policy gate tripped: added count {} exceeds --max-added {}", + cs.added.len(), + max + ); + } + if let Some(max) = max_removed.filter(|max| cs.removed.len() > *max) { + eprintln!( + "bomdrift: policy gate tripped: removed count {} exceeds --max-removed {}", + cs.removed.len(), + max + ); + } + if let Some(max) = max_version_changed.filter(|max| cs.version_changed.len() > *max) { + eprintln!( + "bomdrift: policy gate tripped: version-changed count {} exceeds --max-version-changed {}", + cs.version_changed.len(), + max + ); + } +} + +fn load_sbom( + path: &Path, + format_hint: Option, + include_file_components: bool, +) -> Result { + let raw = fs::read_to_string(path) + .with_context(|| format!("reading SBOM file: {}", path.display()))?; + parse_sbom_bytes( + &raw, + &path.display().to_string(), + format_hint, + include_file_components, + ) +} + +fn parse_sbom_bytes( + raw: &str, + source_label: &str, + format_hint: Option, + include_file_components: bool, +) -> Result { + let value: serde_json::Value = + serde_json::from_str(raw).with_context(|| format!("parsing JSON in: {source_label}"))?; + let mut sbom = parse::parse_with_format(value, format_hint) + .with_context(|| format!("normalizing SBOM from: {source_label}"))?; + if !include_file_components { + parse::filter_file_components(&mut sbom); + } + Ok(sbom) +} + +#[allow(clippy::too_many_arguments)] +fn load_sbom_or_attestation( + path: Option<&Path>, + oci_ref: Option<&str>, + cosign_identity: Option<&str>, + cosign_issuer: Option<&str>, + format_hint: Option, + include_file_components: bool, + side: &str, + debug_calibration: bool, + debug_format: crate::cli::DebugFormat, +) -> Result { + if let Some(oci) = oci_ref { + let identity = cosign_identity.ok_or_else(|| { + anyhow::anyhow!( + "--{side}-attestation requires --cosign-identity (regex passed to cosign --certificate-identity-regexp)" + ) + })?; + let issuer = cosign_issuer.ok_or_else(|| { + anyhow::anyhow!( + "--{side}-attestation requires --cosign-issuer (URL passed to cosign --certificate-oidc-issuer)" + ) + })?; + let body = attestation::fetch_verified_sbom(oci, identity, issuer) + .with_context(|| format!("fetching --{side}-attestation {oci}"))?; + if debug_calibration { + // One row per verified attestation; surfaces the cert + // regex cosign accepted so adopters can confirm policy. + let _ = + write_attestation_calibration(&mut std::io::stderr(), oci, identity, debug_format); + } + return parse_sbom_bytes( + &body, + &format!("attestation:{oci}"), + format_hint, + include_file_components, + ); + } + let path = path.ok_or_else(|| { + anyhow::anyhow!( + "internal: {side} requires either a positional path or --{side}-attestation" + ) + })?; + load_sbom(path, format_hint, include_file_components) +} + +fn write_attestation_calibration( + out: &mut W, + oci_ref: &str, + identity: &str, + format: crate::cli::DebugFormat, +) -> std::io::Result<()> { + match format { + crate::cli::DebugFormat::Pipe => { + writeln!(out, "attestation|{oci_ref}|verified|{identity}") + } + crate::cli::DebugFormat::Jsonl => { + let row = serde_json::json!({ + "kind": "attestation", + "key": oci_ref, + "score": "verified", + "threshold": identity, + }); + writeln!(out, "{row}") + } + } +} diff --git a/src/run/init.rs b/src/run/init.rs new file mode 100644 index 0000000..513782f --- /dev/null +++ b/src/run/init.rs @@ -0,0 +1,90 @@ +use std::fs; +use std::path::Path; + +use anyhow::{Context, Result}; + +use crate::cli::InitArgs; + +pub(super) fn run_init(args: InitArgs) -> Result<()> { + write_scaffold_file(Path::new(".bomdrift.toml"), INIT_CONFIG, args.force)?; + if !args.config_only { + write_scaffold_file( + Path::new(".github/workflows/sbom-diff.yml"), + INIT_SBOM_WORKFLOW, + args.force, + )?; + write_scaffold_file( + Path::new(".github/workflows/bomdrift-suppress.yml"), + INIT_SUPPRESS_WORKFLOW, + args.force, + )?; + } + eprintln!("bomdrift: initialized repository files"); + Ok(()) +} + +fn write_scaffold_file(path: &Path, contents: &str, force: bool) -> Result<()> { + if path.exists() && !force { + anyhow::bail!( + "{} already exists; re-run with --force to overwrite", + path.display() + ); + } + if let Some(parent) = path.parent().filter(|p| !p.as_os_str().is_empty()) { + fs::create_dir_all(parent) + .with_context(|| format!("creating parent directory: {}", parent.display()))?; + } + fs::write(path, contents).with_context(|| format!("writing scaffold file: {}", path.display())) +} + +const INIT_CONFIG: &str = r#"# bomdrift repo policy. +# CLI flags override these defaults for one-off runs. + +[diff] +fail_on = "critical-cve" +baseline = ".bomdrift/baseline.json" +findings_only = false + +# Optional churn budgets. Uncomment to fail the workflow when a PR changes too +# many dependencies at once. +# max_added = 25 +# max_removed = 50 +# max_version_changed = 10 +"#; + +const INIT_SBOM_WORKFLOW: &str = r#"name: SBOM diff + +on: pull_request + +permissions: + contents: read + pull-requests: write + +jobs: + diff: + runs-on: ubuntu-latest + steps: + - uses: Metbcy/bomdrift@v1 + with: + config: .bomdrift.toml +"#; + +const INIT_SUPPRESS_WORKFLOW: &str = r#"name: bomdrift suppress + +on: + issue_comment: + types: [created] + +permissions: + contents: write + pull-requests: write + +jobs: + suppress: + if: | + github.event.issue.pull_request && + startsWith(github.event.comment.body, '/bomdrift suppress ') + runs-on: ubuntu-latest + steps: + - uses: Metbcy/bomdrift/comment-suppress@v1 +"#; diff --git a/src/run/mod.rs b/src/run/mod.rs new file mode 100644 index 0000000..4efa47d --- /dev/null +++ b/src/run/mod.rs @@ -0,0 +1,34 @@ +mod baseline; +mod calibration; +mod diff; +mod init; +mod predicates; +#[cfg(test)] +mod tests; + +use anyhow::Result; + +use crate::cli::{Cli, Command}; +use crate::refresh; + +/// Process exit code emitted when `--fail-on` trips. Distinct from clap's +/// usage-error exit (`2`-ish on parse failure) because clap exits before +/// `run` is called — there's no overlap window where this code is ambiguous. +pub const FAIL_ON_EXIT_CODE: i32 = 2; + +pub use predicates::{any_epss_at_or_above, any_kev, budget_tripped, tripped}; +// Re-export crate-private calibration helpers so they remain reachable as +// `crate::run::` for tests and any future cross-module consumers. +#[allow(unused_imports)] +pub(crate) use calibration::{ + CalibrationOverrides, CalibrationScore, CalibrationThreshold, write_calibration_row, +}; + +pub fn run(cli: Cli) -> Result<()> { + match cli.command { + Command::Diff(args) => diff::run_diff(*args), + Command::RefreshTyposquat(args) => refresh::run(args), + Command::Baseline { action } => baseline::run_baseline(action), + Command::Init(args) => init::run_init(args), + } +} diff --git a/src/run/predicates.rs b/src/run/predicates.rs new file mode 100644 index 0000000..ada446a --- /dev/null +++ b/src/run/predicates.rs @@ -0,0 +1,56 @@ +use crate::cli::FailOn; +use crate::diff::ChangeSet; +use crate::enrich::{Enrichment, Severity}; + +/// Pure helper: does this `(changeset, enrichment)` pair trip the configured +/// fail-on threshold? Side-effect-free so the policy is easy to unit-test +/// without spinning up the full pipeline. +/// +/// `FailOn::CriticalCve` filters on real severity now that OSV `/v1/vulns/{id}` +/// is fetched; only advisories with [`Severity::High`] or higher trip it. +/// (High is included because GHSA's `CRITICAL` label is relatively rare — +/// many actively-exploited supply-chain advisories ship as `HIGH`. Treating +/// "critical-cve" as "high-or-critical" matches what the option's name +/// communicates to a CI policy author: "block on the actionable bucket".) +pub fn tripped(cs: &ChangeSet, e: &Enrichment, threshold: FailOn) -> bool { + match threshold { + FailOn::None => false, + FailOn::Cve => !e.vulns.is_empty(), + FailOn::CriticalCve => any_advisory_at_or_above(e, Severity::High), + FailOn::Typosquat => !e.typosquats.is_empty(), + FailOn::LicenseChange => !cs.license_changed.is_empty(), + FailOn::Kev => any_kev(e), + FailOn::LicenseViolation => !e.license_violations.is_empty(), + FailOn::RecentlyPublished => !e.recently_published.is_empty(), + FailOn::Deprecated => !e.deprecated.is_empty(), + FailOn::Any => e.has_findings() || !cs.license_changed.is_empty() || any_kev(e), + } +} + +/// True when any advisory across all components has its CISA KEV flag set. +pub fn any_kev(e: &Enrichment) -> bool { + e.vulns.values().any(|refs| refs.iter().any(|r| r.kev)) +} + +/// True when any advisory has an EPSS score >= the threshold. +pub fn any_epss_at_or_above(e: &Enrichment, threshold: f32) -> bool { + e.vulns.values().any(|refs| { + refs.iter() + .any(|r| r.epss_score.is_some_and(|s| s >= threshold)) + }) +} + +pub fn budget_tripped( + cs: &ChangeSet, + max_added: Option, + max_removed: Option, + max_version_changed: Option, +) -> bool { + max_added.is_some_and(|max| cs.added.len() > max) + || max_removed.is_some_and(|max| cs.removed.len() > max) + || max_version_changed.is_some_and(|max| cs.version_changed.len() > max) +} + +pub(super) fn any_advisory_at_or_above(e: &Enrichment, threshold: Severity) -> bool { + e.vulns.values().flatten().any(|v| v.severity >= threshold) +} diff --git a/src/run/tests.rs b/src/run/tests.rs new file mode 100644 index 0000000..014fd86 --- /dev/null +++ b/src/run/tests.rs @@ -0,0 +1,417 @@ +#![allow( + clippy::unwrap_used, + clippy::expect_used, + clippy::panic, + clippy::todo, + clippy::unimplemented +)] +use std::collections::HashMap; + +use super::calibration::{CalibrationOverrides, write_calibration_lines}; +use super::predicates::{any_epss_at_or_above, budget_tripped, tripped}; + +use crate::cli::FailOn; +use crate::diff::ChangeSet; +use crate::enrich::typosquat::TyposquatFinding; +use crate::enrich::version_jump::VersionJumpFinding; +use crate::enrich::{Enrichment, LicenseViolation, Severity, VulnRef}; +use crate::model::{Component, Ecosystem, Relationship}; + +fn comp(name: &str) -> Component { + Component { + name: name.to_string(), + version: "1.0.0".to_string(), + ecosystem: Ecosystem::Npm, + purl: Some(format!("pkg:npm/{name}@1.0.0")), + licenses: Vec::new(), + supplier: None, + hashes: Vec::new(), + relationship: Relationship::Unknown, + source_url: None, + bom_ref: None, + } +} + +fn enrichment_with_cve_at(severity: Severity) -> Enrichment { + let mut vulns: HashMap> = HashMap::new(); + vulns.insert( + "pkg:npm/foo@1.0.0".into(), + vec![VulnRef { + id: "CVE-2025-1".into(), + severity, + aliases: Vec::new(), + epss_score: None, + kev: false, + }], + ); + Enrichment { + vulns, + ..Default::default() + } +} + +fn enrichment_with_cve() -> Enrichment { + // Severity::None is what every v0.2-era test implicitly assumed — the + // pre-severity world. Tests that don't care about the bucket use this. + enrichment_with_cve_at(Severity::None) +} + +fn enrichment_with_typosquat() -> Enrichment { + Enrichment { + typosquats: vec![TyposquatFinding { + component: comp("plain-crypto-js"), + closest: "crypto-js".to_string(), + score: 0.95, + }], + ..Default::default() + } +} + +fn enrichment_with_version_jump() -> Enrichment { + Enrichment { + version_jumps: vec![VersionJumpFinding { + before: comp("foo"), + after: comp("foo"), + before_major: 1, + after_major: 4, + }], + ..Default::default() + } +} + +fn cs_with_license_change() -> ChangeSet { + let mut before = comp("foo"); + before.licenses = vec!["MIT".into()]; + let mut after = comp("foo"); + after.licenses = vec!["GPL-3.0".into()]; + ChangeSet { + license_changed: vec![(before, after)], + ..Default::default() + } +} + +#[test] +fn fail_on_none_never_trips() { + assert!(!tripped( + &ChangeSet::default(), + &Enrichment::default(), + FailOn::None + )); + assert!(!tripped( + &cs_with_license_change(), + &enrichment_with_cve(), + FailOn::None + )); +} + +#[test] +fn fail_on_cve_trips_only_on_cve_findings() { + assert!(tripped( + &ChangeSet::default(), + &enrichment_with_cve(), + FailOn::Cve + )); + assert!(!tripped( + &ChangeSet::default(), + &enrichment_with_typosquat(), + FailOn::Cve + )); + assert!(!tripped( + &ChangeSet::default(), + &Enrichment::default(), + FailOn::Cve + )); +} + +#[test] +fn fail_on_critical_cve_filters_on_severity_high_or_above() { + // Critical and High advisories trip; Medium / Low / None don't. The + // doc on `tripped()` explains why High is included in the + // "critical-cve" bucket. + assert!(tripped( + &ChangeSet::default(), + &enrichment_with_cve_at(Severity::Critical), + FailOn::CriticalCve + )); + assert!(tripped( + &ChangeSet::default(), + &enrichment_with_cve_at(Severity::High), + FailOn::CriticalCve + )); + assert!(!tripped( + &ChangeSet::default(), + &enrichment_with_cve_at(Severity::Medium), + FailOn::CriticalCve + )); + assert!(!tripped( + &ChangeSet::default(), + &enrichment_with_cve_at(Severity::None), + FailOn::CriticalCve + )); +} + +#[test] +fn fail_on_cve_still_trips_on_severity_none_advisories() { + // --fail-on cve is the broad "any advisory" bucket; severity threading + // doesn't change its semantics. An advisory with unresolved severity + // still trips it (the alternative — silent suppression — would be the + // real footgun). + assert!(tripped( + &ChangeSet::default(), + &enrichment_with_cve_at(Severity::None), + FailOn::Cve + )); +} + +#[test] +fn fail_on_typosquat_trips_only_on_typosquat_findings() { + assert!(tripped( + &ChangeSet::default(), + &enrichment_with_typosquat(), + FailOn::Typosquat + )); + assert!(!tripped( + &ChangeSet::default(), + &enrichment_with_cve(), + FailOn::Typosquat + )); +} + +#[test] +fn fail_on_any_trips_on_each_finding_kind_and_license_changes() { + assert!(tripped( + &ChangeSet::default(), + &enrichment_with_cve(), + FailOn::Any + )); + assert!(tripped( + &ChangeSet::default(), + &enrichment_with_typosquat(), + FailOn::Any + )); + assert!(tripped( + &ChangeSet::default(), + &enrichment_with_version_jump(), + FailOn::Any + )); + // license-changed-without-version-bump alone trips Any (the suspicious + // case lives on the ChangeSet, not the enrichment). + assert!(tripped( + &cs_with_license_change(), + &Enrichment::default(), + FailOn::Any + )); + assert!(!tripped( + &ChangeSet::default(), + &Enrichment::default(), + FailOn::Any + )); +} + +#[test] +fn fail_on_license_change_trips_only_on_license_changes() { + assert!(tripped( + &cs_with_license_change(), + &Enrichment::default(), + FailOn::LicenseChange + )); + assert!(!tripped( + &ChangeSet::default(), + &enrichment_with_cve(), + FailOn::LicenseChange + )); + assert!(!tripped( + &ChangeSet::default(), + &enrichment_with_typosquat(), + FailOn::LicenseChange + )); +} + +#[test] +fn fail_on_typosquat_ignores_license_change() { + // license_changed is a ChangeSet field, not an enrichment. The + // typosquat threshold is strictly about typosquat findings — license + // drift must NOT trip it (otherwise consumers using --fail-on=typosquat + // get unexpected exit-2's on every license correction). + assert!(!tripped( + &cs_with_license_change(), + &Enrichment::default(), + FailOn::Typosquat + )); +} + +#[test] +fn budget_trips_when_counts_exceed_limits() { + let cs = ChangeSet { + added: vec![comp("a"), comp("b")], + removed: vec![comp("c")], + version_changed: vec![(comp("d"), comp("d"))], + ..Default::default() + }; + assert!(budget_tripped(&cs, Some(1), None, None)); + assert!(budget_tripped(&cs, None, Some(0), None)); + assert!(budget_tripped(&cs, None, None, Some(0))); + assert!(!budget_tripped(&cs, Some(2), Some(1), Some(1))); +} + +#[test] +fn calibration_pipe_format_matches_v0_7_layout() { + let e = enrichment_with_typosquat(); + let mut buf = Vec::new(); + write_calibration_lines( + &e, + &mut buf, + crate::cli::DebugFormat::Pipe, + CalibrationOverrides::default(), + ); + let s = String::from_utf8(buf).unwrap(); + assert!(s.starts_with("typosquat|"), "got: {s}"); + assert_eq!( + s.matches('|').count(), + 3, + "pipe row has 4 fields → 3 separators; got: {s}" + ); +} + +#[test] +fn calibration_jsonl_format_emits_one_object_per_line() { + let e = enrichment_with_typosquat(); + let mut buf = Vec::new(); + write_calibration_lines( + &e, + &mut buf, + crate::cli::DebugFormat::Jsonl, + CalibrationOverrides::default(), + ); + let s = String::from_utf8(buf).unwrap(); + let lines: Vec<&str> = s.lines().collect(); + assert_eq!(lines.len(), 1); + let v: serde_json::Value = serde_json::from_str(lines[0]).expect("valid jsonl"); + assert_eq!(v["kind"], "typosquat"); + assert!(v["score"].is_number(), "numeric score in jsonl"); + assert!(v["threshold"].is_number()); + assert!(v["key"].is_string()); +} + +#[test] +fn calibration_jsonl_keeps_severity_label_as_string() { + let e = enrichment_with_cve_at(Severity::High); + let mut buf = Vec::new(); + write_calibration_lines( + &e, + &mut buf, + crate::cli::DebugFormat::Jsonl, + CalibrationOverrides::default(), + ); + let s = String::from_utf8(buf).unwrap(); + let v: serde_json::Value = serde_json::from_str(s.trim()).unwrap(); + assert_eq!(v["kind"], "cve"); + assert_eq!(v["score"], "HIGH"); + assert_eq!(v["threshold"], "high+"); +} + +#[test] +fn fail_on_kev_trips_when_any_advisory_kev_set() { + let mut e = enrichment_with_cve_at(Severity::Medium); + // Flip the kev flag on the single advisory. + for refs in e.vulns.values_mut() { + refs[0].kev = true; + } + assert!(tripped(&ChangeSet::default(), &e, FailOn::Kev)); + assert!(!tripped( + &ChangeSet::default(), + &enrichment_with_cve_at(Severity::Medium), + FailOn::Kev + )); +} + +#[test] +fn any_epss_threshold_gating() { + let mut e = enrichment_with_cve_at(Severity::Medium); + for refs in e.vulns.values_mut() { + refs[0].epss_score = Some(0.6); + } + assert!(any_epss_at_or_above(&e, 0.5)); + assert!(any_epss_at_or_above(&e, 0.6)); + assert!(!any_epss_at_or_above(&e, 0.7)); +} + +#[test] +fn calibration_emits_epss_and_kev_rows_when_set() { + let mut e = enrichment_with_cve_at(Severity::High); + for refs in e.vulns.values_mut() { + refs[0].epss_score = Some(0.87); + refs[0].kev = true; + } + let mut buf = Vec::new(); + write_calibration_lines( + &e, + &mut buf, + crate::cli::DebugFormat::Pipe, + CalibrationOverrides::default(), + ); + let s = String::from_utf8(buf).unwrap(); + assert!(s.contains("epss|"), "missing epss row: {s}"); + assert!(s.contains("kev|"), "missing kev row: {s}"); +} + +#[test] +fn calibration_license_row_includes_exception_detail() { + // v0.9.5: matched_rule on an exception-driven license violation + // must surface the exception identifier in the calibration tap + // so operators tuning policy see why a row fired. + let mut e = Enrichment::default(); + let component = crate::model::Component { + name: "llvm-sys".into(), + version: "1.0.0".into(), + ecosystem: crate::model::Ecosystem::Cargo, + purl: Some("pkg:cargo/llvm-sys@1.0.0".into()), + licenses: vec!["Apache-2.0 WITH LLVM-exception".into()], + supplier: None, + hashes: Vec::new(), + relationship: crate::model::Relationship::Unknown, + source_url: None, + bom_ref: None, + }; + e.license_violations.push(LicenseViolation { + component, + license: "Apache-2.0 WITH LLVM-exception".into(), + matched_rule: "exception:LLVM-exception denied".into(), + kind: crate::enrich::LicenseViolationKind::Deny, + }); + let mut buf = Vec::new(); + write_calibration_lines( + &e, + &mut buf, + crate::cli::DebugFormat::Pipe, + CalibrationOverrides::default(), + ); + let s = String::from_utf8(buf).unwrap(); + assert!( + s.contains("license|"), + "missing license calibration row: {s}" + ); + assert!( + s.contains("exception:LLVM-exception denied"), + "row must surface matched_rule with exception detail: {s}" + ); +} + +#[test] +fn fail_on_license_violation_trips() { + use crate::enrich::{LicenseViolation, LicenseViolationKind}; + let mut e = Enrichment::default(); + e.license_violations.push(LicenseViolation { + component: comp("foo"), + license: "GPL-3.0-only".into(), + matched_rule: "deny: GPL-3.0-only".into(), + kind: LicenseViolationKind::Deny, + }); + assert!(tripped(&ChangeSet::default(), &e, FailOn::LicenseViolation)); + assert!(tripped(&ChangeSet::default(), &e, FailOn::Any)); + assert!(!tripped( + &ChangeSet::default(), + &Enrichment::default(), + FailOn::LicenseViolation + )); +}