From e4122c313992b0ac0034098205c81a1d40307ff3 Mon Sep 17 00:00:00 2001
From: "Chris (ChrisJr404)" <11917633+ChrisJr404@users.noreply.github.com>
Date: Mon, 4 May 2026 12:25:26 -0400
Subject: [PATCH] report: add SARIF 2.1.0 output (-f sarif)

Lets `openant report -f sarif results_verified.json` emit a SARIF log
that GitHub Code Scanning and GitLab SAST can ingest without a
converter, matching what every other SAST in this category supports.

Renders Go-side via the same flow as -f html: Python's `report-data`
subcommand returns pre-computed JSON, and BuildSARIF turns ReportData
into a SARIF map. Findings become results, verdicts get synthesized
into a `rules` array (vulnerable+bypassable as level `error`,
inconclusive/unclear as `warning`, everything else as `note`). File
paths land as artifactLocation.uri without a startLine, since the
current Finding struct doesn't carry line numbers and emitting a
synthetic 1 would anchor alerts to the wrong row in Code Scanning.

Each result carries a partialFingerprints entry keyed
"openant/file/function/verdict/v1" so re-runs dedupe cleanly, and
versionControlProvenance is populated when ReportData.RepoURL is set
(including revisionId from CommitSHA when available).

15 unit tests cover envelope shape, rule dedup by verdict, level
mapping, path normalization, logical location, dynamic test
property propagation, fingerprint stability, VCS provenance
gating, the empty-AttackVector fallback that keeps message.text
non-empty per spec, the 4 KiB message cap, and an end-to-end
round-trip through json.Unmarshal.
---
 apps/openant-cli/cmd/report.go                |  71 ++++-
 apps/openant-cli/internal/report/sarif.go     | 297 ++++++++++++++++++
 .../openant-cli/internal/report/sarif_test.go | 269 ++++++++++++++++
 3 files changed, 636 insertions(+), 1 deletion(-)
 create mode 100644 apps/openant-cli/internal/report/sarif.go
 create mode 100644 apps/openant-cli/internal/report/sarif_test.go

diff --git a/apps/openant-cli/cmd/report.go b/apps/openant-cli/cmd/report.go
index d2b34b7..5060c19 100644
--- a/apps/openant-cli/cmd/report.go
+++ b/apps/openant-cli/cmd/report.go
@@ -26,6 +26,7 @@ Formats:
   summary      Narrative security overview (uses LLM)
   html         Interactive HTML report with charts and filters
   csv          Spreadsheet export of all findings
+  sarif        SARIF 2.1.0 log for GitHub Code Scanning / GitLab SAST upload
 
 If no results path is given, the active project's results_verified.json is used.
 Python owns default output paths — you only need -o to override.
@@ -50,7 +51,7 @@ var (
 func init() {
 	reportCmd.Flags().StringVarP(&reportOutput, "output", "o", "", "Output path (default: derived from format)")
 	reportCmd.Flags().StringVar(&reportDataset, "dataset", "", "Path to dataset JSON (for html/csv)")
-	reportCmd.Flags().StringVarP(&reportFormat, "format", "f", "", "Report format: disclosure, summary, html, csv")
+	reportCmd.Flags().StringVarP(&reportFormat, "format", "f", "", "Report format: disclosure, summary, html, csv, sarif")
 	reportCmd.Flags().StringVar(&reportPipelineOutput, "pipeline-output", "", "Path to pipeline_output.json (for summary/disclosure)")
 	reportCmd.Flags().StringVar(&reportRepoName, "repo-name", "", "Repository name (used when auto-building pipeline_output)")
 	reportCmd.Flags().StringVar(&reportExtraDest, "copy-to", "", "Copy reports to an additional location")
@@ -213,6 +214,31 @@ func runReport(cmd *cobra.Command, args []string) {
 				output.PrintReportSummary(data)
 			}
 			allResults = append(allResults, data)
+		} else if fmt == "sarif" {
+			// SARIF reports use the Go renderer for the same reason HTML
+			// does: it's a deterministic data transformation, not an
+			// LLM-generated narrative, so there's no need to round-trip
+			// through Python.
+			outputPath := reportOutput
+			if outputPath == "" {
+				resultsDir := filepath.Dir(resultsPath)
+				outputPath = filepath.Join(resultsDir, "final-reports", "report.sarif")
+			}
+
+			if err := runSARIFReport(rt, resultsPath, outputPath); err != nil {
+				output.PrintError("sarif: " + err.Error())
+				exitCode = 2
+				continue
+			}
+
+			data := map[string]any{
+				"output_path": outputPath,
+				"format":      "sarif",
+			}
+			if !jsonOutput {
+				output.PrintReportSummary(data)
+			}
+			allResults = append(allResults, data)
 		} else {
 			// Other formats delegate to Python
 			pyArgs := buildReportArgs(resultsPath, fmt)
@@ -262,6 +288,7 @@ func promptFormats() ([]string, error) {
 					huh.NewOption("Summary — narrative security overview written by AI ($)", "summary"),
 					huh.NewOption("HTML — interactive report with charts and filters", "html"),
 					huh.NewOption("CSV — spreadsheet export of all findings", "csv"),
+					huh.NewOption("SARIF — GitHub Code Scanning / GitLab SAST upload", "sarif"),
 				).
 				Value(&selected),
 		),
@@ -349,6 +376,48 @@ func runHTMLReport(rt *python.RuntimeInfo, resultsPath string, outputPath string
 	return nil
 }
 
+// runSARIFReport generates a SARIF 2.1.0 log using the Go renderer. Like
+// runHTMLReport, it asks Python's report-data subcommand for pre-computed
+// data, then transforms it deterministically here. Driver version is wired
+// to the CLI's `version` (set via -ldflags at build time).
+func runSARIFReport(rt *python.RuntimeInfo, resultsPath string, outputPath string) error {
+	pyArgs := []string{"report-data", resultsPath}
+	if reportDataset != "" {
+		pyArgs = append(pyArgs, "--dataset", reportDataset)
+	}
+
+	result, err := python.Invoke(rt.Path, pyArgs, "", quiet, resolvedAPIKey())
+	if err != nil {
+		return fmt.Errorf("report-data failed: %w", err)
+	}
+	if result.Envelope.Status != "success" {
+		msg := "report-data returned error"
+		if len(result.Envelope.Errors) > 0 {
+			msg = result.Envelope.Errors[0]
+		}
+		return fmt.Errorf("%s", msg)
+	}
+
+	dataBytes, err := json.Marshal(result.Envelope.Data)
+	if err != nil {
+		return fmt.Errorf("failed to marshal report data: %w", err)
+	}
+
+	var reportData report.ReportData
+	if err := json.Unmarshal(dataBytes, &reportData); err != nil {
+		return fmt.Errorf("failed to parse report data: %w", err)
+	}
+
+	opts := report.SARIFOptions{
+		ToolVersion:    version,
+		InformationURI: "https://github.com/knostic/OpenAnt",
+	}
+	if err := report.GenerateSARIF(reportData, outputPath, opts); err != nil {
+		return fmt.Errorf("failed to render SARIF: %w", err)
+	}
+	return nil
+}
+
 // buildReportArgs constructs the Python CLI arguments for a single format.
 func buildReportArgs(resultsPath string, format string) []string {
 	pyArgs := []string{"report", resultsPath, "--format", format}
diff --git a/apps/openant-cli/internal/report/sarif.go b/apps/openant-cli/internal/report/sarif.go
new file mode 100644
index 0000000..88c903b
--- /dev/null
+++ b/apps/openant-cli/internal/report/sarif.go
@@ -0,0 +1,297 @@
+package report
+
+import (
+	"encoding/json"
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"strings"
+)
+
+// sarifVersion is the SARIF spec version we emit. 2.1.0 is what GitHub Code
+// Scanning, GitLab SAST, and most third-party SARIF consumers expect.
+const sarifVersion = "2.1.0"
+
+// sarifSchema points at the OASIS-published JSON schema for SARIF 2.1.0.
+// Consumers that schema-validate the upload (e.g. GitHub Code Scanning's
+// pre-ingest check) read this URL.
+const sarifSchema = "https://docs.oasis-open.org/sarif/sarif/v2.1.0/errata01/os/schemas/sarif-schema-2.1.0.json"
+
+// SARIFOptions controls extra metadata baked into the emitted log. All fields
+// are optional; sensible defaults are chosen when empty.
+type SARIFOptions struct {
+	// ToolVersion is `tool.driver.version`. Defaults to "dev" when empty.
+	ToolVersion string
+	// InformationURI is `tool.driver.informationUri`. Points reviewers at
+	// the project for context on the verdicts.
+	InformationURI string
+	// ToolName overrides `tool.driver.name`. Defaults to "OpenAnt".
+	ToolName string
+}
+
+// BuildSARIF turns a ReportData (the same struct that drives the HTML report)
+// into a SARIF 2.1.0 log. The returned value is plain map[string]any so it
+// round-trips cleanly through json.Marshal — keeping the schema explicit at
+// the call site instead of fragmenting it across a dozen typed structs.
+func BuildSARIF(data ReportData, opts SARIFOptions) map[string]any {
+	if opts.ToolName == "" {
+		opts.ToolName = "OpenAnt"
+	}
+	if opts.ToolVersion == "" {
+		opts.ToolVersion = "dev"
+	}
+
+	rules, ruleIndex := sarifRulesFor(data)
+	results := make([]map[string]any, 0, len(data.Findings))
+	for _, f := range data.Findings {
+		results = append(results, sarifResultFor(f, ruleIndex))
+	}
+
+	driver := map[string]any{
+		"name":            opts.ToolName,
+		"version":         opts.ToolVersion,
+		"semanticVersion": opts.ToolVersion,
+		"rules":           rules,
+	}
+	if opts.InformationURI != "" {
+		driver["informationUri"] = opts.InformationURI
+	}
+
+	run := map[string]any{
+		"tool": map[string]any{
+			"driver": driver,
+		},
+		"results": results,
+	}
+	if data.RepoURL != "" {
+		// versionControlProvenance is consumed by GitHub Code Scanning to
+		// associate the upload with a specific commit. Skip when we don't
+		// have it rather than emitting an empty/misleading object.
+		prov := map[string]any{
+			"repositoryUri": data.RepoURL,
+		}
+		if data.CommitSHA != "" {
+			prov["revisionId"] = data.CommitSHA
+		}
+		run["versionControlProvenance"] = []any{prov}
+	}
+
+	return map[string]any{
+		"$schema": sarifSchema,
+		"version": sarifVersion,
+		"runs":    []any{run},
+	}
+}
+
+// sarifRulesFor returns the SARIF `rules` array plus a map from verdict
+// string to its index in that array. We synthesize one rule per distinct
+// verdict (vulnerable, bypassable, …) since OpenAnt findings are not yet
+// keyed by a stable per-rule taxonomy. Categories from ReportData supply
+// the rule descriptions.
+func sarifRulesFor(data ReportData) ([]map[string]any, map[string]int) {
+	descByVerdict := make(map[string]string, len(data.Categories))
+	for _, c := range data.Categories {
+		descByVerdict[c.Verdict] = c.Description
+	}
+
+	seen := make(map[string]int)
+	rules := make([]map[string]any, 0)
+	for _, f := range data.Findings {
+		v := normalizedVerdict(f.Verdict)
+		if _, ok := seen[v]; ok {
+			continue
+		}
+		seen[v] = len(rules)
+
+		desc := descByVerdict[v]
+		if desc == "" {
+			desc = fmt.Sprintf("Finding with verdict %q.", v)
+		}
+
+		rules = append(rules, map[string]any{
+			"id":   "openant.verdict." + v,
+			"name": "OpenAntVerdict_" + strings.ReplaceAll(v, "-", "_"),
+			"shortDescription": map[string]any{
+				"text": fmt.Sprintf("OpenAnt %s finding", v),
+			},
+			"fullDescription": map[string]any{
+				"text": desc,
+			},
+			"defaultConfiguration": map[string]any{
+				"level": sarifLevelForVerdict(v),
+			},
+			"properties": map[string]any{
+				"verdict": v,
+				"tags":    []string{"security", "openant"},
+			},
+		})
+	}
+
+	return rules, seen
+}
+
+// sarifResultFor renders a single Finding as a SARIF result object.
+//
+// We intentionally emit a file-scoped location with no startLine because the
+// current Finding struct does not carry line numbers; emitting startLine: 1
+// (or any synthetic value) would cause GitHub Code Scanning to anchor the
+// alert to the wrong row, which is worse than no anchor at all. When line
+// data lands in ReportData, the region payload here is the only place that
+// needs to grow.
+func sarifResultFor(f Finding, ruleIndex map[string]int) map[string]any {
+	v := normalizedVerdict(f.Verdict)
+
+	result := map[string]any{
+		"ruleId": "openant.verdict." + v,
+		"level":  sarifLevelForVerdict(v),
+		"message": map[string]any{
+			"text": findingMessage(f),
+		},
+		"locations": []any{
+			sarifLocationFor(f),
+		},
+	}
+
+	if idx, ok := ruleIndex[v]; ok {
+		result["ruleIndex"] = idx
+	}
+
+	props := map[string]any{
+		"verdict":  v,
+		"function": f.Function,
+	}
+	if f.DynamicTestStatus != "" {
+		props["dynamicTestStatus"] = f.DynamicTestStatus
+	}
+	if f.DynamicTestDetails != "" {
+		props["dynamicTestDetails"] = f.DynamicTestDetails
+	}
+	result["properties"] = props
+
+	// PartialFingerprints is what makes SARIF de-dup work across runs in
+	// GitHub Code Scanning. Without these, the same finding from successive
+	// scans shows up as a fresh alert each time.
+	result["partialFingerprints"] = map[string]any{
+		"openant/file/function/verdict/v1": fingerprintFor(f, v),
+	}
+
+	return result
+}
+
+// sarifLocationFor builds the SARIF `location` object for a finding. The
+// physicalLocation has only artifactLocation + a logicalLocations entry for
+// the function name (so SARIF consumers that care about logical scope still
+// get something).
+func sarifLocationFor(f Finding) map[string]any {
+	loc := map[string]any{
+		"physicalLocation": map[string]any{
+			"artifactLocation": map[string]any{
+				"uri":       sarifURI(f.File),
+				"uriBaseId": "%SRCROOT%",
+			},
+		},
+	}
+	if f.Function != "" {
+		loc["logicalLocations"] = []any{
+			map[string]any{
+				"name": f.Function,
+				"kind": "function",
+			},
+		}
+	}
+	return loc
+}
+
+// findingMessage condenses the Finding's narrative fields into a single
+// `message.text` line. SARIF allows arbitrary length here, but we cap so
+// CI inboxes don't drown.
+func findingMessage(f Finding) string {
+	parts := []string{}
+	if f.AttackVector != "" {
+		parts = append(parts, strings.TrimSpace(f.AttackVector))
+	}
+	if f.Analysis != "" {
+		parts = append(parts, strings.TrimSpace(f.Analysis))
+	}
+	if len(parts) == 0 {
+		// Fall back so the result still passes SARIF schema validation,
+		// which requires `message.text` to be non-empty.
+		return fmt.Sprintf("OpenAnt %s finding in %s", f.Verdict, f.File)
+	}
+	msg := strings.Join(parts, "\n\n")
+	const cap = 4096
+	if len(msg) > cap {
+		msg = msg[:cap-1] + "…"
+	}
+	return msg
+}
+
+// fingerprintFor returns a stable string used as the SARIF result's
+// `partialFingerprints` value. Order of fields is fixed and explicit so
+// that adding a new Finding field later cannot silently invalidate
+// existing fingerprints.
+func fingerprintFor(f Finding, verdict string) string {
+	return fmt.Sprintf("%s|%s|%s", f.File, f.Function, verdict)
+}
+
+// sarifLevelForVerdict maps an OpenAnt verdict to a SARIF result.level.
+// Vulnerable + bypassable surface as `error`; inconclusive/unclear as
+// `warning`; everything else (safe, protected, etc.) as `note` so they
+// don't pollute Code-Scanning alert lists.
+func sarifLevelForVerdict(v string) string {
+	switch v {
+	case "vulnerable", "bypassable":
+		return "error"
+	case "inconclusive", "unclear":
+		return "warning"
+	default:
+		return "note"
+	}
+}
+
+// normalizedVerdict trims/lowercases the verdict so casing or whitespace
+// drift in upstream pipeline output cannot fan rules out.
+func normalizedVerdict(v string) string {
+	v = strings.TrimSpace(strings.ToLower(v))
+	if v == "" {
+		return "unknown"
+	}
+	return v
+}
+
+// sarifURI normalizes a file path into a SARIF artifactLocation.uri value.
+// SARIF wants forward slashes and stable relative paths; we strip any
+// leading "./" but otherwise preserve the path as-recorded so consumers can
+// match it against the working tree.
+func sarifURI(path string) string {
+	p := strings.ReplaceAll(path, "\\", "/")
+	p = strings.TrimPrefix(p, "./")
+	return p
+}
+
+// GenerateSARIF renders a SARIF log to the given output path, creating
+// parent directories as needed. The file is overwritten if present.
+func GenerateSARIF(data ReportData, outputPath string, opts SARIFOptions) error {
+	if err := os.MkdirAll(filepath.Dir(outputPath), 0o755); err != nil {
+		return err
+	}
+
+	f, err := os.Create(outputPath)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+
+	return RenderSARIF(data, f, opts)
+}
+
+// RenderSARIF writes a SARIF log to the given writer. Indented for human
+// review; consumers that care about size can pass through `jq -c` to
+// minify.
+func RenderSARIF(data ReportData, w io.Writer, opts SARIFOptions) error {
+	enc := json.NewEncoder(w)
+	enc.SetIndent("", "  ")
+	enc.SetEscapeHTML(false)
+	return enc.Encode(BuildSARIF(data, opts))
+}
diff --git a/apps/openant-cli/internal/report/sarif_test.go b/apps/openant-cli/internal/report/sarif_test.go
new file mode 100644
index 0000000..896fc7c
--- /dev/null
+++ b/apps/openant-cli/internal/report/sarif_test.go
@@ -0,0 +1,269 @@
+package report
+
+import (
+	"bytes"
+	"encoding/json"
+	"strings"
+	"testing"
+)
+
+func sarifFixtureData() ReportData {
+	return ReportData{
+		Title:     "demo",
+		RepoName:  "knostic/demo",
+		CommitSHA: "deadbeefcafebabe1234567890abcdefdeadbeef",
+		RepoURL:   "https://github.com/knostic/demo",
+		Language:  "python",
+		Findings: []Finding{
+			{
+				Number:       1,
+				Verdict:      "vulnerable",
+				File:         "src/auth/login.py",
+				Function:     "do_login",
+				AttackVector: "Unsanitized input flows into eval().",
+				Analysis:     "User-controlled username is passed to eval, allowing RCE.",
+			},
+			{
+				Number:             2,
+				Verdict:            "BYPASSABLE", // exercises normalizedVerdict
+				File:               "./src/api/handler.py",
+				Function:           "handle_get",
+				AttackVector:       "Auth bypass via header injection.",
+				DynamicTestStatus:  "CONFIRMED",
+				DynamicTestDetails: "PoC succeeded.",
+			},
+			{
+				Number:   3,
+				Verdict:  "safe",
+				File:     "src/util/helpers.py",
+				Function: "noop",
+			},
+		},
+		Categories: []Category{
+			{Verdict: "vulnerable", Description: "Confirmed exploitable code path."},
+			{Verdict: "bypassable", Description: "Has guard but reachable around it."},
+			{Verdict: "safe", Description: "No exploitable path identified."},
+		},
+	}
+}
+
+func TestBuildSARIF_TopLevelEnvelope(t *testing.T) {
+	got := BuildSARIF(sarifFixtureData(), SARIFOptions{ToolVersion: "1.2.3"})
+
+	if got["version"] != sarifVersion {
+		t.Fatalf("version: got %v, want %s", got["version"], sarifVersion)
+	}
+	if got["$schema"] != sarifSchema {
+		t.Fatalf("$schema: got %v, want %s", got["$schema"], sarifSchema)
+	}
+	runs, ok := got["runs"].([]any)
+	if !ok || len(runs) != 1 {
+		t.Fatalf("runs: expected one run, got %v", got["runs"])
+	}
+}
+
+func TestBuildSARIF_DriverNameAndVersion(t *testing.T) {
+	got := BuildSARIF(sarifFixtureData(), SARIFOptions{
+		ToolVersion:    "1.2.3",
+		InformationURI: "https://github.com/knostic/OpenAnt",
+	})
+	driver := got["runs"].([]any)[0].(map[string]any)["tool"].(map[string]any)["driver"].(map[string]any)
+	if driver["name"] != "OpenAnt" {
+		t.Errorf("driver.name: got %v, want OpenAnt", driver["name"])
+	}
+	if driver["version"] != "1.2.3" {
+		t.Errorf("driver.version: got %v, want 1.2.3", driver["version"])
+	}
+	if driver["informationUri"] != "https://github.com/knostic/OpenAnt" {
+		t.Errorf("driver.informationUri: got %v", driver["informationUri"])
+	}
+}
+
+func TestBuildSARIF_RulesDeduplicatedByVerdict(t *testing.T) {
+	got := BuildSARIF(sarifFixtureData(), SARIFOptions{})
+	rules := got["runs"].([]any)[0].(map[string]any)["tool"].(map[string]any)["driver"].(map[string]any)["rules"].([]map[string]any)
+	if len(rules) != 3 {
+		t.Fatalf("expected 3 rules (one per verdict), got %d", len(rules))
+	}
+
+	wantIDs := map[string]bool{
+		"openant.verdict.vulnerable": false,
+		"openant.verdict.bypassable": false,
+		"openant.verdict.safe":       false,
+	}
+	for _, r := range rules {
+		id, _ := r["id"].(string)
+		if _, ok := wantIDs[id]; ok {
+			wantIDs[id] = true
+		}
+	}
+	for id, seen := range wantIDs {
+		if !seen {
+			t.Errorf("rule %s missing", id)
+		}
+	}
+}
+
+func TestBuildSARIF_ResultLevelMapping(t *testing.T) {
+	got := BuildSARIF(sarifFixtureData(), SARIFOptions{})
+	results := got["runs"].([]any)[0].(map[string]any)["results"].([]map[string]any)
+	if len(results) != 3 {
+		t.Fatalf("expected 3 results, got %d", len(results))
+	}
+
+	wantLevels := []string{"error", "error", "note"}
+	for i, r := range results {
+		if r["level"] != wantLevels[i] {
+			t.Errorf("result[%d].level: got %v, want %s", i, r["level"], wantLevels[i])
+		}
+	}
+}
+
+func TestBuildSARIF_FilePathsNormalized(t *testing.T) {
+	got := BuildSARIF(sarifFixtureData(), SARIFOptions{})
+	results := got["runs"].([]any)[0].(map[string]any)["results"].([]map[string]any)
+	uri := results[1]["locations"].([]any)[0].(map[string]any)["physicalLocation"].(map[string]any)["artifactLocation"].(map[string]any)["uri"]
+	if uri != "src/api/handler.py" {
+		t.Errorf("artifactLocation.uri: got %q, want %q (./ should be stripped)", uri, "src/api/handler.py")
+	}
+}
+
+func TestBuildSARIF_LogicalLocationCarriesFunction(t *testing.T) {
+	got := BuildSARIF(sarifFixtureData(), SARIFOptions{})
+	results := got["runs"].([]any)[0].(map[string]any)["results"].([]map[string]any)
+	logicals, ok := results[0]["locations"].([]any)[0].(map[string]any)["logicalLocations"].([]any)
+	if !ok || len(logicals) != 1 {
+		t.Fatalf("logicalLocations missing on result[0]")
+	}
+	got0 := logicals[0].(map[string]any)
+	if got0["name"] != "do_login" || got0["kind"] != "function" {
+		t.Errorf("logicalLocations[0]: got %v", got0)
+	}
+}
+
+func TestBuildSARIF_DynamicTestPropertiesPropagate(t *testing.T) {
+	got := BuildSARIF(sarifFixtureData(), SARIFOptions{})
+	results := got["runs"].([]any)[0].(map[string]any)["results"].([]map[string]any)
+	props := results[1]["properties"].(map[string]any)
+	if props["dynamicTestStatus"] != "CONFIRMED" {
+		t.Errorf("dynamicTestStatus: got %v", props["dynamicTestStatus"])
+	}
+	if props["dynamicTestDetails"] != "PoC succeeded." {
+		t.Errorf("dynamicTestDetails: got %v", props["dynamicTestDetails"])
+	}
+}
+
+func TestBuildSARIF_PartialFingerprintsStable(t *testing.T) {
+	got := BuildSARIF(sarifFixtureData(), SARIFOptions{})
+	results := got["runs"].([]any)[0].(map[string]any)["results"].([]map[string]any)
+	for _, r := range results {
+		fps, ok := r["partialFingerprints"].(map[string]any)
+		if !ok {
+			t.Fatalf("partialFingerprints missing on %v", r["ruleId"])
+		}
+		if _, ok := fps["openant/file/function/verdict/v1"].(string); !ok {
+			t.Fatalf("expected v1 fingerprint key on every result")
+		}
+	}
+}
+
+func TestBuildSARIF_VersionControlProvenanceWhenRepoURLPresent(t *testing.T) {
+	got := BuildSARIF(sarifFixtureData(), SARIFOptions{})
+	run := got["runs"].([]any)[0].(map[string]any)
+	prov, ok := run["versionControlProvenance"].([]any)
+	if !ok || len(prov) != 1 {
+		t.Fatalf("expected versionControlProvenance with one entry")
+	}
+	entry := prov[0].(map[string]any)
+	if entry["repositoryUri"] != "https://github.com/knostic/demo" {
+		t.Errorf("repositoryUri: got %v", entry["repositoryUri"])
+	}
+	if entry["revisionId"] != "deadbeefcafebabe1234567890abcdefdeadbeef" {
+		t.Errorf("revisionId: got %v", entry["revisionId"])
+	}
+}
+
+func TestBuildSARIF_NoVCSWhenRepoURLEmpty(t *testing.T) {
+	d := sarifFixtureData()
+	d.RepoURL = ""
+	got := BuildSARIF(d, SARIFOptions{})
+	run := got["runs"].([]any)[0].(map[string]any)
+	if _, has := run["versionControlProvenance"]; has {
+		t.Fatalf("versionControlProvenance must be omitted when RepoURL is empty")
+	}
+}
+
+func TestBuildSARIF_MessageFallbackWhenAttackVectorEmpty(t *testing.T) {
+	d := ReportData{
+		Findings: []Finding{
+			{Verdict: "vulnerable", File: "src/x.py"},
+		},
+	}
+	got := BuildSARIF(d, SARIFOptions{})
+	results := got["runs"].([]any)[0].(map[string]any)["results"].([]map[string]any)
+	msg := results[0]["message"].(map[string]any)["text"].(string)
+	if msg == "" {
+		t.Fatalf("message.text must never be empty per SARIF schema")
+	}
+	if !strings.Contains(msg, "src/x.py") {
+		t.Errorf("expected fallback message to reference file path, got %q", msg)
+	}
+}
+
+func TestBuildSARIF_MessageTruncationCap(t *testing.T) {
+	huge := strings.Repeat("a", 8000)
+	d := ReportData{
+		Findings: []Finding{{Verdict: "vulnerable", File: "src/x.py", AttackVector: huge}},
+	}
+	got := BuildSARIF(d, SARIFOptions{})
+	results := got["runs"].([]any)[0].(map[string]any)["results"].([]map[string]any)
+	msg := results[0]["message"].(map[string]any)["text"].(string)
+	if len(msg) >= len(huge) {
+		t.Errorf("message must be truncated, got %d bytes", len(msg))
+	}
+}
+
+func TestRenderSARIF_RoundTripsThroughJSONUnmarshal(t *testing.T) {
+	var buf bytes.Buffer
+	if err := RenderSARIF(sarifFixtureData(), &buf, SARIFOptions{ToolVersion: "1.0.0"}); err != nil {
+		t.Fatalf("RenderSARIF: %v", err)
+	}
+
+	var anyVal map[string]interface{}
+	if err := json.Unmarshal(buf.Bytes(), &anyVal); err != nil {
+		t.Fatalf("emitted SARIF must be valid JSON: %v", err)
+	}
+	if anyVal["version"] != sarifVersion {
+		t.Errorf("round-trip version drift: %v", anyVal["version"])
+	}
+}
+
+func TestNormalizedVerdict_HandlesEdgeCases(t *testing.T) {
+	cases := []struct {
+		in, want string
+	}{
+		{"VULNERABLE", "vulnerable"},
+		{"  Bypassable  ", "bypassable"},
+		{"", "unknown"},
+	}
+	for _, c := range cases {
+		if got := normalizedVerdict(c.in); got != c.want {
+			t.Errorf("normalizedVerdict(%q): got %q, want %q", c.in, got, c.want)
+		}
+	}
+}
+
+func TestSARIFURI_StripsLeadingDotSlashAndNormalizesBackslashes(t *testing.T) {
+	cases := []struct {
+		in, want string
+	}{
+		{"./src/x.py", "src/x.py"},
+		{`src\nested\file.py`, "src/nested/file.py"},
+		{"src/x.py", "src/x.py"},
+	}
+	for _, c := range cases {
+		if got := sarifURI(c.in); got != c.want {
+			t.Errorf("sarifURI(%q): got %q, want %q", c.in, got, c.want)
+		}
+	}
+}