From e2219c2a3dc8271cf6f7500859355f7fa147852a Mon Sep 17 00:00:00 2001
From: Stephen Benjamin <stephen@redhat.com>
Date: Wed, 18 Mar 2026 11:38:22 -0400
Subject: [PATCH] Add script to re-evaluate job run classifications from DB

Reads job runs and their tests from the database, runs them through
the Go classification code, and reports what would change. Useful for
validating the new infrastructure detection logic against historical
data.

Usage: go run ./scripts/reevaluate-job-status/ --job=<name> --days=14

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 scripts/reevaluate-job-status/main.go | 221 ++++++++++++++++++++++++++
 1 file changed, 221 insertions(+)
 create mode 100644 scripts/reevaluate-job-status/main.go
diff --git a/scripts/reevaluate-job-status/main.go b/scripts/reevaluate-job-status/main.go
new file mode 100644
index 0000000000..46d0ac45bb
--- /dev/null
+++ b/scripts/reevaluate-job-status/main.go
@@ -0,0 +1,221 @@
+package main
+
+import (
+	"fmt"
+	"os"
+	"sort"
+	"strings"
+
+	log "github.com/sirupsen/logrus"
+	"github.com/spf13/pflag"
+	"gorm.io/gorm"
+
+	v1 "github.com/openshift/sippy/pkg/apis/sippyprocessing/v1"
+	"github.com/openshift/sippy/pkg/dataloader/prowloader/testconversion"
+	"github.com/openshift/sippy/pkg/db/models"
+	"github.com/openshift/sippy/pkg/flags"
+	"github.com/openshift/sippy/pkg/synthetictests"
+
+	"github.com/openshift/sippy/pkg/apis/prow"
+)
+
+func main() {
+	dbFlags := flags.NewPostgresDatabaseFlags()
+	var release, job string
+	var days, samples int
+	pflag.StringVar(&release, "release", "", "Filter by release (e.g. 4.22)")
+	pflag.StringVar(&job, "job", "", "Filter by job name (exact match)")
+	pflag.IntVar(&days, "days", 0, "Only evaluate job runs from the last N days")
+	pflag.IntVar(&samples, "samples", 10, "Number of sample URLs to show per transition")
+	dbFlags.BindFlags(pflag.CommandLine)
+	pflag.Parse()
+
+	if release == "" && job == "" {
+		fmt.Fprintln(os.Stderr, "At least one of --release or --job is required")
+		os.Exit(1)
+	}
+
+	dbc, err := dbFlags.GetDBClient()
+	if err != nil {
+		log.WithError(err).Fatal("could not connect to db")
+	}
+
+	query := dbc.DB.
+		Table("prow_job_runs").
+		Select("prow_job_runs.id, prow_job_runs.url, prow_job_runs.overall_result, prow_job_runs.succeeded, prow_job_runs.failed, prow_jobs.name AS job_name").
+		Joins("JOIN prow_jobs ON prow_jobs.id = prow_job_runs.prow_job_id").
+		Where("prow_job_runs.deleted_at IS NULL")
+
+	if job != "" {
+		query = query.Where("prow_jobs.name = ?", job)
+	}
+	if release != "" {
+		query = query.Where("prow_jobs.release = ?", release)
+	}
+	if days > 0 {
+		query = query.Where("prow_job_runs.timestamp >= NOW() - INTERVAL '1 day' * ?", days)
+	}
+
+	type jobRunRow struct {
+		ID            uint
+		URL           string
+		OverallResult v1.JobOverallResult
+		Succeeded     bool
+		Failed        bool
+		JobName       string
+	}
+
+	var runs []jobRunRow
+	if err := query.Find(&runs).Error; err != nil {
+		log.WithError(err).Fatal("could not query job runs")
+	}
+
+	manager := synthetictests.NewOpenshiftSyntheticTestManager()
+
+	// Track transitions: "old -> new" counts and sample URLs
+	transitions := make(map[string]int)
+	transitionSamples := make(map[string][]string) // up to 10 sample URLs per transition
+	oldCounts := make(map[v1.JobOverallResult]int)
+	newCounts := make(map[v1.JobOverallResult]int)
+	var totalChanged int
+
+	for _, run := range runs {
+		var newResult v1.JobOverallResult
+
+		if run.Succeeded {
+			newResult = v1.JobSucceeded
+		} else {
+			tests, err := loadTests(dbc.DB, run.ID)
+			if err != nil {
+				log.WithError(err).Warnf("could not load tests for run %d", run.ID)
+				continue
+			}
+
+			var prowState prow.ProwJobState
+			switch run.OverallResult {
+			case v1.JobAborted:
+				prowState = prow.AbortedState
+			case v1.JobInternalInfrastructureFailure:
+				prowState = prow.ErrorState
+			default:
+				prowState = prow.FailureState
+			}
+
+			pj := prow.ProwJob{
+				Spec:   prow.ProwJobSpec{Job: run.JobName},
+				Status: prow.ProwJobStatus{State: prowState},
+			}
+
+			_, newResult = testconversion.ConvertProwJobRunToSyntheticTests(pj, tests, manager)
+		}
+
+		oldCounts[run.OverallResult]++
+		newCounts[newResult]++
+
+		if run.OverallResult != newResult {
+			totalChanged++
+			key := fmt.Sprintf("%s -> %s", run.OverallResult, newResult)
+			transitions[key]++
+			if samples > 0 && len(transitionSamples[key]) < samples {
+				transitionSamples[key] = append(transitionSamples[key], run.URL)
+			}
+		}
+	}
+
+	total := len(runs)
+
+	// Print summary table
+	allStatuses := []v1.JobOverallResult{
+		v1.JobSucceeded,
+		v1.JobTestFailure,
+		v1.JobInstallFailure,
+		v1.JobUpgradeFailure,
+		v1.JobExternalInfrastructureFailure,
+		v1.JobInternalInfrastructureFailure,
+		v1.JobAborted,
+		v1.JobUnknown,
+	}
+
+	fmt.Printf("\nJob runs evaluated: %d\n", total)
+	fmt.Printf("Would change: %d\n\n", totalChanged)
+
+	fmt.Printf("%-5s %-40s %10s %8s %10s %8s\n", "Code", "Status", "Current", "%", "After", "%")
+	fmt.Println(strings.Repeat("-", 85))
+	for _, s := range allStatuses {
+		old := oldCounts[s]
+		new_ := newCounts[s]
+		if old == 0 && new_ == 0 {
+			continue
+		}
+		oldPct := float64(old) / float64(total) * 100
+		newPct := float64(new_) / float64(total) * 100
+		fmt.Printf("%-5s %-40s %10d %7.1f%% %10d %7.1f%%\n",
+			string(s), s.String(), old, oldPct, new_, newPct)
+	}
+	fmt.Println(strings.Repeat("-", 85))
+	fmt.Printf("%-5s %-40s %10d %7.1f%% %10d %7.1f%%\n",
+		"", "Total", total, 100.0, total, 100.0)
+
+	// Print transitions
+	if totalChanged > 0 {
+		fmt.Printf("\nTransitions:\n")
+		fmt.Printf("%-10s %10s %8s\n", "Change", "Count", "%")
+		fmt.Println(strings.Repeat("-", 30))
+
+		sorted := make([]string, 0, len(transitions))
+		for k := range transitions {
+			sorted = append(sorted, k)
+		}
+		sort.Slice(sorted, func(i, j int) bool {
+			return transitions[sorted[i]] > transitions[sorted[j]]
+		})
+		for _, k := range sorted {
+			count := transitions[k]
+			pct := float64(count) / float64(total) * 100
+			fmt.Printf("%-10s %10d %7.1f%%\n", k, count, pct)
+		}
+
+		if samples == 0 {
+			return
+		}
+		fmt.Printf("\nSamples (up to %d per transition):\n", samples)
+		for _, k := range sorted {
+			fmt.Printf("\n  %s:\n", k)
+			for _, url := range transitionSamples[k] {
+				fmt.Printf("    %s\n", url)
+			}
+		}
+	}
+}
+
+func loadTests(db *gorm.DB, prowJobRunID uint) (map[string]*models.ProwJobRunTest, error) {
+	var tests []struct {
+		models.ProwJobRunTest
+		SuiteName string
+		TestName  string
+	}
+
+	err := db.
+		Table("prow_job_run_tests").
+		Select("prow_job_run_tests.*, suites.name AS suite_name, tests.name AS test_name").
+		Joins("LEFT JOIN suites ON suites.id = prow_job_run_tests.suite_id").
+		Joins("JOIN tests ON tests.id = prow_job_run_tests.test_id").
+		Where("prow_job_run_tests.prow_job_run_id = ?", prowJobRunID).
+		Find(&tests).Error
+	if err != nil {
+		return nil, err
+	}
+
+	result := make(map[string]*models.ProwJobRunTest, len(tests))
+	for i := range tests {
+		// Skip synthetic tests that sippy itself created — they weren't
+		// part of the original test results and would pollute reclassification.
+		if tests[i].SuiteName == "sippy" {
+			continue
+		}
+		key := fmt.Sprintf("%s.%s", tests[i].SuiteName, tests[i].TestName)
+		t := tests[i].ProwJobRunTest
+		result[key] = &t
+	}
+	return result, nil
+}