Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
571 changes: 571 additions & 0 deletions docs/plans/trt-2633-triage-symptoms.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -563,7 +563,9 @@ func buildTestDetailsQuery(
withClause, commonParams := buildCRQueryCTEs(client.Dataset, junitTable, jobNameQueryPortion, jobRunAnnotationToIgnore, c.AdvancedOption.KeyTestNames)

jobLabelsJoin := fmt.Sprintf(`LEFT JOIN (
SELECT prowjob_build_id, STRING_AGG(DISTINCT label, ',' ORDER BY label) AS job_labels
SELECT prowjob_build_id,
STRING_AGG(DISTINCT label, ',' ORDER BY label) AS job_labels,
STRING_AGG(DISTINCT CASE WHEN symptom_id != '' THEN symptom_id END, ',') AS job_symptoms
FROM %s.job_labels
WHERE prowjob_start >= DATETIME(@From)
AND prowjob_start < DATETIME(@To)
Expand Down Expand Up @@ -597,6 +599,7 @@ func buildTestDetailsQuery(
SUM(adjusted_success_val) AS success_count,
SUM(adjusted_flake_count) AS flake_count,
ANY_VALUE(agg_labels.job_labels) AS job_labels,
ANY_VALUE(agg_labels.job_symptoms) AS job_symptoms,
ANY_VALUE(agg_failures.job_run_test_failure_count) AS job_run_test_failure_count,
COALESCE(NULLIF(ANY_VALUE(lifecycle), ''), 'blocking') AS lifecycle,
FROM deduped_testcases junit
Expand Down Expand Up @@ -1094,6 +1097,10 @@ func deserializeRowToJobRunTestReportStatus(row []bigquery.Value, schema bigquer
if row[i] != nil {
cts.JobLabels = strings.Split(row[i].(string), ",")
}
case col == "job_symptoms":
if row[i] != nil {
cts.JobSymptoms = strings.Split(row[i].(string), ",")
}
case col == "job_run_test_failure_count":
if row[i] != nil {
cts.TestFailures = int(row[i].(int64))
Expand Down
62 changes: 62 additions & 0 deletions pkg/api/componentreadiness/regressiontracker.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"github.com/openshift/sippy/pkg/db"
"github.com/openshift/sippy/pkg/db/models"
log "github.com/sirupsen/logrus"
"k8s.io/apimachinery/pkg/util/sets"
)

const (
Expand All @@ -41,6 +42,8 @@ type RegressionStore interface {
UpsertRegressionView(regressionID uint, viewName string) error
// DeactivateRolledOffViews sets active=false on regression_views rows for regressions that have rolled off a view.
DeactivateRolledOffViews(regressionIDs []uint, activeViewMap map[uint][]string) error
// SyncTriageSymptoms upserts symptom associations for triages based on regression job run data.
SyncTriageSymptoms(regressions []*models.TestRegression) error
}

type PostgresRegressionStore struct {
Expand Down Expand Up @@ -109,6 +112,10 @@ func (prs *PostgresRegressionStore) MergeJobRuns(regressionID uint, jobRuns []mo
jobRuns[i].RegressionID = regressionID
res := prs.dbc.DB.
Where("regression_id = ? AND prow_job_run_id = ?", regressionID, jobRuns[i].ProwJobRunID).
Assign(models.RegressionJobRun{
JobLabels: jobRuns[i].JobLabels,
JobSymptoms: jobRuns[i].JobSymptoms,
}).
FirstOrCreate(&jobRuns[i])
Comment thread
smg247 marked this conversation as resolved.
if res.Error != nil {
return fmt.Errorf("error merging job run %s for regression %d: %w",
Expand All @@ -118,6 +125,60 @@ func (prs *PostgresRegressionStore) MergeJobRuns(regressionID uint, jobRuns []mo
return nil
}

// SyncTriageSymptoms upserts triage_symptoms junction rows by doing a full recount of
// symptoms across each regression's job runs. The resulting job_run_count is replaced
// (not incremented), making the operation idempotent and safe to call on every loader run.
func (prs *PostgresRegressionStore) SyncTriageSymptoms(regressions []*models.TestRegression) error {
if len(regressions) == 0 {
return nil
}

regIDs := make([]uint, len(regressions))
for i, r := range regressions {
regIDs[i] = r.ID
}

var regs []models.TestRegression
res := prs.dbc.DB.
Preload("Triages").
Preload("JobRuns").
Where("id IN ?", regIDs).
Find(&regs)
if res.Error != nil {
return fmt.Errorf("error loading regressions for symptom sync: %w", res.Error)
}

for _, reg := range regs {
if len(reg.Triages) == 0 {
continue
}
symptomCounts := map[string]int{}
for _, jr := range reg.JobRuns {
seen := sets.New[string]()
for _, symptom := range jr.JobSymptoms {
if symptom != "" && !seen.Has(symptom) {
seen.Insert(symptom)
symptomCounts[symptom]++
}
}
}
for _, triage := range reg.Triages {
for symptomID, count := range symptomCounts {
if err := prs.dbc.DB.Exec(
`INSERT INTO triage_symptoms (triage_id, symptom_id, regression_id, job_run_count)
VALUES (?, ?, ?, ?)
ON CONFLICT (triage_id, symptom_id, regression_id) DO UPDATE
SET job_run_count = EXCLUDED.job_run_count`,
triage.ID, symptomID, reg.ID, count).Error; err != nil {
return fmt.Errorf("error syncing symptom %s to triage %d regression %d: %w",
symptomID, triage.ID, reg.ID, err)
}
}
}
Comment thread
smg247 marked this conversation as resolved.
}
return nil
}

func (prs *PostgresRegressionStore) UpsertRegressionView(regressionID uint, viewName string) error {
res := prs.dbc.DB.Exec(
`INSERT INTO regression_views (test_regression_id, view_name, active, opened_at)
Expand Down Expand Up @@ -359,6 +420,7 @@ func FailedJobRunsFromTestDetails(report testdetails.Report) []models.Regression
StartTime: run.StartTime.In(time.UTC),
TestFailures: run.TestFailures,
JobLabels: pq.StringArray(run.JobLabels),
JobSymptoms: pq.StringArray(run.JobSymptoms),
}
jobRuns = append(jobRuns, jobRun)
}
Expand Down
86 changes: 86 additions & 0 deletions pkg/api/componentreadiness/regressiontracker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,92 @@ func TestFailedJobRunsFromTestDetails(t *testing.T) {
},
expectedCount: 0,
},
{
name: "preserves JobSymptoms",
report: testdetails.Report{
Analyses: []testdetails.Analysis{
{
JobStats: []testdetails.JobStats{
{
SampleJobName: "job-a",
SampleJobRunStats: []testdetails.JobRunStats{
{
JobRunID: "run-1",
StartTime: startTime1,
TestStats: crtest.Stats{FailureCount: 1},
JobSymptoms: []string{"SymA", "SymB"},
},
},
},
},
},
},
},
expectedCount: 1,
expectedRunIDs: []string{"run-1"},
checkFunc: func(t *testing.T, runs []models.RegressionJobRun) {
assert.Equal(t, []string{"SymA", "SymB"}, []string(runs[0].JobSymptoms))
},
},
{
name: "empty JobSymptoms results in nil",
report: testdetails.Report{
Analyses: []testdetails.Analysis{
{
JobStats: []testdetails.JobStats{
{
SampleJobName: "job-a",
SampleJobRunStats: []testdetails.JobRunStats{
{
JobRunID: "run-1",
StartTime: startTime1,
TestStats: crtest.Stats{FailureCount: 1},
},
},
},
},
},
},
},
expectedCount: 1,
expectedRunIDs: []string{"run-1"},
checkFunc: func(t *testing.T, runs []models.RegressionJobRun) {
assert.Nil(t, runs[0].JobSymptoms)
},
},
{
name: "mixed runs: only symptomatic run carries symptoms",
report: testdetails.Report{
Analyses: []testdetails.Analysis{
{
JobStats: []testdetails.JobStats{
{
SampleJobName: "job-a",
SampleJobRunStats: []testdetails.JobRunStats{
{
JobRunID: "run-1",
StartTime: startTime1,
TestStats: crtest.Stats{FailureCount: 1},
JobSymptoms: []string{"SymA"},
},
{
JobRunID: "run-2",
StartTime: startTime2,
TestStats: crtest.Stats{FailureCount: 1},
},
},
},
},
},
},
},
expectedCount: 2,
expectedRunIDs: []string{"run-1", "run-2"},
checkFunc: func(t *testing.T, runs []models.RegressionJobRun) {
assert.Equal(t, []string{"SymA"}, []string(runs[0].JobSymptoms))
assert.Nil(t, runs[1].JobSymptoms)
},
},
}

for _, tt := range tests {
Expand Down
1 change: 1 addition & 0 deletions pkg/api/componentreadiness/test_details.go
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,7 @@ func (c *ComponentReportGenerator) getJobRunStats(stats crstatus.TestJobRunRows)
JobRunID: stats.ProwJobRunID,
StartTime: stats.StartTime,
JobLabels: stats.JobLabels,
JobSymptoms: stats.JobSymptoms,
TestFailures: stats.TestFailures,
}
return jobRunStats
Expand Down
86 changes: 86 additions & 0 deletions pkg/api/componentreadiness/triage.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"net/url"
"path"
"slices"
"sort"
"strings"
"time"

Expand All @@ -21,6 +22,7 @@ import (
v1 "github.com/openshift/sippy/pkg/apis/sippy/v1"
"github.com/openshift/sippy/pkg/db"
"github.com/openshift/sippy/pkg/db/models"
"github.com/openshift/sippy/pkg/db/models/jobrunscan"
"github.com/openshift/sippy/pkg/db/query"
log "github.com/sirupsen/logrus"
"gorm.io/gorm"
Expand Down Expand Up @@ -860,6 +862,90 @@ func generateTestDetailsURLFromRegression(regression *models.TestRegression, vie
)
}

// TriageSymptomSummary represents a symptom found across a triage's regressions,
// with counts and percentages for the triage detail view.
type TriageSymptomSummary struct {
Symptom struct {
ID string `json:"id"`
Summary string `json:"summary"`
} `json:"symptom"`
RegressionCount int `json:"regression_count"`
TotalCount int `json:"total_count"`
Percentage float64 `json:"percentage"`
JobRunCount int `json:"job_run_count"`
RegressionIDs []uint `json:"regression_ids"`
}

// GetTriageSymptomSummaries queries the triage_symptoms junction table to build
// per-symptom summaries for a triage detail response.
func GetTriageSymptomSummaries(dbc *db.DB, triageID uint, totalRegressions int) ([]TriageSymptomSummary, error) {
if totalRegressions == 0 {
return nil, nil
}

type symptomCount struct {
SymptomID string `gorm:"column:symptom_id"`
RegressionCount int `gorm:"column:regression_count"`
JobRunCount int `gorm:"column:job_run_count"`
}
var counts []symptomCount
if err := dbc.DB.Model(&models.TriageSymptom{}).
Select("symptom_id, COUNT(DISTINCT regression_id) AS regression_count, SUM(job_run_count) AS job_run_count").
Where("triage_id = ?", triageID).
Group("symptom_id").
Order("regression_count DESC").
Scan(&counts).Error; err != nil {
return nil, fmt.Errorf("error querying triage symptom counts: %w", err)
}
if len(counts) == 0 {
return nil, nil
}

symptomIDs := make([]string, len(counts))
for i, c := range counts {
symptomIDs[i] = c.SymptomID
}
var symptoms []jobrunscan.Symptom
if err := dbc.DB.Where("id IN ?", symptomIDs).Find(&symptoms).Error; err != nil {
return nil, fmt.Errorf("error loading symptoms: %w", err)
}
symptomMap := make(map[string]jobrunscan.Symptom, len(symptoms))
for _, s := range symptoms {
symptomMap[s.ID] = s
}

var tsRows []models.TriageSymptom
if err := dbc.DB.Where("triage_id = ?", triageID).Find(&tsRows).Error; err != nil {
return nil, fmt.Errorf("error loading triage symptom regressions: %w", err)
}
regIDsBySymptom := make(map[string][]uint)
for _, row := range tsRows {
regIDsBySymptom[row.SymptomID] = append(regIDsBySymptom[row.SymptomID], row.RegressionID)
}

var summaries []TriageSymptomSummary
for _, c := range counts {
s, ok := symptomMap[c.SymptomID]
if !ok {
continue
}
summary := TriageSymptomSummary{
RegressionCount: c.RegressionCount,
TotalCount: totalRegressions,
Percentage: float64(c.RegressionCount) / float64(totalRegressions) * 100,
JobRunCount: c.JobRunCount,
RegressionIDs: regIDsBySymptom[c.SymptomID],
}
summary.Symptom.ID = s.ID
summary.Symptom.Summary = s.Summary
summaries = append(summaries, summary)
}
sort.Slice(summaries, func(i, j int) bool {
return summaries[i].RegressionCount > summaries[j].RegressionCount
})
return summaries, nil
}

// GetViewsForTriage returns the names of all active views associated with the triage's regressions.
func GetViewsForTriage(triage *models.Triage) []string {
if triage == nil {
Expand Down
1 change: 1 addition & 0 deletions pkg/apis/api/componentreport/crstatus/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ type TestJobRunRows struct {
JiraComponent string `bigquery:"jira_component"`
JiraComponentID *big.Rat `bigquery:"jira_component_id"`
JobLabels []string `bigquery:"-" json:"job_labels,omitempty"`
JobSymptoms []string `bigquery:"-" json:"job_symptoms,omitempty"`
TestFailures int `bigquery:"-" json:"test_failures"`
Lifecycle string `bigquery:"lifecycle"`
}
Expand Down
1 change: 1 addition & 0 deletions pkg/apis/api/componentreport/testdetails/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,5 +114,6 @@ type JobRunStats struct {
// there are cases multiple junits are generated for the same test.
TestStats crtest.Stats `json:"test_stats"`
JobLabels []string `json:"job_labels,omitempty"`
JobSymptoms []string `json:"job_symptoms,omitempty"`
TestFailures int `json:"test_failures"`
}
17 changes: 15 additions & 2 deletions pkg/dataloader/regressioncacheloader/regressioncacheloader.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,7 @@ func (l *RegressionCacheLoader) Load() {
}
}

// Close regressions per-release (only if no errors for that release),
// then resolve triages globally once all releases are processed.
// Close regressions per-release (only if no errors for that release)
anyErrors := false
for release, result := range releaseResults {
if result.hadErrors {
Expand All @@ -178,6 +177,20 @@ func (l *RegressionCacheLoader) Load() {
}
}

var allActiveRegs []*models.TestRegression
for _, result := range releaseResults {
for _, id := range result.activeIDs.UnsortedList() {
allActiveRegs = append(allActiveRegs, &models.TestRegression{ID: id})
}
}
if len(allActiveRegs) > 0 {
l.logger.Infof("syncing triage symptoms for %d active regressions", len(allActiveRegs))
if err := l.regressionStore.SyncTriageSymptoms(allActiveRegs); err != nil {
l.logger.WithError(err).Error("error syncing triage symptoms")
l.errs = append(l.errs, err)
}
}

// ResolveTriages is a global operation (not per-release), so we only run it
// once after all releases have been processed, and only if no releases had errors.
if !anyErrors {
Expand Down
Loading