Skip to content

Commit fc0a23f

Browse files
committed
enhance Playwright Test Health Report: improve input descriptions, refactor classification logic, and add infrastructure error handling
1 parent b7346fa commit fc0a23f

7 files changed

Lines changed: 535 additions & 213 deletions

File tree

.github/actions/playwright-test-health-report/action.yml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,10 @@ inputs:
2727
required: false
2828
default: main
2929
lookback-days:
30-
description: Number of days to look back for workflow runs
30+
description: >-
31+
Number of days to look back for workflow runs. Use 1 for twice-daily
32+
snapshots and 7 for a weekly summary. The same classification logic
33+
applies regardless of window length.
3134
required: false
3235
default: '1'
3336
artifact-name-prefix:
@@ -44,9 +47,9 @@ inputs:
4447
required: false
4548
default: playwright-report
4649
top-n:
47-
description: Maximum number of tests to include in Slack report
50+
description: Maximum number of tests to include in the Slack report
4851
required: false
49-
default: '10'
52+
default: '15'
5053
report-title:
5154
description: Slack header title override
5255
required: false

.github/actions/playwright-test-health-report/create-playwright-test-health-report.mjs

Lines changed: 53 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import { Octokit } from '@octokit/rest';
44
import { downloadArtifactZip, findFilesInZip } from './lib/artifact-download.mjs';
55
import { parsePlaywrightJsonReport } from './lib/parse-playwright-json.mjs';
66
import { createSlackBlocks, sendSlackBatched } from './lib/slack-test-health-blocks.mjs';
7+
import { partitionSummary } from './lib/classify-report-buckets.mjs';
78
import { summarizeTestHealth } from './lib/summarize-test-health.mjs';
89
import { getDateRange, getWorkflowRuns } from './lib/workflow-runs.mjs';
910

@@ -12,15 +13,24 @@ if (!githubToken) {
1213
throw new Error('Missing GITHUB_TOKEN env var');
1314
}
1415

16+
const parsePositiveInt = (value, fallback) => {
17+
const trimmed = value?.trim();
18+
if (!trimmed) {
19+
return fallback;
20+
}
21+
const parsed = parseInt(trimmed, 10);
22+
return Number.isFinite(parsed) && parsed > 0 ? parsed : fallback;
23+
};
24+
1525
const env = {
1626
OWNER: process.env.OWNER || 'MetaMask',
1727
REPOSITORY: process.env.REPOSITORY,
1828
WORKFLOW_IDS: process.env.WORKFLOW_IDS,
1929
BRANCH: process.env.BRANCH || 'main',
20-
LOOKBACK_DAYS: parseInt(process.env.LOOKBACK_DAYS ?? '1'),
30+
LOOKBACK_DAYS: parsePositiveInt(process.env.LOOKBACK_DAYS, 1),
2131
ARTIFACT_NAME_PREFIX: process.env.ARTIFACT_NAME_PREFIX || 'playwright-json-report',
2232
RESULTS_FILE_PATTERN: process.env.RESULTS_FILE_PATTERN || 'playwright-report',
23-
TOP_N: parseInt(process.env.TOP_N ?? '10'),
33+
TOP_N: parsePositiveInt(process.env.TOP_N, 15),
2434
REPORT_TITLE: process.env.REPORT_TITLE || 'Playwright Test Health Report',
2535
SLACK_WEBHOOK: process.env.SLACK_WEBHOOK || '',
2636
GITHUB_TOKEN: githubToken,
@@ -39,6 +49,14 @@ function getWorkflowIds() {
3949
.filter(Boolean);
4050
}
4151

52+
function isTestFailureFinding(finding) {
53+
return finding.classification === 'broken' || finding.classification === 'flaky' || finding.classification === 'infra';
54+
}
55+
56+
function countTestFailureRuns(findings) {
57+
return new Set(findings.filter(isTestFailureFinding).map(finding => finding.runId)).size;
58+
}
59+
4260
async function getMergedWorkflowRuns(github, dateRange) {
4361
const workflowIds = getWorkflowIds();
4462
const runs = [];
@@ -107,6 +125,7 @@ async function collectFindings(github, runs) {
107125
runId: run.id,
108126
runUrl: run.html_url || `https://github.com/${env.OWNER}/${env.REPOSITORY}/actions/runs/${run.id}`,
109127
date: run.created_at,
128+
artifactName: artifact.name,
110129
}),
111130
);
112131
} catch (error) {
@@ -135,35 +154,37 @@ async function sendSlackReport(summary, dateDisplay, metadata) {
135154
reportTitle: env.REPORT_TITLE,
136155
topN: env.TOP_N,
137156
workflowsScanned: metadata.workflowsScanned,
138-
failedRunCount: metadata.failedRunCount,
139157
workflowCount: metadata.workflowCount,
158+
testFailureRunCount: metadata.testFailureRunCount,
159+
otherFailedRunCount: metadata.otherFailedRunCount,
160+
lookbackDays: env.LOOKBACK_DAYS,
140161
});
141162
await sendSlackBatched(env.SLACK_WEBHOOK, blocks);
142163
console.log('✅ Report sent to Slack successfully');
143164
}
144165

145-
function logClassificationDiagnostics(summary) {
146-
const totalUniqueTests = summary.length;
147-
const currentlyBroken = summary.filter(test => test.brokenCount > 0);
148-
const currentlyFlaky = summary.filter(test => test.brokenCount === 0 && test.flakyCount > 0);
149-
const latestPassed = summary.filter(test => test.latestClassification === 'passed');
150-
const resolvedFromFailure = summary.filter(
151-
test =>
152-
test.latestClassification === 'passed' &&
153-
(test.historicalBrokenCount ?? 0) > 0,
154-
);
166+
function logClassificationDiagnostics(summary, metadata) {
167+
const { brokenItems, flakyItems, watchItems, infraItems } = partitionSummary(summary);
155168

156169
console.log('\n🧾 Classification diagnostics');
157-
console.log(` Unique tests observed: ${totalUniqueTests}`);
158-
console.log(` Latest state -> broken: ${currentlyBroken.length}, flaky: ${currentlyFlaky.length}, passed: ${latestPassed.length}`);
159-
console.log(` Resolved since earlier runs (had broken history, latest passed): ${resolvedFromFailure.length}`);
170+
console.log(` Lookback: ${env.LOOKBACK_DAYS} day(s)`);
171+
console.log(` Unique tests observed: ${summary.length}`);
172+
console.log(
173+
` Buckets -> broken: ${brokenItems.length}, flaky: ${flakyItems.length}, watch: ${watchItems.length}, infra: ${infraItems.length}`,
174+
);
175+
console.log(` CI runs: ${metadata.workflowCount} | Test-failure runs: ${metadata.testFailureRunCount}`);
176+
console.log(` Other CI failures: ${metadata.otherFailedRunCount}`);
160177

161-
if (resolvedFromFailure.length > 0) {
162-
const preview = resolvedFromFailure
178+
if (watchItems.length > 0) {
179+
const preview = watchItems
163180
.slice(0, 5)
164-
.map(test => `${test.name} (${test.projectName})`)
181+
.map(test => {
182+
const broken = test.historicalBrokenCount ?? 0;
183+
const flaky = test.historicalFlakyCount ?? 0;
184+
return `${test.name} (${test.projectName}, broken ${broken}, flaky ${flaky})`;
185+
})
165186
.join('; ');
166-
console.log(` Sample resolved (broken→passed): ${preview}`);
187+
console.log(` Sample watch: ${preview}`);
167188
}
168189
}
169190

@@ -173,6 +194,7 @@ async function main() {
173194
const workflowsScanned = getWorkflowIds();
174195

175196
console.log('🧪 Playwright Test Health Report\n');
197+
console.log(`Lookback: ${env.LOOKBACK_DAYS} day(s)`);
176198
console.log(`Time range: ${dateRange.from} to ${dateRange.to}`);
177199
console.log(`Workflows: ${workflowsScanned.join(', ')}\n`);
178200

@@ -192,11 +214,20 @@ async function main() {
192214
return;
193215
}
194216

217+
const testFailureRunCount = countTestFailureRuns(findings);
218+
const otherFailedRunCount = Math.max(0, failedRunCount - testFailureRunCount);
195219
const summary = summarizeTestHealth(findings);
196-
logClassificationDiagnostics(summary);
220+
221+
logClassificationDiagnostics(summary, {
222+
workflowCount: workflowRuns.length,
223+
testFailureRunCount,
224+
otherFailedRunCount,
225+
});
226+
197227
await sendSlackReport(summary, dateRange.display, {
198228
workflowCount: workflowRuns.length,
199-
failedRunCount,
229+
testFailureRunCount,
230+
otherFailedRunCount,
200231
workflowsScanned,
201232
});
202233
} catch (error) {
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
function historicalBroken(test) {
2+
return test.historicalBrokenCount ?? test.brokenCount ?? 0;
3+
}
4+
5+
function historicalFlaky(test) {
6+
return test.historicalFlakyCount ?? test.flakyCount ?? 0;
7+
}
8+
9+
export function instabilityScore(test) {
10+
return historicalBroken(test) + historicalFlaky(test);
11+
}
12+
13+
export function partitionSummary(summary) {
14+
const infraItems = summary
15+
.filter(test => test.latestClassification === 'infra')
16+
.sort((a, b) => (b.infraCount ?? 0) - (a.infraCount ?? 0));
17+
18+
const brokenItems = summary
19+
.filter(test => test.latestClassification === 'broken')
20+
.sort((a, b) => historicalBroken(b) - historicalBroken(a));
21+
22+
const flakyItems = summary
23+
.filter(test => test.latestClassification === 'flaky')
24+
.sort((a, b) => historicalFlaky(b) - historicalFlaky(a));
25+
26+
const watchItems = summary
27+
.filter(
28+
test =>
29+
test.latestClassification === 'passed' &&
30+
(historicalBroken(test) > 0 || historicalFlaky(test) > 0),
31+
)
32+
.sort((a, b) => {
33+
const rateA = instabilityScore(a) / Math.max(a.totalRuns ?? 1, 1);
34+
const rateB = instabilityScore(b) / Math.max(b.totalRuns ?? 1, 1);
35+
if (rateB !== rateA) {
36+
return rateB - rateA;
37+
}
38+
return instabilityScore(b) - instabilityScore(a);
39+
});
40+
41+
return { brokenItems, flakyItems, watchItems, infraItems };
42+
}
43+
44+
export function allocateBucketSlots(topN, counts) {
45+
const { broken = 0, flaky = 0, watch = 0, infra = 0 } = counts;
46+
47+
let maxBroken = Math.min(broken, Math.max(Math.ceil(topN * 0.4), broken > 0 ? 2 : 0));
48+
let maxFlaky = Math.min(flaky, Math.max(Math.ceil(topN * 0.25), flaky > 0 ? 2 : 0));
49+
let maxInfra = Math.min(infra, Math.max(Math.ceil(topN * 0.1), infra > 0 ? 1 : 0));
50+
let maxWatch = Math.min(watch, topN - maxBroken - maxFlaky - maxInfra);
51+
52+
let remaining = topN - (maxBroken + maxFlaky + maxWatch + maxInfra);
53+
54+
const buckets = [
55+
{ key: 'watch', available: watch - maxWatch, max: maxWatch },
56+
{ key: 'broken', available: broken - maxBroken, max: maxBroken },
57+
{ key: 'flaky', available: flaky - maxFlaky, max: maxFlaky },
58+
{ key: 'infra', available: infra - maxInfra, max: maxInfra },
59+
].sort((a, b) => b.available - a.available);
60+
61+
for (const bucket of buckets) {
62+
if (remaining <= 0) {
63+
break;
64+
}
65+
const extra = Math.min(remaining, bucket.available);
66+
bucket.max += extra;
67+
remaining -= extra;
68+
}
69+
70+
const byKey = Object.fromEntries(buckets.map(bucket => [bucket.key, bucket.max]));
71+
72+
return {
73+
maxBroken: byKey.broken,
74+
maxFlaky: byKey.flaky,
75+
maxWatch: byKey.watch,
76+
maxInfra: byKey.infra,
77+
};
78+
}
79+
80+
export function formatRunRate(count, totalRuns) {
81+
if (!totalRuns || totalRuns <= 0) {
82+
return `${count}x`;
83+
}
84+
return `${count}/${totalRuns} runs`;
85+
}
86+
87+
export function formatWatchHistory(test) {
88+
const parts = [];
89+
const broken = historicalBroken(test);
90+
const flaky = historicalFlaky(test);
91+
92+
if (broken > 0) {
93+
parts.push(`broken ${formatRunRate(broken, test.totalRuns)}`);
94+
}
95+
if (flaky > 0) {
96+
parts.push(`flaky ${formatRunRate(flaky, test.totalRuns)}`);
97+
}
98+
99+
return parts.join(', ');
100+
}

.github/actions/playwright-test-health-report/lib/parse-playwright-json.mjs

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ function extractFirstFailureError(test) {
3939
return firstFailure?.error?.message ?? firstFailure?.errors?.[0]?.message ?? 'No error details';
4040
}
4141

42+
function extractInfraError(error) {
43+
return error?.message ?? error?.stack ?? 'Unknown setup error';
44+
}
45+
4246
function walkSuites(suites, currentFile, findings, metadata) {
4347
for (const suite of suites ?? []) {
4448
const suiteFile = suite.file || currentFile;
@@ -67,6 +71,7 @@ function walkSuites(suites, currentFile, findings, metadata) {
6771
runId: metadata.runId,
6872
runUrl: metadata.runUrl,
6973
date: new Date(metadata.date),
74+
artifactName: metadata.artifactName,
7075
});
7176
}
7277
}
@@ -75,8 +80,38 @@ function walkSuites(suites, currentFile, findings, metadata) {
7580
}
7681
}
7782

83+
function parseInfraErrors(report, metadata, findings) {
84+
const errors = Array.isArray(report?.errors) ? report.errors : [];
85+
const suites = report?.suites ?? [];
86+
87+
if (errors.length === 0 || suites.length > 0) {
88+
return;
89+
}
90+
91+
for (const [index, error] of errors.entries()) {
92+
const artifactLabel = metadata.artifactName ?? 'unknown-artifact';
93+
const location = error?.location?.file ?? 'unknown-file';
94+
const key = `infra::${artifactLabel}::${index}`;
95+
96+
findings.push({
97+
key,
98+
name: `Setup failure (${artifactLabel})`,
99+
path: location,
100+
projectName: 'infra',
101+
classification: 'infra',
102+
retries: 0,
103+
error: extractInfraError(error),
104+
runId: metadata.runId,
105+
runUrl: metadata.runUrl,
106+
date: new Date(metadata.date),
107+
artifactName: metadata.artifactName,
108+
});
109+
}
110+
}
111+
78112
export function parsePlaywrightJsonReport(report, metadata) {
79113
const findings = [];
80114
walkSuites(report?.suites ?? [], undefined, findings, metadata);
115+
parseInfraErrors(report, metadata, findings);
81116
return findings;
82117
}

0 commit comments

Comments
 (0)