|
| 1 | +/** |
| 2 | + * Scoring primitives for the evaluation engine. |
| 3 | + * |
| 4 | + * Scoring model: |
| 5 | + * score ∈ [0, 1] — continuous quality signal |
| 6 | + * verdict — binary classification derived from score via PASS_THRESHOLD |
| 7 | + * |
| 8 | + * score >= PASS_THRESHOLD → 'pass' |
| 9 | + * score < PASS_THRESHOLD → 'fail' |
| 10 | + * (infrastructure skip) → 'skip' |
| 11 | + * |
| 12 | + * To change the pass/fail boundary, update PASS_THRESHOLD. |
| 13 | + * All verdict derivation flows through scoreToVerdict(). |
| 14 | + */ |
| 15 | + |
1 | 16 | import type { EvaluationVerdict } from '../types.js'; |
2 | 17 | import type { EvaluationScore } from './types.js'; |
3 | 18 |
|
| 19 | +/** Score threshold for pass verdict. Scores below this are fail. */ |
| 20 | +export const PASS_THRESHOLD = 0.8; |
| 21 | + |
4 | 22 | export function scoreToVerdict(score: number): EvaluationVerdict { |
5 | | - if (score >= 0.8) { |
6 | | - return 'pass'; |
7 | | - } |
8 | | - return 'fail'; |
| 23 | + return score >= PASS_THRESHOLD ? 'pass' : 'fail'; |
9 | 24 | } |
10 | 25 |
|
11 | 26 | export function clampScore(value: number): number { |
@@ -81,18 +96,22 @@ export function deepEqual(a: unknown, b: unknown): boolean { |
81 | 96 | return aKeys.every((key) => Object.hasOwn(bObj, key) && deepEqual(aObj[key], bObj[key])); |
82 | 97 | } |
83 | 98 |
|
| 99 | +/** Verdict inversion map: pass↔fail, skip stays skip. */ |
| 100 | +const NEGATED_VERDICT: Record<EvaluationVerdict, EvaluationVerdict> = { |
| 101 | + pass: 'fail', |
| 102 | + fail: 'pass', |
| 103 | + skip: 'skip', |
| 104 | +}; |
| 105 | + |
84 | 106 | /** |
85 | 107 | * Negate an evaluation score: inverts score (1 - score), swaps pass/fail verdict, |
86 | 108 | * and flips passed on each assertion. |
87 | 109 | */ |
88 | 110 | export function negateScore(score: EvaluationScore): EvaluationScore { |
89 | | - const negatedScore = clampScore(1 - score.score); |
90 | | - const negatedVerdict: EvaluationVerdict = |
91 | | - score.verdict === 'pass' ? 'fail' : score.verdict === 'fail' ? 'pass' : 'skip'; |
92 | 111 | return { |
93 | 112 | ...score, |
94 | | - score: negatedScore, |
95 | | - verdict: negatedVerdict, |
| 113 | + score: clampScore(1 - score.score), |
| 114 | + verdict: NEGATED_VERDICT[score.verdict], |
96 | 115 | assertions: score.assertions.map((a) => ({ |
97 | 116 | ...a, |
98 | 117 | passed: !a.passed, |
|
0 commit comments