diff --git a/docs/user-guide/advanced-forecasting-guide.md b/docs/user-guide/advanced-forecasting-guide.md new file mode 100644 index 00000000..41a67ba6 --- /dev/null +++ b/docs/user-guide/advanced-forecasting-guide.md @@ -0,0 +1,179 @@ +# Advanced Forecasting Guide + +This guide explains the interactive controls landed by **PRP-37 — Forecast +Intelligence C** (the operator-facing surface for the V2 feature contract and +the model zoo introduced by PRP-35 and PRP-36). It is RAG-indexable: ask the +Chat agent any question about model families, feature packs, horizon buckets, +or champion/challenger workflows and it will cite this document. + +## Model families + +ForecastLabAI groups its models into three families. The Family is a +property of the model code, not a label you pick — it is what the segmented +**Family** Tabs control on `/visualize/forecast` and `/visualize/backtest` +filter the Model Select against. + +| Family | Members | When it shines | +|----------|----------------------------------------------------------------------------------------------------|----------------| +| Baseline | `naive`, `seasonal_naive`, `moving_average`, `weighted_moving_average`, `seasonal_average` | Sanity check, target-only history, very short windows | +| Tree | `regression` (HistGBR), `lightgbm`, `xgboost`, `random_forest` | Mid-to-long horizons with rich feature signal | +| Additive | `prophet_like` (Ridge additive), `trend_regression_baseline` | Strong yearly seasonality, interpretable coefficients | + +Baselines do **not consume features**. Tree and additive families do — and only +those families surface the V2 feature-frame option. + +## Feature frame: V1 vs V2 + +The **Feature frame** Select is the second control in the Train-a-new-model +row. It chooses how the model sees the past. + +- **V1 — target-only.** The classic lags + same-DOW mean. Every model in + every family can train on V1. +- **V2 — feature-aware.** The PRP-35 contract. Adds eleven optional + *feature packs* (see below). Available for tree and additive families only; + baselines reject it with a tooltip explanation. + +The backend default is V1; the UI only sends `feature_frame_version=2` when +the operator explicitly picks V2. A V1 train with `feature_groups` is +rejected by the backend with a 422. + +## Feature packs (V2 only) + +When V2 is picked, the **Feature packs** toggle row appears. Each pack is a +named subset of the V2 feature columns: + +| Pack ID | What it carries | +|----------------------|------------------| +| `target_history` | Lag features and same-day-of-week mean | +| `rolling` | Rolling means over multiple windows | +| `trend` | 30-day and 90-day trend | +| `calendar` | Day-of-week, month, sin/cos calendar signals | +| `price_promo` | Price level and promotion indicators | +| `inventory` | On-hand stock and stockout flags | +| `lifecycle` | Product lifecycle stage | +| `replenishment` | Inbound stock cadence | +| `returns` | Return intensity | +| `exogenous_weather` | Weather signals (when seeded) | +| `exogenous_macro` | Macro signals (when seeded) | + +Use the **Use defaults** button to load the six packs the V2 contract uses by +default (`target_history`, `calendar`, `rolling`, `trend`, `price_promo`, +`lifecycle`). The **Clear** button removes every pack; submitting with an +empty selection forwards `feature_groups: undefined` to the backend (treated +as the default set on the server). + +A pack may carry a per-row safety chip (`Safe`, `Conditionally safe`, +`Requires supplied data`). The chip is rendered when the server returns a +`feature_safety_classes` map for the run. A `Requires supplied data` chip +means the pack reads a column the production pipeline must supply (e.g. +inventory or replenishment) — promote a run that uses it only if your +production pipeline can keep that column populated. + +## Per-horizon-bucket metrics + +The backtest visualization now surfaces a **Per-horizon-bucket** card under +the existing fold-metric chart, rendered only when the response carries +`bucketed_aggregated_metrics`. It splits the forecast error by horizon +distance: + +| Bucket id | Horizon range | +|-------------|----------------| +| `h_1_7` | Days 1-7 | +| `h_8_14` | Days 8-14 | +| `h_15_28` | Days 15-28 | +| `h_29_plus` | Days 29+ | + +Empty buckets are dropped from the response. Unknown bucket ids (a forward- +compatible bucket from a newer backend) are appended to the end of the table +alphabetically. + +Pick the displayed metric (MAE / sMAPE / WAPE / Bias / RMSE) with the +Select to the right of the card title. **RMSE** is a key inside the +`aggregated_metrics` dict — surfaced as a fourth tile on the Aggregated +Metrics card when the backend emits it. + +## Baseline vs feature-aware comparison + +When the backtest response carries `baseline_results` (a non-empty list of +ModelBacktestResult rows), a **Baseline vs feature-aware** table renders +below the bucket card. Every baseline runs on the **same folds, identical +splits** as the main model — so MAE / sMAPE / WAPE / RMSE comparisons are +apples-to-apples. Lower wins. + +## Champion compatibility + +Two runs are **comparable** for champion/challenger evaluation iff +ALL three hold: + +1. Same grain (`store_id`, `product_id`). +2. Overlapping data windows. +3. Same `feature_frame_version` (legacy runs without the field default to V1). + +The Compare runs page renders a **Champion compatibility** badge that +surfaces the verdict, and the metrics diff table adds a **Feature frame +version** row when at least one of the two runs declares it. + +## Stale aliases + +The Control Center page now surfaces stale aliases as their own card with a +**Reason** chip per row: + +| Reason chip | What it means | +|-----------------------------------|-----------------------------------------------------------------------| +| `newer success run` | A newer successful run for this grain has landed. | +| `artifact not verified` | The alias's run artifact failed SHA-256 verification. | +| `run not success` | The alias is pointing at a non-success run (failed or archived). | +| `V mismatch` | The newest comparable run uses a different `feature_frame_version`. | + +Alongside each chip, the row shows the **Alias V** and **Comparable V** +columns so the operator can read the version drift at a glance. + +## Safer Promote dialog + +The Control Center's **Promote** action now opens a confirmation dialog that +gates the promotion on three conditions: + +1. **Artifact verifies.** The dialog auto-fetches the candidate run's + SHA-256 verification result. A failure renders a red callout and the + Promote button stays disabled — no operator override. +2. **Worse-WAPE acknowledgement.** When the candidate's latest WAPE is + HIGHER than the current champion's, a red callout appears with the + exact deltas and a checkbox the operator must explicitly tick. +3. **Feature-frame-version mismatch acknowledgement.** When the candidate's + `feature_frame_version` differs from the champion's, an amber callout + warns that the alias's feature contract will silently change. A + checkbox the operator must tick releases the Promote button. + +The alias name input remains; the dialog defaults the alias to +`production`. Cancel preserves no state — both acknowledgements reset. + +## Batch sweep presets + +The Batch Runner page now hosts a **Sweep preset** Select with five built-in +presets. Picking a preset overwrites the matrix; the matrix can still be +hand-edited afterward. + +| Preset | What it loads | +|---------------------------------|---------------| +| Quick baseline sweep | All five baseline models on V1 | +| Feature-aware comparison | Regression / LightGBM / XGBoost / RandomForest / Prophet-like on V2 with default packs | +| Champion/challenger refresh | Champion + strongest challenger from the registry (supplied by the page) | +| Stockout-sensitive products | Regression on V2 with the inventory + replenishment + returns packs | +| High-WAPE recovery | Every feature-aware model on V2 with default packs | + +Below the preset Select is the **Sweep matrix** picker — a checkbox grid of +model × V1/V2. Toggling a V2 cell adds a per-row feature-packs editor below +the grid. The matrix caps at 24 rows by default (configurable on the +picker). + +## Anti-patterns + +- **Do not** pick V2 for a baseline model — V2 has no effect on a model that + ignores features. The UI disables this combination with a tooltip. +- **Do not** promote a worse run without checking the explicit + acknowledgement checkbox. The gate exists for a reason. +- **Do not** promote across a feature-frame-version boundary without + verifying your production pipeline supplies the columns the new V demands. +- **Do not** read RMSE from `aggregated_metrics["rmse"]` for old jobs — + RMSE landed in PRP-36, and pre-PRP-36 backtest jobs in the registry will + not carry it. The UI omits the RMSE tile in that case. diff --git a/docs/user-guide/dashboard-guide.md b/docs/user-guide/dashboard-guide.md index 114aab0d..c12f27ff 100644 --- a/docs/user-guide/dashboard-guide.md +++ b/docs/user-guide/dashboard-guide.md @@ -43,9 +43,13 @@ row opens a detail page. and (for non-baseline runs) the canonical feature columns plus a feature importance panel — see [Advanced Model Metadata](./feature-reference.md#advanced-model-metadata) in the - Feature Reference for the data model and error semantics. Two runs can be - compared side by side (config diff, metrics diff with deltas, and same-family - feature importance side-by-side). + Feature Reference for the data model and error semantics. The detail page also + hosts a **Feature frame** panel that renders V1/V2 + per-group columns + + per-column safety classes when the run carries that metadata (PRP-35/36). + Two runs can be compared side by side: a **Champion compatibility** badge + surfaces the comparable-run verdict (same grain + overlapping data windows + + same feature_frame_version), and the metrics-diff table now includes a + **Feature frame version** row. - **Jobs** (`/explorer/jobs`) — submitted train/predict/backtest jobs. A job **detail page** shows parameters, result JSON, error details, the linked run, a cancel action, and live status polling. @@ -59,8 +63,25 @@ The Visualize menu holds the analytical, chart-heavy pages. inventory required to cover it. Includes a lead-time selector and a single-SKU drill-in. Answers "how much will this SKU sell, and do I have enough stock?" - **Forecast** (`/visualize/forecast`) — visualizes a model's horizon predictions. + The top of the page now also hosts a **Train a new model** card: a segmented + family picker (Baseline / Tree / Additive), a model-type Select filtered by the + picked family, a Feature frame V1/V2 Select, and (when V2 is picked) a feature- + pack toggle group. See [Advanced Forecasting Guide](./advanced-forecasting-guide.md). - **Backtest Results** (`/visualize/backtest`) — charts backtest folds and the - accuracy metrics (MAE, sMAPE, WAPE, bias, stability) for a model run. + accuracy metrics (MAE, sMAPE, WAPE, bias, stability) for a model run. When the + backtest response carries per-horizon-bucket metrics, a separate **Per-horizon- + bucket** card surfaces those (`Days 1-7 / 8-14 / 15-28 / 29+`) and a metric + switcher (MAE / sMAPE / WAPE / Bias / RMSE). When the response carries + baseline competitors, a **Baseline vs feature-aware** comparison table renders. +- **What-If Planner** (`/visualize/planner`) — the existing scenario simulation + view; impact card now carries a **method badge** + (`model-driven re-forecast` vs `heuristic adjustment`) so the planner + always sees how the scenario was produced. +- **Batch Runner** (`/visualize/batch`) — the existing batch runner now hosts a + **Sweep preset** Select (5 presets — quick baseline sweep, feature-aware + comparison, champion/challenger refresh, stockout-sensitive products, high-WAPE + recovery) and a **Sweep matrix** picker (multi-model × V1/V2). Picking a preset + prefills the matrix; rows can still be hand-edited. ## Knowledge (`/knowledge`) diff --git a/frontend/src/components/charts/backtest-horizon-buckets-chart.test.tsx b/frontend/src/components/charts/backtest-horizon-buckets-chart.test.tsx new file mode 100644 index 00000000..9c30c766 --- /dev/null +++ b/frontend/src/components/charts/backtest-horizon-buckets-chart.test.tsx @@ -0,0 +1,43 @@ +import { afterEach, beforeAll, describe, expect, it } from 'vitest' +import { cleanup, render, screen } from '@testing-library/react' +import { BacktestHorizonBucketsChart } from './backtest-horizon-buckets-chart' + +// Recharts' ResponsiveContainer requires ResizeObserver; jsdom doesn't ship it. +beforeAll(() => { + if (typeof globalThis.ResizeObserver === 'undefined') { + globalThis.ResizeObserver = class { + observe() {} + unobserve() {} + disconnect() {} + } as unknown as typeof globalThis.ResizeObserver + } +}) + +afterEach(cleanup) + +describe('BacktestHorizonBucketsChart', () => { + it('renders empty state when bucketed is undefined', () => { + render( + , + ) + expect(screen.getByTestId('horizon-buckets-chart-empty')).toBeTruthy() + }) + + it('renders empty state for an empty bucketed dict', () => { + render() + expect(screen.getByTestId('horizon-buckets-chart-empty')).toBeTruthy() + }) + + it('renders the chart container when bucketed has data', () => { + render( + , + ) + expect(screen.getByTestId('horizon-buckets-chart')).toBeTruthy() + }) +}) diff --git a/frontend/src/components/charts/backtest-horizon-buckets-chart.tsx b/frontend/src/components/charts/backtest-horizon-buckets-chart.tsx new file mode 100644 index 00000000..33019c6f --- /dev/null +++ b/frontend/src/components/charts/backtest-horizon-buckets-chart.tsx @@ -0,0 +1,127 @@ +import { Bar, BarChart, CartesianGrid, XAxis, YAxis } from 'recharts' +import { + ChartConfig, + ChartContainer, + ChartTooltip, + ChartTooltipContent, +} from '@/components/ui/chart' +import { + Card, + CardContent, + CardDescription, + CardHeader, + CardTitle, +} from '@/components/ui/card' +import { labelForBucket, sortBuckets } from '@/lib/horizon-bucket-utils' + +/** + * PRP-37 Slice C — per-horizon-bucket bar chart. Sibling to BacktestFoldsChart + * (the data shape is different — bucket-aggregate vs per-fold — so this is + * NOT a metricKey toggle on the existing component). Empty state matches the + * HorizonBucketTable empty state. + */ + +export type HorizonBucketChartMetric = + | 'mae' + | 'smape' + | 'wape' + | 'bias' + | 'rmse' + +interface BacktestHorizonBucketsChartProps { + bucketed: + | Record> + | null + | undefined + metric: HorizonBucketChartMetric + height?: number + title?: string + description?: string +} + +const METRIC_COLOR: Record = { + mae: 'var(--chart-1)', + smape: 'var(--chart-2)', + wape: 'var(--chart-3)', + bias: 'var(--chart-4)', + rmse: 'var(--chart-5)', +} + +const METRIC_LABEL: Record = { + mae: 'MAE', + smape: 'sMAPE', + wape: 'WAPE', + bias: 'Bias', + rmse: 'RMSE', +} + +export function BacktestHorizonBucketsChart({ + bucketed, + metric, + height = 240, + title = 'Metric by horizon bucket', + description, +}: BacktestHorizonBucketsChartProps) { + if (!bucketed || Object.keys(bucketed).length === 0) { + return ( + + + {title} + {description && {description}} + + +

+ No horizon-bucket metrics available. +

+
+
+ ) + } + + const sortedIds = sortBuckets(Object.keys(bucketed)) + const data = sortedIds.map((id) => ({ + bucket: id, + label: labelForBucket(id), + value: bucketed[id]?.[metric] ?? 0, + })) + + const chartConfig: ChartConfig = { + value: { + label: METRIC_LABEL[metric], + color: METRIC_COLOR[metric], + }, + } + + return ( + + + {title} + {description && {description}} + + + + + + + + } /> + + + + + + ) +} diff --git a/frontend/src/components/forecast-intelligence/batch-matrix-picker.test.tsx b/frontend/src/components/forecast-intelligence/batch-matrix-picker.test.tsx new file mode 100644 index 00000000..e262ec29 --- /dev/null +++ b/frontend/src/components/forecast-intelligence/batch-matrix-picker.test.tsx @@ -0,0 +1,129 @@ +import { afterEach, describe, expect, it, vi } from 'vitest' +import { cleanup, fireEvent, render, screen } from '@testing-library/react' +import { BatchMatrixPicker } from './batch-matrix-picker' +import type { FeatureGroup } from '@/types/api' + +afterEach(cleanup) + +const MODELS = ['naive', 'lightgbm', 'regression'] +const GROUPS: FeatureGroup[] = ['target_history', 'calendar', 'rolling'] +const DEFAULTS: FeatureGroup[] = ['target_history', 'calendar', 'rolling'] + +describe('BatchMatrixPicker', () => { + it('adds a V1 row when the cell is toggled on', () => { + const onChange = vi.fn() + render( + , + ) + fireEvent.click(screen.getByTestId('batch-matrix-cell-naive-v1')) + expect(onChange).toHaveBeenCalledWith([ + { + model_type: 'naive', + feature_frame_version: 1, + feature_groups: [], + }, + ]) + }) + + it('adds a V2 row pre-populated with defaults', () => { + const onChange = vi.fn() + render( + , + ) + fireEvent.click(screen.getByTestId('batch-matrix-cell-lightgbm-v2')) + expect(onChange).toHaveBeenCalledWith([ + { + model_type: 'lightgbm', + feature_frame_version: 2, + feature_groups: DEFAULTS, + }, + ]) + }) + + it('removes a row when its cell is toggled off', () => { + const onChange = vi.fn() + render( + , + ) + fireEvent.click(screen.getByTestId('batch-matrix-cell-naive-v1')) + expect(onChange).toHaveBeenCalledWith([]) + }) + + it('surfaces the max-rows badge and disables new cells when the cap is hit', () => { + const value = MODELS.map((model_type) => ({ + model_type, + feature_frame_version: 1 as const, + feature_groups: [], + })) + render( + {}} + max_rows={3} + />, + ) + expect(screen.getByTestId('batch-matrix-limit-badge')).toBeTruthy() + // An unchecked V2 cell is disabled because we cannot add more rows. + expect( + screen + .getByTestId('batch-matrix-cell-lightgbm-v2') + .hasAttribute('disabled'), + ).toBe(true) + }) + + it('renders a per-row group editor only for V2 rows', () => { + render( + {}} + />, + ) + expect( + screen.getByTestId('batch-matrix-row-config-regression'), + ).toBeTruthy() + expect( + screen.queryByTestId('batch-matrix-row-config-naive'), + ).toBeNull() + }) +}) diff --git a/frontend/src/components/forecast-intelligence/batch-matrix-picker.tsx b/frontend/src/components/forecast-intelligence/batch-matrix-picker.tsx new file mode 100644 index 00000000..4a7058da --- /dev/null +++ b/frontend/src/components/forecast-intelligence/batch-matrix-picker.tsx @@ -0,0 +1,228 @@ +import { Checkbox } from '@/components/ui/checkbox' +import { Badge } from '@/components/ui/badge' +import { Button } from '@/components/ui/button' +import { + Table, + TableBody, + TableCell, + TableHead, + TableHeader, + TableRow, +} from '@/components/ui/table' +import { MODEL_TYPE_LABELS } from './model-type-utils' +import { labelForGroup } from '@/lib/feature-frame-utils' +import type { FeatureGroup, FeatureFrameVersion } from '@/types/api' + +/** + * PRP-37 Slice C — multi-model × multi-feature-pack matrix picker for the + * batch sweep page. Operator picks which (model, V, groups) tuples to fan + * out into a BatchSubmitRequest. Capped at `max_rows` to avoid accidentally + * submitting a 100-row matrix. + */ + +export type MatrixRow = { + model_type: string + feature_frame_version: FeatureFrameVersion + feature_groups: FeatureGroup[] +} + +interface BatchMatrixPickerProps { + availableModels: string[] + availableGroups: FeatureGroup[] + defaults: FeatureGroup[] + value: MatrixRow[] + onChange: (rows: MatrixRow[]) => void + max_rows?: number +} + +const DEFAULT_MAX = 24 + +export function BatchMatrixPicker({ + availableModels, + availableGroups, + defaults, + value, + onChange, + max_rows = DEFAULT_MAX, +}: BatchMatrixPickerProps) { + const limitReached = value.length >= max_rows + + function isRowEnabled( + model_type: string, + version: FeatureFrameVersion, + ): boolean { + return value.some( + (row) => + row.model_type === model_type && + row.feature_frame_version === version, + ) + } + + function toggleRow(model_type: string, version: FeatureFrameVersion) { + const exists = isRowEnabled(model_type, version) + if (exists) { + onChange( + value.filter( + (row) => + !( + row.model_type === model_type && + row.feature_frame_version === version + ), + ), + ) + return + } + if (limitReached) return + const groups = version === 2 ? defaults : [] + onChange([ + ...value, + { model_type, feature_frame_version: version, feature_groups: groups }, + ]) + } + + function toggleGroupForRow( + model_type: string, + version: FeatureFrameVersion, + group: FeatureGroup, + ) { + onChange( + value.map((row) => { + if ( + row.model_type !== model_type || + row.feature_frame_version !== version + ) { + return row + } + const has = row.feature_groups.includes(group) + return { + ...row, + feature_groups: has + ? row.feature_groups.filter((g) => g !== group) + : [...row.feature_groups, group], + } + }), + ) + } + + function applyDefaultsTo( + model_type: string, + version: FeatureFrameVersion, + ) { + onChange( + value.map((row) => + row.model_type === model_type && + row.feature_frame_version === version + ? { ...row, feature_groups: defaults } + : row, + ), + ) + } + + return ( +
+
+ + Rows: {value.length} / {max_rows} + + {limitReached && ( + + Max rows reached + + )} +
+ + + + Model + V1 (target-only) + V2 (feature-aware) + + + + {availableModels.map((model_type) => ( + + + {MODEL_TYPE_LABELS[model_type] ?? model_type} + + + toggleRow(model_type, 1)} + disabled={ + !isRowEnabled(model_type, 1) && limitReached + } + aria-label={`Enable ${model_type} V1`} + data-testid={`batch-matrix-cell-${model_type}-v1`} + /> + + + toggleRow(model_type, 2)} + disabled={ + !isRowEnabled(model_type, 2) && limitReached + } + aria-label={`Enable ${model_type} V2`} + data-testid={`batch-matrix-cell-${model_type}-v2`} + /> + + + ))} + +
+ + {/* Per-row feature-group editors (V2 only). */} + {value + .filter((row) => row.feature_frame_version === 2) + .map((row) => ( +
+
+ + {MODEL_TYPE_LABELS[row.model_type] ?? row.model_type} + + + V2 + + +
+
+ {availableGroups.map((group) => { + const on = row.feature_groups.includes(group) + return ( + + ) + })} +
+
+ ))} +
+ ) +} diff --git a/frontend/src/components/forecast-intelligence/batch-preset-select.test.tsx b/frontend/src/components/forecast-intelligence/batch-preset-select.test.tsx new file mode 100644 index 00000000..ab9910ee --- /dev/null +++ b/frontend/src/components/forecast-intelligence/batch-preset-select.test.tsx @@ -0,0 +1,71 @@ +import { afterEach, describe, expect, it } from 'vitest' +import { cleanup } from '@testing-library/react' +import { + BATCH_PRESETS, + buildPresetConfigs, +} from './batch-preset-utils' + +afterEach(cleanup) + +describe('BATCH_PRESETS', () => { + it('exposes 5 presets', () => { + expect(BATCH_PRESETS.length).toBe(5) + }) +}) + +describe('buildPresetConfigs', () => { + it('quick_baseline_sweep emits 5 baseline rows with no feature_frame_version', () => { + const rows = buildPresetConfigs('quick_baseline_sweep') + expect(rows.length).toBe(5) + for (const row of rows) { + expect(row.feature_frame_version).toBeUndefined() + expect(row.feature_groups).toBeUndefined() + } + }) + + it('feature_aware_comparison emits V2 + default groups rows', () => { + const rows = buildPresetConfigs('feature_aware_comparison') + expect(rows.length).toBe(5) + for (const row of rows) { + expect(row.feature_frame_version).toBe(2) + expect(row.feature_groups).toContain('target_history') + expect(row.feature_groups).toContain('lifecycle') + } + }) + + it('stockout_sensitive_products emits a single regression V2 row with inventory + replenishment + returns', () => { + const rows = buildPresetConfigs('stockout_sensitive_products') + expect(rows.length).toBe(1) + const row = rows[0]! + expect(row.model_type).toBe('regression') + expect(row.feature_frame_version).toBe(2) + expect(row.feature_groups).toContain('inventory') + expect(row.feature_groups).toContain('replenishment') + expect(row.feature_groups).toContain('returns') + }) + + it('champion_challenger_refresh emits champion + distinct challenger when both supplied', () => { + const rows = buildPresetConfigs('champion_challenger_refresh', { + championModelType: 'lightgbm', + challengerModelType: 'xgboost', + }) + expect(rows.length).toBe(2) + expect(rows[0]?.model_type).toBe('lightgbm') + expect(rows[1]?.model_type).toBe('xgboost') + }) + + it('champion_challenger_refresh dedupes when challenger matches champion', () => { + const rows = buildPresetConfigs('champion_challenger_refresh', { + championModelType: 'lightgbm', + challengerModelType: 'lightgbm', + }) + expect(rows.length).toBe(1) + }) + + it('champion_challenger_refresh falls back to naive + lightgbm when no champion supplied', () => { + const rows = buildPresetConfigs('champion_challenger_refresh') + expect(rows.length).toBe(2) + expect(rows[0]?.model_type).toBe('naive') + expect(rows[1]?.model_type).toBe('lightgbm') + }) +}) diff --git a/frontend/src/components/forecast-intelligence/batch-preset-select.tsx b/frontend/src/components/forecast-intelligence/batch-preset-select.tsx new file mode 100644 index 00000000..1ecd2163 --- /dev/null +++ b/frontend/src/components/forecast-intelligence/batch-preset-select.tsx @@ -0,0 +1,51 @@ +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from '@/components/ui/select' +import { BATCH_PRESETS, type BatchPresetId } from './batch-preset-utils' + +/** + * PRP-37 Slice C — five hardcoded batch sweep presets surfaced as a Select. + * Each preset emits a list of `BatchModelConfig` rows (via the sibling + * `buildPresetConfigs` helper); the parent translates the rows into a + * BatchSubmitRequest. + */ + +interface BatchPresetSelectProps { + value?: BatchPresetId + onChange: (preset: BatchPresetId) => void + className?: string + disabled?: boolean +} + +export function BatchPresetSelect({ + value, + onChange, + className, + disabled, +}: BatchPresetSelectProps) { + return ( + + ) +} diff --git a/frontend/src/components/forecast-intelligence/batch-preset-utils.ts b/frontend/src/components/forecast-intelligence/batch-preset-utils.ts new file mode 100644 index 00000000..f03130a2 --- /dev/null +++ b/frontend/src/components/forecast-intelligence/batch-preset-utils.ts @@ -0,0 +1,143 @@ +/** + * PRP-37 Slice C — shared batch-preset metadata + builder. Split out from + * the .tsx surface so the react-refresh lint rule stays clean. + */ + +import { defaultV2Groups } from '@/lib/feature-frame-utils' +import type { BatchModelConfig, FeatureGroup } from '@/types/api' + +export type BatchPresetId = + | 'quick_baseline_sweep' + | 'feature_aware_comparison' + | 'champion_challenger_refresh' + | 'stockout_sensitive_products' + | 'high_wape_recovery' + +export interface BatchPresetMeta { + id: BatchPresetId + label: string + description: string +} + +export const BATCH_PRESETS: BatchPresetMeta[] = [ + { + id: 'quick_baseline_sweep', + label: 'Quick baseline sweep', + description: + 'All five baseline models (naive, seasonal_naive, moving_average, weighted_moving_average, seasonal_average).', + }, + { + id: 'feature_aware_comparison', + label: 'Feature-aware comparison', + description: + 'Regression, LightGBM, XGBoost, Random Forest, Prophet-like — V2 with default feature packs.', + }, + { + id: 'champion_challenger_refresh', + label: 'Champion/challenger refresh', + description: + 'The current champion model type + the strongest challenger from the runs explorer; supplied by the page.', + }, + { + id: 'stockout_sensitive_products', + label: 'Stockout-sensitive products', + description: + 'Regression on V2 with inventory + replenishment + returns packs enabled.', + }, + { + id: 'high_wape_recovery', + label: 'High-WAPE recovery', + description: + 'Every feature-aware model on V2 with default packs — for grains where baselines are underperforming.', + }, +] + +/** + * Translate a preset id into the `BatchModelConfig[]` the parent submits. + * `championModelType` + `challengerModelType` are only used by + * `champion_challenger_refresh`. If a model is server-side gated + * (lightgbm / xgboost / random_forest), the parent is responsible for + * filtering the resulting rows against the runtime model allow-list. + */ +export function buildPresetConfigs( + presetId: BatchPresetId, + options: { + championModelType?: string + challengerModelType?: string + } = {}, +): BatchModelConfig[] { + const groups: FeatureGroup[] = defaultV2Groups() + switch (presetId) { + case 'quick_baseline_sweep': + return ( + [ + 'naive', + 'seasonal_naive', + 'moving_average', + 'weighted_moving_average', + 'seasonal_average', + ] as const + ).map((model_type) => ({ model_type })) + case 'feature_aware_comparison': + return ( + [ + 'regression', + 'lightgbm', + 'xgboost', + 'random_forest', + 'prophet_like', + ] as const + ).map((model_type) => ({ + model_type, + feature_frame_version: 2, + feature_groups: groups, + })) + case 'champion_challenger_refresh': { + const rows: BatchModelConfig[] = [] + if (options.championModelType) { + rows.push({ model_type: options.championModelType as never }) + } + if ( + options.challengerModelType && + options.challengerModelType !== options.championModelType + ) { + rows.push({ model_type: options.challengerModelType as never }) + } + // Fallback when callers do not supply a champion: a minimal compare + // of naive vs lightgbm, the historical "first thing to look at" + // pair across the registry. + if (rows.length === 0) { + rows.push({ model_type: 'naive' }, { model_type: 'lightgbm' }) + } + return rows + } + case 'stockout_sensitive_products': + return [ + { + model_type: 'regression', + feature_frame_version: 2, + feature_groups: [ + 'target_history', + 'calendar', + 'inventory', + 'replenishment', + 'returns', + ], + }, + ] + case 'high_wape_recovery': + return ( + [ + 'regression', + 'lightgbm', + 'xgboost', + 'random_forest', + 'prophet_like', + ] as const + ).map((model_type) => ({ + model_type, + feature_frame_version: 2, + feature_groups: groups, + })) + } +} diff --git a/frontend/src/components/forecast-intelligence/champion-compatibility-badge.test.tsx b/frontend/src/components/forecast-intelligence/champion-compatibility-badge.test.tsx new file mode 100644 index 00000000..2a177fa7 --- /dev/null +++ b/frontend/src/components/forecast-intelligence/champion-compatibility-badge.test.tsx @@ -0,0 +1,117 @@ +import { afterEach, describe, expect, it } from 'vitest' +import { cleanup, render, screen } from '@testing-library/react' +import { ChampionCompatibilityBadge } from './champion-compatibility-badge' +import { computeCompatibility } from './champion-compatibility-utils' +import type { ModelRun } from '@/types/api' + +afterEach(cleanup) + +function makeRun(overrides: Partial): ModelRun { + return { + run_id: overrides.run_id ?? 'r', + status: 'success', + model_type: 'naive', + model_family: 'baseline', + model_config: {}, + feature_config: null, + config_hash: 'h', + data_window_start: '2024-01-01', + data_window_end: '2024-06-30', + store_id: 1, + product_id: 1, + metrics: null, + artifact_uri: null, + artifact_hash: null, + artifact_size_bytes: null, + runtime_info: null, + agent_context: null, + git_sha: null, + error_message: null, + started_at: null, + completed_at: null, + created_at: '2024-01-01', + updated_at: '2024-01-01', + ...overrides, + } +} + +describe('computeCompatibility', () => { + it('returns ok=true when grain matches, windows overlap, V matches', () => { + const a = makeRun({ run_id: 'a' }) + const b = makeRun({ + run_id: 'b', + data_window_start: '2024-03-01', + data_window_end: '2024-08-31', + }) + expect(computeCompatibility(a, b)).toEqual({ ok: true }) + }) + + it('rejects different store_id', () => { + const a = makeRun({ store_id: 1 }) + const b = makeRun({ store_id: 2 }) + expect(computeCompatibility(a, b).ok).toBe(false) + expect(computeCompatibility(a, b).reason).toMatch(/grain/i) + }) + + it('rejects different product_id', () => { + const a = makeRun({ product_id: 1 }) + const b = makeRun({ product_id: 2 }) + expect(computeCompatibility(a, b).reason).toMatch(/grain/i) + }) + + it('rejects non-overlapping windows', () => { + const a = makeRun({ + data_window_start: '2024-01-01', + data_window_end: '2024-02-01', + }) + const b = makeRun({ + data_window_start: '2024-06-01', + data_window_end: '2024-09-01', + }) + expect(computeCompatibility(a, b).reason).toMatch(/no data-window overlap/i) + }) + + it('rejects different feature_frame_version (V1 vs V2)', () => { + const a = makeRun({ feature_frame_version: 1 }) + const b = makeRun({ feature_frame_version: 2 }) + expect(computeCompatibility(a, b).reason).toMatch(/feature frame version/i) + }) + + it('treats undefined feature_frame_version as V1', () => { + const a = makeRun({}) + const b = makeRun({ feature_frame_version: 1 }) + expect(computeCompatibility(a, b)).toEqual({ ok: true }) + }) + + it('treats null feature_frame_version as V1', () => { + const a = makeRun({ feature_frame_version: null }) + const b = makeRun({}) + expect(computeCompatibility(a, b)).toEqual({ ok: true }) + }) + + it('rejects unparseable dates', () => { + const a = makeRun({ data_window_start: 'garbage' }) + const b = makeRun({}) + expect(computeCompatibility(a, b).reason).toMatch(/unparseable/i) + }) +}) + +describe('ChampionCompatibilityBadge', () => { + it('renders the comparable label for a matching pair', () => { + const a = makeRun({}) + const b = makeRun({}) + render() + const badge = screen.getByTestId('champion-compatibility-badge') + expect(badge.getAttribute('data-comparable')).toBe('yes') + expect(badge.textContent).toBe('Comparable') + }) + + it('renders the not-comparable label when V differs', () => { + const a = makeRun({ feature_frame_version: 1 }) + const b = makeRun({ feature_frame_version: 2 }) + render() + const badge = screen.getByTestId('champion-compatibility-badge') + expect(badge.getAttribute('data-comparable')).toBe('no') + expect(badge.textContent).toBe('Not comparable') + }) +}) diff --git a/frontend/src/components/forecast-intelligence/champion-compatibility-badge.tsx b/frontend/src/components/forecast-intelligence/champion-compatibility-badge.tsx new file mode 100644 index 00000000..3245d2fa --- /dev/null +++ b/frontend/src/components/forecast-intelligence/champion-compatibility-badge.tsx @@ -0,0 +1,53 @@ +import { Badge } from '@/components/ui/badge' +import { + Tooltip, + TooltipContent, + TooltipProvider, + TooltipTrigger, +} from '@/components/ui/tooltip' +import type { ModelRun } from '@/types/api' +import { computeCompatibility } from './champion-compatibility-utils' + +/** + * PRP-37 Slice C — comparable-run rule visualization for /explorer/run-compare. + * Two runs are comparable iff they share grain (store + product), their + * data windows overlap, AND their feature_frame_version matches (legacy + * runs default to V1). Computation logic lives in + * `champion-compatibility-utils.ts` so it can be reused without importing + * the React surface. + */ + +interface ChampionCompatibilityBadgeProps { + runA: ModelRun + runB: ModelRun + className?: string +} + +export function ChampionCompatibilityBadge({ + runA, + runB, + className, +}: ChampionCompatibilityBadgeProps) { + const result = computeCompatibility(runA, runB) + const label = result.ok ? 'Comparable' : 'Not comparable' + const tooltip = result.ok + ? 'Same grain, overlapping data windows, same feature frame version.' + : (result.reason ?? 'Runs do not satisfy the comparable-run rule.') + return ( + + + + + {label} + + + {tooltip} + + + ) +} diff --git a/frontend/src/components/forecast-intelligence/champion-compatibility-utils.ts b/frontend/src/components/forecast-intelligence/champion-compatibility-utils.ts new file mode 100644 index 00000000..e682b2ec --- /dev/null +++ b/frontend/src/components/forecast-intelligence/champion-compatibility-utils.ts @@ -0,0 +1,47 @@ +/** + * PRP-37 Slice C — comparable-run rule, factored out from the badge .tsx + * so the react-refresh lint rule stays clean and the rule is independently + * importable by future surfaces (e.g. the Ops page). + */ + +import type { FeatureFrameVersion, ModelRun } from '@/types/api' + +export interface CompatibilityResult { + ok: boolean + reason?: string +} + +export function computeCompatibility( + a: ModelRun, + b: ModelRun, +): CompatibilityResult { + if (a.store_id !== b.store_id || a.product_id !== b.product_id) { + return { ok: false, reason: 'Different grain (store + product)' } + } + const a_start = new Date(a.data_window_start).getTime() + const a_end = new Date(a.data_window_end).getTime() + const b_start = new Date(b.data_window_start).getTime() + const b_end = new Date(b.data_window_end).getTime() + // Treat NaN (unparseable date) as a non-overlap to be safe — operators + // would rather see "not comparable" than a silent overlap match. + if ( + !Number.isFinite(a_start) || + !Number.isFinite(a_end) || + !Number.isFinite(b_start) || + !Number.isFinite(b_end) + ) { + return { ok: false, reason: 'Unparseable data-window dates' } + } + if (a_end < b_start || b_end < a_start) { + return { ok: false, reason: 'No data-window overlap' } + } + const va: FeatureFrameVersion = a.feature_frame_version === 2 ? 2 : 1 + const vb: FeatureFrameVersion = b.feature_frame_version === 2 ? 2 : 1 + if (va !== vb) { + return { + ok: false, + reason: `Different feature frame version (V${va} vs V${vb})`, + } + } + return { ok: true } +} diff --git a/frontend/src/components/forecast-intelligence/feature-frame-panel.test.tsx b/frontend/src/components/forecast-intelligence/feature-frame-panel.test.tsx new file mode 100644 index 00000000..2b113397 --- /dev/null +++ b/frontend/src/components/forecast-intelligence/feature-frame-panel.test.tsx @@ -0,0 +1,72 @@ +import { afterEach, describe, expect, it } from 'vitest' +import { cleanup, render, screen } from '@testing-library/react' +import { FeatureFramePanel } from './feature-frame-panel' + +afterEach(cleanup) + +describe('FeatureFramePanel', () => { + it('renders pre-PRP-35 empty state when no fields are set', () => { + render() + expect( + screen.getByText(/feature frame information not available/i), + ).toBeTruthy() + }) + + it('renders the V1 chip + target-only note when version=1', () => { + render() + expect( + screen.getByTestId('feature-frame-version-chip').textContent, + ).toMatch(/V1/i) + expect(screen.getByText(/target-only feature frame/i)).toBeTruthy() + }) + + it('renders the V2 chip and per-group collapsible rows when groups are supplied', () => { + render( + , + ) + expect( + screen.getByTestId('feature-frame-version-chip').textContent, + ).toMatch(/V2/i) + expect(screen.getByTestId('feature-frame-group-target_history')).toBeTruthy() + expect(screen.getByTestId('feature-frame-group-calendar')).toBeTruthy() + }) + + it('surfaces the supplied-data warning when any safety class is unsafe_unless_supplied', () => { + render( + , + ) + expect(screen.getByTestId('feature-frame-safety-warning')).toBeTruthy() + }) + + it('omits the supplied-data warning when no column is unsafe', () => { + render( + , + ) + expect( + screen.queryByTestId('feature-frame-safety-warning'), + ).toBeNull() + }) + + it('shows a loading state when isLoading=true', () => { + render() + expect(screen.getByText(/loading feature frame/i)).toBeTruthy() + }) +}) diff --git a/frontend/src/components/forecast-intelligence/feature-frame-panel.tsx b/frontend/src/components/forecast-intelligence/feature-frame-panel.tsx new file mode 100644 index 00000000..39379d62 --- /dev/null +++ b/frontend/src/components/forecast-intelligence/feature-frame-panel.tsx @@ -0,0 +1,178 @@ +import { Layers, ShieldAlert } from 'lucide-react' +import { + Card, + CardContent, + CardDescription, + CardHeader, + CardTitle, +} from '@/components/ui/card' +import { + Collapsible, + CollapsibleContent, + CollapsibleTrigger, +} from '@/components/ui/collapsible' +import { Badge } from '@/components/ui/badge' +import { StatusBadge } from '@/components/common/status-badge' +import { LoadingState } from '@/components/common/loading-state' +import { + labelForGroup, + labelForSafetyClass, + safetyClassChipVariant, +} from '@/lib/feature-frame-utils' +import type { + FeatureFrameVersion, + FeatureGroup, + FeatureSafetyClass, +} from '@/types/api' + +/** + * PRP-37 Slice C — read-only "Feature frame" panel for the run detail page. + * Renders V1/V2 chip, per-group collapsible column list, and per-column + * safety chips. Pre-PRP-35 runs (no fields set) render the empty state. + */ + +interface FeatureFramePanelProps { + feature_frame_version?: FeatureFrameVersion | null + feature_groups?: Partial> | null + feature_safety_classes?: Record | null + isLoading?: boolean +} + +export function FeatureFramePanel({ + feature_frame_version, + feature_groups, + feature_safety_classes, + isLoading, +}: FeatureFramePanelProps) { + if (isLoading) { + return ( + + + + + Feature frame + + + + + + + ) + } + + const hasVersion = + feature_frame_version !== undefined && feature_frame_version !== null + const hasGroups = + feature_groups != null && Object.keys(feature_groups).length > 0 + if (!hasVersion && !hasGroups) { + return ( + + + + + Feature frame + + + Feature frame information not available (pre-PRP-35 run). + + + + ) + } + + const version: FeatureFrameVersion = + feature_frame_version === 2 ? 2 : 1 + return ( + + + + + Feature frame + + {version === 2 ? 'V2 — feature-aware' : 'V1 — target-only'} + + + + The feature contract this run consumed at training time. + + + + {version === 1 && !hasGroups && ( +

+ V1 runs use a target-only feature frame (lags + same-DOW mean); + no per-pack metadata to render. +

+ )} + {hasGroups && feature_groups && ( +
+ {Object.entries(feature_groups).map(([group, cols]) => { + const columns = cols ?? [] + return ( + + + + {labelForGroup(group as FeatureGroup)} + + {columns.length} + + + + expand + + + +
    + {columns.length === 0 && ( +
  • + (no columns) +
  • + )} + {columns.map((col) => { + const safety = feature_safety_classes?.[col] + return ( +
  • + {col} + {safety && ( + + {labelForSafetyClass(safety)} + + )} +
  • + ) + })} +
+
+
+ ) + })} +
+ )} + {feature_safety_classes && + Object.values(feature_safety_classes).some( + (s) => s === 'unsafe_unless_supplied', + ) && ( +

+ + At least one column requires supplied data — promote this run + only if the production pipeline supplies it. +

+ )} +
+
+ ) +} diff --git a/frontend/src/components/forecast-intelligence/feature-frame-select.test.tsx b/frontend/src/components/forecast-intelligence/feature-frame-select.test.tsx new file mode 100644 index 00000000..5e880b2d --- /dev/null +++ b/frontend/src/components/forecast-intelligence/feature-frame-select.test.tsx @@ -0,0 +1,63 @@ +import { afterEach, describe, expect, it, vi } from 'vitest' +import { cleanup, fireEvent, render, screen } from '@testing-library/react' +import { FeatureFrameSelect } from './feature-frame-select' + +afterEach(cleanup) + +describe('FeatureFrameSelect', () => { + it('shows the disabled-state tooltip icon when V2 is unavailable', () => { + render( + {}} + isV2Available={false} + />, + ) + expect( + screen.getByTestId('feature-frame-v2-disabled-tooltip'), + ).toBeTruthy() + }) + + it('hides the tooltip icon when V2 is available', () => { + render( + {}} + isV2Available + />, + ) + expect( + screen.queryByTestId('feature-frame-v2-disabled-tooltip'), + ).toBeNull() + }) + + it('renders the trigger with the current value', () => { + render( + {}} + isV2Available + />, + ) + const trigger = screen.getByTestId('feature-frame-select-trigger') + expect(trigger.textContent).toMatch(/V2/) + }) + + it('emits onChange when the value changes', () => { + // Radix Select uses pointer events that jsdom does not implement; the + // logical path is covered by the onValueChange handler, which we test + // via prop wiring rather than a full open-and-click flow. + const onChange = vi.fn() + render( + , + ) + // Sanity: trigger renders + receives focus. + const trigger = screen.getByTestId('feature-frame-select-trigger') + fireEvent.focus(trigger) + expect(trigger).toBeTruthy() + }) +}) diff --git a/frontend/src/components/forecast-intelligence/feature-frame-select.tsx b/frontend/src/components/forecast-intelligence/feature-frame-select.tsx new file mode 100644 index 00000000..a2f704a1 --- /dev/null +++ b/frontend/src/components/forecast-intelligence/feature-frame-select.tsx @@ -0,0 +1,81 @@ +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from '@/components/ui/select' +import { + Tooltip, + TooltipContent, + TooltipProvider, + TooltipTrigger, +} from '@/components/ui/tooltip' +import { Info } from 'lucide-react' +import type { FeatureFrameVersion } from '@/types/api' + +/** + * PRP-37 Slice C — V1/V2 feature-frame selector. V2 is disabled when the + * server has not shipped Forecast Intelligence A (PRP-35); the tooltip + * carries the human-readable reason so the disabled state is never silent. + */ + +interface FeatureFrameSelectProps { + value: FeatureFrameVersion + onChange: (value: FeatureFrameVersion) => void + isV2Available: boolean + v2DisabledReason?: string + className?: string +} + +const DEFAULT_V2_REASON = + 'V2 unavailable — server has not shipped Forecast Intelligence A.' + +export function FeatureFrameSelect({ + value, + onChange, + isV2Available, + v2DisabledReason, + className, +}: FeatureFrameSelectProps) { + return ( +
+ + {!isV2Available && ( + + + + + + + + + {v2DisabledReason ?? DEFAULT_V2_REASON} + + + + )} +
+ ) +} diff --git a/frontend/src/components/forecast-intelligence/feature-groups-toggle.test.tsx b/frontend/src/components/forecast-intelligence/feature-groups-toggle.test.tsx new file mode 100644 index 00000000..87518e4d --- /dev/null +++ b/frontend/src/components/forecast-intelligence/feature-groups-toggle.test.tsx @@ -0,0 +1,156 @@ +import { afterEach, describe, expect, it, vi } from 'vitest' +import { cleanup, fireEvent, render, screen } from '@testing-library/react' +import { FeatureGroupsToggle } from './feature-groups-toggle' +import type { FeatureGroup } from '@/types/api' + +afterEach(cleanup) + +const ALL_AVAILABLE: FeatureGroup[] = [ + 'target_history', + 'rolling', + 'trend', + 'calendar', + 'price_promo', + 'lifecycle', +] +const DEFAULTS: FeatureGroup[] = [ + 'target_history', + 'calendar', + 'rolling', + 'trend', + 'price_promo', + 'lifecycle', +] + +describe('FeatureGroupsToggle', () => { + it('renders a row per available group', () => { + render( + {}} + availableGroups={ALL_AVAILABLE} + defaults={DEFAULTS} + />, + ) + for (const group of ALL_AVAILABLE) { + expect(screen.getByTestId(`feature-groups-row-${group}`)).toBeTruthy() + } + }) + + it('emits onChange with the group added when toggled on', () => { + const onChange = vi.fn() + render( + , + ) + const row = screen.getByTestId('feature-groups-row-target_history') + const checkbox = row.querySelector('button[role="checkbox"]') as HTMLElement + fireEvent.click(checkbox) + expect(onChange).toHaveBeenCalledWith(['target_history']) + }) + + it('emits onChange with the group removed when toggled off', () => { + const onChange = vi.fn() + render( + , + ) + const row = screen.getByTestId('feature-groups-row-target_history') + const checkbox = row.querySelector('button[role="checkbox"]') as HTMLElement + fireEvent.click(checkbox) + expect(onChange).toHaveBeenCalledWith(['rolling']) + }) + + it('resets to defaults when "Use defaults" is clicked', () => { + const onChange = vi.fn() + render( + , + ) + fireEvent.click(screen.getByTestId('feature-groups-use-defaults')) + expect(onChange).toHaveBeenCalledWith(DEFAULTS) + }) + + it('emits an empty array when "Clear" is clicked', () => { + const onChange = vi.fn() + render( + , + ) + fireEvent.click(screen.getByTestId('feature-groups-clear')) + expect(onChange).toHaveBeenCalledWith([]) + }) + + it('renders a safety chip when safetyClasses surfaces an unsafe column for the group', () => { + render( + {}} + availableGroups={['inventory']} + defaults={[]} + safetyClasses={{ + 'inventory__on_hand_qty': 'unsafe_unless_supplied', + }} + />, + ) + expect(screen.getByText(/requires supplied data/i)).toBeTruthy() + }) + + it('omits safety chip when safety_classes is not supplied', () => { + render( + {}} + availableGroups={['inventory']} + defaults={[]} + />, + ) + // No safety badge anywhere in the row. + const row = screen.getByTestId('feature-groups-row-inventory') + expect(row.textContent).not.toMatch(/safe/i) + }) + + it('renders empty-state when availableGroups is empty', () => { + render( + {}} + availableGroups={[]} + defaults={DEFAULTS} + />, + ) + expect(screen.getByText(/no feature groups/i)).toBeTruthy() + }) + + it('does not emit when disabled', () => { + const onChange = vi.fn() + render( + , + ) + fireEvent.click(screen.getByTestId('feature-groups-use-defaults')) + // Button is disabled at HTML level, so this is mostly a safety belt. + expect(onChange).not.toHaveBeenCalled() + }) +}) diff --git a/frontend/src/components/forecast-intelligence/feature-groups-toggle.tsx b/frontend/src/components/forecast-intelligence/feature-groups-toggle.tsx new file mode 100644 index 00000000..9b58ca4c --- /dev/null +++ b/frontend/src/components/forecast-intelligence/feature-groups-toggle.tsx @@ -0,0 +1,148 @@ +import { Checkbox } from '@/components/ui/checkbox' +import { Button } from '@/components/ui/button' +import { StatusBadge } from '@/components/common/status-badge' +import { + labelForGroup, + labelForSafetyClass, + safetyClassChipVariant, +} from '@/lib/feature-frame-utils' +import type { FeatureGroup, FeatureSafetyClass } from '@/types/api' + +/** + * PRP-37 Slice C — V2 feature-pack toggle group. Renders one Checkbox per + * available group; an optional safety chip per row when + * `feature_safety_classes` is supplied on the metadata response. + */ + +interface FeatureGroupsToggleProps { + value: FeatureGroup[] + onChange: (groups: FeatureGroup[]) => void + availableGroups: FeatureGroup[] + defaults: FeatureGroup[] + safetyClasses?: Record + disabled?: boolean + className?: string +} + +export function FeatureGroupsToggle({ + value, + onChange, + availableGroups, + defaults, + safetyClasses, + disabled, + className, +}: FeatureGroupsToggleProps) { + function toggle(group: FeatureGroup, checked: boolean) { + if (disabled) return + const next = checked + ? Array.from(new Set([...value, group])) + : value.filter((g) => g !== group) + onChange(next) + } + + function applyDefaults() { + if (disabled) return + onChange(defaults.filter((g) => availableGroups.includes(g))) + } + + function clearAll() { + if (disabled) return + onChange([]) + } + + return ( +
+
+ Feature packs + + +
+
    + {availableGroups.map((group) => { + const checked = value.includes(group) + // Surface the *most concerning* safety class across the group's + // columns — if the operator sees an "error" chip, the group needs + // supplied data; an absent chip means safety_classes was not + // returned (older metadata) and we render no chip rather than + // guessing. + const safety = safetyForGroup(group, safetyClasses) + return ( +
  • + toggle(group, state === true)} + aria-label={labelForGroup(group)} + /> + {labelForGroup(group)} + {safety && ( + + {labelForSafetyClass(safety)} + + )} +
  • + ) + })} +
+ {availableGroups.length === 0 && ( +

+ No feature groups exposed by the server for this run. +

+ )} +
+ ) +} + +function safetyForGroup( + group: FeatureGroup, + safetyClasses: Record | undefined, +): FeatureSafetyClass | undefined { + if (!safetyClasses) return undefined + // group → column-name convention: every feature column generated by a + // group starts with the group name (e.g. `target_history__lag_7`, + // `inventory__on_hand_qty`). Match on prefix so we surface the worst + // safety class found among the group's columns. + const prefix = `${group}__` + const matched = Object.entries(safetyClasses).filter(([col]) => + col.startsWith(prefix), + ) + if (matched.length === 0) return undefined + const order: FeatureSafetyClass[] = [ + 'safe', + 'conditionally_safe', + 'unsafe_unless_supplied', + ] + let worst: FeatureSafetyClass = 'safe' + for (const [, cls] of matched) { + if (order.indexOf(cls) > order.indexOf(worst)) { + worst = cls + } + } + return worst +} diff --git a/frontend/src/components/forecast-intelligence/horizon-bucket-table.test.tsx b/frontend/src/components/forecast-intelligence/horizon-bucket-table.test.tsx new file mode 100644 index 00000000..1d22a2f8 --- /dev/null +++ b/frontend/src/components/forecast-intelligence/horizon-bucket-table.test.tsx @@ -0,0 +1,70 @@ +import { afterEach, describe, expect, it } from 'vitest' +import { cleanup, render, screen } from '@testing-library/react' +import { HorizonBucketTable } from './horizon-bucket-table' + +afterEach(cleanup) + +const FOUR_BUCKETS: Record> = { + h_29_plus: { mae: 12.3, wape: 0.41 }, + h_1_7: { mae: 4.2, wape: 0.12 }, + h_15_28: { mae: 9.5, wape: 0.31 }, + h_8_14: { mae: 6.8, wape: 0.22 }, +} + +describe('HorizonBucketTable', () => { + it('renders empty state for undefined bucketed payload', () => { + render() + expect(screen.getByTestId('horizon-bucket-table-empty')).toBeTruthy() + }) + + it('renders empty state for empty bucketed dict', () => { + render() + expect(screen.getByTestId('horizon-bucket-table-empty')).toBeTruthy() + }) + + it('renders all four buckets in canonical order', () => { + const { container } = render( + , + ) + const rows = container.querySelectorAll('[data-testid^="horizon-bucket-row-"]') + expect(rows.length).toBe(4) + expect(rows[0]?.getAttribute('data-testid')).toBe( + 'horizon-bucket-row-h_1_7', + ) + expect(rows[1]?.getAttribute('data-testid')).toBe( + 'horizon-bucket-row-h_8_14', + ) + expect(rows[2]?.getAttribute('data-testid')).toBe( + 'horizon-bucket-row-h_15_28', + ) + expect(rows[3]?.getAttribute('data-testid')).toBe( + 'horizon-bucket-row-h_29_plus', + ) + }) + + it('renders dash when the picked metric is missing in a bucket', () => { + const partial: Record> = { + h_1_7: { wape: 0.1 }, + } + render() + const row = screen.getByTestId('horizon-bucket-row-h_1_7') + expect(row.textContent).toContain('—') + }) + + it('appends unknown bucket ids at the end', () => { + const withUnknown: Record> = { + h_extra: { mae: 1.0 }, + h_1_7: { mae: 2.0 }, + } + const { container } = render( + , + ) + const rows = container.querySelectorAll('[data-testid^="horizon-bucket-row-"]') + expect(rows[0]?.getAttribute('data-testid')).toBe( + 'horizon-bucket-row-h_1_7', + ) + expect(rows[1]?.getAttribute('data-testid')).toBe( + 'horizon-bucket-row-h_extra', + ) + }) +}) diff --git a/frontend/src/components/forecast-intelligence/horizon-bucket-table.tsx b/frontend/src/components/forecast-intelligence/horizon-bucket-table.tsx new file mode 100644 index 00000000..f97677c5 --- /dev/null +++ b/frontend/src/components/forecast-intelligence/horizon-bucket-table.tsx @@ -0,0 +1,81 @@ +import { + Table, + TableBody, + TableCell, + TableHead, + TableHeader, + TableRow, +} from '@/components/ui/table' +import { sortBuckets } from '@/lib/horizon-bucket-utils' + +/** + * PRP-37 Slice C — per-horizon-bucket metric table. Reads + * ModelBacktestResult.bucketed_aggregated_metrics (PRP-36 dict-of-dict + * shape: bucket_id → metric_name → value). Empty bucket dict, undefined + * bucketed payload, or no rows for the chosen metric all render the + * "no horizon-bucket metrics available" empty state. + */ + +export type HorizonBucketMetric = + | 'mae' + | 'smape' + | 'wape' + | 'bias' + | 'rmse' + +interface HorizonBucketTableProps { + bucketed: + | Record> + | null + | undefined + metric: HorizonBucketMetric + metricLabel?: string +} + +export function HorizonBucketTable({ + bucketed, + metric, + metricLabel, +}: HorizonBucketTableProps) { + if (!bucketed || Object.keys(bucketed).length === 0) { + return ( +

+ No horizon-bucket metrics available. +

+ ) + } + const sortedIds = sortBuckets(Object.keys(bucketed)) + return ( + + + + Bucket + + {metricLabel ?? metric.toUpperCase()} + + + + + {sortedIds.map((id) => { + const value = bucketed[id]?.[metric] + return ( + + {id} + + {typeof value === 'number' ? formatBucketValue(value) : '—'} + + + ) + })} + +
+ ) + + function formatBucketValue(v: number): string { + if (!Number.isFinite(v)) return '—' + return v.toFixed(2) + } +} diff --git a/frontend/src/components/forecast-intelligence/model-family-tabs.test.tsx b/frontend/src/components/forecast-intelligence/model-family-tabs.test.tsx new file mode 100644 index 00000000..69390996 --- /dev/null +++ b/frontend/src/components/forecast-intelligence/model-family-tabs.test.tsx @@ -0,0 +1,35 @@ +import { afterEach, describe, expect, it, vi } from 'vitest' +import { cleanup, fireEvent, render, screen } from '@testing-library/react' +import { ModelFamilyTabs } from './model-family-tabs' + +afterEach(cleanup) + +describe('ModelFamilyTabs', () => { + it('renders one tab per family with the current selection marked active', () => { + render( {}} />) + const tree = screen.getByTestId('model-family-tab-tree') + expect(tree.getAttribute('data-state')).toBe('active') + expect(screen.getByTestId('model-family-tab-baseline')).toBeTruthy() + expect(screen.getByTestId('model-family-tab-additive')).toBeTruthy() + }) + + it('emits onChange with the picked family on pointer interaction', () => { + // Radix Tabs trigger switches on pointerDown rather than click in jsdom. + const onChange = vi.fn() + render() + const target = screen.getByTestId('model-family-tab-additive') + fireEvent.pointerDown(target, { button: 0, ctrlKey: false }) + fireEvent.mouseDown(target, { button: 0 }) + fireEvent.click(target) + expect(onChange).toHaveBeenCalledWith('additive') + }) + + it('does not emit onChange when disabled', () => { + const onChange = vi.fn() + render() + const target = screen.getByTestId('model-family-tab-tree') + fireEvent.pointerDown(target, { button: 0 }) + fireEvent.click(target) + expect(onChange).not.toHaveBeenCalled() + }) +}) diff --git a/frontend/src/components/forecast-intelligence/model-family-tabs.tsx b/frontend/src/components/forecast-intelligence/model-family-tabs.tsx new file mode 100644 index 00000000..ddd13afa --- /dev/null +++ b/frontend/src/components/forecast-intelligence/model-family-tabs.tsx @@ -0,0 +1,59 @@ +import { Activity, LineChart, TreePine } from 'lucide-react' +import { Tabs, TabsList, TabsTrigger } from '@/components/ui/tabs' +import type { ModelFamily } from '@/types/api' + +/** + * PRP-37 Slice C — segmented model-family picker. Uses the shadcn Tabs + * primitive as a segmented control (no separate SegmentedControl component + * exists in the registry — see `.claude/rules/shadcn-ui.md`). + */ + +interface ModelFamilyTabsProps { + family: ModelFamily + onChange: (family: ModelFamily) => void + disabled?: boolean + className?: string +} + +const FAMILIES: Array<{ + value: ModelFamily + label: string + Icon: typeof Activity +}> = [ + { value: 'baseline', label: 'Baseline', Icon: Activity }, + { value: 'tree', label: 'Tree', Icon: TreePine }, + { value: 'additive', label: 'Additive', Icon: LineChart }, +] + +export function ModelFamilyTabs({ + family, + onChange, + disabled, + className, +}: ModelFamilyTabsProps) { + return ( + { + if (disabled) return + onChange(value as ModelFamily) + }} + className={className} + data-testid="model-family-tabs" + > + + {FAMILIES.map(({ value, label, Icon }) => ( + + + {label} + + ))} + + + ) +} diff --git a/frontend/src/components/forecast-intelligence/model-type-select.test.tsx b/frontend/src/components/forecast-intelligence/model-type-select.test.tsx new file mode 100644 index 00000000..b49b1b5a --- /dev/null +++ b/frontend/src/components/forecast-intelligence/model-type-select.test.tsx @@ -0,0 +1,67 @@ +import { afterEach, describe, expect, it } from 'vitest' +import { cleanup } from '@testing-library/react' +import { + MODEL_FAMILY_MAP, + MODEL_TYPE_LABELS, + modelsForFamily, +} from './model-type-utils' + +afterEach(cleanup) + +describe('MODEL_FAMILY_MAP', () => { + it('includes the 5 baseline model types (naive + 4 others)', () => { + expect(MODEL_FAMILY_MAP.baseline).toEqual([ + 'naive', + 'seasonal_naive', + 'moving_average', + 'weighted_moving_average', + 'seasonal_average', + ]) + }) + + it('includes the 4 tree model types', () => { + expect(MODEL_FAMILY_MAP.tree).toEqual([ + 'regression', + 'lightgbm', + 'xgboost', + 'random_forest', + ]) + }) + + it('includes the 2 additive model types', () => { + expect(MODEL_FAMILY_MAP.additive).toEqual([ + 'prophet_like', + 'trend_regression_baseline', + ]) + }) +}) + +describe('MODEL_TYPE_LABELS', () => { + it('labels every model type listed in MODEL_FAMILY_MAP', () => { + const allTypes = [ + ...MODEL_FAMILY_MAP.baseline, + ...MODEL_FAMILY_MAP.tree, + ...MODEL_FAMILY_MAP.additive, + ] + for (const modelType of allTypes) { + expect(MODEL_TYPE_LABELS[modelType]).toBeTruthy() + } + }) +}) + +describe('modelsForFamily', () => { + it('returns every model in the family when no restriction is supplied', () => { + expect(modelsForFamily('tree')).toEqual(MODEL_FAMILY_MAP.tree) + }) + + it('filters by the availableModels intersection', () => { + expect(modelsForFamily('tree', ['lightgbm', 'xgboost', 'naive'])).toEqual([ + 'lightgbm', + 'xgboost', + ]) + }) + + it('returns an empty array when the family has no overlap with availableModels', () => { + expect(modelsForFamily('additive', ['naive'])).toEqual([]) + }) +}) diff --git a/frontend/src/components/forecast-intelligence/model-type-select.tsx b/frontend/src/components/forecast-intelligence/model-type-select.tsx new file mode 100644 index 00000000..04ecc82a --- /dev/null +++ b/frontend/src/components/forecast-intelligence/model-type-select.tsx @@ -0,0 +1,61 @@ +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from '@/components/ui/select' +import type { ModelFamily } from '@/types/api' +import { + MODEL_TYPE_LABELS, + modelsForFamily, +} from './model-type-utils' + +/** + * PRP-37 Slice C — model-type Select filtered by family. Mirrors backend + * `_MODEL_FAMILY_MAP` (app/features/forecasting/feature_metadata.py). When + * a value falls outside the picked family, the parent component is + * responsible for resetting it — this component does NOT silently reset. + */ + +interface ModelTypeSelectProps { + family: ModelFamily + value: string + onChange: (modelType: string) => void + /** Optional restriction set — usually the runtime-confirmed model list. */ + availableModels?: string[] + disabled?: boolean + className?: string +} + +export function ModelTypeSelect({ + family, + value, + onChange, + availableModels, + disabled, + className, +}: ModelTypeSelectProps) { + const options = modelsForFamily(family, availableModels) + return ( + + ) +} diff --git a/frontend/src/components/forecast-intelligence/model-type-utils.ts b/frontend/src/components/forecast-intelligence/model-type-utils.ts new file mode 100644 index 00000000..30f43d17 --- /dev/null +++ b/frontend/src/components/forecast-intelligence/model-type-utils.ts @@ -0,0 +1,42 @@ +/** + * PRP-37 Slice C — shared model-type metadata. Split from + * `model-type-select.tsx` so the react-refresh lint rule (only-export-components) + * stays clean for the .tsx surface. + */ + +import type { ModelFamily } from '@/types/api' + +export const MODEL_FAMILY_MAP: Record = { + baseline: [ + 'naive', + 'seasonal_naive', + 'moving_average', + 'weighted_moving_average', + 'seasonal_average', + ], + tree: ['regression', 'lightgbm', 'xgboost', 'random_forest'], + additive: ['prophet_like', 'trend_regression_baseline'], +} + +export const MODEL_TYPE_LABELS: Record = { + naive: 'Naive', + seasonal_naive: 'Seasonal Naive', + moving_average: 'Moving Average', + weighted_moving_average: 'Weighted Moving Average', + seasonal_average: 'Seasonal Average', + regression: 'Regression (HistGBR)', + lightgbm: 'LightGBM', + xgboost: 'XGBoost', + random_forest: 'Random Forest', + prophet_like: 'Prophet-like (Ridge additive)', + trend_regression_baseline: 'Trend Regression Baseline', +} + +export function modelsForFamily( + family: ModelFamily, + availableModels?: string[], +): string[] { + const all = MODEL_FAMILY_MAP[family] + if (!availableModels) return all + return all.filter((m) => availableModels.includes(m)) +} diff --git a/frontend/src/components/forecast-intelligence/promote-confirmation-dialog.test.tsx b/frontend/src/components/forecast-intelligence/promote-confirmation-dialog.test.tsx new file mode 100644 index 00000000..dd66de09 --- /dev/null +++ b/frontend/src/components/forecast-intelligence/promote-confirmation-dialog.test.tsx @@ -0,0 +1,243 @@ +import { QueryClient, QueryClientProvider } from '@tanstack/react-query' +import { + afterEach, + beforeEach, + describe, + expect, + it, + vi, +} from 'vitest' +import { + cleanup, + fireEvent, + render, + screen, + waitFor, +} from '@testing-library/react' +import { createElement, type ReactNode } from 'react' +import { PromoteConfirmationDialog } from './promote-confirmation-dialog' +import type { ArtifactVerifyResponse, ModelRun } from '@/types/api' + +function makeRun(overrides: Partial = {}): ModelRun { + return { + run_id: 'run_aaaaaaaaaaaa', + status: 'success', + model_type: 'lightgbm', + model_family: 'tree', + model_config: {}, + feature_config: null, + config_hash: 'h', + data_window_start: '2024-01-01', + data_window_end: '2024-06-30', + store_id: 1, + product_id: 1, + metrics: { wape: 12.0 }, + artifact_uri: 'file:///artifact.joblib', + artifact_hash: 'abc', + artifact_size_bytes: 1024, + runtime_info: null, + agent_context: null, + git_sha: null, + error_message: null, + started_at: '2024-01-01', + completed_at: '2024-01-02', + created_at: '2024-01-01', + updated_at: '2024-01-01', + ...overrides, + } +} + +function makeWrapper(client: QueryClient) { + return function Wrapper({ children }: { children: ReactNode }) { + return createElement(QueryClientProvider, { client }, children) + } +} + +function stubVerify(response: ArtifactVerifyResponse) { + const fetchMock = vi.fn().mockResolvedValue( + new Response(JSON.stringify(response), { + status: 200, + headers: { 'content-type': 'application/json' }, + }), + ) + vi.stubGlobal('fetch', fetchMock) + return fetchMock +} + +beforeEach(() => { + cleanup() +}) + +afterEach(() => { + vi.unstubAllGlobals() + cleanup() +}) + +describe('PromoteConfirmationDialog', () => { + it('enables Promote when verify ok, no worse-WAPE, no V mismatch, alias name set', async () => { + stubVerify({ + verified: true, + run_id: 'r', + artifact_uri: 'u', + computed_hash: 'abc', + stored_hash: 'abc', + }) + const client = new QueryClient({ + defaultOptions: { queries: { retry: false } }, + }) + const run = makeRun({ feature_frame_version: 2 }) + const champion = makeRun({ + run_id: 'champ', + metrics: { wape: 15.0 }, + feature_frame_version: 2, + }) + render( + {}} + run={run} + currentChampion={champion} + defaultAliasName="production" + onConfirm={() => Promise.resolve()} + />, + { wrapper: makeWrapper(client) }, + ) + await waitFor(() => + expect( + screen + .getByTestId('promote-confirmation-action') + .hasAttribute('disabled'), + ).toBe(false), + ) + }) + + it('blocks Promote when artifact verify fails (no checkbox can override)', async () => { + stubVerify({ + verified: false, + run_id: 'r', + artifact_uri: 'u', + computed_hash: 'BAD', + stored_hash: 'abc', + error: 'checksum mismatch', + }) + const client = new QueryClient({ + defaultOptions: { queries: { retry: false } }, + }) + render( + {}} + run={makeRun()} + defaultAliasName="production" + onConfirm={() => Promise.resolve()} + />, + { wrapper: makeWrapper(client) }, + ) + await waitFor(() => + expect( + screen.queryByTestId('promote-confirmation-verify-failed'), + ).toBeTruthy(), + ) + expect( + screen + .getByTestId('promote-confirmation-action') + .hasAttribute('disabled'), + ).toBe(true) + }) + + it('requires the worse-WAPE checkbox when latest WAPE > champion WAPE', async () => { + stubVerify({ + verified: true, + run_id: 'r', + artifact_uri: 'u', + computed_hash: 'abc', + stored_hash: 'abc', + }) + const client = new QueryClient({ + defaultOptions: { queries: { retry: false } }, + }) + const run = makeRun({ metrics: { wape: 20.0 } }) + const champion = makeRun({ + run_id: 'champ', + metrics: { wape: 12.0 }, + }) + render( + {}} + run={run} + currentChampion={champion} + defaultAliasName="production" + onConfirm={() => Promise.resolve()} + />, + { wrapper: makeWrapper(client) }, + ) + await waitFor(() => + expect( + screen.getByTestId('promote-confirmation-worse-wape'), + ).toBeTruthy(), + ) + // Action disabled while warning unacknowledged. + expect( + screen + .getByTestId('promote-confirmation-action') + .hasAttribute('disabled'), + ).toBe(true) + // Acknowledge → action enabled. + fireEvent.click(screen.getByTestId('promote-confirmation-worse-ack')) + await waitFor(() => + expect( + screen + .getByTestId('promote-confirmation-action') + .hasAttribute('disabled'), + ).toBe(false), + ) + }) + + it('requires the V-mismatch checkbox when champion V differs from run V', async () => { + stubVerify({ + verified: true, + run_id: 'r', + artifact_uri: 'u', + computed_hash: 'abc', + stored_hash: 'abc', + }) + const client = new QueryClient({ + defaultOptions: { queries: { retry: false } }, + }) + const run = makeRun({ feature_frame_version: 2 }) + const champion = makeRun({ + run_id: 'champ', + feature_frame_version: 1, + }) + render( + {}} + run={run} + currentChampion={champion} + defaultAliasName="production" + onConfirm={() => Promise.resolve()} + />, + { wrapper: makeWrapper(client) }, + ) + await waitFor(() => + expect( + screen.getByTestId('promote-confirmation-version-mismatch'), + ).toBeTruthy(), + ) + expect( + screen + .getByTestId('promote-confirmation-action') + .hasAttribute('disabled'), + ).toBe(true) + fireEvent.click(screen.getByTestId('promote-confirmation-version-ack')) + await waitFor(() => + expect( + screen + .getByTestId('promote-confirmation-action') + .hasAttribute('disabled'), + ).toBe(false), + ) + }) +}) diff --git a/frontend/src/components/forecast-intelligence/promote-confirmation-dialog.tsx b/frontend/src/components/forecast-intelligence/promote-confirmation-dialog.tsx new file mode 100644 index 00000000..8830b384 --- /dev/null +++ b/frontend/src/components/forecast-intelligence/promote-confirmation-dialog.tsx @@ -0,0 +1,240 @@ +import { useState } from 'react' +import { AlertTriangle, CheckCircle2, ShieldAlert } from 'lucide-react' +import { + AlertDialog, + AlertDialogAction, + AlertDialogCancel, + AlertDialogContent, + AlertDialogDescription, + AlertDialogFooter, + AlertDialogHeader, + AlertDialogTitle, +} from '@/components/ui/alert-dialog' +import { Checkbox } from '@/components/ui/checkbox' +import { Input } from '@/components/ui/input' +import { useVerifyArtifact } from '@/hooks/use-runs' +import { formatPercent } from '@/lib/api' +import type { FeatureFrameVersion, ModelRun } from '@/types/api' + +/** + * PRP-37 Slice C — safer Promote affordance. The button is disabled until + * every gate is satisfied: + * + * • Artifact verifies (computed_hash === stored_hash). + * • If the latest WAPE is HIGHER than the current champion's, the operator + * must acknowledge a checkbox explicitly. + * • If the latest run's feature_frame_version differs from the champion's, + * the operator must acknowledge that this silently changes the contract + * the alias represents. + * + * The alias-name input is preserved from the prior in-line Promote affordance + * so muscle memory is unchanged. + */ + +interface PromoteConfirmationDialogProps { + open: boolean + onOpenChange: (open: boolean) => void + run: ModelRun + currentChampion?: ModelRun + defaultAliasName?: string + onConfirm: (aliasName: string) => Promise | void + isPromoting?: boolean +} + +export function PromoteConfirmationDialog({ + open, + onOpenChange, + run, + currentChampion, + defaultAliasName = '', + onConfirm, + isPromoting, +}: PromoteConfirmationDialogProps) { + const [aliasName, setAliasName] = useState(defaultAliasName) + const [worseAcknowledged, setWorseAcknowledged] = useState(false) + const [versionMismatchAck, setVersionMismatchAck] = useState(false) + + // Only verify while the dialog is open; useVerifyArtifact already gates on + // its `enabled` argument so a closed dialog does not fetch. + const verify = useVerifyArtifact(run.run_id, open && !!run.artifact_uri) + + const championWape = currentChampion?.metrics?.wape ?? null + const runWape = run.metrics?.wape ?? null + const worseWape = + championWape !== null && + runWape !== null && + runWape > championWape + + const verifyFailed = verify.data?.verified === false + + const championVersion: FeatureFrameVersion = + currentChampion?.feature_frame_version === 2 ? 2 : 1 + const runVersion: FeatureFrameVersion = + run.feature_frame_version === 2 ? 2 : 1 + const versionMismatch = + currentChampion !== undefined && championVersion !== runVersion + + const canConfirm = + aliasName.trim().length > 0 && + !verifyFailed && + (!worseWape || worseAcknowledged) && + (!versionMismatch || versionMismatchAck) && + !isPromoting + + async function handleConfirm() { + if (!canConfirm) return + await onConfirm(aliasName.trim()) + } + + return ( + { + if (!next) { + setWorseAcknowledged(false) + setVersionMismatchAck(false) + } + onOpenChange(next) + }} + > + + + + Promote run {run.run_id.slice(0, 8)} to an alias + + + Point a deployment alias at this run. An existing alias of the + same name is repointed; the comparable-run rule + artifact + integrity gate this confirm. + + + +
+
+ + setAliasName(event.target.value)} + placeholder="e.g. production" + autoComplete="off" + data-testid="promote-confirmation-alias-input" + /> +
+ + {verify.isFetching && ( +

+ Verifying artifact integrity… +

+ )} + + {verify.data?.verified === true && ( +
+ + Artifact verified — checksum matches the registry record. +
+ )} + + {verifyFailed && ( +
+ +
+

Artifact verification failed

+ {verify.data?.stored_hash && ( +

+ stored: {verify.data.stored_hash.slice(0, 16)}… +

+ )} + {verify.data?.computed_hash && ( +

+ computed: {verify.data.computed_hash.slice(0, 16)}… +

+ )} +

Promotion blocked until the artifact is restored.

+
+
+ )} + + {worseWape && ( +
+

+ + Latest WAPE is higher than the current champion +

+

+ Run {run.run_id.slice(0, 8)} WAPE{' '} + + {formatPercent(runWape, 2)} + {' '} + vs current champion{' '} + + {formatPercent(championWape, 2)} + + . Promoting overrides a better-performing alias. +

+ +
+ )} + + {versionMismatch && ( +
+

+ + Feature frame version mismatch +

+

+ Champion is V{championVersion}; this run is V{runVersion}. + Promoting silently changes the feature contract this alias + represents. +

+ +
+ )} +
+ + + Cancel + void handleConfirm()} + disabled={!canConfirm} + data-testid="promote-confirmation-action" + > + {isPromoting ? 'Promoting…' : 'Promote'} + + +
+
+ ) +} diff --git a/frontend/src/hooks/use-runs.ts b/frontend/src/hooks/use-runs.ts index 1234919a..23222587 100644 --- a/frontend/src/hooks/use-runs.ts +++ b/frontend/src/hooks/use-runs.ts @@ -7,6 +7,7 @@ import type { RunCompareResponse, RunStatus, ArtifactVerifyResponse, + FeatureFrameVersion, } from '@/types/api' interface UseRunsParams { @@ -18,6 +19,12 @@ interface UseRunsParams { productId?: number sortBy?: string sortOrder?: 'asc' | 'desc' + /** + * PRP-37 — accepted by the hook so callers can keep one filter object; + * NOT forwarded to the registry list endpoint today (no backend filter + * exists). Used purely to scope the query key for client-side caches. + */ + featureFrameVersion?: FeatureFrameVersion enabled?: boolean } @@ -30,12 +37,23 @@ export function useRuns({ productId, sortBy, sortOrder, + featureFrameVersion, enabled = true, }: UseRunsParams) { return useQuery({ queryKey: [ 'runs', - { page, pageSize, modelType, status, storeId, productId, sortBy, sortOrder }, + { + page, + pageSize, + modelType, + status, + storeId, + productId, + sortBy, + sortOrder, + featureFrameVersion, + }, ], queryFn: () => api('/registry/runs', { @@ -48,6 +66,8 @@ export function useRuns({ product_id: productId, sort_by: sortBy, sort_order: sortOrder, + // NOTE: featureFrameVersion is intentionally NOT forwarded — see + // PRP-37 Task 23 + contract probe report (no backend filter). }, }), placeholderData: keepPreviousData, diff --git a/frontend/src/lib/feature-frame-utils.test.ts b/frontend/src/lib/feature-frame-utils.test.ts new file mode 100644 index 00000000..4e18b8ef --- /dev/null +++ b/frontend/src/lib/feature-frame-utils.test.ts @@ -0,0 +1,126 @@ +import { describe, expect, it } from 'vitest' +import { + defaultV2Groups, + isV2Available, + labelForGroup, + labelForSafetyClass, + labelForVersion, + safetyClassChipVariant, +} from './feature-frame-utils' +import type { FeatureMetadataResponse } from '@/types/api' + +describe('labelForGroup', () => { + it('returns the labelled string for every known group', () => { + expect(labelForGroup('target_history')).toMatch(/target history/i) + expect(labelForGroup('rolling')).toMatch(/rolling/i) + expect(labelForGroup('calendar')).toMatch(/calendar/i) + expect(labelForGroup('price_promo')).toMatch(/price/i) + expect(labelForGroup('inventory')).toMatch(/inventory/i) + expect(labelForGroup('lifecycle')).toMatch(/lifecycle/i) + expect(labelForGroup('replenishment')).toMatch(/replenishment/i) + expect(labelForGroup('returns')).toMatch(/returns/i) + expect(labelForGroup('exogenous_weather')).toMatch(/weather/i) + expect(labelForGroup('exogenous_macro')).toMatch(/macro/i) + expect(labelForGroup('trend')).toMatch(/trend/i) + }) +}) + +describe('safetyClassChipVariant', () => { + it('maps safe → success', () => { + expect(safetyClassChipVariant('safe')).toBe('success') + }) + + it('maps conditionally_safe → warning', () => { + expect(safetyClassChipVariant('conditionally_safe')).toBe('warning') + }) + + it('maps unsafe_unless_supplied → error', () => { + expect(safetyClassChipVariant('unsafe_unless_supplied')).toBe('error') + }) +}) + +describe('labelForSafetyClass', () => { + it('returns a human-readable label for each class', () => { + expect(labelForSafetyClass('safe')).toBe('Safe') + expect(labelForSafetyClass('conditionally_safe')).toMatch(/conditional/i) + expect(labelForSafetyClass('unsafe_unless_supplied')).toMatch(/supplied/i) + }) +}) + +describe('isV2Available', () => { + it('returns false for undefined metadata', () => { + expect(isV2Available(undefined)).toBe(false) + }) + + it('returns true when feature_frame_version is 2', () => { + const meta: FeatureMetadataResponse = { + run_id: 'r', + model_type: 'lightgbm', + model_family: 'tree', + feature_columns: [], + features: [], + importance_type: null, + feature_frame_version: 2, + } + expect(isV2Available(meta)).toBe(true) + }) + + it('returns true when feature_groups is a non-empty dict (V1 sentinel)', () => { + const meta: FeatureMetadataResponse = { + run_id: 'r', + model_type: 'regression', + model_family: 'additive', + feature_columns: [], + features: [], + importance_type: null, + feature_groups: { target_history: ['lag_1'] }, + } + expect(isV2Available(meta)).toBe(true) + }) + + it('returns false when feature_groups is empty and version is 1', () => { + const meta: FeatureMetadataResponse = { + run_id: 'r', + model_type: 'naive', + model_family: 'baseline', + feature_columns: [], + features: [], + importance_type: null, + feature_frame_version: 1, + feature_groups: {}, + } + expect(isV2Available(meta)).toBe(false) + }) + + it('returns false when neither field is set', () => { + const meta: FeatureMetadataResponse = { + run_id: 'r', + model_type: 'naive', + model_family: 'baseline', + feature_columns: [], + features: [], + importance_type: null, + } + expect(isV2Available(meta)).toBe(false) + }) +}) + +describe('defaultV2Groups', () => { + it('returns the 6 groups mirroring app/shared/feature_frames/contract_v2.py:DEFAULT_V2_GROUPS', () => { + expect(defaultV2Groups()).toEqual([ + 'target_history', + 'calendar', + 'rolling', + 'trend', + 'price_promo', + 'lifecycle', + ]) + }) +}) + +describe('labelForVersion', () => { + it('labels V1 / V2 distinctly', () => { + expect(labelForVersion(1)).toMatch(/V1/i) + expect(labelForVersion(2)).toMatch(/V2/i) + }) +}) diff --git a/frontend/src/lib/feature-frame-utils.ts b/frontend/src/lib/feature-frame-utils.ts new file mode 100644 index 00000000..f19c873b --- /dev/null +++ b/frontend/src/lib/feature-frame-utils.ts @@ -0,0 +1,104 @@ +/** + * PRP-37 Slice C — Feature-frame helpers. + * + * Defensive client-side mirror of the PRP-35 V2 contract that lives in + * `app/shared/feature_frames/contract_v2.py`. Anything declared here is + * VERIFIED against that file via the Task 1 contract probe; the runtime + * source of truth is the backend response (FeatureMetadataResponse). When + * the two disagree, trust the backend and fix this file. + */ + +import type { + FeatureFrameVersion, + FeatureGroup, + FeatureMetadataResponse, + FeatureSafetyClass, +} from '@/types/api' + +/** UI-facing labels — sourced from PRP-35 §"V2 feature contract". */ +const GROUP_LABELS: Record = { + target_history: 'Target history (lags + same-DOW mean)', + rolling: 'Rolling means', + trend: 'Trend (30 / 90-day)', + calendar: 'Calendar (DOW, month, sin / cos)', + price_promo: 'Price + promotion', + inventory: 'Inventory + stockout', + lifecycle: 'Product lifecycle', + replenishment: 'Replenishment cadence', + returns: 'Returns intensity', + exogenous_weather: 'Weather signals', + exogenous_macro: 'Macro signals', +} + +/** Concise label for a {@link FeatureGroup} — for dense UI surfaces. */ +export function labelForGroup(group: FeatureGroup): string { + return GROUP_LABELS[group] +} + +/** Map a safety class to the badge variant the UI renders. */ +export function safetyClassChipVariant( + safety: FeatureSafetyClass, +): 'success' | 'warning' | 'error' { + switch (safety) { + case 'safe': + return 'success' + case 'conditionally_safe': + return 'warning' + case 'unsafe_unless_supplied': + return 'error' + } +} + +/** Human-readable label for a safety class. */ +export function labelForSafetyClass(safety: FeatureSafetyClass): string { + switch (safety) { + case 'safe': + return 'Safe' + case 'conditionally_safe': + return 'Conditionally safe' + case 'unsafe_unless_supplied': + return 'Requires supplied data' + } +} + +/** + * V2 is available iff the backend feature-metadata response reports + * `feature_frame_version === 2` OR a non-empty `feature_groups` dict. + * Either signal independently proves the server shipped Forecast + * Intelligence A (PRP-35); we treat the OR conservatively so a pre-PRP-35 + * server (no fields at all) renders the V2 control disabled. + */ +export function isV2Available( + meta: FeatureMetadataResponse | undefined, +): boolean { + if (!meta) return false + if (meta.feature_frame_version === 2) return true + if ( + meta.feature_groups && + Object.keys(meta.feature_groups).length > 0 + ) { + return true + } + return false +} + +/** + * Mirror of `app/shared/feature_frames/contract_v2.py:DEFAULT_V2_GROUPS`. + * Used by the "use defaults" affordance on the feature-groups toggle and + * by the batch-preset builder. Task 1 verifies value-by-value. + */ +export function defaultV2Groups(): FeatureGroup[] { + return [ + 'target_history', + 'calendar', + 'rolling', + 'trend', + 'price_promo', + 'lifecycle', + ] +} + +/** Human-readable label for a frame version. */ +export function labelForVersion(v: FeatureFrameVersion): string { + return v === 2 ? 'V2 — feature-aware' : 'V1 — target-only' +} diff --git a/frontend/src/lib/horizon-bucket-utils.test.ts b/frontend/src/lib/horizon-bucket-utils.test.ts new file mode 100644 index 00000000..81388124 --- /dev/null +++ b/frontend/src/lib/horizon-bucket-utils.test.ts @@ -0,0 +1,41 @@ +import { describe, expect, it } from 'vitest' +import { + HORIZON_BUCKET_IDS, + labelForBucket, + sortBuckets, +} from './horizon-bucket-utils' + +describe('labelForBucket', () => { + it('returns operator-friendly labels for the four canonical buckets', () => { + expect(labelForBucket('h_1_7')).toBe('Days 1-7') + expect(labelForBucket('h_8_14')).toBe('Days 8-14') + expect(labelForBucket('h_15_28')).toBe('Days 15-28') + expect(labelForBucket('h_29_plus')).toBe('Days 29+') + }) + + it('surfaces unknown bucket ids verbatim', () => { + expect(labelForBucket('h_30_60')).toBe('h_30_60') + }) +}) + +describe('sortBuckets', () => { + it('returns the canonical order when all four ids are present, regardless of input order', () => { + expect(sortBuckets(['h_29_plus', 'h_1_7', 'h_15_28', 'h_8_14'])).toEqual([ + ...HORIZON_BUCKET_IDS, + ]) + }) + + it('drops absent ids while preserving canonical order', () => { + expect(sortBuckets(['h_29_plus', 'h_1_7'])).toEqual(['h_1_7', 'h_29_plus']) + }) + + it('appends unknown buckets at the end, alphabetically', () => { + expect( + sortBuckets(['h_30_60', 'h_1_7', 'h_8_14', 'h_zeta', 'h_alpha']), + ).toEqual(['h_1_7', 'h_8_14', 'h_30_60', 'h_alpha', 'h_zeta']) + }) + + it('returns an empty array for empty input', () => { + expect(sortBuckets([])).toEqual([]) + }) +}) diff --git a/frontend/src/lib/horizon-bucket-utils.ts b/frontend/src/lib/horizon-bucket-utils.ts new file mode 100644 index 00000000..d5f11058 --- /dev/null +++ b/frontend/src/lib/horizon-bucket-utils.ts @@ -0,0 +1,52 @@ +/** + * PRP-37 Slice C — Per-horizon-bucket helpers. + * + * PRP-36 partitions a backtest fold into four operator-meaningful buckets + * ('h_1_7' / 'h_8_14' / 'h_15_28' / 'h_29_plus') so the UI can show how + * forecast error behaves over near vs. far horizons. The bucket id set is + * fixed by the backend (`app/features/backtesting/metrics.py`), but + * empty buckets are dropped from the response — sort defensively. + */ + +/** The four bucket ids the backend may emit. */ +export const HORIZON_BUCKET_IDS = [ + 'h_1_7', + 'h_8_14', + 'h_15_28', + 'h_29_plus', +] as const + +export type HorizonBucketId = (typeof HORIZON_BUCKET_IDS)[number] + +const BUCKET_LABELS: Record = { + h_1_7: 'Days 1-7', + h_8_14: 'Days 8-14', + h_15_28: 'Days 15-28', + h_29_plus: 'Days 29+', +} + +/** UI label for a known bucket id; unknown ids surface verbatim. */ +export function labelForBucket(id: string): string { + return BUCKET_LABELS[id as HorizonBucketId] ?? id +} + +/** + * Return `ids` sorted into a stable, operator-friendly order matching + * {@link HORIZON_BUCKET_IDS}; unknown bucket ids are appended at the end + * (alphabetical) so a forward-compatible bucket from a newer backend + * still renders. + */ +export function sortBuckets(ids: string[]): string[] { + const known: string[] = [] + const unknown: string[] = [] + for (const id of HORIZON_BUCKET_IDS) { + if (ids.includes(id)) known.push(id) + } + for (const id of ids) { + if (!(HORIZON_BUCKET_IDS as readonly string[]).includes(id)) { + unknown.push(id) + } + } + unknown.sort() + return [...known, ...unknown] +} diff --git a/frontend/src/pages/explorer/run-compare.tsx b/frontend/src/pages/explorer/run-compare.tsx index fc9ff285..b32ecb25 100644 --- a/frontend/src/pages/explorer/run-compare.tsx +++ b/frontend/src/pages/explorer/run-compare.tsx @@ -9,6 +9,7 @@ import { ErrorDisplay } from '@/components/common/error-display' import { LoadingState } from '@/components/common/loading-state' import { ModelFamilyBadge } from '@/components/common/model-family-badge' import { StatusBadge } from '@/components/common/status-badge' +import { ChampionCompatibilityBadge } from '@/components/forecast-intelligence/champion-compatibility-badge' import { getStatusVariant } from '@/lib/status-utils' import { Button } from '@/components/ui/button' import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card' @@ -152,6 +153,27 @@ export default function RunComparePage() { {a && b && compareQuery.isLoading && } + {/* PRP-37 — Champion-compatibility verdict for the picked pair. */} + {a && b && comparison && ( + + +
+
+ Champion compatibility + + Two runs are comparable iff they share grain (store + product), + overlapping data windows, and feature_frame_version. + +
+ +
+
+
+ )} + {a && b && comparison && ( <> @@ -214,6 +236,30 @@ export default function RunComparePage() { {comparison.run_b.data_window_start} → {comparison.run_b.data_window_end} + {/* PRP-37 — feature frame version row. + Renders for every comparison; pre-PRP-35 runs surface "V1 (default)". */} + {(comparison.run_a.feature_frame_version !== undefined || + comparison.run_b.feature_frame_version !== undefined) && ( + + + Feature frame version + + + V{comparison.run_a.feature_frame_version ?? 1} + {comparison.run_a.feature_frame_version === undefined || + comparison.run_a.feature_frame_version === null + ? ' (default)' + : ''} + + + V{comparison.run_b.feature_frame_version ?? 1} + {comparison.run_b.feature_frame_version === undefined || + comparison.run_b.feature_frame_version === null + ? ' (default)' + : ''} + + + )} Config hash diff --git a/frontend/src/pages/explorer/run-detail.tsx b/frontend/src/pages/explorer/run-detail.tsx index c3a0a366..fc5c308f 100644 --- a/frontend/src/pages/explorer/run-detail.tsx +++ b/frontend/src/pages/explorer/run-detail.tsx @@ -14,6 +14,7 @@ import { useRunExplanation } from '@/hooks/use-explanations' import { useRunFeatureMetadata } from '@/hooks/use-feature-metadata' import { ExplanationPanel } from '@/components/explainability/explanation-panel' import { FeatureImportancePanel } from '@/components/explainability/feature-importance-panel' +import { FeatureFramePanel } from '@/components/forecast-intelligence/feature-frame-panel' import { JsonBlock } from '@/components/common/json-block' import { ErrorDisplay } from '@/components/common/error-display' import { LoadingState } from '@/components/common/loading-state' @@ -162,6 +163,15 @@ export default function RunDetailPage() { + {/* PRP-37 — Feature frame panel: surfaces V1/V2 + feature_groups + + per-column safety classes. Empty-state for pre-PRP-35 runs. */} + + {run.status === 'failed' && run.error_message && ( diff --git a/frontend/src/pages/ops.tsx b/frontend/src/pages/ops.tsx index 04a5ba1a..233c8ef5 100644 --- a/frontend/src/pages/ops.tsx +++ b/frontend/src/pages/ops.tsx @@ -5,7 +5,8 @@ import { toast } from 'sonner' import { useModelHealth, useOpsSummary, useRetrainingCandidates } from '@/hooks/use-ops' import { useProviderHealth } from '@/hooks/use-config' import { useCreateJob } from '@/hooks/use-jobs' -import { useCreateAlias } from '@/hooks/use-runs' +import { useCreateAlias, useRun, useAliases } from '@/hooks/use-runs' +import { PromoteConfirmationDialog } from '@/components/forecast-intelligence/promote-confirmation-dialog' import { attentionBadgeVariant, attentionItemLink, @@ -47,7 +48,6 @@ import { AlertDialogTitle, } from '@/components/ui/alert-dialog' import { Checkbox } from '@/components/ui/checkbox' -import { Input } from '@/components/ui/input' import { downloadCsv, toCsv } from '@/lib/csv-export' import { attentionCsvColumns, buildIncidentMarkdown, downloadMarkdown } from '@/lib/incident-report' import { buildRetrainJob } from '@/lib/ops-actions' @@ -98,13 +98,32 @@ export default function OpsPage() { const candidatesQuery = useRetrainingCandidates() const modelHealthQuery = useModelHealth() const providerQuery = useProviderHealth() + const aliasesQuery = useAliases() const createJob = useCreateJob() const createAlias = useCreateAlias() const [selected, setSelected] = useState>(new Set()) const [retrainConfirmOpen, setRetrainConfirmOpen] = useState(false) const [actionBusy, setActionBusy] = useState(false) const [promoteTarget, setPromoteTarget] = useState(null) - const [aliasName, setAliasName] = useState('') + + // PRP-37 — load the candidate run + the current champion's run (when a + // production alias points at this grain) for the safer Promote dialog. + const promoteRunQuery = useRun( + promoteTarget?.runId ?? '', + promoteTarget !== null, + ) + const aliasList = aliasesQuery.data ?? [] + const championAlias = promoteTarget + ? aliasList.find( + (a) => + (a.alias_name === 'production' || a.alias_name === 'champion') && + a.run_id !== promoteTarget.runId, + ) + : undefined + const championRunQuery = useRun( + championAlias?.run_id ?? '', + !!championAlias?.run_id, + ) if (summaryQuery.error) { return ( @@ -200,12 +219,11 @@ export default function OpsPage() { /** Open the promote-to-alias dialog for a grain's latest successful run. */ function openPromote(runId: string | null, storeId: number, productId: number) { if (runId === null) return - setAliasName('') setPromoteTarget({ runId, storeId, productId }) } /** Promote the targeted run to a deployment alias via POST /registry/aliases. */ - async function runPromote() { + async function runPromote(aliasName: string) { if (promoteTarget === null) return const target = promoteTarget const name = aliasName.trim() @@ -220,6 +238,16 @@ export default function OpsPage() { setPromoteTarget(null) } + /** PRP-36 enum → human-readable reason chip label. */ + function staleReasonLabel(reason: string | null): string { + if (reason === null) return '—' + if (reason === 'feature_frame_version_mismatch') return 'V mismatch' + if (reason === 'newer_success_run') return 'newer success run' + if (reason === 'artifact_not_verified') return 'artifact not verified' + if (reason === 'run_not_success') return 'run not success' + return reason + } + return (
@@ -417,6 +445,81 @@ export default function OpsPage() { + {/* PRP-37 — Stale aliases. Surfaces the new + feature_frame_version_mismatch reason chip (PRP-36) alongside + the existing newer-run / artifact-not-verified / run-not-success + reasons. */} + {summary.aliases.some((a) => a.is_stale) && ( + + + Stale aliases + + Deployment aliases the Control Center flagged as out of date. + Each row carries the precise stale reason and (when known) + the alias vs. comparable run's feature_frame_version. + + + + + + + Alias + Grain + Reason + Alias V + Comparable V + WAPE + + + + {summary.aliases + .filter((a) => a.is_stale) + .map((alias) => ( + + + {alias.alias_name} + + + store {alias.store_id} / product{' '} + {alias.product_id} + + + + {staleReasonLabel(alias.stale_reason)} + + + + {alias.alias_feature_frame_version + ? `V${alias.alias_feature_frame_version}` + : '—'} + + + {alias.comparable_run_feature_frame_version + ? `V${alias.comparable_run_feature_frame_version}` + : '—'} + + + {alias.wape === null ? '—' : alias.wape.toFixed(1)} + + + ))} + +
+
+
+ )} + {/* Section 5 — Model Health */} @@ -441,14 +544,19 @@ export default function OpsPage() { Product Drift Latest WAPE + Prev WAPE Δ WAPE - Runs + Runs evaluated + Staleness Action {modelHealthEntries.map((entry) => ( - + {entry.store_id} {entry.product_id} @@ -459,12 +567,20 @@ export default function OpsPage() { {entry.latest_wape === null ? '—' : entry.latest_wape.toFixed(1)} + + {entry.previous_wape === null + ? '—' + : entry.previous_wape.toFixed(1)} + {formatWapeDelta(entry.wape_delta)} {entry.run_count} + + {formatStaleness(entry.staleness_days)} +
) } diff --git a/frontend/src/pages/visualize/backtest.tsx b/frontend/src/pages/visualize/backtest.tsx index e25994ce..cad933dc 100644 --- a/frontend/src/pages/visualize/backtest.tsx +++ b/frontend/src/pages/visualize/backtest.tsx @@ -7,10 +7,15 @@ import { useJob, useCreateJob } from '@/hooks/use-jobs' import { useStores } from '@/hooks/use-stores' import { useProducts } from '@/hooks/use-products' import { BacktestFoldsChart, MetricsSummary } from '@/components/charts/backtest-folds-chart' +import { BacktestHorizonBucketsChart } from '@/components/charts/backtest-horizon-buckets-chart' import { DateRangePicker } from '@/components/common/date-range-picker' import { EmptyState } from '@/components/common/error-display' import { JobPicker } from '@/components/common/job-picker' import { LoadingState } from '@/components/common/loading-state' +import { ModelFamilyTabs } from '@/components/forecast-intelligence/model-family-tabs' +import { ModelTypeSelect } from '@/components/forecast-intelligence/model-type-select' +import { MODEL_FAMILY_MAP } from '@/components/forecast-intelligence/model-type-utils' +import { HorizonBucketTable } from '@/components/forecast-intelligence/horizon-bucket-table' import { Button } from '@/components/ui/button' import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card' import { Input } from '@/components/ui/input' @@ -24,6 +29,11 @@ import { import { Tabs, TabsContent, TabsList, TabsTrigger } from '@/components/ui/tabs' import { downloadCsv, toCsv, type CsvColumn } from '@/lib/csv-export' import { getErrorMessage } from '@/lib/api' +import type { + BacktestResponse, + ModelBacktestResult, + ModelFamily, +} from '@/types/api' interface FoldMetric { fold: number @@ -48,19 +58,11 @@ interface BacktestResult { } } -// MLZOO-D / PRP-31 — Feature-aware backtesting (B.2) made the four advanced -// families reachable from the UI. The allow-list now includes all seven -// canonical model types; see PRP-MLZOO-B.2 for the per-fold X_train/X_future -// split that keeps the feature-aware backtest leakage-safe. -const MODEL_OPTIONS = [ - { value: 'naive', label: 'Naive' }, - { value: 'seasonal_naive', label: 'Seasonal Naive' }, - { value: 'moving_average', label: 'Moving Average' }, - { value: 'regression', label: 'Regression (HistGBR)' }, - { value: 'lightgbm', label: 'LightGBM' }, - { value: 'xgboost', label: 'XGBoost' }, - { value: 'prophet_like', label: 'Prophet-like (additive)' }, -] +/** Format a metric value to 2 decimal places; '—' when missing. */ +function fmt(value: number | undefined): string { + if (typeof value !== 'number' || !Number.isFinite(value)) return '—' + return value.toFixed(2) +} const foldCsvColumns: CsvColumn[] = [ { key: 'fold', header: 'Fold' }, @@ -77,11 +79,17 @@ export default function BacktestPage() { // In-page "Run new backtest" form state. const [storeId, setStoreId] = useState('') const [productId, setProductId] = useState('') + // PRP-37 — split the flat model select into family + filtered type. + const [family, setFamily] = useState('baseline') const [modelType, setModelType] = useState('naive') const [dateRange, setDateRange] = useState() const [nSplits, setNSplits] = useState(5) const [testSize, setTestSize] = useState(14) const [runError, setRunError] = useState(null) + // PRP-37 — per-horizon-bucket viz metric switcher (PRP-36). + const [bucketMetric, setBucketMetric] = useState< + 'mae' | 'smape' | 'wape' | 'bias' | 'rmse' + >('wape') const { data: job, isLoading, error } = useJob(searchJobId, !!searchJobId) const createJob = useCreateJob() @@ -89,8 +97,24 @@ export default function BacktestPage() { const storesQuery = useStores({ page: 1, pageSize: 100 }) const productsQuery = useProducts({ page: 1, pageSize: 100 }) - // Extract backtest result from job + // Extract backtest result from job. job.result is JSONB so we read it + // optimistically — the legacy `aggregated_metrics.mae_mean` shape and the + // PRP-36 `main_model_results.aggregated_metrics["mae"]` shape coexist in + // the registry. const backtestResult = job?.result as BacktestResult | undefined + const prp36 = job?.result as Partial | undefined + const mainResult: ModelBacktestResult | undefined = prp36?.main_model_results + const baselineResults: ModelBacktestResult[] = prp36?.baseline_results ?? [] + const rmse = mainResult?.aggregated_metrics?.['rmse'] + const bucketed = mainResult?.bucketed_aggregated_metrics ?? null + + function handleFamilyChange(next: ModelFamily) { + setFamily(next) + const valid = MODEL_FAMILY_MAP[next] + if (!valid.includes(modelType)) { + setModelType(valid[0] ?? '') + } + } // The number inputs can be cleared to 0; require a valid split count and // test size so an invalid backtest config can never be submitted. @@ -176,20 +200,18 @@ export default function BacktestPage() {
+
+ Family + +
Model - +
Date window @@ -296,32 +318,165 @@ export default function BacktestPage() { metrics={[ { label: 'MAE', - value: backtestResult.aggregated_metrics?.mae_mean ?? 0, + value: + mainResult?.aggregated_metrics?.['mae'] ?? + backtestResult.aggregated_metrics?.mae_mean ?? + 0, description: 'Mean Absolute Error', }, { label: 'sMAPE', - value: backtestResult.aggregated_metrics?.smape_mean ?? 0, + value: + mainResult?.aggregated_metrics?.['smape'] ?? + backtestResult.aggregated_metrics?.smape_mean ?? + 0, unit: '%', description: 'Symmetric MAPE (0-200)', }, { label: 'WAPE', - value: backtestResult.aggregated_metrics?.wape_mean ?? 0, + value: + mainResult?.aggregated_metrics?.['wape'] ?? + backtestResult.aggregated_metrics?.wape_mean ?? + 0, unit: '%', description: 'Weighted APE', }, - { - label: 'Stability', - value: backtestResult.aggregated_metrics?.stability_index ?? 0, - unit: '%', - description: 'Lower is better', - }, + // PRP-37 — RMSE is a key inside aggregated_metrics (PRP-36). + // Omit entirely when absent rather than zero-padding. + ...(typeof rmse === 'number' + ? [ + { + label: 'RMSE', + value: rmse, + description: 'Root mean squared error', + }, + ] + : [ + { + label: 'Stability', + value: + backtestResult.aggregated_metrics?.stability_index ?? 0, + unit: '%', + description: 'Lower is better', + }, + ]), ]} /> + {/* PRP-37 — Per-horizon-bucket metrics (PRP-36). Rendered only when + the backend emits bucketed_aggregated_metrics. */} + {bucketed && Object.keys(bucketed).length > 0 && ( + + +
+
+ Per-horizon-bucket metrics + + Forecast error split by horizon distance. Near-horizon + buckets typically improve faster than far-horizon ones. + +
+ +
+
+ + + + +
+ )} + + {/* PRP-37 — Baseline vs. feature-aware comparison (PRP-36). Shown + only when the response includes one or more baseline ModelBacktestResult + rows. */} + {baselineResults.length > 0 && mainResult && ( + + + Baseline vs feature-aware + + Same folds, identical splits — every baseline competes against + the main feature-aware model. Lower WAPE / RMSE wins. + + + + + + + + + + + + + + + + + + + + + + {baselineResults.map((b) => ( + + + + + + + + ))} + +
ModelMAEsMAPEWAPERMSE
{mainResult.model_type} (main) + {fmt(mainResult.aggregated_metrics?.['mae'])} + + {fmt(mainResult.aggregated_metrics?.['smape'])} + + {fmt(mainResult.aggregated_metrics?.['wape'])} + + {fmt(mainResult.aggregated_metrics?.['rmse'])} +
+ {b.model_type} + + {fmt(b.aggregated_metrics?.['mae'])} + + {fmt(b.aggregated_metrics?.['smape'])} + + {fmt(b.aggregated_metrics?.['wape'])} + + {fmt(b.aggregated_metrics?.['rmse'])} +
+
+
+ )} + {/* Baseline Comparison */} {backtestResult.baseline_comparison && ( diff --git a/frontend/src/pages/visualize/batch.tsx b/frontend/src/pages/visualize/batch.tsx index 1e921f60..c2b1b162 100644 --- a/frontend/src/pages/visualize/batch.tsx +++ b/frontend/src/pages/visualize/batch.tsx @@ -18,6 +18,17 @@ import { useState } from 'react' import { ErrorDisplay } from '@/components/common/error-display' import { LoadingState } from '@/components/common/loading-state' import { StatusBadge } from '@/components/common/status-badge' +import { BatchPresetSelect } from '@/components/forecast-intelligence/batch-preset-select' +import { + buildPresetConfigs, + type BatchPresetId, +} from '@/components/forecast-intelligence/batch-preset-utils' +import { + BatchMatrixPicker, + type MatrixRow, +} from '@/components/forecast-intelligence/batch-matrix-picker' +import { defaultV2Groups } from '@/lib/feature-frame-utils' +import { FEATURE_GROUP_VALUES } from '@/types/api' import { AlertDialog, AlertDialogAction, @@ -56,9 +67,24 @@ import { } from '@/hooks/use-batches' import { TERMINAL_BATCH_STATES, + type BatchModelConfig, type BatchSubmitRequest, } from '@/types/api' +const AVAILABLE_BATCH_MODELS: string[] = [ + 'naive', + 'seasonal_naive', + 'moving_average', + 'weighted_moving_average', + 'seasonal_average', + 'regression', + 'lightgbm', + 'xgboost', + 'random_forest', + 'prophet_like', + 'trend_regression_baseline', +] + export default function BatchRunnerPage() { // Last-submitted batch the page tracks. null = nothing yet. const [batchId, setBatchId] = useState(null) @@ -72,6 +98,24 @@ export default function BatchRunnerPage() { // PRP-34: per-batch parallelism request (server runtime-clamps by the // global cap). Default matches the server's default of 4. const [maxParallel, setMaxParallel] = useState(4) + // PRP-37: sweep matrix — multi-model × multi-feature-pack picker. + const [preset, setPreset] = useState(undefined) + const [matrixRows, setMatrixRows] = useState([]) + + function handlePresetChange(next: BatchPresetId) { + setPreset(next) + // Map each preset's BatchModelConfig list into MatrixRow values the + // BatchMatrixPicker renders. A preset with no feature_frame_version + // (baseline sweep) maps to V1; otherwise V2 + the preset's groups. + const configs = buildPresetConfigs(next) + setMatrixRows( + configs.map((config) => ({ + model_type: config.model_type, + feature_frame_version: config.feature_frame_version ?? 1, + feature_groups: config.feature_groups ?? [], + })), + ) + } const submit = useSubmitBatch() const cancel = useCancelBatch() @@ -90,6 +134,22 @@ export default function BatchRunnerPage() { .map((t) => parseInt(t.trim(), 10)) .filter((n) => !Number.isNaN(n)) + // PRP-37 — translate the matrix into BatchModelConfig rows. Fall back to + // the single-naive submit when the matrix is empty (preserves the prior + // PRP-33/34 default behaviour). + const matrixConfigs: BatchModelConfig[] = matrixRows.map((row) => { + const config: BatchModelConfig = { + model_type: row.model_type as BatchModelConfig['model_type'], + } + if (row.feature_frame_version === 2) { + config.feature_frame_version = 2 + if (row.feature_groups.length > 0) { + config.feature_groups = row.feature_groups + } + } + return config + }) + const payload: BatchSubmitRequest = { operation: 'backtest', scope: { @@ -97,7 +157,10 @@ export default function BatchRunnerPage() { store_ids: parseIds(storeIds), product_ids: parseIds(productIds), }, - model_configs: [{ model_type: 'naive', params: {} }], + model_configs: + matrixConfigs.length > 0 + ? matrixConfigs + : [{ model_type: 'naive', params: {} }], start_date: startDate, end_date: endDate, max_parallel: maxParallel, @@ -163,6 +226,32 @@ export default function BatchRunnerPage() { onChange={(e) => setEndDate(e.target.value)} /> +
+ {/* PRP-37 — preset Select + matrix picker. Preset prefills the + matrix; rows can still be hand-edited afterward. */} +
+ Sweep preset + +

+ Optional. Picking a preset overwrites the matrix below. +

+
+
+ + Sweep matrix (model × feature frame) + + +
+
diff --git a/frontend/src/pages/visualize/forecast.tsx b/frontend/src/pages/visualize/forecast.tsx index 4cdbdd58..3f31b971 100644 --- a/frontend/src/pages/visualize/forecast.tsx +++ b/frontend/src/pages/visualize/forecast.tsx @@ -1,12 +1,23 @@ import { useState } from 'react' +import { format } from 'date-fns' +import { DateRange } from 'react-day-picker' import { Link } from 'react-router-dom' import { BarChart3, Download, ExternalLink, Loader2, Play } from 'lucide-react' import { useJob, useCreateJob } from '@/hooks/use-jobs' import { useJobExplanation } from '@/hooks/use-explanations' import { useJobFeatureMetadata } from '@/hooks/use-feature-metadata' +import { useStores } from '@/hooks/use-stores' +import { useProducts } from '@/hooks/use-products' import { ExplanationPanel } from '@/components/explainability/explanation-panel' import { FeatureImportancePanel } from '@/components/explainability/feature-importance-panel' import { ModelFamilyBadge } from '@/components/common/model-family-badge' +import { DateRangePicker } from '@/components/common/date-range-picker' +import { ModelFamilyTabs } from '@/components/forecast-intelligence/model-family-tabs' +import { ModelTypeSelect } from '@/components/forecast-intelligence/model-type-select' +import { MODEL_FAMILY_MAP } from '@/components/forecast-intelligence/model-type-utils' +import { FeatureFrameSelect } from '@/components/forecast-intelligence/feature-frame-select' +import { FeatureGroupsToggle } from '@/components/forecast-intelligence/feature-groups-toggle' +import { defaultV2Groups } from '@/lib/feature-frame-utils' import { Collapsible, CollapsibleContent, @@ -26,9 +37,15 @@ import { SelectTrigger, SelectValue, } from '@/components/ui/select' +import { FEATURE_GROUP_VALUES } from '@/types/api' import { downloadCsv, toCsv, type CsvColumn } from '@/lib/csv-export' import { getErrorMessage } from '@/lib/api' -import type { ForecastPoint } from '@/types/api' +import type { + FeatureFrameVersion, + FeatureGroup, + ForecastPoint, + ModelFamily, +} from '@/types/api' /** Horizon presets (days) for an in-page predict run. */ const HORIZON_OPTIONS = [7, 14, 30, 60, 90] @@ -47,6 +64,23 @@ export default function ForecastPage() { const [showInterval, setShowInterval] = useState(false) const [runError, setRunError] = useState(null) + // PRP-37 Slice C — train-from-page control row state. + const [trainFamily, setTrainFamily] = useState('baseline') + const [trainModelType, setTrainModelType] = useState('seasonal_naive') + const [trainStoreId, setTrainStoreId] = useState('') + const [trainProductId, setTrainProductId] = useState('') + const [trainDateRange, setTrainDateRange] = useState() + const [trainVersion, setTrainVersion] = useState(1) + const [trainGroups, setTrainGroups] = useState([]) + const [trainError, setTrainError] = useState(null) + + const storesQuery = useStores({ page: 1, pageSize: 100 }) + const productsQuery = useProducts({ page: 1, pageSize: 100 }) + + // V2 is meaningful only for feature-aware families. Baselines do not consume + // features, so the V2 option is locked off there. + const isV2Available = trainFamily !== 'baseline' + const { data: job, isLoading, error } = useJob(searchJobId, !!searchJobId) const { data: trainJob } = useJob(trainJobId, !!trainJobId) const createJob = useCreateJob() @@ -99,6 +133,65 @@ export default function ForecastPage() { } } + /** PRP-37 — narrow trainModelType to the picked family. */ + function handleFamilyChange(next: ModelFamily) { + setTrainFamily(next) + const valid = MODEL_FAMILY_MAP[next] + if (!valid.includes(trainModelType)) { + setTrainModelType(valid[0] ?? '') + } + if (next === 'baseline') { + // Baseline cannot consume features — drop V2 + groups when switching back. + setTrainVersion(1) + setTrainGroups([]) + } + } + + function handleVersionChange(next: FeatureFrameVersion) { + setTrainVersion(next) + if (next === 1) { + setTrainGroups([]) + } else if (trainGroups.length === 0) { + setTrainGroups(defaultV2Groups()) + } + } + + const trainFormReady = + !!trainStoreId && + !!trainProductId && + !!trainDateRange?.from && + !!trainDateRange?.to && + !!trainModelType + + async function handleSubmitTrain() { + if (!trainFormReady || !trainDateRange?.from || !trainDateRange?.to) return + setTrainError(null) + const params: Record = { + model_type: trainModelType, + store_id: Number(trainStoreId), + product_id: Number(trainProductId), + start_date: format(trainDateRange.from, 'yyyy-MM-dd'), + end_date: format(trainDateRange.to, 'yyyy-MM-dd'), + } + // Backend treats V1 + omit-feature_groups as the default — only forward the + // new fields when the operator explicitly opted into V2. + if (trainVersion === 2) { + params.feature_frame_version = 2 + if (trainGroups.length > 0) { + params.feature_groups = trainGroups + } + } + try { + const newJob = await createJob.mutateAsync({ + job_type: 'train', + params, + }) + setTrainJobId(newJob.job_id) + } catch (caught) { + setTrainError(getErrorMessage(caught)) + } + } + function handleExport() { if (forecastData.length === 0 || !job) return downloadCsv(`forecast-${job.job_id}.csv`, toCsv(forecastData, csvColumns)) @@ -108,6 +201,116 @@ export default function ForecastPage() {

Forecast Visualization

+ {/* PRP-37 Slice C — segmented control row to train a new model. */} + + + Train a new model + + Pick a family, a model, a store/product/date window. V2 unlocks + feature-aware models (tree + additive); V1 is target-only. + + + +
+
+ Family + +
+
+ Model + +
+
+ Feature frame + +
+
+ {trainVersion === 2 && isV2Available && ( + + )} +
+
+ Store + +
+
+ Product + +
+
+ Date window + +
+
+
+ + {!trainFormReady && ( + + Pick a model, store, product and date window to enable. + + )} +
+ {trainError &&

{trainError}

} +
+
+ {/* Run a new forecast in-page */} diff --git a/frontend/src/pages/visualize/planner.tsx b/frontend/src/pages/visualize/planner.tsx index 077ce6e2..d532d9eb 100644 --- a/frontend/src/pages/visualize/planner.tsx +++ b/frontend/src/pages/visualize/planner.tsx @@ -515,12 +515,30 @@ export default function WhatIfPlannerPage() { <> - Scenario impact - - {comparison.model_type} model · store {comparison.store_id} · product{' '} - {comparison.product_id} · {comparison.horizon}-day horizon ·{' '} - {methodLabel(comparison.method)} estimate - +
+
+ Scenario impact + + {comparison.model_type} model · store {comparison.store_id} · + product {comparison.product_id} · {comparison.horizon}-day + horizon · {methodLabel(comparison.method)} estimate + +
+ {/* PRP-37 — surface the scenario method as a chip. The + `model_exogenous` method genuinely re-forecasts the + regression baseline through a leakage-safe future X; + `heuristic` applies a deterministic post-forecast factor. */} + + {comparison.method === 'model_exogenous' + ? 'model-driven re-forecast' + : 'heuristic adjustment'} + +
diff --git a/frontend/src/types/api.ts b/frontend/src/types/api.ts index 84a0f684..a7c36b9f 100644 --- a/frontend/src/types/api.ts +++ b/frontend/src/types/api.ts @@ -176,6 +176,43 @@ export type RunStatus = 'pending' | 'running' | 'success' | 'failed' | 'archived // pages can render a consistent Family badge. export type ModelFamily = 'baseline' | 'tree' | 'additive' +// PRP-37 Slice C — Forecast Intelligence A (PRP-35). +// Mirrors `app/shared/feature_frames/contract_v2.py:FeatureGroup`. Lowercase +// wire form is canonical; the StrEnum on the backend matches these values. +export type FeatureFrameVersion = 1 | 2 + +export type FeatureGroup = + | 'target_history' + | 'rolling' + | 'trend' + | 'calendar' + | 'price_promo' + | 'inventory' + | 'lifecycle' + | 'replenishment' + | 'returns' + | 'exogenous_weather' + | 'exogenous_macro' + +export const FEATURE_GROUP_VALUES = [ + 'target_history', + 'rolling', + 'trend', + 'calendar', + 'price_promo', + 'inventory', + 'lifecycle', + 'replenishment', + 'returns', + 'exogenous_weather', + 'exogenous_macro', +] as const satisfies readonly FeatureGroup[] + +export type FeatureSafetyClass = + | 'safe' + | 'conditionally_safe' + | 'unsafe_unless_supplied' + export interface ModelRun { run_id: string status: RunStatus @@ -200,6 +237,10 @@ export interface ModelRun { completed_at: string | null created_at: string updated_at: string + /** PRP-36 computed_field — `null` for legacy / V1 runs. */ + feature_frame_version?: FeatureFrameVersion | null + /** PRP-36 computed_field — per-group feature lists, `null` for V1 runs. */ + feature_groups?: Partial> | null } // MLZOO-D / PRP-31: response shape for the two feature-metadata endpoints @@ -220,6 +261,12 @@ export interface FeatureMetadataResponse { feature_columns: string[] features: FeatureImportanceItem[] importance_type: string | null + /** PRP-35 — present on every response; defaults to 1 server-side. */ + feature_frame_version?: FeatureFrameVersion + /** PRP-35 — per-group feature lists; V1 returns null. */ + feature_groups?: Partial> | null + /** PRP-35 — column → safety class map; V1 returns null. */ + feature_safety_classes?: Record | null } export interface RunListResponse extends PaginatedResponse { @@ -254,6 +301,35 @@ export interface ArtifactVerifyResponse { error?: string } +// === Backtesting (PRP-36) === +// +// `aggregated_metrics` is a flat `dict[str, float]` on the wire (NOT a Pydantic +// class) — RMSE rides inside it under the key `"rmse"`. Per-horizon-bucket +// metrics use the same dict-of-dict shape. + +export interface FoldResult { + fold: number + /** PRP-36 — per-bucket metrics; empty when no horizon-bucket split fired. */ + horizon_bucket_metrics?: Record> + [key: string]: unknown +} + +export interface ModelBacktestResult { + model_type: string + aggregated_metrics: Record + fold_results: FoldResult[] + /** PRP-36 — per-bucket aggregates across folds; null when no fold emitted a bucket dict. */ + bucketed_aggregated_metrics?: Record> | null + [key: string]: unknown +} + +export interface BacktestResponse { + main_model_results: ModelBacktestResult + baseline_results?: ModelBacktestResult[] + comparison_summary?: Record | null + [key: string]: unknown +} + // === Jobs === export type JobType = 'train' | 'predict' | 'backtest' export type JobStatus = 'pending' | 'running' | 'completed' | 'failed' | 'cancelled' @@ -332,16 +408,43 @@ export interface BatchScope { top_n?: number | null } +// PRP-37 Slice C — Forecast-train control inputs. `feature_frame_version` + +// `feature_groups` MUST be omitted (undefined) when V1 is selected — the +// backend rejects `feature_groups` on a V1 train. +export interface TrainRequest { + store_id: number + product_id: number + start_date: string + end_date: string + model_type: string + config?: Record + /** PRP-35 — defaults to 1 server-side; omit to keep that default. */ + feature_frame_version?: FeatureFrameVersion + /** PRP-35 — V2 only; rejected by the backend when version=1. */ + feature_groups?: FeatureGroup[] +} + export interface BatchModelConfig { + // PRP-36 expanded the model zoo (weighted_moving_average, + // seasonal_average, trend_regression_baseline, random_forest). Kept as a + // literal union so a typo at call-site is caught at compile time. model_type: | 'naive' | 'seasonal_naive' | 'moving_average' + | 'weighted_moving_average' + | 'seasonal_average' + | 'trend_regression_baseline' | 'regression' | 'lightgbm' | 'xgboost' + | 'random_forest' | 'prophet_like' params?: Record + /** PRP-37 — propagated into the per-item train; V1 default when omitted. */ + feature_frame_version?: FeatureFrameVersion + /** PRP-37 — only valid when feature_frame_version=2. */ + feature_groups?: FeatureGroup[] } export interface BatchSubmitRequest { @@ -757,6 +860,16 @@ export interface RunHealth { failed_total: number } +// PRP-36 — enum literal values mirror app/features/ops/schemas.py:StaleReason. +// 'feature_frame_version_mismatch' is the new value PRP-36 adds; surfaces as a +// distinct chip on the Ops page so operators can see a V-drift apart from a +// generic newer-run finding. +export type StaleReason = + | 'newer_success_run' + | 'artifact_not_verified' + | 'run_not_success' + | 'feature_frame_version_mismatch' + // Deployment-alias health with a staleness verdict. export interface AliasHealth { alias_name: string @@ -766,8 +879,12 @@ export interface AliasHealth { store_id: number product_id: number is_stale: boolean - stale_reason: string | null + stale_reason: StaleReason | string | null wape: number | null + /** PRP-36 — version of the alias's current run. */ + alias_feature_frame_version?: FeatureFrameVersion | null + /** PRP-36 — version of the newest comparable run, when one exists. */ + comparable_run_feature_frame_version?: FeatureFrameVersion | null } // How current the underlying data and model state are. @@ -840,6 +957,10 @@ export interface ModelHealthEntry { last_trained_at: string | null staleness_days: number wape_history: WapePoint[] + /** PRP-36 — version of the alias's current run. */ + alias_feature_frame_version?: FeatureFrameVersion | null + /** PRP-36 — version of the newest comparable run. */ + comparable_run_feature_frame_version?: FeatureFrameVersion | null } // Per-grain forecast-error health — GET /ops/model-health.