From 14041cad2efb66a350afce6c3461a711416812dc Mon Sep 17 00:00:00 2001 From: Gabor Szabo Date: Tue, 26 May 2026 15:51:19 +0200 Subject: [PATCH] =?UTF-8?q?feat(api,ui):=20showcase=20pipeline=20=E2=80=94?= =?UTF-8?q?=20decision=20+=20portfolio=20lifecycle=20(#316)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PRP-39 — extend the showcase_rich demo pipeline with three new decision- phase steps (champion_compat_compare, stale_alias_trigger, safer_promote_flow) and a new portfolio phase (batch_preset). The decision lifecycle now demonstrates V1-vs-V2 champion-compat verdicts, the stale-alias V-mismatch chip on /ops, and the safer-Promote dialog gates. The portfolio phase runs the quick_baseline_sweep preset (3 stores x 2 products x 3 baselines = 18 items) via /batch/forecasting. Backend: - app/features/demo/pipeline.py — 4 new step functions, PHASE_PORTFOLIO constant, BATCH_PRESET_QUICK_BASELINE_SWEEP_MODELS module constant, DemoContext additive fields (compat_compare_result, stale_alias_run_id, original_demo_alias_run_id, batch_id, batch_status), step_cleanup extension that restores the demo-production alias to its pre-swap target (R15). - app/features/demo/tests/test_pipeline.py — 8 new unit tests (4 step functions, 2 skip paths, 2 cleanup scenarios) + extended canned responses for /ops/summary, /batch/forecasting, /registry/runs?..., /registry/aliases/{name}, /registry/compare/{a}/{b}; lockstep test_phase_table_showcase_rich expanded to 18 rows. - tests/test_e2e_demo.py — new test_run_demo_showcase_rich_decision_ portfolio integration test asserting the four new step events fire and R15 alias restoration completes. Frontend: - PHASE_DEFS.ts — appends 3 decision-phase rows + portfolio phase row; PHASE_ORDER + PHASE_LABEL extend with 'portfolio'. - showcase.tsx — resolveInspectHref gains 4 new case arms targeting /explorer/runs/compare, /ops, and /visualize/batch/{batch_id}. - demo-step-card.tsx — 4 new mini-summary chip-line components. - demo-step-card.test.tsx (new) — 6 render tests covering chip-lines and Inspect button behaviour. - PHASE_DEFS.test.ts + use-demo-pipeline.test.ts — extended to assert the new 18-step showcase_rich layout. Docs: - docs/_base/RUNBOOKS.md — 8 new failure-mode entries under the /showcase pipeline section covering the 4 new steps (skip / fail diagnostics, R15 cleanup recovery). Drift resolutions (per PRPs/ai_docs/prp-39-contract-probe-report.md): - D1 (compare envelope): champion_compat_compare derives compatible + comparable_reason client-side; mirrors the frontend computeCompatibility predicate. - D2 (quick_baseline_sweep): preset expansion stays in the demo slice (Option A); no preset_id on BatchSubmitRequest. - D3 (sync settle): /batch/forecasting normally returns terminal status on submit; the 90 s poll loop is a safety net. WebSocket schema additive only — no StepEvent / DemoRunRequest field changes. Relative-anchor phase insertion (PHASE_PORTFOLIO between PHASE_DECISION and PHASE_VERIFY) keeps the slice merge-order independent of PRP-40. --- app/features/demo/pipeline.py | 539 +++++++++++++++++- app/features/demo/tests/test_pipeline.py | 275 ++++++++- docs/_base/RUNBOOKS.md | 10 +- .../src/components/demo/PHASE_DEFS.test.ts | 20 +- frontend/src/components/demo/PHASE_DEFS.ts | 17 +- .../components/demo/demo-step-card.test.tsx | 126 ++++ .../src/components/demo/demo-step-card.tsx | 84 +++ frontend/src/hooks/use-demo-pipeline.test.ts | 12 +- frontend/src/pages/showcase.tsx | 26 +- tests/test_e2e_demo.py | 115 ++++ 10 files changed, 1196 insertions(+), 28 deletions(-) create mode 100644 frontend/src/components/demo/demo-step-card.test.tsx diff --git a/app/features/demo/pipeline.py b/app/features/demo/pipeline.py index d5b60df9..09ced393 100644 --- a/app/features/demo/pipeline.py +++ b/app/features/demo/pipeline.py @@ -72,6 +72,24 @@ # coefficients), NOT regression (HGBR has no feature_importances_). SHOWCASE_V2_MODEL_TYPE = "prophet_like" +# PRP-39 — quick_baseline_sweep portfolio preset. +# SOURCE: frontend/src/components/forecast-intelligence/batch-preset-utils.ts:22-28 +# First 3 of the 5 quick_baseline_sweep baselines — gives 3 stores x 2 products +# x 3 models = 18 items, matching INITIAL-39 § Scope. Keep this list in sync +# with the frontend preset definition; the demo slice cannot import frontend +# code (vertical-slice rule), so a comment is the only drift signal. +BATCH_PRESET_QUICK_BASELINE_SWEEP_MODELS: tuple[str, ...] = ( + "naive", + "seasonal_naive", + "moving_average", +) + +# PRP-39 — per probe report § D3, /batch/forecasting settles synchronously in +# most cases. The poll loop is a safety net guarding against a future +# async-runner mode. +_BATCH_POLL_INTERVAL_SECONDS = 2.0 +_BATCH_POLL_TIMEOUT_SECONDS = 90.0 + # Per-step HTTP timeout. /seeder/generate on demo_minimal is slow; 120 s leaves # margin. connect=5 s because the ASGI transport connects instantly. _HTTP_TIMEOUT = httpx.Timeout(120.0, connect=5.0) @@ -193,6 +211,13 @@ class DemoContext: v2_run_id: str | None = None v2_model_path: str | None = None bucketed_aggregated_metrics: dict[str, dict[str, float]] | None = None + # PRP-39 — additive Optional fields populated only on SHOWCASE_RICH runs + # AND only by their respective step functions. + compat_compare_result: dict[str, Any] | None = None + stale_alias_run_id: str | None = None + original_demo_alias_run_id: str | None = None + batch_id: str | None = None + batch_status: str | None = None # ============================================================================= @@ -1085,16 +1110,498 @@ async def step_agent(ctx: DemoContext, client: _Client) -> StepResult: ) +async def step_champion_compat_compare(ctx: DemoContext, client: _Client) -> StepResult: + """PRP-39 — Compare V1 baseline vs V2 prophet_like (champion-compat). + + Derives ``compatible`` + ``comparable_reason`` client-side per probe + report § D1 (the compare endpoint envelope has only ``run_a``, + ``run_b``, ``config_diff``, ``metrics_diff`` — no top-level + compatibility flags). Mirrors the predicate at + ``frontend/src/components/forecast-intelligence/champion-compatibility-utils.ts:14-47`` + so the same reason key works for both the compare card and the ops + chip. + """ + if ctx.v2_run_id is None or ctx.winning_run_id is None: + # R14 — no V2 run on the showcase grain (user ran scenario=demo_minimal). + return ( + "skip", + "no V2 run on the showcase grain — run with scenario=showcase_rich", + {}, + ) + + # Discover a V1 baseline run on the same grain. Use the registry's + # status filter to narrow to SUCCESS runs, then pick the first one + # whose feature_frame_version is None-or-1 and that isn't the V2 run. + runs_body = await client.request( + "champion_compat_compare[runs]", + "GET", + ( + f"/registry/runs?store_id={ctx.store_id}&product_id={ctx.product_id}" + "&status=success&page_size=20" + ), + ) + runs_raw = runs_body.get("runs", []) + runs = runs_raw if isinstance(runs_raw, list) else [] + v1_run_id: str | None = None + for run in runs: + if not isinstance(run, dict): + continue + ffv = run.get("feature_frame_version") + run_id_raw = run.get("run_id") + if ( + (ffv is None or ffv == 1) + and isinstance(run_id_raw, str) + and run_id_raw != ctx.v2_run_id + ): + v1_run_id = run_id_raw + break + if v1_run_id is None: + return ("skip", "no V1 baseline run on the showcase grain", {}) + + # GET the compare envelope. Per D1, derive compatible + reason client-side. + compare_body = await client.request( + "champion_compat_compare[compare]", + "GET", + f"/registry/compare/{v1_run_id}/{ctx.v2_run_id}", + ) + run_a_raw = compare_body.get("run_a", {}) + run_b_raw = compare_body.get("run_b", {}) + run_a = run_a_raw if isinstance(run_a_raw, dict) else {} + run_b = run_b_raw if isinstance(run_b_raw, dict) else {} + v_a = run_a.get("feature_frame_version") # None for legacy V1 + v_b = run_b.get("feature_frame_version") # 2 for PRP-38's V2 run + # Coerce legacy V1 (None) to V=1 for the compat predicate, matching the + # frontend computeCompatibility logic AND OpsService._run_feature_frame_version. + v_a_norm = 1 if v_a is None else v_a + v_b_norm = 1 if v_b is None else v_b + compatible = v_a_norm == v_b_norm # grain + window equal by construction + reason: str | None = None if compatible else "feature_frame_version_mismatch" + + ctx.compat_compare_result = { + "v1_run_id": v1_run_id, + "v2_run_id": ctx.v2_run_id, + "compatible": compatible, + "comparable_reason": reason, + } + + return ( + "pass", + f"V_a={v_a_norm} V_b={v_b_norm} compatible={compatible}", + { + "v1_run_id": v1_run_id, + "v2_run_id": ctx.v2_run_id, + "feature_frame_version_a": v_a, + "feature_frame_version_b": v_b, + "compatible": compatible, + "comparable_reason": reason, + }, + ) + + +async def step_stale_alias_trigger(ctx: DemoContext, client: _Client) -> StepResult: + """PRP-39 — trigger feature_frame_version_mismatch stale-alias verdict. + + Registers a SECOND prophet_like run on the SAME grain as PRP-38's V2 run, + with ``runtime_info_extras.feature_frame_version`` set to a value + DIFFERENT from PRP-38's V2 (which is V=2). The integer JSONB key is + opaque to the ops service, so V=3 is a valid "synthetic" value that + fires the V-mismatch branch (see probe report § (b)). + """ + if ctx.v2_run_id is None or ctx.date_start is None or ctx.date_end is None: + return ( + "skip", + "no V2 run / date range — run with scenario=showcase_rich", + {}, + ) + + # Register the V=3 run. Mirror step_v2_train's create+running+success chain. + create_body = await client.request( + "stale_alias_trigger[create]", + "POST", + "/registry/runs", + json_body={ + "model_type": "prophet_like", + "model_config": _model_config_payload("prophet_like"), + "feature_config": None, + "data_window_start": ctx.date_start.isoformat(), + "data_window_end": ctx.date_end.isoformat(), + "store_id": ctx.store_id, + "product_id": ctx.product_id, + # The whole point of this step — controlled V different from V=2. + "runtime_info_extras": {"feature_frame_version": 3}, + }, + ) + second_run_id_raw = create_body.get("run_id") + if not isinstance(second_run_id_raw, str): + return ("fail", "POST /registry/runs returned no run_id", {}) + ctx.stale_alias_run_id = second_run_id_raw + + # PATCH pending → running → success. + await client.request( + "stale_alias_trigger[running]", + "PATCH", + f"/registry/runs/{second_run_id_raw}", + json_body={"status": "running"}, + ) + await client.request( + "stale_alias_trigger[success]", + "PATCH", + f"/registry/runs/{second_run_id_raw}", + json_body={ + "status": "success", + "metrics": {"wape": 999.0}, + "artifact_uri": "demo/stale-alias-placeholder.joblib", + "artifact_hash": "0" * 64, + "artifact_size_bytes": 1, + }, + ) + + # Hit /ops/summary to confirm the stale-alias verdict surfaces. + ops_body = await client.request("stale_alias_trigger[ops]", "GET", "/ops/summary") + aliases_raw = ops_body.get("aliases", []) + aliases = aliases_raw if isinstance(aliases_raw, list) else [] + target: dict[str, Any] | None = None + for alias in aliases: + if isinstance(alias, dict) and alias.get("alias_name") == DEMO_ALIAS: + target = alias + break + if target is None: + return ("fail", f"alias {DEMO_ALIAS} missing from /ops/summary", {}) + + stale_reason = target.get("stale_reason") + if stale_reason != "feature_frame_version_mismatch": + return ( + "fail", + (f"expected stale_reason=feature_frame_version_mismatch, got {stale_reason}"), + {}, + ) + + alias_v = target.get("alias_feature_frame_version") + comparable_v = target.get("comparable_run_feature_frame_version") + return ( + "pass", + ( + f"alias={DEMO_ALIAS} stale_reason={stale_reason} " + f"V_alias={alias_v}→V_comparable={comparable_v}" + ), + { + "alias_name": DEMO_ALIAS, + "stale_reason": stale_reason, + "alias_feature_frame_version": alias_v, + "comparable_run_feature_frame_version": comparable_v, + "second_v2_run_id": second_run_id_raw, + }, + ) + + +async def step_safer_promote_flow(ctx: DemoContext, client: _Client) -> StepResult: + """PRP-39 — swap ``demo-production`` to a worse-WAPE run. + + Mirrors step_register's create+running+success+alias chain at + ``pipeline.py``. Deliberately registers a worse-WAPE run so the + safer-Promote dialog gates fire when a human visits /ops. The + original alias target is captured BEFORE the swap so step_cleanup can + restore it (R15). + """ + if ctx.winning_run_id is None or ctx.date_start is None or ctx.date_end is None: + return ( + "skip", + "no winning run / date range — run with scenario=showcase_rich", + {}, + ) + + # Capture the current alias target BEFORE the swap (R15). + alias_body = await client.request( + "safer_promote[alias_pre]", + "GET", + f"/registry/aliases/{DEMO_ALIAS}", + ) + pre_run_id_raw = alias_body.get("run_id") + if not isinstance(pre_run_id_raw, str): + return ("fail", f"GET /registry/aliases/{DEMO_ALIAS} returned no run_id", {}) + ctx.original_demo_alias_run_id = pre_run_id_raw + + # Register a fresh baseline run with a tweaked config so config_hash differs + # from the prior register step's run. Use seasonal_naive season_length=14 + # (the default register uses 7). + create_body = await client.request( + "safer_promote[create]", + "POST", + "/registry/runs", + json_body={ + "model_type": "seasonal_naive", + "model_config": { + "model_type": "seasonal_naive", + "season_length": 14, + }, + "feature_config": None, + "data_window_start": ctx.date_start.isoformat(), + "data_window_end": ctx.date_end.isoformat(), + "store_id": ctx.store_id, + "product_id": ctx.product_id, + # V=1 deliberately to additionally fire the V-mismatch-ack gate + # in the dialog (V2 winner → V1 challenger). + "runtime_info_extras": {"feature_frame_version": 1}, + }, + ) + worse_run_id_raw = create_body.get("run_id") + if not isinstance(worse_run_id_raw, str): + return ("fail", "POST /registry/runs returned no run_id", {}) + + # pending → running → success + await client.request( + "safer_promote[running]", + "PATCH", + f"/registry/runs/{worse_run_id_raw}", + json_body={"status": "running"}, + ) + await client.request( + "safer_promote[success]", + "PATCH", + f"/registry/runs/{worse_run_id_raw}", + json_body={ + "status": "success", + "metrics": {"wape": 99.0}, + "artifact_uri": "demo/safer-promote-placeholder.joblib", + "artifact_hash": "0" * 64, + "artifact_size_bytes": 1, + }, + ) + + # Swap the alias. + await client.request( + "safer_promote[alias_swap]", + "POST", + "/registry/aliases", + json_body={ + "alias_name": DEMO_ALIAS, + "run_id": worse_run_id_raw, + "description": ("PRP-39 safer-Promote walkthrough — deliberate worse-WAPE swap."), + }, + ) + + return ( + "pass", + (f"alias={DEMO_ALIAS} before={pre_run_id_raw[:8]}→after={worse_run_id_raw[:8]}"), + { + "alias_name": DEMO_ALIAS, + "before_run_id": pre_run_id_raw, + "after_run_id": worse_run_id_raw, + "swap_intent": "demo_safer_promote_walkthrough", + }, + ) + + +async def step_batch_preset(ctx: DemoContext, client: _Client) -> StepResult: + """PRP-39 — run the quick_baseline_sweep portfolio preset (Option A). + + Per probe report § D2, the preset is frontend-only — the backend + ``BatchSubmitRequest`` does not accept ``preset_id``. The demo slice + expands the preset client-side using + ``BATCH_PRESET_QUICK_BASELINE_SWEEP_MODELS``. + """ + if ctx.date_start is None or ctx.date_end is None: + return ("skip", "no date range — run with scenario=showcase_rich", {}) + + # Discover 3 stores + 2 products via the dimensions endpoints (mirrors + # step_status pattern). Never hardcode ids — seeder doesn't reset IDs. + stores_body = await client.request( + "batch_preset[stores]", + "GET", + "/dimensions/stores?page=1&page_size=5", + ) + products_body = await client.request( + "batch_preset[products]", + "GET", + "/dimensions/products?page=1&page_size=5", + ) + stores_raw = stores_body.get("stores", []) + products_raw = products_body.get("products", []) + stores = stores_raw if isinstance(stores_raw, list) else [] + products = products_raw if isinstance(products_raw, list) else [] + store_ids: list[int] = [] + for s in stores: + if isinstance(s, dict): + sid = s.get("id") + if isinstance(sid, int): + store_ids.append(sid) + if len(store_ids) >= 3: + break + product_ids: list[int] = [] + for p in products: + if isinstance(p, dict): + pid = p.get("id") + if isinstance(pid, int): + product_ids.append(pid) + if len(product_ids) >= 2: + break + if len(store_ids) < 3 or len(product_ids) < 2: + return ("skip", "insufficient stores/products in the seeded grain", {}) + + # POST /batch/forecasting — Option A expansion. + submit_body = await client.request( + "batch_preset[submit]", + "POST", + "/batch/forecasting", + json_body={ + "operation": "train", + "scope": { + "kind": "manual", + "store_ids": store_ids, + "product_ids": product_ids, + }, + "model_configs": [{"model_type": m} for m in BATCH_PRESET_QUICK_BASELINE_SWEEP_MODELS], + "start_date": ctx.date_start.isoformat(), + "end_date": ctx.date_end.isoformat(), + }, + ) + batch_id_raw = submit_body.get("batch_id") + if not isinstance(batch_id_raw, str): + return ("fail", "POST /batch/forecasting returned no batch_id", {}) + ctx.batch_id = batch_id_raw + + terminal_statuses = {"completed", "failed", "partial", "cancelled"} + status_raw = submit_body.get("status") + status: str = status_raw if isinstance(status_raw, str) else "unknown" + body: dict[str, Any] = submit_body + if status not in terminal_statuses: + t0 = time.monotonic() + timed_out = True + while time.monotonic() - t0 < _BATCH_POLL_TIMEOUT_SECONDS: + await asyncio.sleep(_BATCH_POLL_INTERVAL_SECONDS) + body = await client.request( + "batch_preset[poll]", + "GET", + f"/batch/{batch_id_raw}", + ) + status_raw = body.get("status") + status = status_raw if isinstance(status_raw, str) else "unknown" + if status in terminal_statuses: + timed_out = False + break + if timed_out: + ctx.batch_status = status + return ( + "warn", + ( + f"batch poll timed out at {_BATCH_POLL_TIMEOUT_SECONDS:.0f}s; " + f"visit /visualize/batch/{batch_id_raw} to follow up" + ), + { + "batch_id": batch_id_raw, + "kind": "manual", + "preset_source": "quick_baseline_sweep", + "model_types": list(BATCH_PRESET_QUICK_BASELINE_SWEEP_MODELS), + "status": status, + "total_items": body.get("total_items"), + "completed_items": body.get("completed_items"), + "failed_items": body.get("failed_items"), + }, + ) + + ctx.batch_status = status + step_status: StepStatus + if status == "completed": + step_status = "pass" + elif status == "partial": + step_status = "warn" + else: # failed or cancelled + step_status = "fail" + + completed = body.get("completed_items") + total = body.get("total_items") + return ( + step_status, + (f"preset=quick_baseline_sweep {completed}/{total} done status={status}"), + { + "batch_id": batch_id_raw, + "kind": "manual", + "preset_source": "quick_baseline_sweep", + "model_types": list(BATCH_PRESET_QUICK_BASELINE_SWEEP_MODELS), + "status": status, + "total_items": total, + "completed_items": completed, + "failed_items": body.get("failed_items"), + }, + ) + + async def step_cleanup(ctx: DemoContext, client: _Client) -> StepResult: - """Close the agent session (no-op if no session was opened).""" - if ctx.session_id is None: - return ("skip", "no agent session to close", {}) - try: - await client.request("cleanup", "DELETE", f"/agents/sessions/{ctx.session_id}") - except _StepError as exc: - # Cleanup failure is non-fatal -- warn so the run still goes green. - return ("warn", f"DELETE failed but ignored: {exc}", {}) - return ("pass", "agent session closed", {}) + """Close the agent session + restore the demo-production alias (PRP-39 R15). + + PRP-39 extends the original PRP-15 cleanup to ALSO restore the + ``demo-production`` alias when ``safer_promote_flow`` swapped it to a + worse-WAPE run. Failure to restore is a ``warn``, never a fail. + """ + alias_restored = False + restored_run_id: str | None = None + + # PRP-39 — R15 restore. Failure is `warn`, not `fail`. + if ctx.original_demo_alias_run_id is not None: + try: + await client.request( + "cleanup[restore_alias]", + "POST", + "/registry/aliases", + json_body={ + "alias_name": DEMO_ALIAS, + "run_id": ctx.original_demo_alias_run_id, + "description": "Restored by demo cleanup (PRP-39).", + }, + ) + alias_restored = True + restored_run_id = ctx.original_demo_alias_run_id + except _StepError as exc: + logger.warning( + "demo.cleanup.alias_restore_failed", + run_id=ctx.original_demo_alias_run_id, + status_code=exc.status_code, + ) + + # PRESERVED — existing agent-session-close. + agent_closed = False + if ctx.session_id is not None: + try: + await client.request("cleanup", "DELETE", f"/agents/sessions/{ctx.session_id}") + agent_closed = True + except _StepError as exc: + return ( + "warn", + f"DELETE agent failed but ignored: {exc}", + { + "agent_session_closed": False, + "alias_restored": alias_restored, + "restored_run_id": restored_run_id, + }, + ) + + detail_parts: list[str] = [] + if agent_closed: + detail_parts.append("agent closed") + if alias_restored and restored_run_id is not None: + detail_parts.append(f"alias restored to {restored_run_id[:8]}...") + + # Preserve PRP-15 skip-semantics: when neither an agent session was + # closed NOR an alias was restored, the step is a no-op. + if not detail_parts: + return ( + "skip", + "no agent session to close", + { + "agent_session_closed": False, + "alias_restored": False, + "restored_run_id": None, + }, + ) + return ( + "pass", + " · ".join(detail_parts), + { + "agent_session_closed": agent_closed, + "alias_restored": alias_restored, + "restored_run_id": restored_run_id, + }, + ) # ============================================================================= @@ -1110,6 +1617,8 @@ async def step_cleanup(ctx: DemoContext, client: _Client) -> StepResult: PHASE_DATA = "data" PHASE_MODELING = "modeling" PHASE_DECISION = "decision" +# PRP-39 — new portfolio phase, inserted between decision and verify. +PHASE_PORTFOLIO = "portfolio" PHASE_VERIFY = "verify" PHASE_AGENT = "agent" PHASE_CLEANUP = "cleanup" @@ -1139,6 +1648,8 @@ def _phase_table(scenario: ScenarioPreset) -> list[PhaseStep]: ("backtest", step_backtest), ("register", step_register), ] + # PRP-39 — new portfolio phase, empty under demo_minimal/sparse. + portfolio_steps: list[tuple[str, StepFn]] = [] verify_steps: list[tuple[str, StepFn]] = [("verify", step_verify)] agent_steps: list[tuple[str, StepFn]] = [("agent", step_agent)] cleanup_steps: list[tuple[str, StepFn]] = [("cleanup", step_cleanup)] @@ -1148,10 +1659,20 @@ def _phase_table(scenario: ScenarioPreset) -> list[PhaseStep]: ("historical_backfill", step_historical_backfill), ] modeling_steps += [("v2_train", step_v2_train)] + # PRP-39 — extend decision phase (AFTER register) with 3 new steps. + decision_steps += [ + ("champion_compat_compare", step_champion_compat_compare), + ("stale_alias_trigger", step_stale_alias_trigger), + ("safer_promote_flow", step_safer_promote_flow), + ] + # PRP-39 — new portfolio phase has its one step under showcase_rich. + portfolio_steps = [("batch_preset", step_batch_preset)] rows: list[PhaseStep] = [] rows += [(PHASE_DATA, name, fn) for name, fn in data_steps] rows += [(PHASE_MODELING, name, fn) for name, fn in modeling_steps] rows += [(PHASE_DECISION, name, fn) for name, fn in decision_steps] + # PRP-39 — INSERT portfolio BEFORE verify (relative anchor). + rows += [(PHASE_PORTFOLIO, name, fn) for name, fn in portfolio_steps] rows += [(PHASE_VERIFY, name, fn) for name, fn in verify_steps] rows += [(PHASE_AGENT, name, fn) for name, fn in agent_steps] rows += [(PHASE_CLEANUP, name, fn) for name, fn in cleanup_steps] diff --git a/app/features/demo/tests/test_pipeline.py b/app/features/demo/tests/test_pipeline.py index a82ccc9c..782e2157 100644 --- a/app/features/demo/tests/test_pipeline.py +++ b/app/features/demo/tests/test_pipeline.py @@ -46,8 +46,15 @@ def _canned_response( "sales": 500, } if path.startswith("/dimensions/stores"): + # page_size=5 is the PRP-39 batch_preset discovery call; return 3 stores + # so the step doesn't skip. Other callers ask for page_size=1; either + # way the first item is the showcase grain (id=7). + if "page_size=5" in path: + return {"stores": [{"id": 7}, {"id": 8}, {"id": 9}]} return {"stores": [{"id": 7}]} if path.startswith("/dimensions/products"): + if "page_size=5" in path: + return {"products": [{"id": 3}, {"id": 4}]} return {"products": [{"id": 3}]} if path == "/featuresets/compute": return {"row_count": 80, "feature_columns": ["lag_1", "roll_7", "dow"]} @@ -113,10 +120,70 @@ def _canned_response( "feature_groups": {"target_history": ["lag_1", "lag_7"], "calendar": ["dow", "month"]}, "feature_safety_classes": {"lag_1": "leak_safe"}, } + if path.startswith("/registry/runs?"): + # PRP-39 — champion_compat_compare lists SUCCESS runs on the grain. + return { + "runs": [ + {"run_id": "v1-baseline-run-id-aaaa", "feature_frame_version": None}, + {"run_id": "demo-run-abc123def456", "feature_frame_version": 2}, + ], + } + if path.startswith("/registry/compare/"): + # PRP-39 — champion_compat_compare GETs the compare envelope. + return { + "run_a": { + "run_id": "v1-baseline-run-id-aaaa", + "feature_frame_version": None, + }, + "run_b": { + "run_id": "demo-run-abc123def456", + "feature_frame_version": 2, + }, + "config_diff": {}, + "metrics_diff": {}, + } if path.startswith("/registry/runs/"): # PATCH pending->running->success return {} if path == "/registry/aliases": return {} + if path.startswith("/registry/aliases/"): + # PRP-39 — safer_promote_flow GETs the current alias target before swap. + return { + "alias_name": "demo-production", + "run_id": "demo-run-abc123def456", + "description": "current target", + } + if path == "/ops/summary": + # PRP-39 — stale_alias_trigger GETs after registering a V=3 run. + return { + "aliases": [ + { + "alias_name": "demo-production", + "stale_reason": "feature_frame_version_mismatch", + "alias_feature_frame_version": 2, + "comparable_run_feature_frame_version": 3, + } + ] + } + if path == "/batch/forecasting": + # PRP-39 — batch_preset POSTs the preset expansion. Return terminal + # COMPLETED status (per D3, settles synchronously in most cases). + return { + "batch_id": "batch-demo-abcdef0123", + "status": "completed", + "total_items": 18, + "completed_items": 18, + "failed_items": 0, + } + if path.startswith("/batch/"): + # Safety-net poll path (rare in canned fast tests). + return { + "batch_id": path.split("/")[-1], + "status": "completed", + "total_items": 18, + "completed_items": 18, + "failed_items": 0, + } raise AssertionError(f"unexpected request path: {path}") @@ -379,7 +446,13 @@ def test_phase_table_demo_minimal_matches_legacy_11_steps(): def test_phase_table_showcase_rich_adds_v2_steps(): - """PRP-38 — phase_table for SHOWCASE_RICH adds 3 steps; phase order stable.""" + """PRP-38/39 — phase_table for SHOWCASE_RICH adds 3+4 steps; phase order stable. + + PRP-38 shipped 3 (phase2_enrichment, historical_backfill, v2_train). + PRP-39 adds 4 more (champion_compat_compare, stale_alias_trigger, + safer_promote_flow, batch_preset) AND a new ``portfolio`` phase between + ``decision`` and ``verify``. Total: 18 rows across 7 phases. + """ rows = pipeline._phase_table(ScenarioPreset.SHOWCASE_RICH) by_phase_step = [(p, s) for p, s, _fn in rows] assert by_phase_step == [ @@ -394,6 +467,12 @@ def test_phase_table_showcase_rich_adds_v2_steps(): ("modeling", "v2_train"), ("decision", "backtest"), ("decision", "register"), + # PRP-39 — three decision-phase extensions after register. + ("decision", "champion_compat_compare"), + ("decision", "stale_alias_trigger"), + ("decision", "safer_promote_flow"), + # PRP-39 — new portfolio phase between decision and verify. + ("portfolio", "batch_preset"), ("verify", "verify"), ("agent", "agent"), ("cleanup", "cleanup"), @@ -502,8 +581,13 @@ async def test_run_pipeline_showcase_rich_runs_v2_and_buckets(monkeypatch, tmp_p assert final.data["v2_run_id"] == "demo-run-abc123def456" -async def test_run_pipeline_showcase_rich_emits_14_steps(monkeypatch, tmp_path): - """PRP-38 — SHOWCASE_RICH adds 3 new steps (11 -> 14 total).""" +async def test_run_pipeline_showcase_rich_emits_18_steps(monkeypatch, tmp_path): + """PRP-38/39 — SHOWCASE_RICH adds 3+4 new steps (11 -> 18 total). + + PRP-38 shipped 14 (11 + phase2_enrichment + historical_backfill + v2_train). + PRP-39 adds 4 more (champion_compat_compare + stale_alias_trigger + + safer_promote_flow + batch_preset). + """ artifact = tmp_path / "artifacts" / "models" / "model_x.joblib" artifact.parent.mkdir(parents=True, exist_ok=True) artifact.write_bytes(b"x") @@ -514,7 +598,186 @@ async def test_run_pipeline_showcase_rich_emits_14_steps(monkeypatch, tmp_path): req = DemoRunRequest(scenario=ScenarioPreset.SHOWCASE_RICH) events = [e async for e in pipeline.run_pipeline(app=_FAKE_APP, req=req)] completes = [e for e in events if e.event_type == "step_complete"] - assert len(completes) == 14 - # Every event reports total_steps=14 + assert len(completes) == 18 + # Every event reports total_steps=18 for ev in completes: - assert ev.total_steps == 14 + assert ev.total_steps == 18 + + +# ============================================================================= +# PRP-39 — per-step unit tests (canned ASGI HTTP) +# ============================================================================= + + +def _make_ctx_showcase_ready() -> pipeline.DemoContext: + """Build a DemoContext with the fields PRP-39 steps consume already set.""" + from datetime import date + + ctx = pipeline.DemoContext( + seed=42, + skip_seed=True, + reset=False, + scenario=ScenarioPreset.SHOWCASE_RICH, + ) + ctx.store_id = 7 + ctx.product_id = 3 + ctx.date_start = date(2024, 10, 1) + ctx.date_end = date(2024, 12, 31) + ctx.winner_model_type = "prophet_like" + ctx.winner_wape = 0.08 + ctx.winning_run_id = "demo-run-abc123def456" + ctx.v2_run_id = "demo-run-abc123def456" + return ctx + + +def _bind_fake_client(artifact_path: str, wapes: dict[str, float]) -> Any: + """Construct a fake-client instance for direct step-function invocation.""" + fake_class = _build_fake_client(artifact_path, wapes) + return fake_class(_FAKE_APP) + + +async def test_champion_compat_compare_step_marks_v_mismatch_incompatible(monkeypatch, tmp_path): + """PRP-39 — champion_compat_compare derives compatible=False on V mismatch.""" + artifact = tmp_path / "m.joblib" + artifact.write_bytes(b"x") + monkeypatch.setattr(pipeline, "get_settings", lambda: _fake_settings(str(tmp_path / "reg"))) + client = _bind_fake_client(str(artifact), {"prophet_like": 0.08}) + + ctx = _make_ctx_showcase_ready() + status, detail, data = await pipeline.step_champion_compat_compare(ctx, client) + + assert status == "pass" + assert data["compatible"] is False + assert data["comparable_reason"] == "feature_frame_version_mismatch" + assert data["v1_run_id"] == "v1-baseline-run-id-aaaa" + assert data["v2_run_id"] == "demo-run-abc123def456" + assert data["feature_frame_version_a"] is None + assert data["feature_frame_version_b"] == 2 + assert "V_a=1" in detail and "V_b=2" in detail + + +async def test_champion_compat_compare_step_skips_without_v2_run(monkeypatch, tmp_path): + """PRP-39 — champion_compat_compare skips when no V2 run exists (R14).""" + artifact = tmp_path / "m.joblib" + artifact.write_bytes(b"x") + monkeypatch.setattr(pipeline, "get_settings", lambda: _fake_settings(str(tmp_path / "reg"))) + client = _bind_fake_client(str(artifact), {}) + + ctx = _make_ctx_showcase_ready() + ctx.v2_run_id = None + status, detail, _ = await pipeline.step_champion_compat_compare(ctx, client) + + assert status == "skip" + assert "showcase_rich" in detail + + +async def test_stale_alias_trigger_step_surfaces_v_mismatch(monkeypatch, tmp_path): + """PRP-39 — stale_alias_trigger registers V=3 run and confirms ops verdict.""" + artifact = tmp_path / "m.joblib" + artifact.write_bytes(b"x") + monkeypatch.setattr(pipeline, "get_settings", lambda: _fake_settings(str(tmp_path / "reg"))) + client = _bind_fake_client(str(artifact), {"prophet_like": 0.08}) + + ctx = _make_ctx_showcase_ready() + status, _detail, data = await pipeline.step_stale_alias_trigger(ctx, client) + + assert status == "pass" + assert data["alias_name"] == "demo-production" + assert data["stale_reason"] == "feature_frame_version_mismatch" + assert data["alias_feature_frame_version"] == 2 + assert data["comparable_run_feature_frame_version"] == 3 + assert ctx.stale_alias_run_id == "demo-run-abc123def456" + + +async def test_safer_promote_flow_step_captures_original_alias(monkeypatch, tmp_path): + """PRP-39 — safer_promote_flow records original alias for R15 restore.""" + artifact = tmp_path / "m.joblib" + artifact.write_bytes(b"x") + monkeypatch.setattr(pipeline, "get_settings", lambda: _fake_settings(str(tmp_path / "reg"))) + client = _bind_fake_client(str(artifact), {"seasonal_naive": 99.0}) + + ctx = _make_ctx_showcase_ready() + status, _detail, data = await pipeline.step_safer_promote_flow(ctx, client) + + assert status == "pass" + assert data["alias_name"] == "demo-production" + assert data["before_run_id"] == "demo-run-abc123def456" # canned GET response + assert data["after_run_id"] == "demo-run-abc123def456" # canned POST returns same id + assert data["swap_intent"] == "demo_safer_promote_walkthrough" + # R15 — original alias captured before swap. + assert ctx.original_demo_alias_run_id == "demo-run-abc123def456" + + +async def test_batch_preset_step_emits_terminal_completed(monkeypatch, tmp_path): + """PRP-39 — batch_preset returns pass on terminal completed status (D2/D3).""" + artifact = tmp_path / "m.joblib" + artifact.write_bytes(b"x") + monkeypatch.setattr(pipeline, "get_settings", lambda: _fake_settings(str(tmp_path / "reg"))) + client = _bind_fake_client(str(artifact), {}) + + ctx = _make_ctx_showcase_ready() + status, detail, data = await pipeline.step_batch_preset(ctx, client) + + assert status == "pass" + assert data["batch_id"] == "batch-demo-abcdef0123" + assert data["kind"] == "manual" + assert data["preset_source"] == "quick_baseline_sweep" + assert data["total_items"] == 18 + assert data["completed_items"] == 18 + assert data["status"] == "completed" + assert "preset=quick_baseline_sweep" in detail + assert ctx.batch_id == "batch-demo-abcdef0123" + + +async def test_batch_preset_step_skips_without_date_range(monkeypatch, tmp_path): + """PRP-39 — batch_preset skips gracefully when no date range present.""" + artifact = tmp_path / "m.joblib" + artifact.write_bytes(b"x") + monkeypatch.setattr(pipeline, "get_settings", lambda: _fake_settings(str(tmp_path / "reg"))) + client = _bind_fake_client(str(artifact), {}) + + ctx = _make_ctx_showcase_ready() + ctx.date_start = None + ctx.date_end = None + status, detail, _ = await pipeline.step_batch_preset(ctx, client) + + assert status == "skip" + assert "showcase_rich" in detail + + +async def test_cleanup_restores_alias_when_promote_swapped_it(monkeypatch, tmp_path): + """PRP-39 R15 — cleanup restores demo-production alias post-swap.""" + artifact = tmp_path / "m.joblib" + artifact.write_bytes(b"x") + monkeypatch.setattr(pipeline, "get_settings", lambda: _fake_settings(str(tmp_path / "reg"))) + client = _bind_fake_client(str(artifact), {}) + + ctx = _make_ctx_showcase_ready() + ctx.original_demo_alias_run_id = "original-v2-winner-run-id" + # No agent session opened + ctx.session_id = None + + status, detail, data = await pipeline.step_cleanup(ctx, client) + + assert status == "pass" + assert data["alias_restored"] is True + assert data["restored_run_id"] == "original-v2-winner-run-id" + assert "alias restored" in detail + + +async def test_cleanup_skips_when_nothing_to_restore_or_close(monkeypatch, tmp_path): + """PRP-39 — cleanup is a no-op skip when no agent + no alias swap occurred.""" + artifact = tmp_path / "m.joblib" + artifact.write_bytes(b"x") + monkeypatch.setattr(pipeline, "get_settings", lambda: _fake_settings(str(tmp_path / "reg"))) + client = _bind_fake_client(str(artifact), {}) + + ctx = _make_ctx_showcase_ready() + ctx.session_id = None + ctx.original_demo_alias_run_id = None # PRP-39 — no swap to restore + + status, _detail, data = await pipeline.step_cleanup(ctx, client) + + assert status == "skip" + assert data["alias_restored"] is False + assert data["agent_session_closed"] is False diff --git a/docs/_base/RUNBOOKS.md b/docs/_base/RUNBOOKS.md index 0f8b47bb..43b8c12d 100644 --- a/docs/_base/RUNBOOKS.md +++ b/docs/_base/RUNBOOKS.md @@ -115,7 +115,15 @@ uv run python scripts/run_demo.py --seed 42 --quiet 2>&1 | tee demo.log 7. **`v2_train` step fails with `model_path does not contain 'artifacts/models/'` (PRP-38, `showcase_rich` only)** — `POST /forecasting/train` returned a relative path that doesn't match the R1 contract. Cause: someone changed `forecast_model_artifacts_dir` and the path no longer lives under `artifacts/models/`. Fix: revert the config change OR update step_v2_train's assertion to match the new convention. 8. **V2 Feature Frame panel on `/explorer/runs/{id}` is empty after a green `showcase_rich` run (PRP-38)** — happens when the `v2_run_id` was registered with `runtime_info_extras={"feature_frame_version": 2}` but the bundle on disk doesn't carry the V2 manifest (e.g. an older bundle copied over). Cause: the `/forecasting/runs/{id}/feature-metadata` endpoint loads the bundle and the bundle's `feature_groups` / `feature_safety_classes` are None for V1 bundles. Fix: rebuild the bundle (re-run the showcase with `Re-seed first` ticked so a fresh V2 bundle lands). 9. **`verify` step shows ⏭️ on a `prophet_like` winner (PRP-38, `showcase_rich` only)** — expected. The V2 winner's `artifact_uri` is the full `artifacts/models/...` path so `/forecasting/runs/{id}/feature-metadata` can resolve it. The `/registry/runs/{id}/verify` endpoint resolves under `registry_artifact_root`; the two roots differ, so verify is skipped gracefully for V2 winners. -**Notes:** the `POST /demo/run` body and `WS /demo/stream` events are documented in `docs/_base/API_CONTRACTS.md`. The pipeline mirrors `scripts/run_demo.py`; the per-step diagnosis for `make demo` above applies to the same steps. PRP-38 added the `scenario` field on `DemoRunRequest` (defaults to `demo_minimal`) and the additive `phase_name` / `phase_index` / `phase_total` fields on every `StepEvent`. +10. **`champion_compat_compare` step shows ⏭️ (PRP-39, `showcase_rich` only)** — the V2 run is missing or no V1 baseline exists on the showcase grain. Cause: the scenario was switched to `demo_minimal` mid-flow (no `v2_train` registers a V2 run) OR the DB has only V2 runs on the grain (a re-run with `Re-seed first` not ticked may leave previous-run artefacts but no V1 baseline). Fix: tick **Re-seed first** and run with `scenario=showcase_rich`; the `train` step's V1 baselines + the `v2_train` step's V2 run together give the compare step both endpoints. +11. **`champion_compat_compare` step fails with `HTTP 404 -- Not Found` from `/registry/compare/...` (PRP-39)** — one of the two run_ids the step picked was deleted between the runs-list call and the compare call (an unlikely race). Cause: a concurrent operator-issued DELETE on `/registry/runs/{id}`. Fix: re-run the showcase; the step picks a fresh pair from the runs list. The demo pipeline does not delete runs itself. +12. **`stale_alias_trigger` step fails with `RunCreate` 422 / 409 (PRP-39, `showcase_rich` only)** — `POST /registry/runs` was rejected because (a) the data window violates an Alembic-enforced check (window inversion / negative span), or (b) `RegistryService._find_duplicate` matched an existing run with the same config_hash + V on the same grain (likely from a prior `stale_alias_trigger` run that didn't get cleaned up). Cause: a stale V=3 run for the same grain accumulated across showcase runs (per `docs/_base/DOMAIN_MODEL.md` the demo does NOT delete prior runs). Fix: bump the controlled-V value in `step_stale_alias_trigger` (currently 3) or accept the accumulation as a portfolio-noise tradeoff. +13. **`safer_promote_flow` step fails with `RunUpdate` 422 / 409 (PRP-39, `showcase_rich` only)** — the worse-WAPE run never reached SUCCESS (the PATCH chain broke) OR the alias POST was rejected. Cause: the new run's `pending → running → success` transition was attempted out of order, or the alias POST hit a `success` precondition before the final PATCH landed. Fix: confirm the canned chain order matches `step_register` (each PATCH must return 2xx before the next). The R15 restoration handles a clean partial state. +14. **`safer_promote_flow` step shows ⏭️ (PRP-39, `showcase_rich` only)** — the winning run is unavailable (the `register` step didn't surface a `winning_run_id`) OR the showcase grain date range is missing. Cause: an earlier failure broke the chain before `register` populated the context. Fix: re-run the showcase from a clean state (`Re-seed first` + `Reset database`). +15. **`batch_preset` step shows ⚠️ "batch poll timed out at 90s" (PRP-39, `showcase_rich` only)** — the batch's 18 sub-jobs together exceeded the poll-timeout budget. Cause: a slow-feature-pipeline branch makes each grain×model pair take longer than expected; on a developer laptop with limited CPU 18 jobs can exceed 90 s under load. Fix: visit `/visualize/batch/{batch_id}` to follow the run to completion; the step is `warn` (non-fatal), so the pipeline still goes green. +16. **`batch_preset` step fails with `HTTP 422 -- Unprocessable Entity` from `/batch/forecasting` (PRP-39, `showcase_rich` only)** — `BatchSubmitRequest` validation rejected the body. Common causes: (a) `BatchScope.kind` casing drift (must be lowercase `"manual"`); (b) `operation` value drift (must be `"train"` / `"predict"` / `"backtest"` / `"train_backtest_register"`, NOT `"forecasting"`); (c) the discovered `store_ids` / `product_ids` list is empty because `step_status` did not seed the grain. Fix: re-tick `Re-seed first`; verify the discovery returns at least 3 stores + 2 products. +17. **`cleanup` step shows `alias restored=False` in detail (PRP-39 R15, `showcase_rich` only)** — the `POST /registry/aliases` restore call returned non-2xx. Cause: the original alias target was archived between the swap and the cleanup (an `agent_require_approval` archive_run tool fire by an operator during the demo). Fix: re-create the alias manually pointing at the V2 winner. The cleanup step warns and continues so the run still goes green. +**Notes:** the `POST /demo/run` body and `WS /demo/stream` events are documented in `docs/_base/API_CONTRACTS.md`. The pipeline mirrors `scripts/run_demo.py`; the per-step diagnosis for `make demo` above applies to the same steps. PRP-38 added the `scenario` field on `DemoRunRequest` (defaults to `demo_minimal`) and the additive `phase_name` / `phase_index` / `phase_total` fields on every `StepEvent`. PRP-39 added four new steps (`champion_compat_compare`, `stale_alias_trigger`, `safer_promote_flow`, `batch_preset`) and a new `portfolio` phase between `decision` and `verify`. ### release-please skipped the bump after a dev → main merge **Symptoms:** `dev → main` PR is merged, `CD Release` workflow on `main` completes in ~10s, **no Release PR** is opened. release-please log shows `No user facing commits found since - skipping`. diff --git a/frontend/src/components/demo/PHASE_DEFS.test.ts b/frontend/src/components/demo/PHASE_DEFS.test.ts index 5f469c52..5a836392 100644 --- a/frontend/src/components/demo/PHASE_DEFS.test.ts +++ b/frontend/src/components/demo/PHASE_DEFS.test.ts @@ -27,7 +27,7 @@ describe('PHASE_DEFS lockstep with backend _phase_table', () => { ]) }) - it('showcase_rich -> the 14-step sequence with phase2_enrichment/historical_backfill/v2_train', () => { + it('showcase_rich -> the 18-step sequence with PRP-38 V2 + PRP-39 decision/portfolio rows', () => { const tuples = phaseDefsForScenario('showcase_rich').map((d) => [d.phase, d.step]) expect(tuples).toEqual([ ['data', 'precheck'], @@ -41,6 +41,12 @@ describe('PHASE_DEFS lockstep with backend _phase_table', () => { ['modeling', 'v2_train'], ['decision', 'backtest'], ['decision', 'register'], + // PRP-39 — three decision-phase extensions after register. + ['decision', 'champion_compat_compare'], + ['decision', 'stale_alias_trigger'], + ['decision', 'safer_promote_flow'], + // PRP-39 — new portfolio phase between decision and verify. + ['portfolio', 'batch_preset'], ['verify', 'verify'], ['agent', 'agent'], ['cleanup', 'cleanup'], @@ -53,8 +59,16 @@ describe('PHASE_DEFS lockstep with backend _phase_table', () => { expect(sparse).toEqual(minimal) }) - it('PHASE_ORDER contains exactly the six canonical phases', () => { - expect(PHASE_ORDER).toEqual(['data', 'modeling', 'decision', 'verify', 'agent', 'cleanup']) + it('PHASE_ORDER contains exactly the seven canonical phases (PRP-39 adds portfolio)', () => { + expect(PHASE_ORDER).toEqual([ + 'data', + 'modeling', + 'decision', + 'portfolio', + 'verify', + 'agent', + 'cleanup', + ]) }) it('PHASE_LABEL has a label per canonical phase', () => { diff --git a/frontend/src/components/demo/PHASE_DEFS.ts b/frontend/src/components/demo/PHASE_DEFS.ts index 0fe87dbc..9307f4ca 100644 --- a/frontend/src/components/demo/PHASE_DEFS.ts +++ b/frontend/src/components/demo/PHASE_DEFS.ts @@ -20,7 +20,7 @@ export interface PhaseDef { /** * The complete set of step definitions used by either DEMO_MINIMAL (legacy - * 11 steps) or SHOWCASE_RICH (11 + 3 = 14 steps). + * 11 steps) or SHOWCASE_RICH (PRP-38 added 3; PRP-39 adds 4 more = 18 steps). * * Order matters: each row's (phase, step) tuple list is what the lockstep * test asserts equals the backend's `_phase_table(scenario)` output for @@ -38,6 +38,12 @@ const ALL_STEPS: ReadonlyArray = [ { phase: 'modeling', step: 'v2_train', label: 'Train feature-aware (V2)' }, { phase: 'decision', step: 'backtest', label: 'Backtest models' }, { phase: 'decision', step: 'register', label: 'Register winner' }, + // PRP-39 — decision-phase extensions. + { phase: 'decision', step: 'champion_compat_compare', label: 'Compare V1 vs V2' }, + { phase: 'decision', step: 'stale_alias_trigger', label: 'Trigger stale-alias V mismatch' }, + { phase: 'decision', step: 'safer_promote_flow', label: 'Safer Promote walkthrough' }, + // PRP-39 — new portfolio phase, between decision and verify. + { phase: 'portfolio', step: 'batch_preset', label: 'Portfolio batch (quick baseline sweep)' }, { phase: 'verify', step: 'verify', label: 'Verify artifact' }, { phase: 'agent', step: 'agent', label: 'Agent chat' }, { phase: 'cleanup', step: 'cleanup', label: 'Cleanup' }, @@ -47,6 +53,11 @@ const SHOWCASE_RICH_STEP_NAMES = new Set([ 'phase2_enrichment', 'historical_backfill', 'v2_train', + // PRP-39 — only render these step rows under scenario=showcase_rich. + 'champion_compat_compare', + 'stale_alias_trigger', + 'safer_promote_flow', + 'batch_preset', ]) /** Return the PhaseDef list for one scenario (lockstep with backend). */ @@ -63,6 +74,8 @@ export const PHASE_LABEL: Record = { data: 'Data', modeling: 'Modeling', decision: 'Decision', + // PRP-39 — new portfolio phase between decision and verify. + portfolio: 'Portfolio', verify: 'Verify', agent: 'Agent', cleanup: 'Cleanup', @@ -73,6 +86,8 @@ export const PHASE_ORDER: readonly string[] = [ 'data', 'modeling', 'decision', + // PRP-39 — new portfolio phase between decision and verify. + 'portfolio', 'verify', 'agent', 'cleanup', diff --git a/frontend/src/components/demo/demo-step-card.test.tsx b/frontend/src/components/demo/demo-step-card.test.tsx new file mode 100644 index 00000000..5776a730 --- /dev/null +++ b/frontend/src/components/demo/demo-step-card.test.tsx @@ -0,0 +1,126 @@ +/** + * PRP-39 — render tests for the 4 new step kinds' mini-summary chip-lines + * and the Inspect deep-link hrefs they expose. + */ + +import { afterEach, describe, expect, it } from 'vitest' +import { cleanup, render, screen } from '@testing-library/react' +import { MemoryRouter } from 'react-router-dom' +import type { DemoStep } from '@/hooks/use-demo-pipeline' +import { DemoStepCard } from './demo-step-card' + +afterEach(cleanup) + +function makeStep( + name: string, + status: DemoStep['status'], + data: Record, + detail = '' +): DemoStep { + return { + name, + label: name, + status, + detail, + durationMs: 0, + data, + phaseName: 'decision', + } +} + +function renderCard(step: DemoStep, inspectHref: string | null = null) { + return render( + + + + ) +} + +describe('DemoStepCard PRP-39 mini-summaries', () => { + it('champion_compat_compare — renders V_a / V_b / compatible chips with reason', () => { + const step = makeStep('champion_compat_compare', 'pass', { + v1_run_id: 'v1-aaaa', + v2_run_id: 'v2-bbbb', + feature_frame_version_a: null, + feature_frame_version_b: 2, + compatible: false, + comparable_reason: 'feature_frame_version_mismatch', + }) + renderCard(step) + expect(screen.getByText(/V_a=1/).textContent).toBeTruthy() + expect(screen.getByText(/V_b=2/).textContent).toBeTruthy() + expect(screen.getByText(/compatible=false/).textContent).toBeTruthy() + expect(screen.getByText(/feature_frame_version_mismatch/).textContent).toBeTruthy() + }) + + it('stale_alias_trigger — renders alias name + stale reason + V mismatch chips', () => { + const step = makeStep('stale_alias_trigger', 'pass', { + alias_name: 'demo-production', + stale_reason: 'feature_frame_version_mismatch', + alias_feature_frame_version: 2, + comparable_run_feature_frame_version: 3, + second_v2_run_id: 'second-v2-cccc', + }) + renderCard(step) + expect(screen.getByText(/alias=demo-production/).textContent).toBeTruthy() + expect(screen.getByText(/stale_reason=feature_frame_version_mismatch/).textContent).toBeTruthy() + expect(screen.getByText(/V_alias=2/).textContent).toBeTruthy() + expect(screen.getByText(/V_comparable=3/).textContent).toBeTruthy() + }) + + it('safer_promote_flow — renders alias + before/after short run-id chips', () => { + const step = makeStep('safer_promote_flow', 'pass', { + alias_name: 'demo-production', + before_run_id: 'beforeruna-cafebabe', + after_run_id: 'afterrunb-deadbeef', + swap_intent: 'demo_safer_promote_walkthrough', + }) + renderCard(step) + expect(screen.getByText(/alias=demo-production/).textContent).toBeTruthy() + expect(screen.getByText(/before=beforeru/).textContent).toBeTruthy() + expect(screen.getByText(/after=afterrun/).textContent).toBeTruthy() + }) + + it('batch_preset — renders preset, items, and status chips', () => { + const step = makeStep('batch_preset', 'pass', { + batch_id: 'batch-aaaa', + kind: 'manual', + preset_source: 'quick_baseline_sweep', + model_types: ['naive', 'seasonal_naive', 'moving_average'], + status: 'completed', + total_items: 18, + completed_items: 18, + failed_items: 0, + }) + renderCard(step) + expect(screen.getByText(/preset=quick_baseline_sweep/).textContent).toBeTruthy() + expect(screen.getByText(/18\/18 done/).textContent).toBeTruthy() + expect(screen.getByText(/status=completed/).textContent).toBeTruthy() + }) + + it('shows the Inspect button on terminal pass with a deep-link href', () => { + const step = makeStep('batch_preset', 'pass', { + batch_id: 'batch-aaaa', + kind: 'manual', + preset_source: 'quick_baseline_sweep', + status: 'completed', + total_items: 18, + completed_items: 18, + }) + renderCard(step, '/visualize/batch/batch-aaaa') + const link = screen.getByRole('link', { name: /Inspect/i }) as HTMLAnchorElement + expect(link.getAttribute('href')).toBe('/visualize/batch/batch-aaaa') + }) + + it('suppresses the Inspect button when inspectHref is null', () => { + const step = makeStep('champion_compat_compare', 'pass', { + compatible: false, + feature_frame_version_a: null, + feature_frame_version_b: 2, + comparable_reason: 'feature_frame_version_mismatch', + }) + renderCard(step, null) + const links = screen.queryAllByRole('link', { name: /Inspect/i }) + expect(links.length).toBe(0) + }) +}) diff --git a/frontend/src/components/demo/demo-step-card.tsx b/frontend/src/components/demo/demo-step-card.tsx index 93e4f866..7a3a8c34 100644 --- a/frontend/src/components/demo/demo-step-card.tsx +++ b/frontend/src/components/demo/demo-step-card.tsx @@ -86,6 +86,83 @@ function RegisterDetail({ data }: { data: Record }) { ) } +/** PRP-39 — champion-compat compare mini-summary chip-line. */ +function ChampionCompatDetail({ data }: { data: Record }) { + const va = data.feature_frame_version_a + const vb = data.feature_frame_version_b + const compatible = data.compatible + const reason = typeof data.comparable_reason === 'string' ? data.comparable_reason : null + if (typeof compatible !== 'boolean') return null + const vaDisplay = va === null || va === undefined ? '1' : String(va) + const vbDisplay = vb === null || vb === undefined ? '1' : String(vb) + return ( +
+ V_a={vaDisplay} + V_b={vbDisplay} + + compatible={String(compatible)} + + {!compatible && reason && ( + reason={reason} + )} +
+ ) +} + +/** PRP-39 — stale-alias trigger mini-summary chip-line. */ +function StaleAliasDetail({ data }: { data: Record }) { + const aliasName = typeof data.alias_name === 'string' ? data.alias_name : null + const staleReason = typeof data.stale_reason === 'string' ? data.stale_reason : null + const aliasV = data.alias_feature_frame_version + const comparableV = data.comparable_run_feature_frame_version + if (!aliasName || !staleReason) return null + return ( +
+ alias={aliasName} + + stale_reason={staleReason} + + + V_alias={String(aliasV ?? 'null')} → V_comparable={String(comparableV ?? 'null')} + +
+ ) +} + +/** PRP-39 — safer-Promote flow mini-summary chip-line. */ +function SaferPromoteDetail({ data }: { data: Record }) { + const aliasName = typeof data.alias_name === 'string' ? data.alias_name : null + const before = typeof data.before_run_id === 'string' ? data.before_run_id : null + const after = typeof data.after_run_id === 'string' ? data.after_run_id : null + if (!aliasName || !before || !after) return null + return ( +
+ alias={aliasName} + + before={before.slice(0, 8)} → after={after.slice(0, 8)} + +
+ ) +} + +/** PRP-39 — batch preset mini-summary chip-line. */ +function BatchPresetDetail({ data }: { data: Record }) { + const presetSource = typeof data.preset_source === 'string' ? data.preset_source : null + const completed = data.completed_items + const total = data.total_items + const status = typeof data.status === 'string' ? data.status : null + if (!presetSource || !status) return null + return ( +
+ preset={presetSource} + + {String(completed ?? '?')}/{String(total ?? '?')} done + + status={status} +
+ ) +} + interface DemoStepCardProps { step: DemoStep index: number @@ -139,6 +216,13 @@ export function DemoStepCard({ step, index, inspectHref }: DemoStepCardProps) { )} {step.name === 'register' && } + {/* PRP-39 — terminal-pass mini-summaries for the 4 new step kinds. */} + {step.name === 'champion_compat_compare' && ( + + )} + {step.name === 'stale_alias_trigger' && } + {step.name === 'safer_promote_flow' && } + {step.name === 'batch_preset' && } {showInspect && (