From 532910b6270c95737ea2fa50eb7aad450494248b Mon Sep 17 00:00:00 2001 From: hyeokjun32 Date: Mon, 1 Jun 2026 17:42:30 +0900 Subject: [PATCH] feat: add runtime intelligence reviewer focus --- README.md | 6 + .../runtime_intelligence_gitlab_artifacts.md | 2 +- inferedgelab/report/html_generator.py | 34 +++- inferedgelab/report/markdown_generator.py | 20 +- inferedgelab/report/runtime_intelligence.py | 183 ++++++++++++++++++ ...ck_runtime_intelligence_artifact_bundle.py | 14 ++ tests/test_report_generators.py | 15 ++ ...st_runtime_intelligence_bundle_manifest.py | 7 + ...ntime_intelligence_evidence_chain_smoke.py | 18 ++ 9 files changed, 296 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index b3c85bb..443ddb6 100644 --- a/README.md +++ b/README.md @@ -518,6 +518,12 @@ AIGuard evidence in the same Lab-owned report. The summary is a reviewer navigation surface: it makes runtime risk evidence easy to find, while Lab remains the final deployment decision owner. +The report starts with a `Reviewer Focus` table using +`Focus / Quick signal / First read` columns so reviewers can quickly scan the +Decision owner, EdgeEnv regression gate, Telemetry/replay quality, +Operation context, and AIGuard warnings before reading the detailed evidence +rows. + | Reviewer question | Where to look | Meaning | |---|---|---| | Did AIGuard preserve EdgeEnv/Orchestrator producer lineage? | `producer_lineage_evidence_type=edgeenv_orchestrator_producer_lineage` and `runtime_history_seed_run_config_traceability` | Confirms the expected deterministic AIGuard evidence reached the Lab-owned report. | diff --git a/docs/ci/runtime_intelligence_gitlab_artifacts.md b/docs/ci/runtime_intelligence_gitlab_artifacts.md index 5ed6272..158f5ad 100644 --- a/docs/ci/runtime_intelligence_gitlab_artifacts.md +++ b/docs/ci/runtime_intelligence_gitlab_artifacts.md @@ -186,7 +186,7 @@ EdgeEnv handoff summary and AIGuard deterministic evidence agree on producer-lineage guard-alignment run IDs. This keeps the cross-repo marker check file-based and does not make AIGuard a deployment decision owner. -The artifact gate is implemented by `scripts/check_runtime_intelligence_artifact_bundle.py`. It checks the generated Markdown / HTML report for the required Runtime Intelligence rows, including Lab ownership, EdgeEnv comparability, telemetry coverage-gap markers, Runtime replay duration scope with `source=entrypoint_requested_frames` traceability, Orchestrator operation feed context, compact queue/deadline/fallback operation markers with `max_total_queue_depth`, AIGuard max queue raw-context traceability, Orchestrator task event rollup, Lab EdgeEnv preservation context, Jetson/device-local preservation identity and detail labels, Orchestrator `operation_risk_summary` navigation context, AIGuard runtime operation anomalies, AIGuard `edgeenv_orchestrator_operation_risk_summary` evidence, AIGuard `edgeenv_orchestrator_task_event_rollup` evidence, remote dispatch starter event summary, `Remote fallback starter evidence`, `edgeenv_orchestrator_producer_lineage`, `runtime_history_seed_run_config_traceability`, `remote_execution_recovered_by_fallback`, and triggered deployment review rules. +The artifact gate is implemented by `scripts/check_runtime_intelligence_artifact_bundle.py`. It checks the generated Markdown / HTML report for the required Runtime Intelligence rows, including the `Reviewer Focus` quick-scan table, Lab ownership, EdgeEnv comparability, telemetry coverage-gap markers, Runtime replay duration scope with `source=entrypoint_requested_frames` traceability, Orchestrator operation feed context, compact queue/deadline/fallback operation markers with `max_total_queue_depth`, AIGuard max queue raw-context traceability, Orchestrator task event rollup, Lab EdgeEnv preservation context, Jetson/device-local preservation identity and detail labels, Orchestrator `operation_risk_summary` navigation context, AIGuard runtime operation anomalies, AIGuard `edgeenv_orchestrator_operation_risk_summary` evidence, AIGuard `edgeenv_orchestrator_task_event_rollup` evidence, remote dispatch starter event summary, `Remote fallback starter evidence`, `edgeenv_orchestrator_producer_lineage`, `runtime_history_seed_run_config_traceability`, `remote_execution_recovered_by_fallback`, and triggered deployment review rules. The bundle manifest gate also checks the external AIGuard artifact before the rendered report stage. In particular, `runtime_queue_overload` must preserve diff --git a/inferedgelab/report/html_generator.py b/inferedgelab/report/html_generator.py index 52b8e61..f59442b 100644 --- a/inferedgelab/report/html_generator.py +++ b/inferedgelab/report/html_generator.py @@ -3,7 +3,10 @@ from html import escape from typing import Any, Dict, Optional -from inferedgelab.report.runtime_intelligence import build_runtime_intelligence_risk_rows +from inferedgelab.report.runtime_intelligence import ( + build_runtime_intelligence_reviewer_focus_rows, + build_runtime_intelligence_risk_rows, +) from inferedgelab.services.guard_analysis import guard_primary_reason, guard_status, guard_verdict @@ -594,6 +597,23 @@ def _runtime_intelligence_risk_summary_to_html( if not rows: return "" + focus_rows = build_runtime_intelligence_reviewer_focus_rows( + guard_analysis=guard_analysis, + deployment_decision=deployment_decision, + edgeenv_regression=edgeenv_regression, + ) + focus_html = [] + for focus, value, first_read in focus_rows: + focus_html.append( + f""" + + {escape(focus)} + {escape(value)} + {escape(first_read)} + + """ + ) + row_html = [] for signal, value, interpretation in rows: row_html.append( @@ -609,6 +629,18 @@ def _runtime_intelligence_risk_summary_to_html( return f"""

Runtime Intelligence Risk Summary

+

Reviewer Focus

+ + + + + + + + + {''.join(focus_html)} +
FocusQuick signalFirst read
+

Detailed Evidence Rows

diff --git a/inferedgelab/report/markdown_generator.py b/inferedgelab/report/markdown_generator.py index 6a25797..e3eeeb9 100644 --- a/inferedgelab/report/markdown_generator.py +++ b/inferedgelab/report/markdown_generator.py @@ -2,7 +2,10 @@ from typing import Any, Dict, Optional -from inferedgelab.report.runtime_intelligence import build_runtime_intelligence_risk_rows +from inferedgelab.report.runtime_intelligence import ( + build_runtime_intelligence_reviewer_focus_rows, + build_runtime_intelligence_risk_rows, +) from inferedgelab.services.guard_analysis import guard_primary_reason, guard_status, guard_verdict @@ -333,8 +336,23 @@ def _append_runtime_intelligence_risk_summary( if not rows: return + focus_rows = build_runtime_intelligence_reviewer_focus_rows( + guard_analysis=guard_analysis, + deployment_decision=deployment_decision, + edgeenv_regression=edgeenv_regression, + ) lines.append("## Runtime Intelligence Risk Summary") lines.append("") + if focus_rows: + lines.append("### Reviewer Focus") + lines.append("") + lines.append("| Focus | Quick signal | First read |") + lines.append("|---|---|---|") + for focus, value, first_read in focus_rows: + lines.append(f"| {focus} | {value} | {first_read} |") + lines.append("") + lines.append("### Detailed Evidence Rows") + lines.append("") lines.append("| Signal | Value | Lab interpretation |") lines.append("|---|---|---|") for signal, value, interpretation in rows: diff --git a/inferedgelab/report/runtime_intelligence.py b/inferedgelab/report/runtime_intelligence.py index 7118cc2..ccd30df 100644 --- a/inferedgelab/report/runtime_intelligence.py +++ b/inferedgelab/report/runtime_intelligence.py @@ -136,6 +136,189 @@ def build_runtime_intelligence_risk_rows( return rows +def build_runtime_intelligence_reviewer_focus_rows( + *, + guard_analysis: dict[str, Any] | None, + deployment_decision: dict[str, Any] | None, + edgeenv_regression: dict[str, Any] | None, +) -> list[tuple[str, str, str]]: + if guard_analysis is None and edgeenv_regression is None: + return [] + + rows: list[tuple[str, str, str]] = [] + if deployment_decision is not None: + triggered_rules = _string_list(deployment_decision.get("triggered_rules")) + rows.append( + ( + "Decision owner", + ( + f"Lab={deployment_decision.get('decision')}; " + f"triggered_rules={_compact_join(triggered_rules)}" + ), + "Start here: Lab is the final policy owner and downstream evidence is context.", + ) + ) + + if edgeenv_regression is not None: + rows.append(_edgeenv_reviewer_focus_row(edgeenv_regression)) + telemetry_row = _telemetry_reviewer_focus_row(edgeenv_regression) + if telemetry_row is not None: + rows.append(telemetry_row) + operation_row = _operation_reviewer_focus_row(edgeenv_regression) + if operation_row is not None: + rows.append(operation_row) + + if guard_analysis is not None: + rows.append(_aiguard_reviewer_focus_row(guard_analysis)) + + return rows + + +def _edgeenv_reviewer_focus_row( + edgeenv_regression: dict[str, Any], +) -> tuple[str, str, str]: + comparability = edgeenv_regression.get("comparability") or {} + evidence = edgeenv_regression.get("evidence") + if not isinstance(evidence, dict): + evidence = {} + metric_parts = [ + _focus_percent("mean", evidence.get("mean_delta_pct")), + _focus_percent("p99", evidence.get("p99_delta_pct")), + _focus_percent("fps", evidence.get("fps_delta_pct")), + _focus_percent("memory", evidence.get("memory_peak_delta_pct")), + ] + metric_label = _compact_join([part for part in metric_parts if part]) + return ( + "EdgeEnv regression gate", + ( + f"comparable={_first_present(comparability.get('comparable'), edgeenv_regression.get('comparable'))}; " + f"mode={edgeenv_regression.get('mode')}; " + f"regression={edgeenv_regression.get('regression_detected')}; " + f"type={edgeenv_regression.get('regression_type')}; " + f"severity={edgeenv_regression.get('severity')}; " + f"deltas={metric_label}" + ), + "Check comparability first, then read latency/resource deltas as deployment risk evidence.", + ) + + +def _telemetry_reviewer_focus_row( + edgeenv_regression: dict[str, Any], +) -> tuple[str, str, str] | None: + telemetry_context = edgeenv_regression.get("runtime_telemetry_context") + if not isinstance(telemetry_context, dict): + return None + + gaps = [ + gap + for gap in telemetry_context.get("evidence_gaps") or [] + if isinstance(gap, dict) + ] + history = telemetry_context.get("history") or {} + history_summary = history.get("summary") if isinstance(history, dict) else {} + if not isinstance(history_summary, dict): + history_summary = {} + coverage_labels = _runtime_telemetry_coverage_labels(telemetry_context) + replay_labels = _runtime_replay_scope_labels(telemetry_context) + + return ( + "Telemetry/replay quality", + ( + f"gaps={len(gaps)}; " + f"history_missing_runs={history_summary.get('missing_telemetry_runs', '-')}; " + f"run_config_seeds={history_summary.get('history_seed_run_config_runs', '-')}; " + f"coverage={_compact_join(coverage_labels)}; " + f"replay={_compact_join(replay_labels, limit=1)}" + ), + "Missing telemetry and replay scope are evidence-quality context, not failure or policy override.", + ) + + +def _operation_reviewer_focus_row( + edgeenv_regression: dict[str, Any], +) -> tuple[str, str, str] | None: + telemetry_context = edgeenv_regression.get("runtime_telemetry_context") + if not isinstance(telemetry_context, dict): + return None + + marker_labels = _orchestrator_queue_deadline_fallback_labels(telemetry_context) + risk_labels = _orchestrator_operation_risk_labels(telemetry_context) + preservation_labels = _edgeenv_preservation_run_labels(telemetry_context) + task_labels = _orchestrator_task_event_rollup_labels(telemetry_context) + if not any((marker_labels, risk_labels, preservation_labels, task_labels)): + return None + + parts = [ + f"queue_deadline_fallback={'present' if marker_labels else 'missing'}", + f"operation_risk={'present' if risk_labels else 'missing'}", + f"device_local_preservation={'present' if preservation_labels else 'missing'}", + f"task_rollup={'present' if task_labels else 'missing'}", + ] + return ( + "Operation context", + "; ".join(parts), + "Use this row to decide whether to scan Orchestrator/EdgeEnv operation evidence next.", + ) + + +def _aiguard_reviewer_focus_row( + guard_analysis: dict[str, Any], +) -> tuple[str, str, str]: + evidence_items = [ + item + for item in (guard_analysis.get("evidence") or []) + if isinstance(item, dict) + ] + warning_items = [ + item + for item in evidence_items + if str(item.get("status")).lower() in {"warning", "failed", "error"} + ] + anomaly_types = sorted( + { + str(item.get("type")) + for item in warning_items + if item.get("type") in RUNTIME_OPERATION_ANOMALY_TYPES + } + ) + remote_dispatch_types = sorted( + { + str(item.get("type")) + for item in evidence_items + if item.get("type") in REMOTE_DISPATCH_EVIDENCE_TYPES + } + ) + return ( + "AIGuard warnings", + ( + f"status={guard_status(guard_analysis)}; " + f"verdict={guard_verdict(guard_analysis)}; " + f"review_items={len(warning_items)}; " + f"anomalies={_compact_join(anomaly_types)}; " + f"remote_dispatch={_compact_join(remote_dispatch_types, limit=1)}" + ), + "AIGuard provides deterministic warning evidence; Lab keeps the final decision.", + ) + + +def _focus_percent(label: str, value: Any) -> str: + if value is None: + return "" + if isinstance(value, (int, float)) and not isinstance(value, bool): + return f"{label}={value:+.1f}%" + return f"{label}={value}" + + +def _compact_join(values: list[str], *, limit: int = 3) -> str: + compact_values = [value for value in values if value] + if not compact_values: + return "none" + if len(compact_values) <= limit: + return ",".join(compact_values) + visible = ",".join(compact_values[:limit]) + return f"{visible},+{len(compact_values) - limit} more" + + def _append_telemetry_context_rows( rows: list[tuple[str, str, str]], edgeenv_regression: dict[str, Any], diff --git a/scripts/check_runtime_intelligence_artifact_bundle.py b/scripts/check_runtime_intelligence_artifact_bundle.py index c8034b7..8a21800 100644 --- a/scripts/check_runtime_intelligence_artifact_bundle.py +++ b/scripts/check_runtime_intelligence_artifact_bundle.py @@ -8,6 +8,13 @@ REQUIRED_MARKDOWN_MARKERS = { "risk_summary_section": "## Runtime Intelligence Risk Summary", + "reviewer_focus_section": "### Reviewer Focus", + "reviewer_focus_table": "| Focus | Quick signal | First read |", + "reviewer_focus_edgeenv_gate": "| EdgeEnv regression gate |", + "reviewer_focus_telemetry_quality": "| Telemetry/replay quality |", + "reviewer_focus_operation_context": "| Operation context |", + "reviewer_focus_aiguard_warnings": "| AIGuard warnings |", + "detailed_evidence_rows": "### Detailed Evidence Rows", "lab_decision_owner": "Lab remains the final deployment decision owner.", "edgeenv_comparability": "| EdgeEnv comparability | Yes / same-condition |", "runtime_regression": "| Runtime regression | True / mixed / high |", @@ -130,6 +137,13 @@ REQUIRED_HTML_MARKERS = { "risk_summary_section": "Runtime Intelligence Risk Summary", + "reviewer_focus_section": "Reviewer Focus", + "reviewer_focus_table": "Quick signal", + "reviewer_focus_edgeenv_gate": "EdgeEnv regression gate", + "reviewer_focus_telemetry_quality": "Telemetry/replay quality", + "reviewer_focus_operation_context": "Operation context", + "reviewer_focus_aiguard_warnings": "AIGuard warnings", + "detailed_evidence_rows": "Detailed Evidence Rows", "lab_decision_owner": "Lab remains the final deployment decision owner.", "runtime_telemetry_coverage": "Runtime telemetry coverage gaps", "aiguard_coverage_field_gap": "runtime_telemetry_field_gap", diff --git a/tests/test_report_generators.py b/tests/test_report_generators.py index f0de8a3..f3d2204 100644 --- a/tests/test_report_generators.py +++ b/tests/test_report_generators.py @@ -846,6 +846,14 @@ def test_generate_compare_markdown_summarizes_orchestrator_context_risk(): ) assert "## Runtime Intelligence Risk Summary" in text + assert "### Reviewer Focus" in text + assert "| Focus | Quick signal | First read |" in text + assert "| EdgeEnv regression gate | comparable=True; mode=same-condition;" in text + assert "deltas=mean=+18.4%,p99=+32.1%,fps=-20.5%,+1 more" in text + assert "| Telemetry/replay quality | gaps=0; history_missing_runs=0;" in text + assert "| Operation context | queue_deadline_fallback=present;" in text + assert "| AIGuard warnings | status=warning; verdict=suspicious;" in text + assert "### Detailed Evidence Rows" in text assert "| Orchestrator operation feed context | 1 |" in text assert "| Orchestrator context attached runs | candidate |" in text assert ( @@ -937,6 +945,13 @@ def test_generate_compare_html_summarizes_operation_risk_summary(): ) assert "Runtime Intelligence Risk Summary" in html + assert "Reviewer Focus" in html + assert "Quick signal" in html + assert "EdgeEnv regression gate" in html + assert "Telemetry/replay quality" in html + assert "Operation context" in html + assert "AIGuard warnings" in html + assert "Detailed Evidence Rows" in html assert "Runtime replay duration scope" in html assert "short 96-frame-class replay (96 frames)" in html assert "entrypoint_requested_frames" in html diff --git a/tests/test_runtime_intelligence_bundle_manifest.py b/tests/test_runtime_intelligence_bundle_manifest.py index 7496657..3f95b67 100644 --- a/tests/test_runtime_intelligence_bundle_manifest.py +++ b/tests/test_runtime_intelligence_bundle_manifest.py @@ -173,6 +173,13 @@ def test_readme_runtime_intelligence_section_stays_scannable(): readme = (REPO_ROOT / "README.md").read_text(encoding="utf-8") assert "| Reviewer question | Where to look | Meaning |" in readme + assert "Reviewer Focus" in readme + assert "`Focus / Quick signal / First read`" in readme + assert "Decision owner" in readme + assert "EdgeEnv regression gate" in readme + assert "Telemetry/replay quality" in readme + assert "Operation context" in readme + assert "AIGuard warnings" in readme for row in [ "Did AIGuard preserve EdgeEnv/Orchestrator producer lineage?", "Is there operation pressure?", diff --git a/tests/test_runtime_intelligence_evidence_chain_smoke.py b/tests/test_runtime_intelligence_evidence_chain_smoke.py index e252f3f..4954696 100644 --- a/tests/test_runtime_intelligence_evidence_chain_smoke.py +++ b/tests/test_runtime_intelligence_evidence_chain_smoke.py @@ -499,6 +499,16 @@ def test_compare_cmd_runtime_intelligence_chain_writes_markdown_and_html( assert "raw_context: preserved in artifact; omitted from console summary" in out assert "'raw_context':" not in out assert "Runtime Intelligence Risk Summary" in markdown + assert "### Reviewer Focus" in markdown + assert "| Focus | Quick signal | First read |" in markdown + assert "| Decision owner | Lab=review_required;" in markdown + assert "| EdgeEnv regression gate | comparable=Yes; mode=same-condition;" in markdown + assert "deltas=mean=+18.0%,p99=+32.0%,fps=-22.0%,+1 more" in markdown + assert "| Telemetry/replay quality | gaps=0; history_missing_runs=1;" in markdown + assert "| Operation context | queue_deadline_fallback=present;" in markdown + assert "| AIGuard warnings | status=warning; verdict=suspicious;" in markdown + assert "remote_dispatch=remote_execution_recovered_by_fallback" in markdown + assert "### Detailed Evidence Rows" in markdown assert "Runtime telemetry coverage gaps" in markdown assert "Runtime telemetry history seed" in markdown assert "Runtime history seed run_config" in markdown @@ -562,6 +572,14 @@ def test_compare_cmd_runtime_intelligence_chain_writes_markdown_and_html( "warmup=1, runs=10" ) in markdown assert "Runtime Intelligence Risk Summary" in html + assert "Reviewer Focus" in html + assert "Quick signal" in html + assert "Decision owner" in html + assert "EdgeEnv regression gate" in html + assert "Telemetry/replay quality" in html + assert "Operation context" in html + assert "AIGuard warnings" in html + assert "Detailed Evidence Rows" in html assert "Orchestrator operation risk summary" in html assert "Orchestrator queue/deadline/fallback markers" in html assert "queue_pressure_reason=queue_backlog_threshold_exceeded" in html