Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,12 @@ AIGuard evidence in the same Lab-owned report. The summary is a reviewer
navigation surface: it makes runtime risk evidence easy to find, while Lab
remains the final deployment decision owner.

The report starts with a `Reviewer Focus` table using
`Focus / Quick signal / First read` columns so reviewers can quickly scan the
Decision owner, EdgeEnv regression gate, Telemetry/replay quality,
Operation context, and AIGuard warnings before reading the detailed evidence
rows.

| Reviewer question | Where to look | Meaning |
|---|---|---|
| Did AIGuard preserve EdgeEnv/Orchestrator producer lineage? | `producer_lineage_evidence_type=edgeenv_orchestrator_producer_lineage` and `runtime_history_seed_run_config_traceability` | Confirms the expected deterministic AIGuard evidence reached the Lab-owned report. |
Expand Down
2 changes: 1 addition & 1 deletion docs/ci/runtime_intelligence_gitlab_artifacts.md
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ EdgeEnv handoff summary and AIGuard deterministic evidence agree on
producer-lineage guard-alignment run IDs. This keeps the cross-repo marker
check file-based and does not make AIGuard a deployment decision owner.

The artifact gate is implemented by `scripts/check_runtime_intelligence_artifact_bundle.py`. It checks the generated Markdown / HTML report for the required Runtime Intelligence rows, including Lab ownership, EdgeEnv comparability, telemetry coverage-gap markers, Runtime replay duration scope with `source=entrypoint_requested_frames` traceability, Orchestrator operation feed context, compact queue/deadline/fallback operation markers with `max_total_queue_depth`, AIGuard max queue raw-context traceability, Orchestrator task event rollup, Lab EdgeEnv preservation context, Jetson/device-local preservation identity and detail labels, Orchestrator `operation_risk_summary` navigation context, AIGuard runtime operation anomalies, AIGuard `edgeenv_orchestrator_operation_risk_summary` evidence, AIGuard `edgeenv_orchestrator_task_event_rollup` evidence, remote dispatch starter event summary, `Remote fallback starter evidence`, `edgeenv_orchestrator_producer_lineage`, `runtime_history_seed_run_config_traceability`, `remote_execution_recovered_by_fallback`, and triggered deployment review rules.
The artifact gate is implemented by `scripts/check_runtime_intelligence_artifact_bundle.py`. It checks the generated Markdown / HTML report for the required Runtime Intelligence rows, including the `Reviewer Focus` quick-scan table, Lab ownership, EdgeEnv comparability, telemetry coverage-gap markers, Runtime replay duration scope with `source=entrypoint_requested_frames` traceability, Orchestrator operation feed context, compact queue/deadline/fallback operation markers with `max_total_queue_depth`, AIGuard max queue raw-context traceability, Orchestrator task event rollup, Lab EdgeEnv preservation context, Jetson/device-local preservation identity and detail labels, Orchestrator `operation_risk_summary` navigation context, AIGuard runtime operation anomalies, AIGuard `edgeenv_orchestrator_operation_risk_summary` evidence, AIGuard `edgeenv_orchestrator_task_event_rollup` evidence, remote dispatch starter event summary, `Remote fallback starter evidence`, `edgeenv_orchestrator_producer_lineage`, `runtime_history_seed_run_config_traceability`, `remote_execution_recovered_by_fallback`, and triggered deployment review rules.

The bundle manifest gate also checks the external AIGuard artifact before the
rendered report stage. In particular, `runtime_queue_overload` must preserve
Expand Down
34 changes: 33 additions & 1 deletion inferedgelab/report/html_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
from html import escape
from typing import Any, Dict, Optional

from inferedgelab.report.runtime_intelligence import build_runtime_intelligence_risk_rows
from inferedgelab.report.runtime_intelligence import (
build_runtime_intelligence_reviewer_focus_rows,
build_runtime_intelligence_risk_rows,
)
from inferedgelab.services.guard_analysis import guard_primary_reason, guard_status, guard_verdict


Expand Down Expand Up @@ -594,6 +597,23 @@ def _runtime_intelligence_risk_summary_to_html(
if not rows:
return ""

focus_rows = build_runtime_intelligence_reviewer_focus_rows(
guard_analysis=guard_analysis,
deployment_decision=deployment_decision,
edgeenv_regression=edgeenv_regression,
)
focus_html = []
for focus, value, first_read in focus_rows:
focus_html.append(
f"""
<tr>
<td>{escape(focus)}</td>
<td>{escape(value)}</td>
<td>{escape(first_read)}</td>
</tr>
"""
)

row_html = []
for signal, value, interpretation in rows:
row_html.append(
Expand All @@ -609,6 +629,18 @@ def _runtime_intelligence_risk_summary_to_html(
return f"""
<h2>Runtime Intelligence Risk Summary</h2>
<div class="meta">
<h3>Reviewer Focus</h3>
<table>
<thead>
<tr>
<th>Focus</th>
<th>Quick signal</th>
<th>First read</th>
</tr>
</thead>
<tbody>{''.join(focus_html)}</tbody>
</table>
<h3>Detailed Evidence Rows</h3>
<table>
<thead>
<tr>
Expand Down
20 changes: 19 additions & 1 deletion inferedgelab/report/markdown_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@

from typing import Any, Dict, Optional

from inferedgelab.report.runtime_intelligence import build_runtime_intelligence_risk_rows
from inferedgelab.report.runtime_intelligence import (
build_runtime_intelligence_reviewer_focus_rows,
build_runtime_intelligence_risk_rows,
)
from inferedgelab.services.guard_analysis import guard_primary_reason, guard_status, guard_verdict


Expand Down Expand Up @@ -333,8 +336,23 @@ def _append_runtime_intelligence_risk_summary(
if not rows:
return

focus_rows = build_runtime_intelligence_reviewer_focus_rows(
guard_analysis=guard_analysis,
deployment_decision=deployment_decision,
edgeenv_regression=edgeenv_regression,
)
lines.append("## Runtime Intelligence Risk Summary")
lines.append("")
if focus_rows:
lines.append("### Reviewer Focus")
lines.append("")
lines.append("| Focus | Quick signal | First read |")
lines.append("|---|---|---|")
for focus, value, first_read in focus_rows:
lines.append(f"| {focus} | {value} | {first_read} |")
lines.append("")
lines.append("### Detailed Evidence Rows")
lines.append("")
lines.append("| Signal | Value | Lab interpretation |")
lines.append("|---|---|---|")
for signal, value, interpretation in rows:
Expand Down
183 changes: 183 additions & 0 deletions inferedgelab/report/runtime_intelligence.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,189 @@ def build_runtime_intelligence_risk_rows(
return rows


def build_runtime_intelligence_reviewer_focus_rows(
*,
guard_analysis: dict[str, Any] | None,
deployment_decision: dict[str, Any] | None,
edgeenv_regression: dict[str, Any] | None,
) -> list[tuple[str, str, str]]:
if guard_analysis is None and edgeenv_regression is None:
return []

rows: list[tuple[str, str, str]] = []
if deployment_decision is not None:
triggered_rules = _string_list(deployment_decision.get("triggered_rules"))
rows.append(
(
"Decision owner",
(
f"Lab={deployment_decision.get('decision')}; "
f"triggered_rules={_compact_join(triggered_rules)}"
),
"Start here: Lab is the final policy owner and downstream evidence is context.",
)
)

if edgeenv_regression is not None:
rows.append(_edgeenv_reviewer_focus_row(edgeenv_regression))
telemetry_row = _telemetry_reviewer_focus_row(edgeenv_regression)
if telemetry_row is not None:
rows.append(telemetry_row)
operation_row = _operation_reviewer_focus_row(edgeenv_regression)
if operation_row is not None:
rows.append(operation_row)

if guard_analysis is not None:
rows.append(_aiguard_reviewer_focus_row(guard_analysis))

return rows


def _edgeenv_reviewer_focus_row(
edgeenv_regression: dict[str, Any],
) -> tuple[str, str, str]:
comparability = edgeenv_regression.get("comparability") or {}
evidence = edgeenv_regression.get("evidence")
if not isinstance(evidence, dict):
evidence = {}
metric_parts = [
_focus_percent("mean", evidence.get("mean_delta_pct")),
_focus_percent("p99", evidence.get("p99_delta_pct")),
_focus_percent("fps", evidence.get("fps_delta_pct")),
_focus_percent("memory", evidence.get("memory_peak_delta_pct")),
]
metric_label = _compact_join([part for part in metric_parts if part])
return (
"EdgeEnv regression gate",
(
f"comparable={_first_present(comparability.get('comparable'), edgeenv_regression.get('comparable'))}; "
f"mode={edgeenv_regression.get('mode')}; "
f"regression={edgeenv_regression.get('regression_detected')}; "
f"type={edgeenv_regression.get('regression_type')}; "
f"severity={edgeenv_regression.get('severity')}; "
f"deltas={metric_label}"
),
"Check comparability first, then read latency/resource deltas as deployment risk evidence.",
)


def _telemetry_reviewer_focus_row(
edgeenv_regression: dict[str, Any],
) -> tuple[str, str, str] | None:
telemetry_context = edgeenv_regression.get("runtime_telemetry_context")
if not isinstance(telemetry_context, dict):
return None

gaps = [
gap
for gap in telemetry_context.get("evidence_gaps") or []
if isinstance(gap, dict)
]
history = telemetry_context.get("history") or {}
history_summary = history.get("summary") if isinstance(history, dict) else {}
if not isinstance(history_summary, dict):
history_summary = {}
coverage_labels = _runtime_telemetry_coverage_labels(telemetry_context)
replay_labels = _runtime_replay_scope_labels(telemetry_context)

return (
"Telemetry/replay quality",
(
f"gaps={len(gaps)}; "
f"history_missing_runs={history_summary.get('missing_telemetry_runs', '-')}; "
f"run_config_seeds={history_summary.get('history_seed_run_config_runs', '-')}; "
f"coverage={_compact_join(coverage_labels)}; "
f"replay={_compact_join(replay_labels, limit=1)}"
),
"Missing telemetry and replay scope are evidence-quality context, not failure or policy override.",
)


def _operation_reviewer_focus_row(
edgeenv_regression: dict[str, Any],
) -> tuple[str, str, str] | None:
telemetry_context = edgeenv_regression.get("runtime_telemetry_context")
if not isinstance(telemetry_context, dict):
return None

marker_labels = _orchestrator_queue_deadline_fallback_labels(telemetry_context)
risk_labels = _orchestrator_operation_risk_labels(telemetry_context)
preservation_labels = _edgeenv_preservation_run_labels(telemetry_context)
task_labels = _orchestrator_task_event_rollup_labels(telemetry_context)
if not any((marker_labels, risk_labels, preservation_labels, task_labels)):
return None

parts = [
f"queue_deadline_fallback={'present' if marker_labels else 'missing'}",
f"operation_risk={'present' if risk_labels else 'missing'}",
f"device_local_preservation={'present' if preservation_labels else 'missing'}",
f"task_rollup={'present' if task_labels else 'missing'}",
]
return (
"Operation context",
"; ".join(parts),
"Use this row to decide whether to scan Orchestrator/EdgeEnv operation evidence next.",
)


def _aiguard_reviewer_focus_row(
guard_analysis: dict[str, Any],
) -> tuple[str, str, str]:
evidence_items = [
item
for item in (guard_analysis.get("evidence") or [])
if isinstance(item, dict)
]
warning_items = [
item
for item in evidence_items
if str(item.get("status")).lower() in {"warning", "failed", "error"}
]
anomaly_types = sorted(
{
str(item.get("type"))
for item in warning_items
if item.get("type") in RUNTIME_OPERATION_ANOMALY_TYPES
}
)
remote_dispatch_types = sorted(
{
str(item.get("type"))
for item in evidence_items
if item.get("type") in REMOTE_DISPATCH_EVIDENCE_TYPES
}
)
return (
"AIGuard warnings",
(
f"status={guard_status(guard_analysis)}; "
f"verdict={guard_verdict(guard_analysis)}; "
f"review_items={len(warning_items)}; "
f"anomalies={_compact_join(anomaly_types)}; "
f"remote_dispatch={_compact_join(remote_dispatch_types, limit=1)}"
),
"AIGuard provides deterministic warning evidence; Lab keeps the final decision.",
)


def _focus_percent(label: str, value: Any) -> str:
if value is None:
return ""
if isinstance(value, (int, float)) and not isinstance(value, bool):
return f"{label}={value:+.1f}%"
return f"{label}={value}"


def _compact_join(values: list[str], *, limit: int = 3) -> str:
compact_values = [value for value in values if value]
if not compact_values:
return "none"
if len(compact_values) <= limit:
return ",".join(compact_values)
visible = ",".join(compact_values[:limit])
return f"{visible},+{len(compact_values) - limit} more"


def _append_telemetry_context_rows(
rows: list[tuple[str, str, str]],
edgeenv_regression: dict[str, Any],
Expand Down
14 changes: 14 additions & 0 deletions scripts/check_runtime_intelligence_artifact_bundle.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,13 @@

REQUIRED_MARKDOWN_MARKERS = {
"risk_summary_section": "## Runtime Intelligence Risk Summary",
"reviewer_focus_section": "### Reviewer Focus",
"reviewer_focus_table": "| Focus | Quick signal | First read |",
"reviewer_focus_edgeenv_gate": "| EdgeEnv regression gate |",
"reviewer_focus_telemetry_quality": "| Telemetry/replay quality |",
"reviewer_focus_operation_context": "| Operation context |",
"reviewer_focus_aiguard_warnings": "| AIGuard warnings |",
"detailed_evidence_rows": "### Detailed Evidence Rows",
"lab_decision_owner": "Lab remains the final deployment decision owner.",
"edgeenv_comparability": "| EdgeEnv comparability | Yes / same-condition |",
"runtime_regression": "| Runtime regression | True / mixed / high |",
Expand Down Expand Up @@ -130,6 +137,13 @@

REQUIRED_HTML_MARKERS = {
"risk_summary_section": "Runtime Intelligence Risk Summary",
"reviewer_focus_section": "Reviewer Focus",
"reviewer_focus_table": "Quick signal",
"reviewer_focus_edgeenv_gate": "EdgeEnv regression gate",
"reviewer_focus_telemetry_quality": "Telemetry/replay quality",
"reviewer_focus_operation_context": "Operation context",
"reviewer_focus_aiguard_warnings": "AIGuard warnings",
"detailed_evidence_rows": "Detailed Evidence Rows",
"lab_decision_owner": "Lab remains the final deployment decision owner.",
"runtime_telemetry_coverage": "Runtime telemetry coverage gaps",
"aiguard_coverage_field_gap": "runtime_telemetry_field_gap",
Expand Down
15 changes: 15 additions & 0 deletions tests/test_report_generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -846,6 +846,14 @@ def test_generate_compare_markdown_summarizes_orchestrator_context_risk():
)

assert "## Runtime Intelligence Risk Summary" in text
assert "### Reviewer Focus" in text
assert "| Focus | Quick signal | First read |" in text
assert "| EdgeEnv regression gate | comparable=True; mode=same-condition;" in text
assert "deltas=mean=+18.4%,p99=+32.1%,fps=-20.5%,+1 more" in text
assert "| Telemetry/replay quality | gaps=0; history_missing_runs=0;" in text
assert "| Operation context | queue_deadline_fallback=present;" in text
assert "| AIGuard warnings | status=warning; verdict=suspicious;" in text
assert "### Detailed Evidence Rows" in text
assert "| Orchestrator operation feed context | 1 |" in text
assert "| Orchestrator context attached runs | candidate |" in text
assert (
Expand Down Expand Up @@ -937,6 +945,13 @@ def test_generate_compare_html_summarizes_operation_risk_summary():
)

assert "Runtime Intelligence Risk Summary" in html
assert "Reviewer Focus" in html
assert "Quick signal" in html
assert "EdgeEnv regression gate" in html
assert "Telemetry/replay quality" in html
assert "Operation context" in html
assert "AIGuard warnings" in html
assert "Detailed Evidence Rows" in html
assert "Runtime replay duration scope" in html
assert "short 96-frame-class replay (96 frames)" in html
assert "entrypoint_requested_frames" in html
Expand Down
7 changes: 7 additions & 0 deletions tests/test_runtime_intelligence_bundle_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,13 @@ def test_readme_runtime_intelligence_section_stays_scannable():
readme = (REPO_ROOT / "README.md").read_text(encoding="utf-8")

assert "| Reviewer question | Where to look | Meaning |" in readme
assert "Reviewer Focus" in readme
assert "`Focus / Quick signal / First read`" in readme
assert "Decision owner" in readme
assert "EdgeEnv regression gate" in readme
assert "Telemetry/replay quality" in readme
assert "Operation context" in readme
assert "AIGuard warnings" in readme
for row in [
"Did AIGuard preserve EdgeEnv/Orchestrator producer lineage?",
"Is there operation pressure?",
Expand Down
Loading
Loading