diff --git a/README.md b/README.md index b3f77c9..c3f8380 100644 --- a/README.md +++ b/README.md @@ -191,7 +191,7 @@ Additional report paths: - Optional `--remote-dispatch ` input adds file-based worker selection, retry/fallback plan, and remote execution starter context when an Orchestrator `inferedge-remote-dispatch-result-v1` JSON is available. - Optional `--edgeenv-run-show ` input adds EdgeEnv local run preservation context when an EdgeEnv `runs show` JSON is available. - `scripts/smoke_agent_runtime_remote_paths.sh` reproduces both committed remote dispatch starter paths: plan-only worker selection and fallback-recovered starter execution. -- `scripts/smoke_agent_runtime_edgeenv_preservation.sh` gates the committed EdgeEnv run-show fixture so the `Runtime Intelligence EdgeEnv Preservation` report section cannot disappear silently. +- `scripts/smoke_agent_runtime_edgeenv_preservation.sh` gates the committed EdgeEnv run-show fixture so the `Runtime Intelligence EdgeEnv Preservation` report section, preservation identity label, and preservation details label cannot disappear silently. - Remote dispatch remains starter evidence for local-first review; it does not claim production remote execution. Remote dispatch starter boundary: diff --git a/docs/portfolio/agent_runtime_reliability_report.md b/docs/portfolio/agent_runtime_reliability_report.md index 6a34de9..590f0c4 100644 --- a/docs/portfolio/agent_runtime_reliability_report.md +++ b/docs/portfolio/agent_runtime_reliability_report.md @@ -64,7 +64,10 @@ bash scripts/smoke_agent_runtime_edgeenv_preservation.sh \ The smoke verifies that the Lab-owned Markdown/JSON report keeps the `Runtime Intelligence EdgeEnv Preservation` section, EdgeEnv run ID, Runtime operation summary schema, and `comparability_role=supplemental_evidence_not_gate` -visible from a lightweight `runs show` fixture. This is local registry +visible from a lightweight `runs show` fixture. The same section now exposes a +short preservation identity label and a companion preservation details label so +device-local preservation evidence stays easy to scan without mixing producer, +queue, and resource markers into the identity row. This is local registry preservation evidence, not a production telemetry database or deployment decision override. diff --git a/docs/portfolio/edgeenv_runtime_regression_lab_handoff.md b/docs/portfolio/edgeenv_runtime_regression_lab_handoff.md index 4d305ea..af11a4b 100644 --- a/docs/portfolio/edgeenv_runtime_regression_lab_handoff.md +++ b/docs/portfolio/edgeenv_runtime_regression_lab_handoff.md @@ -190,6 +190,7 @@ Expected Lab behavior: - When EdgeEnv includes preserved Orchestrator feed context, the `Runtime Intelligence Risk Summary` surfaces queue, thermal, throttling, memory, fallback context, and compact `operation_risk_summary` markers as supplemental runtime evidence. - The Jetson/device-local preservation row starts with `identity=jetson_device_local_preservation` and the device-local path marker such as `path=device_local_starter` when available, making the preserved Jetson EdgeEnv run easier to identify before reviewers inspect detailed queue/resource context. - The detailed producer/source/stage/resource markers are rendered in a separate `Jetson/device-local EdgeEnv preservation details` row so the identity row stays short while preserving the same navigation context. +- Agent Runtime Reliability reports now mirror the same reviewer-facing split inside `Runtime Intelligence EdgeEnv Preservation`: `preservation_identity` keeps the run/path identity short, while `preservation_details` carries source, stage, device-local event, resource, and queue markers as navigation context. - `operation_risk_summary` markers are shown as navigation context only: queue-pressure reason, max-pressure task, worker-health reason, and producer/device-local event counts do not become EdgeEnv regression deltas, comparability fields, or a Lab deployment decision override. - If the preserved Orchestrator feed includes `runtime_task_event_summary`, Lab also renders a task-level event rollup for scheduler delay, deadline miss, fallback, policy reason, and drop reason markers. This helps reviewers identify the affected workload without making Orchestrator the decision owner. - When `--guard-analysis` is provided, Lab ingests the precomputed AIGuard artifact as evidence without requiring AIGuard to be installed in the Lab environment. diff --git a/inferedgelab/services/agent_runtime_report.py b/inferedgelab/services/agent_runtime_report.py index ab60b7a..b8a5fc5 100644 --- a/inferedgelab/services/agent_runtime_report.py +++ b/inferedgelab/services/agent_runtime_report.py @@ -137,7 +137,14 @@ def build_agent_runtime_reliability_report( runtime_summary = _agent_runtime_summary(orchestration_summary) runtime_result_context = _runtime_result_operation_context(runtime_result) remote_dispatch_context = _remote_dispatch_context(remote_dispatch) + operation_context = _operation_context(orchestration_summary, metrics) edgeenv_preservation_context = _edgeenv_preservation_context(edgeenv_run_show) + if edgeenv_preservation_context: + edgeenv_preservation_context = _with_edgeenv_preservation_report_labels( + edgeenv_preservation_context, + metrics=metrics, + operation_context=operation_context, + ) runtime_operation_guard_summary = _runtime_operation_guard_summary(guard_analysis) orchestrator_operation_guard_summary = _orchestrator_operation_guard_summary( guard_analysis, @@ -190,7 +197,7 @@ def build_agent_runtime_reliability_report( "totals": _totals(runtime_summary), "metrics": metrics, "timeline_summary": _timeline_summary(orchestration_summary, metrics), - "operation_context": _operation_context(orchestration_summary, metrics), + "operation_context": operation_context, "runtime_result_context": runtime_result_context, "remote_dispatch_context": remote_dispatch_context, "edgeenv_preservation_context": edgeenv_preservation_context, @@ -1061,6 +1068,106 @@ def _edgeenv_preservation_context( } +def _with_edgeenv_preservation_report_labels( + context: dict[str, Any], + *, + metrics: dict[str, Any], + operation_context: dict[str, Any], +) -> dict[str, Any]: + enriched = dict(context) + identity_label = _edgeenv_preservation_identity_label(context, metrics) + details_label = _edgeenv_preservation_details_label(metrics, operation_context) + enriched["preservation_identity_label"] = identity_label + enriched["preservation_details_label"] = details_label + return enriched + + +def _edgeenv_preservation_identity_label( + context: dict[str, Any], + metrics: dict[str, Any], +) -> str: + path = _edgeenv_preservation_path(metrics) + identity = ( + "jetson_device_local_preservation" + if path == "device_local_starter" + else "edgeenv_runtime_operation_preservation" + ) + parts = [f"identity={identity}"] + if path: + parts.append(f"path={path}") + run_id = context.get("run_id") + if run_id: + parts.append(f"run={run_id}") + return ", ".join(parts) + + +def _edgeenv_preservation_path(metrics: dict[str, Any]) -> str: + scenario_mode = str(metrics.get("scenario_mode") or "") + device_local_count = _non_negative_number(metrics.get("device_local_event_count")) + if scenario_mode == "device_local" or device_local_count > 0: + return "device_local_starter" + return "agent_runtime_preservation" + + +def _edgeenv_preservation_details_label( + metrics: dict[str, Any], + operation_context: dict[str, Any], +) -> str: + queue_state = operation_context.get("queue_state_summary") or {} + runtime_events = operation_context.get("runtime_event_summary") or {} + worker_health = operation_context.get("worker_health_snapshot") or {} + workers = worker_health.get("workers") if isinstance(worker_health, dict) else {} + + sources = _unique_strings( + [ + *(_string_list(metrics.get("device_local_producer_sources"))), + *(_string_list(metrics.get("runtime_event_producer_sources"))), + *(_string_list(queue_state.get("device_local_producer_sources"))), + *(_string_list(runtime_events.get("producer_sources"))), + ] + ) + stages: list[str] = [] + if isinstance(workers, dict): + for task_name, worker in workers.items(): + if not isinstance(worker, dict): + continue + stage = worker.get("producer_stage") + if isinstance(stage, str) and stage: + task = worker.get("task") or worker.get("agent_id") or task_name + stages.append(f"{task}:{stage}") + + resource_markers: list[str] = [] + for source in sources: + if source in {"process_resource_snapshot", "resource_snapshot_fixture"}: + resource_markers.append(source) + if "tegrastats" in source: + resource_markers.append(source) + + parts = [ + f"sources={_label_list(sources)}", + f"stages={_label_list(_unique_strings(stages))}", + ( + "device_local_events=" + f"{_fmt_number(metrics.get('device_local_event_count'))}" + ), + f"resource={_label_list(_unique_strings(resource_markers))}", + f"queue={metrics.get('queue_pressure_reason') or 'unknown'}", + ] + return ", ".join(parts) + + +def _unique_strings(values: list[str]) -> list[str]: + result: list[str] = [] + for value in values: + if value and value not in result: + result.append(value) + return result + + +def _label_list(values: list[str]) -> str: + return "+".join(values) if values else "none" + + def _edgeenv_preservation_markdown_lines(context: dict[str, Any]) -> list[str]: if not context: return [] @@ -1069,6 +1176,8 @@ def _edgeenv_preservation_markdown_lines(context: dict[str, Any]) -> list[str]: "", "| Field | Value |", "|---|---|", + f"| preservation_identity | {context.get('preservation_identity_label') or '-'} |", + f"| preservation_details | {context.get('preservation_details_label') or '-'} |", f"| edgeenv_run_id | {context.get('run_id') or '-'} |", f"| created_at | {context.get('created_at') or '-'} |", f"| runtime_operation_schema | {context.get('runtime_operation_schema_version') or '-'} |", diff --git a/scripts/smoke_agent_runtime_edgeenv_preservation.sh b/scripts/smoke_agent_runtime_edgeenv_preservation.sh index c6d49ce..a43487c 100755 --- a/scripts/smoke_agent_runtime_edgeenv_preservation.sh +++ b/scripts/smoke_agent_runtime_edgeenv_preservation.sh @@ -75,7 +75,11 @@ grep -q "edgeenv_preservation_context" "$REPORT_JSON" grep -q "run-fixture-edgeenv-operation-0001" "$REPORT_JSON" grep -q "inferedge-runtime-operation-summary-v1" "$REPORT_JSON" grep -q "supplemental_evidence_not_gate" "$REPORT_JSON" +grep -q "preservation_identity_label" "$REPORT_JSON" +grep -q "preservation_details_label" "$REPORT_JSON" grep -q "Runtime Intelligence EdgeEnv Preservation" "$REPORT_MD" +grep -q "preservation_identity" "$REPORT_MD" +grep -q "preservation_details" "$REPORT_MD" grep -q "edgeenv_run_id" "$REPORT_MD" grep -q "run-fixture-edgeenv-operation-0001" "$REPORT_MD" grep -q "runtime_operation_health_reason" "$REPORT_MD" diff --git a/tests/test_agent_runtime_report.py b/tests/test_agent_runtime_report.py index 00d3470..2079910 100644 --- a/tests/test_agent_runtime_report.py +++ b/tests/test_agent_runtime_report.py @@ -1296,9 +1296,36 @@ def test_agent_runtime_report_surfaces_edgeenv_run_show_preservation(): "latency_budget_exceeded", ] assert context["comparability_role"] == "supplemental_evidence_not_gate" + assert context["preservation_identity_label"] == ( + "identity=jetson_device_local_preservation, " + "path=device_local_starter, " + "run=run-20260529-094714-0955a027" + ) + assert context["preservation_details_label"] == ( + "sources=resource_snapshot_fixture+image_file+fastapi_request_fixture, " + "stages=safety_monitor_agent:device_local_starter+" + "vision_agent:device_local_starter, " + "device_local_events=4, " + "resource=resource_snapshot_fixture, " + "queue=max_total_queue_depth_exceeded_overload_threshold" + ) markdown = build_agent_runtime_reliability_markdown(report) assert "Runtime Intelligence EdgeEnv Preservation" in markdown + assert ( + "| preservation_identity | identity=jetson_device_local_preservation, " + "path=device_local_starter, run=run-20260529-094714-0955a027 |" + in markdown + ) + assert ( + "| preservation_details | " + "sources=resource_snapshot_fixture+image_file+fastapi_request_fixture, " + "stages=safety_monitor_agent:device_local_starter+" + "vision_agent:device_local_starter, device_local_events=4, " + "resource=resource_snapshot_fixture, " + "queue=max_total_queue_depth_exceeded_overload_threshold |" + in markdown + ) assert "| edgeenv_run_id | run-20260529-094714-0955a027 |" in markdown assert "| runtime_operation_health_reason | timeout_threshold_exceeded |" in markdown assert "| runtime_operation_recommended_action | review_latency_budget_or_degrade |" in markdown @@ -1528,6 +1555,11 @@ def test_agent_runtime_report_loads_committed_fixtures(): "inferedge-runtime-operation-summary-v1" ) assert context["comparability_role"] == "supplemental_evidence_not_gate" + assert context["preservation_identity_label"].startswith( + "identity=edgeenv_runtime_operation_preservation, " + "path=agent_runtime_preservation" + ) + assert "device_local_events=0" in context["preservation_details_label"] def test_agent_runtime_report_surfaces_remote_execution_failure():