From 54787265dcaf0d020333dd5d2bfad646d34f1908 Mon Sep 17 00:00:00 2001 From: hyeokjun32 Date: Sun, 31 May 2026 23:45:59 +0900 Subject: [PATCH] feat: surface runtime replay duration source --- README.md | 9 ++++---- .../runtime_intelligence_gitlab_artifacts.md | 2 +- .../edgeenv_runtime_regression_lab_handoff.md | 2 +- ..._regression_with_orchestrator_context.json | 4 ++++ inferedgelab/report/runtime_intelligence.py | 21 ++++++++++++++----- ...ck_runtime_intelligence_artifact_bundle.py | 10 +++++++-- ...check_runtime_intelligence_ci_artifacts.py | 2 ++ tests/test_report_generators.py | 18 ++++++++++++++++ ...test_runtime_intelligence_artifact_gate.py | 2 ++ .../test_runtime_intelligence_ci_template.py | 10 +++++++++ ...ntime_intelligence_evidence_chain_smoke.py | 4 ++++ .../test_runtime_intelligence_smoke_script.py | 2 ++ 12 files changed, 73 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 41f4526..0f21e19 100644 --- a/README.md +++ b/README.md @@ -504,10 +504,11 @@ If EdgeEnv preserves an Orchestrator `operation_risk_summary`, Lab shows the com When EdgeEnv/Orchestrator context includes reviewer-facing duration metadata, Lab renders a `Runtime replay duration scope` row with `duration_label`, -`duration_class`, and frame count. This helps reviewers distinguish short -96-frame replay, 5-minute-class sustained replay, and quick starter smoke -without changing Lab deployment policy or treating replay duration as a -production readiness claim. +`duration_class`, frame count, and optional `duration_source` / +`duration_scope_label` traceability such as `source=entrypoint_requested_frames`. +This helps reviewers distinguish short 96-frame replay, 5-minute-class +sustained replay, and quick starter smoke without changing Lab deployment +policy or treating replay duration as a production readiness claim. When the EdgeEnv preservation path is present, Lab also renders a `Lab EdgeEnv preservation context` row with `lab_report_preservation_context_present=True`, diff --git a/docs/ci/runtime_intelligence_gitlab_artifacts.md b/docs/ci/runtime_intelligence_gitlab_artifacts.md index d2f6b88..eed774c 100644 --- a/docs/ci/runtime_intelligence_gitlab_artifacts.md +++ b/docs/ci/runtime_intelligence_gitlab_artifacts.md @@ -186,7 +186,7 @@ EdgeEnv handoff summary and AIGuard deterministic evidence agree on producer-lineage guard-alignment run IDs. This keeps the cross-repo marker check file-based and does not make AIGuard a deployment decision owner. -The artifact gate is implemented by `scripts/check_runtime_intelligence_artifact_bundle.py`. It checks the generated Markdown / HTML report for the required Runtime Intelligence rows, including Lab ownership, EdgeEnv comparability, telemetry coverage-gap markers, Runtime replay duration scope, Orchestrator operation feed context, Orchestrator task event rollup, Lab EdgeEnv preservation context, Jetson/device-local preservation identity and detail labels, Orchestrator `operation_risk_summary` navigation context, AIGuard runtime operation anomalies, AIGuard `edgeenv_orchestrator_operation_risk_summary` evidence, AIGuard `edgeenv_orchestrator_task_event_rollup` evidence, remote dispatch starter event summary, `Remote fallback starter evidence`, `edgeenv_orchestrator_producer_lineage`, `runtime_history_seed_run_config_traceability`, `remote_execution_recovered_by_fallback`, and triggered deployment review rules. +The artifact gate is implemented by `scripts/check_runtime_intelligence_artifact_bundle.py`. It checks the generated Markdown / HTML report for the required Runtime Intelligence rows, including Lab ownership, EdgeEnv comparability, telemetry coverage-gap markers, Runtime replay duration scope with `source=entrypoint_requested_frames` traceability, Orchestrator operation feed context, Orchestrator task event rollup, Lab EdgeEnv preservation context, Jetson/device-local preservation identity and detail labels, Orchestrator `operation_risk_summary` navigation context, AIGuard runtime operation anomalies, AIGuard `edgeenv_orchestrator_operation_risk_summary` evidence, AIGuard `edgeenv_orchestrator_task_event_rollup` evidence, remote dispatch starter event summary, `Remote fallback starter evidence`, `edgeenv_orchestrator_producer_lineage`, `runtime_history_seed_run_config_traceability`, `remote_execution_recovered_by_fallback`, and triggered deployment review rules. The CI artifact gate is implemented by `scripts/check_runtime_intelligence_ci_artifacts.py`. It runs in the deployment-risk stage and verifies that the collected optional GitLab artifacts include the manifest gate summary, AIGuard handoff alignment artifact, report gate summary, Runtime Intelligence Risk Summary report, portfolio demo status, and the validated contract markers from the bundle manifest gate. This keeps the final CI gate file-based and deterministic without turning GitLab into a runtime control plane. The same CI artifact gate also checks the copied diff --git a/docs/portfolio/edgeenv_runtime_regression_lab_handoff.md b/docs/portfolio/edgeenv_runtime_regression_lab_handoff.md index cc0472c..1d74bb7 100644 --- a/docs/portfolio/edgeenv_runtime_regression_lab_handoff.md +++ b/docs/portfolio/edgeenv_runtime_regression_lab_handoff.md @@ -188,7 +188,7 @@ Expected Lab behavior: - Additional Lab test fixtures under `tests/fixtures/edgeenv_regression/` mirror EdgeEnv replay examples for candidate telemetry gaps and execution sequence inversion. These fixture smokes verify that replay warnings become Lab-owned report context without making Lab recompute EdgeEnv comparability. - Markdown/HTML reports include a `Runtime Intelligence Risk Summary` that summarizes EdgeEnv comparability/regression, telemetry replay gaps, Runtime history seed/run_config traceability, AIGuard deterministic evidence, and the Lab-owned deployment decision in one reviewer-facing table. - When EdgeEnv includes preserved Orchestrator feed context, the `Runtime Intelligence Risk Summary` surfaces queue, thermal, throttling, memory, fallback context, and compact `operation_risk_summary` markers as supplemental runtime evidence. -- When EdgeEnv includes replay duration metadata, the same Risk Summary surfaces `Runtime replay duration scope` with `duration_label`, `duration_class`, and frame count as reviewer navigation context. This does not change EdgeEnv comparability or Lab deployment policy. +- When EdgeEnv includes replay duration metadata, the same Risk Summary surfaces `Runtime replay duration scope` with `duration_label`, `duration_class`, frame count, and optional `duration_source` / `duration_scope_label` traceability as reviewer navigation context. This does not change EdgeEnv comparability or Lab deployment policy. - The Jetson/device-local preservation row starts with `identity=jetson_device_local_preservation` and the device-local path marker such as `path=device_local_starter` when available, making the preserved Jetson EdgeEnv run easier to identify before reviewers inspect detailed queue/resource context. - The detailed producer/source/stage/resource markers are rendered in a separate `Jetson/device-local EdgeEnv preservation details` row so the identity row stays short while preserving the same navigation context. - Agent Runtime Reliability reports now mirror the same reviewer-facing split inside `Runtime Intelligence EdgeEnv Preservation`: `preservation_identity` keeps the run/path identity short, while `preservation_details` carries source, stage, device-local event, resource, and queue markers as navigation context. diff --git a/examples/runtime_intelligence_chain/edgeenv_regression_with_orchestrator_context.json b/examples/runtime_intelligence_chain/edgeenv_regression_with_orchestrator_context.json index 47f027f..6d8f12c 100644 --- a/examples/runtime_intelligence_chain/edgeenv_regression_with_orchestrator_context.json +++ b/examples/runtime_intelligence_chain/edgeenv_regression_with_orchestrator_context.json @@ -126,6 +126,8 @@ "frames": 96, "duration_class": "short_96_frame_class", "duration_label": "short 96-frame-class replay (96 frames)", + "duration_source": "entrypoint_requested_frames", + "duration_scope_label": "source=entrypoint_requested_frames, label=short 96-frame-class replay (96 frames), class=short_96_frame_class, frames=96", "telemetry_coverage": { "schema_version": "inferedge-runtime-telemetry-coverage-v1", "expected_fields": [ @@ -239,6 +241,8 @@ "frames": 96, "duration_class": "short_96_frame_class", "duration_label": "short 96-frame-class replay (96 frames)", + "duration_source": "entrypoint_requested_frames", + "duration_scope_label": "source=entrypoint_requested_frames, label=short 96-frame-class replay (96 frames), class=short_96_frame_class, frames=96", "producer_sources": [ "device_local_cli_override", "orchestration_summary" diff --git a/inferedgelab/report/runtime_intelligence.py b/inferedgelab/report/runtime_intelligence.py index 5ec6e50..8756f66 100644 --- a/inferedgelab/report/runtime_intelligence.py +++ b/inferedgelab/report/runtime_intelligence.py @@ -442,6 +442,8 @@ def _runtime_replay_scope_label(run_label: str, run_context: dict[str, Any]) -> ] duration_label = _first_payload_value(payloads, "duration_label") duration_class = _first_payload_value(payloads, "duration_class") + duration_source = _first_payload_value(payloads, "duration_source") + duration_scope_label = _first_payload_value(payloads, "duration_scope_label") frames = _first_payload_value(payloads, "frames") if frames is None: frames = _first_payload_value(payloads, "requested_frames") @@ -449,12 +451,21 @@ def _runtime_replay_scope_label(run_label: str, run_context: dict[str, Any]) -> frames = _first_payload_value(payloads, "frame_count") parts: list[str] = [] - if duration_label is not None: + if duration_scope_label is not None: + parts.append(f"scope_label={duration_scope_label}") + elif duration_label is not None: parts.append(f"label={duration_label}") - if duration_class is not None: - parts.append(f"class={duration_class}") - if frames is not None: - parts.append(f"frames={_format_compact_value(frames)}") + if duration_class is not None: + parts.append(f"class={duration_class}") + if frames is not None: + parts.append(f"frames={_format_compact_value(frames)}") + else: + if duration_class is not None: + parts.append(f"class={duration_class}") + if frames is not None: + parts.append(f"frames={_format_compact_value(frames)}") + if duration_source is not None and str(duration_source) not in ",".join(parts): + parts.append(f"source={duration_source}") if not parts: return "" return f"{run_label}: " + ", ".join(parts) diff --git a/scripts/check_runtime_intelligence_artifact_bundle.py b/scripts/check_runtime_intelligence_artifact_bundle.py index 0978f70..57b6232 100644 --- a/scripts/check_runtime_intelligence_artifact_bundle.py +++ b/scripts/check_runtime_intelligence_artifact_bundle.py @@ -31,8 +31,10 @@ ), "runtime_replay_duration_scope": ( "| Runtime replay duration scope | candidate: " - "label=short 96-frame-class replay (96 frames), " - "class=short_96_frame_class, frames=96 |" + "scope_label=source=entrypoint_requested_frames" + ), + "runtime_replay_duration_scope_label": ( + "scope_label=source=entrypoint_requested_frames" ), "jetson_edgeenv_preservation_identity": ( "| Jetson/device-local EdgeEnv preservation run | candidate: " @@ -150,6 +152,10 @@ "orchestrator_operation_risk_summary": "Orchestrator operation risk summary", "runtime_replay_duration_scope": "Runtime replay duration scope", "runtime_replay_duration_label": "short 96-frame-class replay (96 frames)", + "runtime_replay_duration_source": "source=entrypoint_requested_frames", + "runtime_replay_duration_scope_label": ( + "scope_label=source=entrypoint_requested_frames" + ), "orchestrator_task_event_rollup": "Orchestrator task event rollup", "lab_edgeenv_preservation_context": "Lab EdgeEnv preservation context", "lab_edgeenv_preservation_context_marker": ( diff --git a/scripts/check_runtime_intelligence_ci_artifacts.py b/scripts/check_runtime_intelligence_ci_artifacts.py index a4d8089..810652f 100644 --- a/scripts/check_runtime_intelligence_ci_artifacts.py +++ b/scripts/check_runtime_intelligence_ci_artifacts.py @@ -153,6 +153,8 @@ def _validate_runtime_report(path: Path, errors: list[str]) -> None: "## Runtime Intelligence Risk Summary", "Runtime replay duration scope", "short 96-frame-class replay (96 frames)", + "source=entrypoint_requested_frames", + "scope_label=source=entrypoint_requested_frames", "Lab remains the final deployment decision owner.", "AIGuard runtime operation anomalies", "runtime_queue_overload, runtime_thermal_instability", diff --git a/tests/test_report_generators.py b/tests/test_report_generators.py index 967a5c2..b4186d9 100644 --- a/tests/test_report_generators.py +++ b/tests/test_report_generators.py @@ -343,6 +343,12 @@ def make_edgeenv_regression_with_orchestrator_context() -> dict: context["candidate"]["frames"] = 96 context["candidate"]["duration_class"] = "short_96_frame_class" context["candidate"]["duration_label"] = "short 96-frame-class replay (96 frames)" + context["candidate"]["duration_source"] = "entrypoint_requested_frames" + context["candidate"]["duration_scope_label"] = ( + "source=entrypoint_requested_frames, " + "label=short 96-frame-class replay (96 frames), " + "class=short_96_frame_class, frames=96" + ) context["candidate"]["orchestrator_operation_context"] = { "schema_version": "inferedge-orchestrator-edgeenv-runtime-telemetry-feed-v1", "role": "orchestrator_operation_context_for_edgeenv", @@ -355,6 +361,15 @@ def make_edgeenv_regression_with_orchestrator_context() -> dict: "candidate_context": { "run_id": "candidate", "queue_depth": 7, + "frames": 96, + "duration_class": "short_96_frame_class", + "duration_label": "short 96-frame-class replay (96 frames)", + "duration_source": "entrypoint_requested_frames", + "duration_scope_label": ( + "source=entrypoint_requested_frames, " + "label=short 96-frame-class replay (96 frames), " + "class=short_96_frame_class, frames=96" + ), "operation": { "queue_depth": 7, "deadline_missed_count": 2, @@ -834,6 +849,7 @@ def test_generate_compare_markdown_summarizes_orchestrator_context_risk(): ) in text assert ( "| Runtime replay duration scope | candidate: " + "scope_label=source=entrypoint_requested_frames, " "label=short 96-frame-class replay (96 frames), " "class=short_96_frame_class, frames=96 |" ) in text @@ -904,6 +920,8 @@ def test_generate_compare_html_summarizes_operation_risk_summary(): assert "Runtime Intelligence Risk Summary" in html assert "Runtime replay duration scope" in html assert "short 96-frame-class replay (96 frames)" in html + assert "entrypoint_requested_frames" in html + assert "scope_label=source=entrypoint_requested_frames" in html assert "Orchestrator operation risk summary" in html assert "Orchestrator task event rollup" in html assert "vision_agent(delay=1,miss=1,max_delay_cycles=3,max_wait_ms=15)" in html diff --git a/tests/test_runtime_intelligence_artifact_gate.py b/tests/test_runtime_intelligence_artifact_gate.py index b49d225..15d6fba 100644 --- a/tests/test_runtime_intelligence_artifact_gate.py +++ b/tests/test_runtime_intelligence_artifact_gate.py @@ -55,6 +55,8 @@ def test_runtime_intelligence_artifact_gate_passes_for_chain_report(tmp_path): markdown = markdown_path.read_text(encoding="utf-8") assert "Runtime replay duration scope" in markdown assert "short 96-frame-class replay (96 frames)" in markdown + assert "source=entrypoint_requested_frames" in markdown + assert "scope_label=source=entrypoint_requested_frames" in markdown def test_runtime_intelligence_artifact_gate_cli_passes_for_chain_report(tmp_path): diff --git a/tests/test_runtime_intelligence_ci_template.py b/tests/test_runtime_intelligence_ci_template.py index d752a5d..e409b41 100644 --- a/tests/test_runtime_intelligence_ci_template.py +++ b/tests/test_runtime_intelligence_ci_template.py @@ -94,6 +94,8 @@ def test_runtime_intelligence_ci_artifact_gate_passes_for_expected_outputs(tmp_p "## Runtime Intelligence Risk Summary", "Runtime replay duration scope", "short 96-frame-class replay (96 frames)", + "source=entrypoint_requested_frames", + "scope_label=source=entrypoint_requested_frames", "Lab remains the final deployment decision owner.", "AIGuard runtime operation anomalies", "runtime_queue_overload, runtime_thermal_instability", @@ -300,6 +302,8 @@ def test_runtime_intelligence_ci_artifact_gate_fails_for_missing_lab_marker_cont "## Runtime Intelligence Risk Summary", "Runtime replay duration scope", "short 96-frame-class replay (96 frames)", + "source=entrypoint_requested_frames", + "scope_label=source=entrypoint_requested_frames", "Lab remains the final deployment decision owner.", "AIGuard runtime operation anomalies", "runtime_queue_overload, runtime_thermal_instability", @@ -443,6 +447,8 @@ def test_runtime_intelligence_ci_artifact_gate_fails_for_missing_contract_marker "## Runtime Intelligence Risk Summary", "Runtime replay duration scope", "short 96-frame-class replay (96 frames)", + "source=entrypoint_requested_frames", + "scope_label=source=entrypoint_requested_frames", "Lab remains the final deployment decision owner.", "AIGuard runtime operation anomalies", "runtime_queue_overload, runtime_thermal_instability", @@ -526,6 +532,8 @@ def test_runtime_intelligence_ci_artifact_gate_fails_for_missing_coverage_gap_ma "## Runtime Intelligence Risk Summary", "Runtime replay duration scope", "short 96-frame-class replay (96 frames)", + "source=entrypoint_requested_frames", + "scope_label=source=entrypoint_requested_frames", "Lab remains the final deployment decision owner.", "AIGuard runtime operation anomalies", "runtime_queue_overload, runtime_thermal_instability", @@ -594,6 +602,8 @@ def test_runtime_intelligence_ci_artifact_gate_fails_for_failed_deployment_risk( "## Runtime Intelligence Risk Summary", "Runtime replay duration scope", "short 96-frame-class replay (96 frames)", + "source=entrypoint_requested_frames", + "scope_label=source=entrypoint_requested_frames", "Lab remains the final deployment decision owner.", "AIGuard runtime operation anomalies", "runtime_queue_overload, runtime_thermal_instability", diff --git a/tests/test_runtime_intelligence_evidence_chain_smoke.py b/tests/test_runtime_intelligence_evidence_chain_smoke.py index 83ad482..aafe115 100644 --- a/tests/test_runtime_intelligence_evidence_chain_smoke.py +++ b/tests/test_runtime_intelligence_evidence_chain_smoke.py @@ -485,6 +485,8 @@ def test_compare_cmd_runtime_intelligence_chain_writes_markdown_and_html( assert "Runtime replay duration scope" in markdown assert "short 96-frame-class replay (96 frames)" in markdown assert "class=short_96_frame_class, frames=96" in markdown + assert "source=entrypoint_requested_frames" in markdown + assert "scope_label=source=entrypoint_requested_frames" in markdown assert "runtime_telemetry_field_gap" in markdown assert "coverage_missing_fields" in markdown assert "queue_depth" in markdown @@ -558,6 +560,8 @@ def test_compare_cmd_runtime_intelligence_chain_writes_markdown_and_html( assert "Runtime history seed run_config" in html assert "Runtime replay duration scope" in html assert "short 96-frame-class replay (96 frames)" in html + assert "source=entrypoint_requested_frames" in html + assert "scope_label=source=entrypoint_requested_frames" in html assert "AIGuard history seed run_config markers" in html assert "AIGuard run_config traceability evidence" in html assert "runtime_history_seed_run_config_traceability" in html diff --git a/tests/test_runtime_intelligence_smoke_script.py b/tests/test_runtime_intelligence_smoke_script.py index debc7fc..23f0f38 100644 --- a/tests/test_runtime_intelligence_smoke_script.py +++ b/tests/test_runtime_intelligence_smoke_script.py @@ -126,6 +126,8 @@ def test_runtime_intelligence_smoke_script_runs_artifact_chain(tmp_path): assert "Runtime replay duration scope" in runtime_summary assert "short 96-frame-class replay (96 frames)" in runtime_summary assert "class=short_96_frame_class, frames=96" in runtime_summary + assert "source=entrypoint_requested_frames" in runtime_summary + assert "scope_label=source=entrypoint_requested_frames" in runtime_summary alignment_summary = ( output_dir / "aiguard_edgeenv_handoff_alignment.md"