From e3722bdde86a5eac58cdd66568e9bb995f7a4502 Mon Sep 17 00:00:00 2001 From: hyeokjun32 Date: Mon, 1 Jun 2026 02:16:22 +0900 Subject: [PATCH] feat: surface operation marker row --- README.md | 2 + .../runtime_intelligence_gitlab_artifacts.md | 2 +- .../edgeenv_runtime_regression_lab_handoff.md | 1 + inferedgelab/report/runtime_intelligence.py | 97 +++++++++++++++++++ ...ck_runtime_intelligence_artifact_bundle.py | 10 ++ ...check_runtime_intelligence_ci_artifacts.py | 2 + tests/test_compare_service.py | 4 + tests/test_report_generators.py | 10 ++ .../test_runtime_intelligence_ci_template.py | 10 ++ ...ntime_intelligence_evidence_chain_smoke.py | 6 ++ 10 files changed, 143 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 0f21e19..033f798 100644 --- a/README.md +++ b/README.md @@ -501,6 +501,7 @@ When `--with-guard` is used with EdgeEnv evidence, Lab preserves deterministic A If AIGuard preserves EdgeEnv/Orchestrator `candidate_context.producer` lineage, Lab shows the device-local producer source, task stage, event count, and supplemental role as traceability evidence. Lab also surfaces the Orchestrator-declared downstream guard alignment marker, including `producer_lineage_evidence_type=edgeenv_orchestrator_producer_lineage`, so reviewers can see which deterministic AIGuard evidence type was expected without making Orchestrator or AIGuard the final decision owner. The Runtime Intelligence gates also require `edgeenv_orchestrator_producer_lineage` and `runtime_history_seed_run_config_traceability` so these handoffs cannot disappear silently from the Lab-owned report. If EdgeEnv preserves an Orchestrator `operation_risk_summary`, Lab shows the compact queue-pressure, max-pressure task, worker-health, and producer/device-local event markers as navigation context in the Runtime Intelligence Risk Summary. These markers help reviewers find the relevant operation evidence, but they do not become EdgeEnv regression deltas, comparability fields, or a deployment decision override. +Lab also renders a separate `Orchestrator queue/deadline/fallback markers` row when those compact counters are present. That row keeps `queue_pressure_reason`, queue depth, deadline miss count, and fallback count together so reviewers can spot operation pressure before reading the detailed task rollup. When EdgeEnv/Orchestrator context includes reviewer-facing duration metadata, Lab renders a `Runtime replay duration scope` row with `duration_label`, @@ -537,6 +538,7 @@ Markdown and HTML reports include a Runtime Intelligence Risk Summary that conne - telemetry replay gaps - Runtime history seed and run_config traceability - Orchestrator operation risk summary markers +- compact queue/deadline/fallback operation markers - Lab EdgeEnv preservation context markers - device-local producer lineage handoff - Orchestrator-declared downstream guard alignment diff --git a/docs/ci/runtime_intelligence_gitlab_artifacts.md b/docs/ci/runtime_intelligence_gitlab_artifacts.md index f72e5b4..c65f2c6 100644 --- a/docs/ci/runtime_intelligence_gitlab_artifacts.md +++ b/docs/ci/runtime_intelligence_gitlab_artifacts.md @@ -186,7 +186,7 @@ EdgeEnv handoff summary and AIGuard deterministic evidence agree on producer-lineage guard-alignment run IDs. This keeps the cross-repo marker check file-based and does not make AIGuard a deployment decision owner. -The artifact gate is implemented by `scripts/check_runtime_intelligence_artifact_bundle.py`. It checks the generated Markdown / HTML report for the required Runtime Intelligence rows, including Lab ownership, EdgeEnv comparability, telemetry coverage-gap markers, Runtime replay duration scope with `source=entrypoint_requested_frames` traceability, Orchestrator operation feed context, Orchestrator task event rollup, Lab EdgeEnv preservation context, Jetson/device-local preservation identity and detail labels, Orchestrator `operation_risk_summary` navigation context, AIGuard runtime operation anomalies, AIGuard `edgeenv_orchestrator_operation_risk_summary` evidence, AIGuard `edgeenv_orchestrator_task_event_rollup` evidence, remote dispatch starter event summary, `Remote fallback starter evidence`, `edgeenv_orchestrator_producer_lineage`, `runtime_history_seed_run_config_traceability`, `remote_execution_recovered_by_fallback`, and triggered deployment review rules. +The artifact gate is implemented by `scripts/check_runtime_intelligence_artifact_bundle.py`. It checks the generated Markdown / HTML report for the required Runtime Intelligence rows, including Lab ownership, EdgeEnv comparability, telemetry coverage-gap markers, Runtime replay duration scope with `source=entrypoint_requested_frames` traceability, Orchestrator operation feed context, compact queue/deadline/fallback operation markers, Orchestrator task event rollup, Lab EdgeEnv preservation context, Jetson/device-local preservation identity and detail labels, Orchestrator `operation_risk_summary` navigation context, AIGuard runtime operation anomalies, AIGuard `edgeenv_orchestrator_operation_risk_summary` evidence, AIGuard `edgeenv_orchestrator_task_event_rollup` evidence, remote dispatch starter event summary, `Remote fallback starter evidence`, `edgeenv_orchestrator_producer_lineage`, `runtime_history_seed_run_config_traceability`, `remote_execution_recovered_by_fallback`, and triggered deployment review rules. When that report gate passes, its summary now emits a `Validated Duration Traceability` section with `duration_handoff_alignment`, `duration_source: source=entrypoint_requested_frames`, diff --git a/docs/portfolio/edgeenv_runtime_regression_lab_handoff.md b/docs/portfolio/edgeenv_runtime_regression_lab_handoff.md index c7829bc..d39e3d6 100644 --- a/docs/portfolio/edgeenv_runtime_regression_lab_handoff.md +++ b/docs/portfolio/edgeenv_runtime_regression_lab_handoff.md @@ -188,6 +188,7 @@ Expected Lab behavior: - Additional Lab test fixtures under `tests/fixtures/edgeenv_regression/` mirror EdgeEnv replay examples for candidate telemetry gaps and execution sequence inversion. These fixture smokes verify that replay warnings become Lab-owned report context without making Lab recompute EdgeEnv comparability. - Markdown/HTML reports include a `Runtime Intelligence Risk Summary` that summarizes EdgeEnv comparability/regression, telemetry replay gaps, Runtime history seed/run_config traceability, AIGuard deterministic evidence, and the Lab-owned deployment decision in one reviewer-facing table. - When EdgeEnv includes preserved Orchestrator feed context, the `Runtime Intelligence Risk Summary` surfaces queue, thermal, throttling, memory, fallback context, and compact `operation_risk_summary` markers as supplemental runtime evidence. +- Lab now keeps queue pressure, queue depth, deadline miss count, and fallback count together in an `Orchestrator queue/deadline/fallback markers` row when those compact counters are present. This is reviewer navigation context, not a production scheduler state or deployment decision override. - When EdgeEnv includes replay duration metadata, the same Risk Summary surfaces `Runtime replay duration scope` with `duration_label`, `duration_class`, frame count, and optional `duration_source` / `duration_scope_label` traceability as reviewer navigation context. This does not change EdgeEnv comparability or Lab deployment policy. - The report artifact gate summary also emits a `Validated Duration Traceability` section so reviewers can see `duration_handoff_alignment`, diff --git a/inferedgelab/report/runtime_intelligence.py b/inferedgelab/report/runtime_intelligence.py index 8756f66..272729e 100644 --- a/inferedgelab/report/runtime_intelligence.py +++ b/inferedgelab/report/runtime_intelligence.py @@ -226,6 +226,18 @@ def _append_telemetry_context_rows( ) ) + operation_marker_labels = _orchestrator_queue_deadline_fallback_labels( + telemetry_context + ) + if operation_marker_labels: + rows.append( + ( + "Orchestrator queue/deadline/fallback markers", + "; ".join(operation_marker_labels), + "Compact queue, deadline, and fallback markers point reviewers to operation evidence without changing Lab deployment policy.", + ) + ) + preservation_labels = _edgeenv_preservation_run_labels(telemetry_context) if preservation_labels: preservation_detail_labels = _edgeenv_preservation_detail_labels( @@ -407,6 +419,91 @@ def _operation_risk_summary_parts(summary: dict[str, Any]) -> list[str]: return parts +def _orchestrator_queue_deadline_fallback_labels( + context: dict[str, Any], +) -> list[str]: + labels: list[str] = [] + for run_label in ("baseline", "candidate"): + run_context = context.get(run_label) + if not isinstance(run_context, dict): + continue + operation_context = run_context.get("orchestrator_operation_context") + if not isinstance(operation_context, dict): + continue + + candidate_context = operation_context.get("candidate_context") + if not isinstance(candidate_context, dict): + candidate_context = {} + operation = candidate_context.get("operation") + if not isinstance(operation, dict): + operation = {} + operation_summary = operation_context.get("operation_risk_summary") + if not isinstance(operation_summary, dict): + operation_summary = {} + queue_state = operation_context.get("queue_state_summary") + if not isinstance(queue_state, dict): + queue_state = {} + runtime_event_summary = operation_context.get("runtime_event_summary") + if not isinstance(runtime_event_summary, dict): + runtime_event_summary = {} + + parts: list[str] = [] + queue_pressure = _first_present( + operation_summary.get("queue_pressure_reason"), + queue_state.get("queue_pressure_reason"), + operation.get("queue_pressure_reason"), + candidate_context.get("queue_pressure_reason"), + ) + if queue_pressure is not None: + parts.append(f"queue_pressure_reason={queue_pressure}") + + max_total_queue_depth = _first_present( + operation_summary.get("max_total_queue_depth"), + queue_state.get("max_total_queue_depth"), + operation.get("max_total_queue_depth"), + candidate_context.get("max_total_queue_depth"), + ) + if max_total_queue_depth is not None: + parts.append( + "max_total_queue_depth=" + f"{_format_compact_value(max_total_queue_depth)}" + ) + else: + queue_depth = _first_present( + operation.get("queue_depth"), + candidate_context.get("queue_depth"), + run_context.get("queue_depth"), + ) + if queue_depth is not None: + parts.append(f"queue_depth={_format_compact_value(queue_depth)}") + + deadline_missed_count = _first_present( + operation_summary.get("deadline_missed_count"), + operation.get("deadline_missed_count"), + runtime_event_summary.get("deadline_missed_count"), + candidate_context.get("deadline_missed_count"), + ) + if deadline_missed_count is not None: + parts.append( + "deadline_missed_count=" + f"{_format_compact_value(deadline_missed_count)}" + ) + + fallback_count = _first_present( + operation_summary.get("fallback_count"), + operation.get("fallback_count"), + runtime_event_summary.get("fallback_count"), + runtime_event_summary.get("fallback_decision_count"), + candidate_context.get("fallback_count"), + ) + if fallback_count is not None: + parts.append(f"fallback_count={_format_compact_value(fallback_count)}") + + if parts: + labels.append(f"{run_label}: " + ", ".join(parts)) + return labels + + def _runtime_replay_scope_labels(context: dict[str, Any]) -> list[str]: labels: list[str] = [] for run_label in ("baseline", "candidate"): diff --git a/scripts/check_runtime_intelligence_artifact_bundle.py b/scripts/check_runtime_intelligence_artifact_bundle.py index f4674cc..201514c 100644 --- a/scripts/check_runtime_intelligence_artifact_bundle.py +++ b/scripts/check_runtime_intelligence_artifact_bundle.py @@ -29,6 +29,10 @@ "| Orchestrator operation risk summary | candidate: " "queue=queue_backlog_threshold_exceeded" ), + "orchestrator_queue_deadline_fallback_markers": ( + "| Orchestrator queue/deadline/fallback markers | candidate: " + "queue_pressure_reason=queue_backlog_threshold_exceeded" + ), "runtime_replay_duration_scope": ( "| Runtime replay duration scope | candidate: " "scope_label=source=entrypoint_requested_frames" @@ -150,6 +154,12 @@ "remote_fallback_lab_context_marker": "lab=Remote fallback starter evidence", "aiguard_orchestrator_handoff": "AIGuard Orchestrator context handoff", "orchestrator_operation_risk_summary": "Orchestrator operation risk summary", + "orchestrator_queue_deadline_fallback_markers": ( + "Orchestrator queue/deadline/fallback markers" + ), + "orchestrator_queue_deadline_fallback_values": ( + "queue_pressure_reason=queue_backlog_threshold_exceeded" + ), "runtime_replay_duration_scope": "Runtime replay duration scope", "runtime_replay_duration_label": "short 96-frame-class replay (96 frames)", "runtime_replay_duration_source": "source=entrypoint_requested_frames", diff --git a/scripts/check_runtime_intelligence_ci_artifacts.py b/scripts/check_runtime_intelligence_ci_artifacts.py index 45ca01f..a2c393e 100644 --- a/scripts/check_runtime_intelligence_ci_artifacts.py +++ b/scripts/check_runtime_intelligence_ci_artifacts.py @@ -181,6 +181,8 @@ def _validate_runtime_report(path: Path, errors: list[str]) -> None: "lab_preservation=present", "edgeenv_orchestrator_task_event_rollup", "Runtime telemetry coverage gaps", + "Orchestrator queue/deadline/fallback markers", + "queue_pressure_reason=queue_backlog_threshold_exceeded", "AIGuard producer-lineage guard alignment", "edgeenv_orchestrator_producer_lineage", "AIGuard run_config traceability evidence", diff --git a/tests/test_compare_service.py b/tests/test_compare_service.py index 6623157..f6715b6 100644 --- a/tests/test_compare_service.py +++ b/tests/test_compare_service.py @@ -805,6 +805,10 @@ def fake_analyze_edgeenv_regression_report(report): assert "guard_warning_review" in bundle["deployment_decision"]["triggered_rules"] assert "| Orchestrator operation feed context | 1 |" in bundle["markdown"] assert "| Orchestrator context attached runs | candidate |" in bundle["markdown"] + assert ( + "| Orchestrator queue/deadline/fallback markers | candidate: " + "queue_depth=7, deadline_missed_count=2, fallback_count=1 |" + ) in bundle["markdown"] assert "| Orchestrator task event rollup | candidate: " in bundle["markdown"] assert "vision_agent(delay=1,miss=1,max_delay_cycles=3,max_wait_ms=15)" in bundle[ "markdown" diff --git a/tests/test_report_generators.py b/tests/test_report_generators.py index b4186d9..2d599ca 100644 --- a/tests/test_report_generators.py +++ b/tests/test_report_generators.py @@ -847,6 +847,11 @@ def test_generate_compare_markdown_summarizes_orchestrator_context_risk(): "health=worker_health_degraded, device_local_events=15, " "producer_events=7, degraded_workers=vision_agent |" ) in text + assert ( + "| Orchestrator queue/deadline/fallback markers | candidate: " + "queue_pressure_reason=queue_backlog_threshold_exceeded, " + "queue_depth=7, deadline_missed_count=2, fallback_count=1 |" + ) in text assert ( "| Runtime replay duration scope | candidate: " "scope_label=source=entrypoint_requested_frames, " @@ -923,6 +928,11 @@ def test_generate_compare_html_summarizes_operation_risk_summary(): assert "entrypoint_requested_frames" in html assert "scope_label=source=entrypoint_requested_frames" in html assert "Orchestrator operation risk summary" in html + assert "Orchestrator queue/deadline/fallback markers" in html + assert ( + "queue_pressure_reason=queue_backlog_threshold_exceeded, " + "queue_depth=7, deadline_missed_count=2, fallback_count=1" + ) in html assert "Orchestrator task event rollup" in html assert "vision_agent(delay=1,miss=1,max_delay_cycles=3,max_wait_ms=15)" in html assert "queue=queue_backlog_threshold_exceeded" in html diff --git a/tests/test_runtime_intelligence_ci_template.py b/tests/test_runtime_intelligence_ci_template.py index 9ea51c8..398d5d1 100644 --- a/tests/test_runtime_intelligence_ci_template.py +++ b/tests/test_runtime_intelligence_ci_template.py @@ -120,6 +120,8 @@ def test_runtime_intelligence_ci_artifact_gate_passes_for_expected_outputs(tmp_p "lab_report_preservation_context_present=True", "lab_preservation=present", "Runtime telemetry coverage gaps", + "Orchestrator queue/deadline/fallback markers", + "queue_pressure_reason=queue_backlog_threshold_exceeded", "AIGuard producer-lineage guard alignment", "edgeenv_orchestrator_producer_lineage", "AIGuard run_config traceability evidence", @@ -357,6 +359,8 @@ def test_runtime_intelligence_ci_artifact_gate_fails_for_missing_lab_marker_cont "lab_report_preservation_context_present=True", "lab_preservation=present", "Runtime telemetry coverage gaps", + "Orchestrator queue/deadline/fallback markers", + "queue_pressure_reason=queue_backlog_threshold_exceeded", "AIGuard producer-lineage guard alignment", "edgeenv_orchestrator_producer_lineage", "AIGuard run_config traceability evidence", @@ -502,6 +506,8 @@ def test_runtime_intelligence_ci_artifact_gate_fails_for_missing_contract_marker "lab_report_preservation_context_present=True", "lab_preservation=present", "Runtime telemetry coverage gaps", + "Orchestrator queue/deadline/fallback markers", + "queue_pressure_reason=queue_backlog_threshold_exceeded", "AIGuard producer-lineage guard alignment", "edgeenv_orchestrator_producer_lineage", "AIGuard run_config traceability evidence", @@ -587,6 +593,8 @@ def test_runtime_intelligence_ci_artifact_gate_fails_for_missing_coverage_gap_ma "lab_report_preservation_context_present=True", "lab_preservation=present", "Runtime telemetry coverage gaps", + "Orchestrator queue/deadline/fallback markers", + "queue_pressure_reason=queue_backlog_threshold_exceeded", "AIGuard producer-lineage guard alignment", "edgeenv_orchestrator_producer_lineage", "AIGuard run_config traceability evidence", @@ -657,6 +665,8 @@ def test_runtime_intelligence_ci_artifact_gate_fails_for_failed_deployment_risk( "lab_report_preservation_context_present=True", "lab_preservation=present", "Runtime telemetry coverage gaps", + "Orchestrator queue/deadline/fallback markers", + "queue_pressure_reason=queue_backlog_threshold_exceeded", "AIGuard producer-lineage guard alignment", "edgeenv_orchestrator_producer_lineage", "AIGuard run_config traceability evidence", diff --git a/tests/test_runtime_intelligence_evidence_chain_smoke.py b/tests/test_runtime_intelligence_evidence_chain_smoke.py index aafe115..5035851 100644 --- a/tests/test_runtime_intelligence_evidence_chain_smoke.py +++ b/tests/test_runtime_intelligence_evidence_chain_smoke.py @@ -381,6 +381,10 @@ def test_runtime_intelligence_chain_smoke_ingests_precomputed_guard_artifact(): assert "| Runtime telemetry history seed | 2 |" in bundle["markdown"] assert "| Runtime history seed run_config | 2 |" in bundle["markdown"] assert "| Orchestrator context attached runs | candidate |" in bundle["markdown"] + assert ( + "| Orchestrator queue/deadline/fallback markers | candidate: " + "queue_pressure_reason=queue_backlog_threshold_exceeded" + ) in bundle["markdown"] assert ( "| Jetson/device-local EdgeEnv preservation run | candidate: " "identity=jetson_device_local_preservation, path=device_local_starter, " @@ -537,6 +541,8 @@ def test_compare_cmd_runtime_intelligence_chain_writes_markdown_and_html( ) in markdown assert "Runtime Intelligence Risk Summary" in html assert "Orchestrator operation risk summary" in html + assert "Orchestrator queue/deadline/fallback markers" in html + assert "queue_pressure_reason=queue_backlog_threshold_exceeded" in html assert "Jetson/device-local EdgeEnv preservation run" in html assert "Jetson/device-local EdgeEnv preservation details" in html assert "Lab EdgeEnv preservation context" in html