From c08c559ddd7daf36beb997db9dc4b294aa96dcf5 Mon Sep 17 00:00:00 2001 From: hyeokjun32 Date: Tue, 26 May 2026 03:10:56 +0900 Subject: [PATCH] feat: preserve runtime history seed run config --- README.ko.md | 2 +- README.md | 2 +- docs/agent_runtime_result_contract.md | 2 +- scripts/smoke_default.sh | 13 +++++++++++++ src/result_writer.cpp | 19 +++++++++++++++++++ tests/test_agent_runtime_result_contract.py | 8 ++++++++ tests/test_lab_result_schema.py | 14 ++++++++++++++ 7 files changed, 57 insertions(+), 3 deletions(-) diff --git a/README.ko.md b/README.ko.md index 3372795..7d25f1a 100644 --- a/README.ko.md +++ b/README.ko.md @@ -75,7 +75,7 @@ Runtime은 Forge `agent_manifest.json`을 선택적으로 읽어 기존 Lab-comp 이 기능은 reliable edge agent runtime 방향의 첫 Runtime-side contract입니다. `agent_id`, `task_id`, `agent_type`, priority, latency budget, queue wait, fallback usage, telemetry context를 기록하지만 기존 `result.json`의 top-level compare/report 필드는 변경하지 않습니다. -Runtime result JSON에는 `runtime_health_snapshot`, `runtime_error_classification`, `runtime_events`, `runtime_operation_summary`도 additive evidence로 기록됩니다. 이제 health snapshot은 backend availability, latency budget/deadline observation, tegrastats evidence availability와 `health_reason`을 함께 남기고, runtime events는 sequential `event_index`를 가진 lifecycle trace로 기록됩니다. `runtime_operation_summary`는 Lab/Orchestrator/AIGuard handoff용 compact index로 `risk_labels`, `evidence_gaps`, retryability, conservative `recommended_action`을 남기되 `decision_owner: lab`, `scheduler_owner: orchestrator`, `production_cancellation: false`를 유지합니다. `runtime_telemetry.coverage`는 expected / observed / missing telemetry fields를 기록하고 `comparability_owner: edgeenv`, `missing_telemetry_is_failure: false`를 명시합니다. `runtime_telemetry.history_seed`는 `registry_owner: edgeenv`, `decision_owner: lab`, `production_monitoring: false`를 유지하며 EdgeEnv telemetry history accumulation으로 넘길 수 있는 single-result replay point를 제공합니다. `--timeout-ms`는 latency timeout 관측 기준을 남기는 옵션이며, production request cancellation을 의미하지 않습니다. 실행이 `skipped`로 끝나면 Runtime은 `runtime_execution_skipped`, `retryable: true`, `retry_hint: check_backend_availability`를 남겨 Lab/Orchestrator가 failure handling evidence로 해석할 수 있게 합니다. +Runtime result JSON에는 `runtime_health_snapshot`, `runtime_error_classification`, `runtime_events`, `runtime_operation_summary`도 additive evidence로 기록됩니다. 이제 health snapshot은 backend availability, latency budget/deadline observation, tegrastats evidence availability와 `health_reason`을 함께 남기고, runtime events는 sequential `event_index`를 가진 lifecycle trace로 기록됩니다. `runtime_operation_summary`는 Lab/Orchestrator/AIGuard handoff용 compact index로 `risk_labels`, `evidence_gaps`, retryability, conservative `recommended_action`을 남기되 `decision_owner: lab`, `scheduler_owner: orchestrator`, `production_cancellation: false`를 유지합니다. `runtime_telemetry.coverage`는 expected / observed / missing telemetry fields를 기록하고 `comparability_owner: edgeenv`, `missing_telemetry_is_failure: false`를 명시합니다. `runtime_telemetry.history_seed`는 `registry_owner: edgeenv`, `decision_owner: lab`, `production_monitoring: false`를 유지하며 EdgeEnv telemetry history accumulation으로 넘길 수 있는 single-result replay point를 제공합니다. 또한 seed 안에 compact `run_config` snapshot을 함께 담아 EdgeEnv가 Runtime 전체 result를 다시 해석하지 않아도 replay/comparability context를 보존할 수 있게 합니다. `--timeout-ms`는 latency timeout 관측 기준을 남기는 옵션이며, production request cancellation을 의미하지 않습니다. 실행이 `skipped`로 끝나면 Runtime은 `runtime_execution_skipped`, `retryable: true`, `retry_hint: check_backend_availability`를 남겨 Lab/Orchestrator가 failure handling evidence로 해석할 수 있게 합니다. 예시: diff --git a/README.md b/README.md index 6213f6e..d20dd19 100644 --- a/README.md +++ b/README.md @@ -501,7 +501,7 @@ Runtime Intelligence boundary: - `collection_mode` starts as `single_result_export`; EdgeEnv owns telemetry history accumulation and comparability-first regression. - Missing device telemetry remains explicit in `missing_fields` instead of being fabricated. - `runtime_telemetry.coverage` records expected / observed / missing telemetry fields, with `comparability_owner: edgeenv` and `missing_telemetry_is_failure: false`. -- `runtime_telemetry.history_seed` uses `inferedge-runtime-telemetry-history-seed-v1`, keeps `registry_owner: edgeenv`, `decision_owner: lab`, `production_monitoring: false`, and exposes a `single_result_to_history` replay point that mirrors the Runtime telemetry timestamp, sequence id, latency values, operation flags, power mode, and telemetry source for EdgeEnv accumulation. +- `runtime_telemetry.history_seed` uses `inferedge-runtime-telemetry-history-seed-v1`, keeps `registry_owner: edgeenv`, `decision_owner: lab`, `production_monitoring: false`, and exposes a `single_result_to_history` replay point that mirrors the Runtime telemetry timestamp, sequence id, latency values, operation flags, power mode, and telemetry source for EdgeEnv accumulation. It also carries a compact `run_config` snapshot so EdgeEnv can keep replay/comparability evidence with the seed without turning Runtime into a registry. - EdgeEnv validates and preserves this seed as `runtime_telemetry_history_seed`; Lab may display the preserved marker in a Runtime Intelligence risk report, but Runtime does not own the registry or deployment decision. - Runtime exports telemetry evidence only. AIGuard may turn it into deterministic anomaly evidence, and Lab remains the deployment decision owner. diff --git a/docs/agent_runtime_result_contract.md b/docs/agent_runtime_result_contract.md index 4b47392..d2b9dd3 100644 --- a/docs/agent_runtime_result_contract.md +++ b/docs/agent_runtime_result_contract.md @@ -257,7 +257,7 @@ When provided, Runtime appends: - `runtime_operation_summary` is an additive handoff index for Lab/Orchestrator/AIGuard. It repeats the health reason, retryability, risk labels, evidence gaps, and a conservative `recommended_action` without making the deployment decision itself. - `runtime_operation_summary.decision_owner` must remain `lab`, and `scheduler_owner` must remain `orchestrator`. - `runtime_operation_summary.production_cancellation` is always `false`; Runtime records observations only. -- `runtime_telemetry.history_seed` is an additive `inferedge-runtime-telemetry-history-seed-v1` block for EdgeEnv telemetry history/replay. It keeps `registry_owner: edgeenv`, `decision_owner: lab`, `production_monitoring: false`, `replay_scope: single_result_to_history`, and one single-result telemetry point so downstream tools can accumulate history without Runtime becoming a telemetry store. The replay point mirrors Runtime telemetry timestamp, sequence id, latency values, operation flags, power mode, and telemetry source. +- `runtime_telemetry.history_seed` is an additive `inferedge-runtime-telemetry-history-seed-v1` block for EdgeEnv telemetry history/replay. It keeps `registry_owner: edgeenv`, `decision_owner: lab`, `production_monitoring: false`, `replay_scope: single_result_to_history`, and one single-result telemetry point so downstream tools can accumulate history without Runtime becoming a telemetry store. The replay point mirrors Runtime telemetry timestamp, sequence id, latency values, operation flags, power mode, and telemetry source. The seed also carries a compact `run_config` snapshot for replay/comparability context. - Runtime does not claim production request cancellation. `--timeout-ms` is an observation threshold: if a successful benchmark mean latency exceeds the configured threshold, Runtime records `timeout_observed: true`, `runtime_error_classification.category: runtime_timeout_observed`, and `retryable: true` for downstream reliability reporting. - If execution is skipped because Runtime cannot complete the configured benchmark, Runtime records `runtime_error_classification.category: runtime_execution_skipped`, `severity: warning`, `retryable: true`, and `retry_hint: check_backend_availability`. This is failure-handling evidence for Lab/Orchestrator reporting, not a production worker retry loop. - Without `--timeout-ms`, results record `timeout_policy: not_configured`, `timeout_budget_ms: null`, and `timeout_observed: false`. diff --git a/scripts/smoke_default.sh b/scripts/smoke_default.sh index 6d888db..d12b000 100755 --- a/scripts/smoke_default.sh +++ b/scripts/smoke_default.sh @@ -121,6 +121,14 @@ assert "compare_key" in history_seed["recommended_registry_key_fields"], history assert "latency.mean_ms" in history_seed["time_series_fields"], history_seed assert history_seed["source_result"]["compare_key"] == data["compare_key"], history_seed assert history_seed["source_result"]["backend_key"] == data["backend_key"], history_seed +assert history_seed["run_config"]["batch"] == data["run_config"]["batch"], history_seed +assert history_seed["run_config"]["height"] == data["run_config"]["height"], history_seed +assert history_seed["run_config"]["width"] == data["run_config"]["width"], history_seed +assert history_seed["run_config"]["warmup"] == data["run_config"]["warmup"], history_seed +assert history_seed["run_config"]["runs"] == data["run_config"]["runs"], history_seed +assert history_seed["run_config"]["timeout_ms"] == data["run_config"]["timeout_ms"], history_seed +assert history_seed["run_config"]["input_mode"] == health["input_mode"], history_seed +assert history_seed["run_config"]["power_mode"] == telemetry["power_mode"], history_seed assert history_seed["points"][0]["telemetry_timestamp"] == telemetry["telemetry_timestamp"], history_seed assert history_seed["points"][0]["execution_sequence_id"] == telemetry["execution_sequence_id"], history_seed assert history_seed["points"][0]["mean_ms"] == telemetry["latency"]["mean_ms"], history_seed @@ -206,6 +214,11 @@ assert history_seed["source_result_schema_version"] == telemetry["source_result_ assert history_seed["source_telemetry_schema_version"] == telemetry["schema_version"], history_seed assert history_seed["replay_scope"] == "single_result_to_history", history_seed assert history_seed["source_result"]["compare_key"] == data["compare_key"], history_seed +assert history_seed["run_config"]["batch"] == data["run_config"]["batch"], history_seed +assert history_seed["run_config"]["runs"] == data["run_config"]["runs"], history_seed +assert history_seed["run_config"]["timeout_ms"] == data["run_config"]["timeout_ms"], history_seed +assert history_seed["run_config"]["input_mode"] == health["input_mode"], history_seed +assert history_seed["run_config"]["power_mode"] == telemetry["power_mode"], history_seed assert history_seed["points"][0]["p99_ms"] == telemetry["latency"]["p99_ms"], history_seed assert history_seed["points"][0]["deadline_missed"] == telemetry["operation"]["deadline_missed"], history_seed assert "runtime_telemetry_recorded" in events, events diff --git a/src/result_writer.cpp b/src/result_writer.cpp index 2fe1386..a3ccd63 100644 --- a/src/result_writer.cpp +++ b/src/result_writer.cpp @@ -591,6 +591,25 @@ void write_runtime_telemetry_history_seed_json( << indent << " \"precision\": " << json_string(precision) << ",\n" << indent << " \"power_mode\": " << json_string(config.power_mode) << "\n" << indent << " },\n" + << indent << " \"run_config\": {\n" + << indent << " \"batch\": " << config.batch << ",\n" + << indent << " \"height\": " << config.height << ",\n" + << indent << " \"width\": " << config.width << ",\n" + << indent << " \"warmup\": " << config.warmup << ",\n" + << indent << " \"runs\": " << config.runs << ",\n" + << indent << " \"timeout_ms\": "; + if (config.timeout_ms > 0) { + output << config.timeout_ms; + } else { + output << "null"; + } + output + << ",\n" + << indent << " \"input_mode\": " << json_string(config.input_mode()) << ",\n" + << indent << " \"input_preprocess\": " << json_string(config.input_preprocess()) << ",\n" + << indent << " \"power_mode\": " << json_string(config.power_mode) << ",\n" + << indent << " \"jetson_clocks\": " << json_string(config.jetson_clocks) << "\n" + << indent << " },\n" << indent << " \"recommended_registry_key_fields\": "; write_string_array_json(output, { "compare_key", diff --git a/tests/test_agent_runtime_result_contract.py b/tests/test_agent_runtime_result_contract.py index 6c405eb..b6f6e81 100644 --- a/tests/test_agent_runtime_result_contract.py +++ b/tests/test_agent_runtime_result_contract.py @@ -235,6 +235,14 @@ def test_runtime_output_records_optional_agent_block_when_manifest_is_provided(s ) self.assertEqual(history_seed["source_result"]["precision"], result["precision"]) self.assertEqual(history_seed["source_result"]["power_mode"], result["run_config"]["power_mode"]) + self.assertEqual(history_seed["run_config"]["batch"], result["run_config"]["batch"]) + self.assertEqual(history_seed["run_config"]["height"], result["run_config"]["height"]) + self.assertEqual(history_seed["run_config"]["width"], result["run_config"]["width"]) + self.assertEqual(history_seed["run_config"]["warmup"], result["run_config"]["warmup"]) + self.assertEqual(history_seed["run_config"]["runs"], result["run_config"]["runs"]) + self.assertEqual(history_seed["run_config"]["timeout_ms"], result["run_config"]["timeout_ms"]) + self.assertEqual(history_seed["run_config"]["input_mode"], result["runtime_health_snapshot"]["input_mode"]) + self.assertEqual(history_seed["run_config"]["power_mode"], telemetry["power_mode"]) point = history_seed["points"][0] self.assertEqual(point["execution_sequence_id"], telemetry["execution_sequence_id"]) self.assertEqual(point["telemetry_timestamp"], telemetry["telemetry_timestamp"]) diff --git a/tests/test_lab_result_schema.py b/tests/test_lab_result_schema.py index ee8d50b..de3ed5e 100644 --- a/tests/test_lab_result_schema.py +++ b/tests/test_lab_result_schema.py @@ -440,6 +440,20 @@ def validate_runtime_telemetry_history_seed(history_seed: dict, telemetry: dict) if not isinstance(source_result.get(field), str): raise AssertionError(f"runtime_telemetry.history_seed.source_result.{field} must be a string") + run_config = history_seed.get("run_config") + if not isinstance(run_config, dict): + raise AssertionError("runtime_telemetry.history_seed.run_config must be an object") + for field in ("batch", "height", "width", "warmup", "runs"): + if isinstance(run_config.get(field), bool) or not isinstance(run_config.get(field), int): + raise AssertionError(f"runtime_telemetry.history_seed.run_config.{field} must be an integer") + if run_config.get("timeout_ms") is not None and ( + isinstance(run_config.get("timeout_ms"), bool) or not isinstance(run_config.get("timeout_ms"), int) + ): + raise AssertionError("runtime_telemetry.history_seed.run_config.timeout_ms must be an integer or null") + for field in ("input_mode", "input_preprocess", "power_mode", "jetson_clocks"): + if not isinstance(run_config.get(field), str): + raise AssertionError(f"runtime_telemetry.history_seed.run_config.{field} must be a string") + points = history_seed.get("points") if not isinstance(points, list) or not points: raise AssertionError("runtime_telemetry.history_seed.points must be a non-empty array")