From c08c559ddd7daf36beb997db9dc4b294aa96dcf5 Mon Sep 17 00:00:00 2001
From: hyeokjun32 <ksjm0417@naver.com>
Date: Tue, 26 May 2026 03:10:56 +0900
Subject: [PATCH] feat: preserve runtime history seed run config

---
 README.ko.md                                |  2 +-
 README.md                                   |  2 +-
 docs/agent_runtime_result_contract.md       |  2 +-
 scripts/smoke_default.sh                    | 13 +++++++++++++
 src/result_writer.cpp                       | 19 +++++++++++++++++++
 tests/test_agent_runtime_result_contract.py |  8 ++++++++
 tests/test_lab_result_schema.py             | 14 ++++++++++++++
 7 files changed, 57 insertions(+), 3 deletions(-)

diff --git a/README.ko.md b/README.ko.md
index 3372795..7d25f1a 100644
--- a/README.ko.md
+++ b/README.ko.md
@@ -75,7 +75,7 @@ Runtime은 Forge `agent_manifest.json`을 선택적으로 읽어 기존 Lab-comp
 
 이 기능은 reliable edge agent runtime 방향의 첫 Runtime-side contract입니다. `agent_id`, `task_id`, `agent_type`, priority, latency budget, queue wait, fallback usage, telemetry context를 기록하지만 기존 `result.json`의 top-level compare/report 필드는 변경하지 않습니다.
 
-Runtime result JSON에는 `runtime_health_snapshot`, `runtime_error_classification`, `runtime_events`, `runtime_operation_summary`도 additive evidence로 기록됩니다. 이제 health snapshot은 backend availability, latency budget/deadline observation, tegrastats evidence availability와 `health_reason`을 함께 남기고, runtime events는 sequential `event_index`를 가진 lifecycle trace로 기록됩니다. `runtime_operation_summary`는 Lab/Orchestrator/AIGuard handoff용 compact index로 `risk_labels`, `evidence_gaps`, retryability, conservative `recommended_action`을 남기되 `decision_owner: lab`, `scheduler_owner: orchestrator`, `production_cancellation: false`를 유지합니다. `runtime_telemetry.coverage`는 expected / observed / missing telemetry fields를 기록하고 `comparability_owner: edgeenv`, `missing_telemetry_is_failure: false`를 명시합니다. `runtime_telemetry.history_seed`는 `registry_owner: edgeenv`, `decision_owner: lab`, `production_monitoring: false`를 유지하며 EdgeEnv telemetry history accumulation으로 넘길 수 있는 single-result replay point를 제공합니다. `--timeout-ms`는 latency timeout 관측 기준을 남기는 옵션이며, production request cancellation을 의미하지 않습니다. 실행이 `skipped`로 끝나면 Runtime은 `runtime_execution_skipped`, `retryable: true`, `retry_hint: check_backend_availability`를 남겨 Lab/Orchestrator가 failure handling evidence로 해석할 수 있게 합니다.
+Runtime result JSON에는 `runtime_health_snapshot`, `runtime_error_classification`, `runtime_events`, `runtime_operation_summary`도 additive evidence로 기록됩니다. 이제 health snapshot은 backend availability, latency budget/deadline observation, tegrastats evidence availability와 `health_reason`을 함께 남기고, runtime events는 sequential `event_index`를 가진 lifecycle trace로 기록됩니다. `runtime_operation_summary`는 Lab/Orchestrator/AIGuard handoff용 compact index로 `risk_labels`, `evidence_gaps`, retryability, conservative `recommended_action`을 남기되 `decision_owner: lab`, `scheduler_owner: orchestrator`, `production_cancellation: false`를 유지합니다. `runtime_telemetry.coverage`는 expected / observed / missing telemetry fields를 기록하고 `comparability_owner: edgeenv`, `missing_telemetry_is_failure: false`를 명시합니다. `runtime_telemetry.history_seed`는 `registry_owner: edgeenv`, `decision_owner: lab`, `production_monitoring: false`를 유지하며 EdgeEnv telemetry history accumulation으로 넘길 수 있는 single-result replay point를 제공합니다. 또한 seed 안에 compact `run_config` snapshot을 함께 담아 EdgeEnv가 Runtime 전체 result를 다시 해석하지 않아도 replay/comparability context를 보존할 수 있게 합니다. `--timeout-ms`는 latency timeout 관측 기준을 남기는 옵션이며, production request cancellation을 의미하지 않습니다. 실행이 `skipped`로 끝나면 Runtime은 `runtime_execution_skipped`, `retryable: true`, `retry_hint: check_backend_availability`를 남겨 Lab/Orchestrator가 failure handling evidence로 해석할 수 있게 합니다.
 
 예시:
 
diff --git a/README.md b/README.md
index 6213f6e..d20dd19 100644
--- a/README.md
+++ b/README.md
@@ -501,7 +501,7 @@ Runtime Intelligence boundary:
 - `collection_mode` starts as `single_result_export`; EdgeEnv owns telemetry history accumulation and comparability-first regression.
 - Missing device telemetry remains explicit in `missing_fields` instead of being fabricated.
 - `runtime_telemetry.coverage` records expected / observed / missing telemetry fields, with `comparability_owner: edgeenv` and `missing_telemetry_is_failure: false`.
-- `runtime_telemetry.history_seed` uses `inferedge-runtime-telemetry-history-seed-v1`, keeps `registry_owner: edgeenv`, `decision_owner: lab`, `production_monitoring: false`, and exposes a `single_result_to_history` replay point that mirrors the Runtime telemetry timestamp, sequence id, latency values, operation flags, power mode, and telemetry source for EdgeEnv accumulation.
+- `runtime_telemetry.history_seed` uses `inferedge-runtime-telemetry-history-seed-v1`, keeps `registry_owner: edgeenv`, `decision_owner: lab`, `production_monitoring: false`, and exposes a `single_result_to_history` replay point that mirrors the Runtime telemetry timestamp, sequence id, latency values, operation flags, power mode, and telemetry source for EdgeEnv accumulation. It also carries a compact `run_config` snapshot so EdgeEnv can keep replay/comparability evidence with the seed without turning Runtime into a registry.
 - EdgeEnv validates and preserves this seed as `runtime_telemetry_history_seed`; Lab may display the preserved marker in a Runtime Intelligence risk report, but Runtime does not own the registry or deployment decision.
 - Runtime exports telemetry evidence only. AIGuard may turn it into deterministic anomaly evidence, and Lab remains the deployment decision owner.
 
diff --git a/docs/agent_runtime_result_contract.md b/docs/agent_runtime_result_contract.md
index 4b47392..d2b9dd3 100644
--- a/docs/agent_runtime_result_contract.md
+++ b/docs/agent_runtime_result_contract.md
@@ -257,7 +257,7 @@ When provided, Runtime appends:
 - `runtime_operation_summary` is an additive handoff index for Lab/Orchestrator/AIGuard. It repeats the health reason, retryability, risk labels, evidence gaps, and a conservative `recommended_action` without making the deployment decision itself.
 - `runtime_operation_summary.decision_owner` must remain `lab`, and `scheduler_owner` must remain `orchestrator`.
 - `runtime_operation_summary.production_cancellation` is always `false`; Runtime records observations only.
-- `runtime_telemetry.history_seed` is an additive `inferedge-runtime-telemetry-history-seed-v1` block for EdgeEnv telemetry history/replay. It keeps `registry_owner: edgeenv`, `decision_owner: lab`, `production_monitoring: false`, `replay_scope: single_result_to_history`, and one single-result telemetry point so downstream tools can accumulate history without Runtime becoming a telemetry store. The replay point mirrors Runtime telemetry timestamp, sequence id, latency values, operation flags, power mode, and telemetry source.
+- `runtime_telemetry.history_seed` is an additive `inferedge-runtime-telemetry-history-seed-v1` block for EdgeEnv telemetry history/replay. It keeps `registry_owner: edgeenv`, `decision_owner: lab`, `production_monitoring: false`, `replay_scope: single_result_to_history`, and one single-result telemetry point so downstream tools can accumulate history without Runtime becoming a telemetry store. The replay point mirrors Runtime telemetry timestamp, sequence id, latency values, operation flags, power mode, and telemetry source. The seed also carries a compact `run_config` snapshot for replay/comparability context.
 - Runtime does not claim production request cancellation. `--timeout-ms` is an observation threshold: if a successful benchmark mean latency exceeds the configured threshold, Runtime records `timeout_observed: true`, `runtime_error_classification.category: runtime_timeout_observed`, and `retryable: true` for downstream reliability reporting.
 - If execution is skipped because Runtime cannot complete the configured benchmark, Runtime records `runtime_error_classification.category: runtime_execution_skipped`, `severity: warning`, `retryable: true`, and `retry_hint: check_backend_availability`. This is failure-handling evidence for Lab/Orchestrator reporting, not a production worker retry loop.
 - Without `--timeout-ms`, results record `timeout_policy: not_configured`, `timeout_budget_ms: null`, and `timeout_observed: false`.
diff --git a/scripts/smoke_default.sh b/scripts/smoke_default.sh
index 6d888db..d12b000 100755
--- a/scripts/smoke_default.sh
+++ b/scripts/smoke_default.sh
@@ -121,6 +121,14 @@ assert "compare_key" in history_seed["recommended_registry_key_fields"], history
 assert "latency.mean_ms" in history_seed["time_series_fields"], history_seed
 assert history_seed["source_result"]["compare_key"] == data["compare_key"], history_seed
 assert history_seed["source_result"]["backend_key"] == data["backend_key"], history_seed
+assert history_seed["run_config"]["batch"] == data["run_config"]["batch"], history_seed
+assert history_seed["run_config"]["height"] == data["run_config"]["height"], history_seed
+assert history_seed["run_config"]["width"] == data["run_config"]["width"], history_seed
+assert history_seed["run_config"]["warmup"] == data["run_config"]["warmup"], history_seed
+assert history_seed["run_config"]["runs"] == data["run_config"]["runs"], history_seed
+assert history_seed["run_config"]["timeout_ms"] == data["run_config"]["timeout_ms"], history_seed
+assert history_seed["run_config"]["input_mode"] == health["input_mode"], history_seed
+assert history_seed["run_config"]["power_mode"] == telemetry["power_mode"], history_seed
 assert history_seed["points"][0]["telemetry_timestamp"] == telemetry["telemetry_timestamp"], history_seed
 assert history_seed["points"][0]["execution_sequence_id"] == telemetry["execution_sequence_id"], history_seed
 assert history_seed["points"][0]["mean_ms"] == telemetry["latency"]["mean_ms"], history_seed
@@ -206,6 +214,11 @@ assert history_seed["source_result_schema_version"] == telemetry["source_result_
 assert history_seed["source_telemetry_schema_version"] == telemetry["schema_version"], history_seed
 assert history_seed["replay_scope"] == "single_result_to_history", history_seed
 assert history_seed["source_result"]["compare_key"] == data["compare_key"], history_seed
+assert history_seed["run_config"]["batch"] == data["run_config"]["batch"], history_seed
+assert history_seed["run_config"]["runs"] == data["run_config"]["runs"], history_seed
+assert history_seed["run_config"]["timeout_ms"] == data["run_config"]["timeout_ms"], history_seed
+assert history_seed["run_config"]["input_mode"] == health["input_mode"], history_seed
+assert history_seed["run_config"]["power_mode"] == telemetry["power_mode"], history_seed
 assert history_seed["points"][0]["p99_ms"] == telemetry["latency"]["p99_ms"], history_seed
 assert history_seed["points"][0]["deadline_missed"] == telemetry["operation"]["deadline_missed"], history_seed
 assert "runtime_telemetry_recorded" in events, events
diff --git a/src/result_writer.cpp b/src/result_writer.cpp
index 2fe1386..a3ccd63 100644
--- a/src/result_writer.cpp
+++ b/src/result_writer.cpp
@@ -591,6 +591,25 @@ void write_runtime_telemetry_history_seed_json(
         << indent << "    \"precision\": " << json_string(precision) << ",\n"
         << indent << "    \"power_mode\": " << json_string(config.power_mode) << "\n"
         << indent << "  },\n"
+        << indent << "  \"run_config\": {\n"
+        << indent << "    \"batch\": " << config.batch << ",\n"
+        << indent << "    \"height\": " << config.height << ",\n"
+        << indent << "    \"width\": " << config.width << ",\n"
+        << indent << "    \"warmup\": " << config.warmup << ",\n"
+        << indent << "    \"runs\": " << config.runs << ",\n"
+        << indent << "    \"timeout_ms\": ";
+    if (config.timeout_ms > 0) {
+        output << config.timeout_ms;
+    } else {
+        output << "null";
+    }
+    output
+        << ",\n"
+        << indent << "    \"input_mode\": " << json_string(config.input_mode()) << ",\n"
+        << indent << "    \"input_preprocess\": " << json_string(config.input_preprocess()) << ",\n"
+        << indent << "    \"power_mode\": " << json_string(config.power_mode) << ",\n"
+        << indent << "    \"jetson_clocks\": " << json_string(config.jetson_clocks) << "\n"
+        << indent << "  },\n"
         << indent << "  \"recommended_registry_key_fields\": ";
     write_string_array_json(output, {
         "compare_key",
diff --git a/tests/test_agent_runtime_result_contract.py b/tests/test_agent_runtime_result_contract.py
index 6c405eb..b6f6e81 100644
--- a/tests/test_agent_runtime_result_contract.py
+++ b/tests/test_agent_runtime_result_contract.py
@@ -235,6 +235,14 @@ def test_runtime_output_records_optional_agent_block_when_manifest_is_provided(s
         )
         self.assertEqual(history_seed["source_result"]["precision"], result["precision"])
         self.assertEqual(history_seed["source_result"]["power_mode"], result["run_config"]["power_mode"])
+        self.assertEqual(history_seed["run_config"]["batch"], result["run_config"]["batch"])
+        self.assertEqual(history_seed["run_config"]["height"], result["run_config"]["height"])
+        self.assertEqual(history_seed["run_config"]["width"], result["run_config"]["width"])
+        self.assertEqual(history_seed["run_config"]["warmup"], result["run_config"]["warmup"])
+        self.assertEqual(history_seed["run_config"]["runs"], result["run_config"]["runs"])
+        self.assertEqual(history_seed["run_config"]["timeout_ms"], result["run_config"]["timeout_ms"])
+        self.assertEqual(history_seed["run_config"]["input_mode"], result["runtime_health_snapshot"]["input_mode"])
+        self.assertEqual(history_seed["run_config"]["power_mode"], telemetry["power_mode"])
         point = history_seed["points"][0]
         self.assertEqual(point["execution_sequence_id"], telemetry["execution_sequence_id"])
         self.assertEqual(point["telemetry_timestamp"], telemetry["telemetry_timestamp"])
diff --git a/tests/test_lab_result_schema.py b/tests/test_lab_result_schema.py
index ee8d50b..de3ed5e 100644
--- a/tests/test_lab_result_schema.py
+++ b/tests/test_lab_result_schema.py
@@ -440,6 +440,20 @@ def validate_runtime_telemetry_history_seed(history_seed: dict, telemetry: dict)
         if not isinstance(source_result.get(field), str):
             raise AssertionError(f"runtime_telemetry.history_seed.source_result.{field} must be a string")
 
+    run_config = history_seed.get("run_config")
+    if not isinstance(run_config, dict):
+        raise AssertionError("runtime_telemetry.history_seed.run_config must be an object")
+    for field in ("batch", "height", "width", "warmup", "runs"):
+        if isinstance(run_config.get(field), bool) or not isinstance(run_config.get(field), int):
+            raise AssertionError(f"runtime_telemetry.history_seed.run_config.{field} must be an integer")
+    if run_config.get("timeout_ms") is not None and (
+        isinstance(run_config.get("timeout_ms"), bool) or not isinstance(run_config.get("timeout_ms"), int)
+    ):
+        raise AssertionError("runtime_telemetry.history_seed.run_config.timeout_ms must be an integer or null")
+    for field in ("input_mode", "input_preprocess", "power_mode", "jetson_clocks"):
+        if not isinstance(run_config.get(field), str):
+            raise AssertionError(f"runtime_telemetry.history_seed.run_config.{field} must be a string")
+
     points = history_seed.get("points")
     if not isinstance(points, list) or not points:
         raise AssertionError("runtime_telemetry.history_seed.points must be a non-empty array")