From dbc27335dceba156f37075fab2c1d957b80a6207 Mon Sep 17 00:00:00 2001
From: hyeokjun32 <ksjm0417@naver.com>
Date: Tue, 19 May 2026 21:21:16 +0900
Subject: [PATCH] feat: add runtime operation evidence blocks

---
 README.md                                   |   8 ++
 docs/agent_runtime_result_contract.md       |  55 ++++++++
 src/result_writer.cpp                       | 133 +++++++++++++++++++-
 tests/test_agent_runtime_result_contract.py |  24 ++++
 tests/test_lab_result_schema.py             |  58 +++++++++
 5 files changed, 277 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index f1491b0..22f8838 100644
--- a/README.md
+++ b/README.md
@@ -485,6 +485,14 @@ Runtime can optionally read a Forge `agent_manifest.json` and append an additive
 
 This is the first bridge toward the reliable edge agent runtime direction. It records task metadata such as `agent_id`, `task_id`, `agent_type`, priority, latency budget, queue wait, fallback usage, and telemetry context while preserving the base Lab-compatible Runtime result schema.
 
+Runtime result JSON also includes additive operation evidence blocks:
+
+- `runtime_health_snapshot`: execution health, backend/device context, run count, latency/FPS summary, and explicit timeout observation status.
+- `runtime_error_classification`: structured success/error category for downstream report context.
+- `runtime_events`: compact lifecycle event log for configuration, benchmark completion, error classification, optional agent context, and tegrastats parsing.
+
+These fields are evidence for Orchestrator/Lab analysis. Runtime still does not schedule tasks or own deployment decisions.
+
 Example:
 
 ```bash
diff --git a/docs/agent_runtime_result_contract.md b/docs/agent_runtime_result_contract.md
index ca55cb3..feed77f 100644
--- a/docs/agent_runtime_result_contract.md
+++ b/docs/agent_runtime_result_contract.md
@@ -4,6 +4,14 @@ InferEdge-Runtime can attach optional agent task context to the existing Lab-com
 
 This contract is intentionally additive. Existing Runtime results remain valid without an `agent` block, and Lab-compatible top-level fields such as `compare_key`, `backend_key`, `run_config`, `latency_ms`, `jetson_evidence`, and `extra` must not change shape.
 
+Runtime may also append additive operation evidence blocks:
+
+- `runtime_health_snapshot`
+- `runtime_error_classification`
+- `runtime_events`
+
+These blocks support downstream runtime operation reporting without turning Runtime into a scheduler or deployment decision owner.
+
 ## Scope
 
 The agent result block is the Runtime-side bridge from Forge `agent_manifest.json` to later Orchestrator, AIGuard, and Lab agent workflow analysis.
@@ -45,6 +53,51 @@ When provided, Runtime appends:
   "schema_version": "inferedge-runtime-result-v1",
   "compare_key": "yolov8n__b1__h224w224__fp32",
   "backend_key": "onnxruntime__cpu",
+  "runtime_health_snapshot": {
+    "schema_version": "inferedge-runtime-health-v1",
+    "status": "ok",
+    "engine_backend": "onnxruntime",
+    "device": "cpu",
+    "input_mode": "synthetic",
+    "input_preprocess": "synthetic",
+    "warmup": 1,
+    "runs": 1,
+    "run_once": false,
+    "success": true,
+    "latency_mean_ms": 0.0,
+    "latency_p95_ms": 0.0,
+    "latency_p99_ms": 0.0,
+    "fps": 0.0,
+    "power_mode": "unknown",
+    "jetson_clocks": "unknown",
+    "timeout_policy": "not_configured",
+    "timeout_observed": false
+  },
+  "runtime_error_classification": {
+    "schema_version": "inferedge-runtime-error-v1",
+    "status": "none",
+    "category": "none",
+    "message": "",
+    "timeout_observed": false,
+    "retryable": false
+  },
+  "runtime_events": [
+    {
+      "type": "runtime_configured",
+      "status": "ok",
+      "engine_backend": "onnxruntime",
+      "device": "cpu",
+      "input_mode": "synthetic"
+    },
+    {
+      "type": "benchmark_completed",
+      "status": "success",
+      "success": true,
+      "warmup": 1,
+      "runs": 1,
+      "mean_ms": 0.0
+    }
+  ],
   "agent": {
     "schema_version": "inferedge-runtime-agent-task-v1",
     "source_contract": "inferedge-agent-manifest-v1",
@@ -105,6 +158,8 @@ When provided, Runtime appends:
 - `agent.deadline_missed` is computed from mean latency and `latency_budget_ms` when possible, unless explicitly overridden by `--agent-deadline-missed`.
 - `queue_wait_ms` is `null` unless supplied.
 - `execution_status` defaults to the Runtime benchmark status unless overridden.
+- `runtime_health_snapshot`, `runtime_error_classification`, and `runtime_events` are additive and safe for existing consumers to ignore.
+- Runtime does not claim timeout detection unless a timeout mechanism is explicitly implemented; current results record `timeout_policy: not_configured` and `timeout_observed: false`.
 
 ## Current Boundary
 
diff --git a/src/result_writer.cpp b/src/result_writer.cpp
index e571cca..42f0ff9 100644
--- a/src/result_writer.cpp
+++ b/src/result_writer.cpp
@@ -327,6 +327,127 @@ void write_agent_task_json(
         << indent << "}";
 }
 
+std::string runtime_health_status(const BenchmarkResult& benchmark_result) {
+    if (benchmark_result.success) {
+        return "ok";
+    }
+    if (benchmark_result.status == "skipped") {
+        return "degraded";
+    }
+    return "error";
+}
+
+std::string runtime_error_category(const BenchmarkResult& benchmark_result) {
+    if (benchmark_result.success) {
+        return "none";
+    }
+    if (benchmark_result.status == "skipped") {
+        return "runtime_execution_skipped";
+    }
+    if (!benchmark_result.status.empty()) {
+        return "runtime_" + sanitize_filename_component(benchmark_result.status);
+    }
+    return "runtime_error";
+}
+
+void write_runtime_health_snapshot_json(
+    std::ostream& output,
+    const RuntimeConfig& config,
+    const EngineMetadata& engine_metadata,
+    const BenchmarkResult& benchmark_result,
+    int indent_spaces) {
+    const std::string indent(static_cast<std::size_t>(indent_spaces), ' ');
+    output
+        << "{\n"
+        << indent << "  \"schema_version\": \"inferedge-runtime-health-v1\",\n"
+        << indent << "  \"status\": " << json_string(runtime_health_status(benchmark_result)) << ",\n"
+        << indent << "  \"engine_backend\": " << json_string(engine_metadata.backend) << ",\n"
+        << indent << "  \"device\": " << json_string(config.device) << ",\n"
+        << indent << "  \"input_mode\": " << json_string(config.input_mode()) << ",\n"
+        << indent << "  \"input_preprocess\": " << json_string(config.input_preprocess()) << ",\n"
+        << indent << "  \"warmup\": " << config.warmup << ",\n"
+        << indent << "  \"runs\": " << config.runs << ",\n"
+        << indent << "  \"run_once\": " << (config.run_once ? "true" : "false") << ",\n"
+        << indent << "  \"success\": " << (benchmark_result.success ? "true" : "false") << ",\n"
+        << indent << "  \"latency_mean_ms\": " << benchmark_result.mean_ms << ",\n"
+        << indent << "  \"latency_p95_ms\": " << benchmark_result.p95_ms << ",\n"
+        << indent << "  \"latency_p99_ms\": " << benchmark_result.p99_ms << ",\n"
+        << indent << "  \"fps\": " << benchmark_result.fps << ",\n"
+        << indent << "  \"power_mode\": " << json_string(config.power_mode) << ",\n"
+        << indent << "  \"jetson_clocks\": " << json_string(config.jetson_clocks) << ",\n"
+        << indent << "  \"timeout_policy\": \"not_configured\",\n"
+        << indent << "  \"timeout_observed\": false\n"
+        << indent << "}";
+}
+
+void write_runtime_error_classification_json(
+    std::ostream& output,
+    const BenchmarkResult& benchmark_result,
+    int indent_spaces) {
+    const std::string indent(static_cast<std::size_t>(indent_spaces), ' ');
+    output
+        << "{\n"
+        << indent << "  \"schema_version\": \"inferedge-runtime-error-v1\",\n"
+        << indent << "  \"status\": " << json_string(benchmark_result.success ? "none" : "classified") << ",\n"
+        << indent << "  \"category\": " << json_string(runtime_error_category(benchmark_result)) << ",\n"
+        << indent << "  \"message\": " << json_string(benchmark_result.success ? "" : benchmark_result.message) << ",\n"
+        << indent << "  \"timeout_observed\": false,\n"
+        << indent << "  \"retryable\": false\n"
+        << indent << "}";
+}
+
+void write_runtime_events_json(
+    std::ostream& output,
+    const RuntimeConfig& config,
+    const EngineMetadata& engine_metadata,
+    const BenchmarkResult& benchmark_result,
+    const TegrastatsSummary& tegrastats_summary,
+    int indent_spaces) {
+    const std::string indent(static_cast<std::size_t>(indent_spaces), ' ');
+    const std::string item_indent(static_cast<std::size_t>(indent_spaces + 2), ' ');
+
+    output
+        << "[\n"
+        << item_indent << "{\n"
+        << item_indent << "  \"type\": \"runtime_configured\",\n"
+        << item_indent << "  \"status\": \"ok\",\n"
+        << item_indent << "  \"engine_backend\": " << json_string(engine_metadata.backend) << ",\n"
+        << item_indent << "  \"device\": " << json_string(config.device) << ",\n"
+        << item_indent << "  \"input_mode\": " << json_string(config.input_mode()) << "\n"
+        << item_indent << "},\n"
+        << item_indent << "{\n"
+        << item_indent << "  \"type\": \"benchmark_completed\",\n"
+        << item_indent << "  \"status\": " << json_string(benchmark_result.status) << ",\n"
+        << item_indent << "  \"success\": " << (benchmark_result.success ? "true" : "false") << ",\n"
+        << item_indent << "  \"warmup\": " << benchmark_result.warmup_runs << ",\n"
+        << item_indent << "  \"runs\": " << benchmark_result.timed_runs << ",\n"
+        << item_indent << "  \"mean_ms\": " << benchmark_result.mean_ms << "\n"
+        << item_indent << "},\n"
+        << item_indent << "{\n"
+        << item_indent << "  \"type\": \"runtime_error_classified\",\n"
+        << item_indent << "  \"status\": " << json_string(benchmark_result.success ? "none" : "classified") << ",\n"
+        << item_indent << "  \"category\": " << json_string(runtime_error_category(benchmark_result)) << "\n"
+        << item_indent << "},\n";
+
+    if (!config.agent_manifest_path.empty()) {
+        output
+            << item_indent << "{\n"
+            << item_indent << "  \"type\": \"agent_context_recorded\",\n"
+            << item_indent << "  \"status\": " << json_string(config.agent_manifest_applied ? "ok" : "provided") << ",\n"
+            << item_indent << "  \"agent_id\": " << json_string(config.agent_id) << ",\n"
+            << item_indent << "  \"task_id\": " << json_string(config.agent_task_id) << "\n"
+            << item_indent << "},\n";
+    }
+
+    output
+        << item_indent << "{\n"
+        << item_indent << "  \"type\": \"tegrastats_summary\",\n"
+        << item_indent << "  \"status\": " << json_string(tegrastats_summary.status) << ",\n"
+        << item_indent << "  \"sample_count\": " << tegrastats_summary.sample_count << "\n"
+        << item_indent << "}\n"
+        << indent << "]";
+}
+
 void write_shape_json(std::ostream& output, const std::vector<int64_t>& shape) {
     output << '[';
     for (std::size_t i = 0; i < shape.size(); ++i) {
@@ -504,7 +625,17 @@ std::filesystem::path write_result_json(
     write_tegrastats_summary_json(output, tegrastats_summary, 4);
     output
         << "\n"
-        << "  }";
+        << "  },\n"
+        << "  \"runtime_health_snapshot\": ";
+    write_runtime_health_snapshot_json(output, config, engine_metadata, benchmark_result, 2);
+    output
+        << ",\n"
+        << "  \"runtime_error_classification\": ";
+    write_runtime_error_classification_json(output, benchmark_result, 2);
+    output
+        << ",\n"
+        << "  \"runtime_events\": ";
+    write_runtime_events_json(output, config, engine_metadata, benchmark_result, tegrastats_summary, 2);
     if (!config.agent_manifest_path.empty()) {
         output
             << ",\n"
diff --git a/tests/test_agent_runtime_result_contract.py b/tests/test_agent_runtime_result_contract.py
index 3f340d3..6416b98 100644
--- a/tests/test_agent_runtime_result_contract.py
+++ b/tests/test_agent_runtime_result_contract.py
@@ -77,6 +77,30 @@ def test_runtime_output_records_optional_agent_block_when_manifest_is_provided(s
         self.assertEqual(agent["telemetry_contract_version"], "inferedge-agent-telemetry-v1")
         self.assertEqual(agent["telemetry_snapshot"]["power_mode"], "unknown")
 
+        health = result["runtime_health_snapshot"]
+        self.assertEqual(health["schema_version"], "inferedge-runtime-health-v1")
+        self.assertIn(health["status"], {"ok", "degraded", "error"})
+        self.assertEqual(health["engine_backend"], "onnxruntime")
+        self.assertEqual(health["device"], "cpu")
+        self.assertFalse(health["timeout_observed"])
+
+        error = result["runtime_error_classification"]
+        self.assertEqual(error["schema_version"], "inferedge-runtime-error-v1")
+        if result["success"]:
+            self.assertEqual(error["status"], "none")
+            self.assertEqual(error["category"], "none")
+        else:
+            self.assertEqual(error["status"], "classified")
+            self.assertNotEqual(error["category"], "none")
+
+        runtime_events = result["runtime_events"]
+        self.assertIsInstance(runtime_events, list)
+        event_types = {event["type"] for event in runtime_events}
+        self.assertIn("runtime_configured", event_types)
+        self.assertIn("benchmark_completed", event_types)
+        self.assertIn("runtime_error_classified", event_types)
+        self.assertIn("agent_context_recorded", event_types)
+
         extra = result["extra"]
         self.assertTrue(extra["agent_manifest_recorded"])
         self.assertEqual(extra["agent_id"], "vision_detector")
diff --git a/tests/test_lab_result_schema.py b/tests/test_lab_result_schema.py
index 3257aea..61132a7 100644
--- a/tests/test_lab_result_schema.py
+++ b/tests/test_lab_result_schema.py
@@ -126,6 +126,55 @@ def validate_lab_compatible_result(result: dict) -> None:
     if result.get("backend_key") is not None and not isinstance(result["backend_key"], str):
         raise AssertionError("backend_key must be a string when present")
 
+    validate_optional_runtime_operation_evidence(result)
+
+
+def validate_optional_runtime_operation_evidence(result: dict) -> None:
+    health = result.get("runtime_health_snapshot")
+    if health is not None:
+        if not isinstance(health, dict):
+            raise AssertionError("runtime_health_snapshot must be an object when present")
+        for field in ("schema_version", "status", "engine_backend", "device", "input_mode"):
+            if field not in health:
+                raise AssertionError(f"runtime_health_snapshot.{field} is required")
+            if not isinstance(health[field], str):
+                raise AssertionError(f"runtime_health_snapshot.{field} must be a string")
+        for field in ("warmup", "runs"):
+            if not isinstance(health.get(field), int):
+                raise AssertionError(f"runtime_health_snapshot.{field} must be an integer")
+        for field in ("success", "run_once", "timeout_observed"):
+            if not isinstance(health.get(field), bool):
+                raise AssertionError(f"runtime_health_snapshot.{field} must be a boolean")
+
+    error = result.get("runtime_error_classification")
+    if error is not None:
+        if not isinstance(error, dict):
+            raise AssertionError("runtime_error_classification must be an object when present")
+        for field in ("schema_version", "status", "category", "message"):
+            if field not in error:
+                raise AssertionError(f"runtime_error_classification.{field} is required")
+            if not isinstance(error[field], str):
+                raise AssertionError(f"runtime_error_classification.{field} must be a string")
+        for field in ("timeout_observed", "retryable"):
+            if not isinstance(error.get(field), bool):
+                raise AssertionError(f"runtime_error_classification.{field} must be a boolean")
+
+    events = result.get("runtime_events")
+    if events is not None:
+        if not isinstance(events, list):
+            raise AssertionError("runtime_events must be an array when present")
+        event_types = []
+        for event in events:
+            if not isinstance(event, dict):
+                raise AssertionError("runtime_events items must be objects")
+            event_type = event.get("type")
+            if not isinstance(event_type, str) or not event_type:
+                raise AssertionError("runtime_events[].type must be a non-empty string")
+            event_types.append(event_type)
+        for expected in ("runtime_configured", "benchmark_completed", "runtime_error_classified"):
+            if expected not in event_types:
+                raise AssertionError(f"runtime_events must include {expected}")
+
 
 class JetsonEvidenceContractTest(unittest.TestCase):
     def test_runtime_binary_parses_tegrastats_log_when_available(self):
@@ -167,6 +216,15 @@ def test_runtime_binary_parses_tegrastats_log_when_available(self):
             result = load_json(output_path)
 
         validate_lab_compatible_result(result)
+        self.assertEqual(result["runtime_health_snapshot"]["schema_version"], "inferedge-runtime-health-v1")
+        self.assertIn(result["runtime_health_snapshot"]["status"], {"ok", "degraded", "error"})
+        if result["success"]:
+            self.assertEqual(result["runtime_error_classification"]["category"], "none")
+        else:
+            self.assertNotEqual(result["runtime_error_classification"]["category"], "none")
+        event_types = {event["type"] for event in result["runtime_events"]}
+        self.assertIn("runtime_configured", event_types)
+        self.assertIn("benchmark_completed", event_types)
         self.assertEqual(result["run_config"]["power_mode"], "15W")
         self.assertEqual(result["run_config"]["jetson_clocks"], "on")
         summary = result["jetson_evidence"]["tegrastats_summary"]