From dbc27335dceba156f37075fab2c1d957b80a6207 Mon Sep 17 00:00:00 2001 From: hyeokjun32 Date: Tue, 19 May 2026 21:21:16 +0900 Subject: [PATCH] feat: add runtime operation evidence blocks --- README.md | 8 ++ docs/agent_runtime_result_contract.md | 55 ++++++++ src/result_writer.cpp | 133 +++++++++++++++++++- tests/test_agent_runtime_result_contract.py | 24 ++++ tests/test_lab_result_schema.py | 58 +++++++++ 5 files changed, 277 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index f1491b0..22f8838 100644 --- a/README.md +++ b/README.md @@ -485,6 +485,14 @@ Runtime can optionally read a Forge `agent_manifest.json` and append an additive This is the first bridge toward the reliable edge agent runtime direction. It records task metadata such as `agent_id`, `task_id`, `agent_type`, priority, latency budget, queue wait, fallback usage, and telemetry context while preserving the base Lab-compatible Runtime result schema. +Runtime result JSON also includes additive operation evidence blocks: + +- `runtime_health_snapshot`: execution health, backend/device context, run count, latency/FPS summary, and explicit timeout observation status. +- `runtime_error_classification`: structured success/error category for downstream report context. +- `runtime_events`: compact lifecycle event log for configuration, benchmark completion, error classification, optional agent context, and tegrastats parsing. + +These fields are evidence for Orchestrator/Lab analysis. Runtime still does not schedule tasks or own deployment decisions. + Example: ```bash diff --git a/docs/agent_runtime_result_contract.md b/docs/agent_runtime_result_contract.md index ca55cb3..feed77f 100644 --- a/docs/agent_runtime_result_contract.md +++ b/docs/agent_runtime_result_contract.md @@ -4,6 +4,14 @@ InferEdge-Runtime can attach optional agent task context to the existing Lab-com This contract is intentionally additive. Existing Runtime results remain valid without an `agent` block, and Lab-compatible top-level fields such as `compare_key`, `backend_key`, `run_config`, `latency_ms`, `jetson_evidence`, and `extra` must not change shape. +Runtime may also append additive operation evidence blocks: + +- `runtime_health_snapshot` +- `runtime_error_classification` +- `runtime_events` + +These blocks support downstream runtime operation reporting without turning Runtime into a scheduler or deployment decision owner. + ## Scope The agent result block is the Runtime-side bridge from Forge `agent_manifest.json` to later Orchestrator, AIGuard, and Lab agent workflow analysis. @@ -45,6 +53,51 @@ When provided, Runtime appends: "schema_version": "inferedge-runtime-result-v1", "compare_key": "yolov8n__b1__h224w224__fp32", "backend_key": "onnxruntime__cpu", + "runtime_health_snapshot": { + "schema_version": "inferedge-runtime-health-v1", + "status": "ok", + "engine_backend": "onnxruntime", + "device": "cpu", + "input_mode": "synthetic", + "input_preprocess": "synthetic", + "warmup": 1, + "runs": 1, + "run_once": false, + "success": true, + "latency_mean_ms": 0.0, + "latency_p95_ms": 0.0, + "latency_p99_ms": 0.0, + "fps": 0.0, + "power_mode": "unknown", + "jetson_clocks": "unknown", + "timeout_policy": "not_configured", + "timeout_observed": false + }, + "runtime_error_classification": { + "schema_version": "inferedge-runtime-error-v1", + "status": "none", + "category": "none", + "message": "", + "timeout_observed": false, + "retryable": false + }, + "runtime_events": [ + { + "type": "runtime_configured", + "status": "ok", + "engine_backend": "onnxruntime", + "device": "cpu", + "input_mode": "synthetic" + }, + { + "type": "benchmark_completed", + "status": "success", + "success": true, + "warmup": 1, + "runs": 1, + "mean_ms": 0.0 + } + ], "agent": { "schema_version": "inferedge-runtime-agent-task-v1", "source_contract": "inferedge-agent-manifest-v1", @@ -105,6 +158,8 @@ When provided, Runtime appends: - `agent.deadline_missed` is computed from mean latency and `latency_budget_ms` when possible, unless explicitly overridden by `--agent-deadline-missed`. - `queue_wait_ms` is `null` unless supplied. - `execution_status` defaults to the Runtime benchmark status unless overridden. +- `runtime_health_snapshot`, `runtime_error_classification`, and `runtime_events` are additive and safe for existing consumers to ignore. +- Runtime does not claim timeout detection unless a timeout mechanism is explicitly implemented; current results record `timeout_policy: not_configured` and `timeout_observed: false`. ## Current Boundary diff --git a/src/result_writer.cpp b/src/result_writer.cpp index e571cca..42f0ff9 100644 --- a/src/result_writer.cpp +++ b/src/result_writer.cpp @@ -327,6 +327,127 @@ void write_agent_task_json( << indent << "}"; } +std::string runtime_health_status(const BenchmarkResult& benchmark_result) { + if (benchmark_result.success) { + return "ok"; + } + if (benchmark_result.status == "skipped") { + return "degraded"; + } + return "error"; +} + +std::string runtime_error_category(const BenchmarkResult& benchmark_result) { + if (benchmark_result.success) { + return "none"; + } + if (benchmark_result.status == "skipped") { + return "runtime_execution_skipped"; + } + if (!benchmark_result.status.empty()) { + return "runtime_" + sanitize_filename_component(benchmark_result.status); + } + return "runtime_error"; +} + +void write_runtime_health_snapshot_json( + std::ostream& output, + const RuntimeConfig& config, + const EngineMetadata& engine_metadata, + const BenchmarkResult& benchmark_result, + int indent_spaces) { + const std::string indent(static_cast(indent_spaces), ' '); + output + << "{\n" + << indent << " \"schema_version\": \"inferedge-runtime-health-v1\",\n" + << indent << " \"status\": " << json_string(runtime_health_status(benchmark_result)) << ",\n" + << indent << " \"engine_backend\": " << json_string(engine_metadata.backend) << ",\n" + << indent << " \"device\": " << json_string(config.device) << ",\n" + << indent << " \"input_mode\": " << json_string(config.input_mode()) << ",\n" + << indent << " \"input_preprocess\": " << json_string(config.input_preprocess()) << ",\n" + << indent << " \"warmup\": " << config.warmup << ",\n" + << indent << " \"runs\": " << config.runs << ",\n" + << indent << " \"run_once\": " << (config.run_once ? "true" : "false") << ",\n" + << indent << " \"success\": " << (benchmark_result.success ? "true" : "false") << ",\n" + << indent << " \"latency_mean_ms\": " << benchmark_result.mean_ms << ",\n" + << indent << " \"latency_p95_ms\": " << benchmark_result.p95_ms << ",\n" + << indent << " \"latency_p99_ms\": " << benchmark_result.p99_ms << ",\n" + << indent << " \"fps\": " << benchmark_result.fps << ",\n" + << indent << " \"power_mode\": " << json_string(config.power_mode) << ",\n" + << indent << " \"jetson_clocks\": " << json_string(config.jetson_clocks) << ",\n" + << indent << " \"timeout_policy\": \"not_configured\",\n" + << indent << " \"timeout_observed\": false\n" + << indent << "}"; +} + +void write_runtime_error_classification_json( + std::ostream& output, + const BenchmarkResult& benchmark_result, + int indent_spaces) { + const std::string indent(static_cast(indent_spaces), ' '); + output + << "{\n" + << indent << " \"schema_version\": \"inferedge-runtime-error-v1\",\n" + << indent << " \"status\": " << json_string(benchmark_result.success ? "none" : "classified") << ",\n" + << indent << " \"category\": " << json_string(runtime_error_category(benchmark_result)) << ",\n" + << indent << " \"message\": " << json_string(benchmark_result.success ? "" : benchmark_result.message) << ",\n" + << indent << " \"timeout_observed\": false,\n" + << indent << " \"retryable\": false\n" + << indent << "}"; +} + +void write_runtime_events_json( + std::ostream& output, + const RuntimeConfig& config, + const EngineMetadata& engine_metadata, + const BenchmarkResult& benchmark_result, + const TegrastatsSummary& tegrastats_summary, + int indent_spaces) { + const std::string indent(static_cast(indent_spaces), ' '); + const std::string item_indent(static_cast(indent_spaces + 2), ' '); + + output + << "[\n" + << item_indent << "{\n" + << item_indent << " \"type\": \"runtime_configured\",\n" + << item_indent << " \"status\": \"ok\",\n" + << item_indent << " \"engine_backend\": " << json_string(engine_metadata.backend) << ",\n" + << item_indent << " \"device\": " << json_string(config.device) << ",\n" + << item_indent << " \"input_mode\": " << json_string(config.input_mode()) << "\n" + << item_indent << "},\n" + << item_indent << "{\n" + << item_indent << " \"type\": \"benchmark_completed\",\n" + << item_indent << " \"status\": " << json_string(benchmark_result.status) << ",\n" + << item_indent << " \"success\": " << (benchmark_result.success ? "true" : "false") << ",\n" + << item_indent << " \"warmup\": " << benchmark_result.warmup_runs << ",\n" + << item_indent << " \"runs\": " << benchmark_result.timed_runs << ",\n" + << item_indent << " \"mean_ms\": " << benchmark_result.mean_ms << "\n" + << item_indent << "},\n" + << item_indent << "{\n" + << item_indent << " \"type\": \"runtime_error_classified\",\n" + << item_indent << " \"status\": " << json_string(benchmark_result.success ? "none" : "classified") << ",\n" + << item_indent << " \"category\": " << json_string(runtime_error_category(benchmark_result)) << "\n" + << item_indent << "},\n"; + + if (!config.agent_manifest_path.empty()) { + output + << item_indent << "{\n" + << item_indent << " \"type\": \"agent_context_recorded\",\n" + << item_indent << " \"status\": " << json_string(config.agent_manifest_applied ? "ok" : "provided") << ",\n" + << item_indent << " \"agent_id\": " << json_string(config.agent_id) << ",\n" + << item_indent << " \"task_id\": " << json_string(config.agent_task_id) << "\n" + << item_indent << "},\n"; + } + + output + << item_indent << "{\n" + << item_indent << " \"type\": \"tegrastats_summary\",\n" + << item_indent << " \"status\": " << json_string(tegrastats_summary.status) << ",\n" + << item_indent << " \"sample_count\": " << tegrastats_summary.sample_count << "\n" + << item_indent << "}\n" + << indent << "]"; +} + void write_shape_json(std::ostream& output, const std::vector& shape) { output << '['; for (std::size_t i = 0; i < shape.size(); ++i) { @@ -504,7 +625,17 @@ std::filesystem::path write_result_json( write_tegrastats_summary_json(output, tegrastats_summary, 4); output << "\n" - << " }"; + << " },\n" + << " \"runtime_health_snapshot\": "; + write_runtime_health_snapshot_json(output, config, engine_metadata, benchmark_result, 2); + output + << ",\n" + << " \"runtime_error_classification\": "; + write_runtime_error_classification_json(output, benchmark_result, 2); + output + << ",\n" + << " \"runtime_events\": "; + write_runtime_events_json(output, config, engine_metadata, benchmark_result, tegrastats_summary, 2); if (!config.agent_manifest_path.empty()) { output << ",\n" diff --git a/tests/test_agent_runtime_result_contract.py b/tests/test_agent_runtime_result_contract.py index 3f340d3..6416b98 100644 --- a/tests/test_agent_runtime_result_contract.py +++ b/tests/test_agent_runtime_result_contract.py @@ -77,6 +77,30 @@ def test_runtime_output_records_optional_agent_block_when_manifest_is_provided(s self.assertEqual(agent["telemetry_contract_version"], "inferedge-agent-telemetry-v1") self.assertEqual(agent["telemetry_snapshot"]["power_mode"], "unknown") + health = result["runtime_health_snapshot"] + self.assertEqual(health["schema_version"], "inferedge-runtime-health-v1") + self.assertIn(health["status"], {"ok", "degraded", "error"}) + self.assertEqual(health["engine_backend"], "onnxruntime") + self.assertEqual(health["device"], "cpu") + self.assertFalse(health["timeout_observed"]) + + error = result["runtime_error_classification"] + self.assertEqual(error["schema_version"], "inferedge-runtime-error-v1") + if result["success"]: + self.assertEqual(error["status"], "none") + self.assertEqual(error["category"], "none") + else: + self.assertEqual(error["status"], "classified") + self.assertNotEqual(error["category"], "none") + + runtime_events = result["runtime_events"] + self.assertIsInstance(runtime_events, list) + event_types = {event["type"] for event in runtime_events} + self.assertIn("runtime_configured", event_types) + self.assertIn("benchmark_completed", event_types) + self.assertIn("runtime_error_classified", event_types) + self.assertIn("agent_context_recorded", event_types) + extra = result["extra"] self.assertTrue(extra["agent_manifest_recorded"]) self.assertEqual(extra["agent_id"], "vision_detector") diff --git a/tests/test_lab_result_schema.py b/tests/test_lab_result_schema.py index 3257aea..61132a7 100644 --- a/tests/test_lab_result_schema.py +++ b/tests/test_lab_result_schema.py @@ -126,6 +126,55 @@ def validate_lab_compatible_result(result: dict) -> None: if result.get("backend_key") is not None and not isinstance(result["backend_key"], str): raise AssertionError("backend_key must be a string when present") + validate_optional_runtime_operation_evidence(result) + + +def validate_optional_runtime_operation_evidence(result: dict) -> None: + health = result.get("runtime_health_snapshot") + if health is not None: + if not isinstance(health, dict): + raise AssertionError("runtime_health_snapshot must be an object when present") + for field in ("schema_version", "status", "engine_backend", "device", "input_mode"): + if field not in health: + raise AssertionError(f"runtime_health_snapshot.{field} is required") + if not isinstance(health[field], str): + raise AssertionError(f"runtime_health_snapshot.{field} must be a string") + for field in ("warmup", "runs"): + if not isinstance(health.get(field), int): + raise AssertionError(f"runtime_health_snapshot.{field} must be an integer") + for field in ("success", "run_once", "timeout_observed"): + if not isinstance(health.get(field), bool): + raise AssertionError(f"runtime_health_snapshot.{field} must be a boolean") + + error = result.get("runtime_error_classification") + if error is not None: + if not isinstance(error, dict): + raise AssertionError("runtime_error_classification must be an object when present") + for field in ("schema_version", "status", "category", "message"): + if field not in error: + raise AssertionError(f"runtime_error_classification.{field} is required") + if not isinstance(error[field], str): + raise AssertionError(f"runtime_error_classification.{field} must be a string") + for field in ("timeout_observed", "retryable"): + if not isinstance(error.get(field), bool): + raise AssertionError(f"runtime_error_classification.{field} must be a boolean") + + events = result.get("runtime_events") + if events is not None: + if not isinstance(events, list): + raise AssertionError("runtime_events must be an array when present") + event_types = [] + for event in events: + if not isinstance(event, dict): + raise AssertionError("runtime_events items must be objects") + event_type = event.get("type") + if not isinstance(event_type, str) or not event_type: + raise AssertionError("runtime_events[].type must be a non-empty string") + event_types.append(event_type) + for expected in ("runtime_configured", "benchmark_completed", "runtime_error_classified"): + if expected not in event_types: + raise AssertionError(f"runtime_events must include {expected}") + class JetsonEvidenceContractTest(unittest.TestCase): def test_runtime_binary_parses_tegrastats_log_when_available(self): @@ -167,6 +216,15 @@ def test_runtime_binary_parses_tegrastats_log_when_available(self): result = load_json(output_path) validate_lab_compatible_result(result) + self.assertEqual(result["runtime_health_snapshot"]["schema_version"], "inferedge-runtime-health-v1") + self.assertIn(result["runtime_health_snapshot"]["status"], {"ok", "degraded", "error"}) + if result["success"]: + self.assertEqual(result["runtime_error_classification"]["category"], "none") + else: + self.assertNotEqual(result["runtime_error_classification"]["category"], "none") + event_types = {event["type"] for event in result["runtime_events"]} + self.assertIn("runtime_configured", event_types) + self.assertIn("benchmark_completed", event_types) self.assertEqual(result["run_config"]["power_mode"], "15W") self.assertEqual(result["run_config"]["jetson_clocks"], "on") summary = result["jetson_evidence"]["tegrastats_summary"]