diff --git a/README.ko.md b/README.ko.md index 4631a0c..30a8626 100644 --- a/README.ko.md +++ b/README.ko.md @@ -75,7 +75,7 @@ Runtime은 Forge `agent_manifest.json`을 선택적으로 읽어 기존 Lab-comp 이 기능은 reliable edge agent runtime 방향의 첫 Runtime-side contract입니다. `agent_id`, `task_id`, `agent_type`, priority, latency budget, queue wait, fallback usage, telemetry context를 기록하지만 기존 `result.json`의 top-level compare/report 필드는 변경하지 않습니다. -Runtime result JSON에는 `runtime_health_snapshot`, `runtime_error_classification`, `runtime_events`도 additive evidence로 기록됩니다. `--timeout-ms`는 latency timeout 관측 기준을 남기는 옵션이며, production request cancellation을 의미하지 않습니다. +Runtime result JSON에는 `runtime_health_snapshot`, `runtime_error_classification`, `runtime_events`도 additive evidence로 기록됩니다. 이제 health snapshot은 backend availability, latency budget/deadline observation, tegrastats evidence availability를 함께 남기고, runtime events는 sequential `event_index`를 가진 lifecycle trace로 기록됩니다. `--timeout-ms`는 latency timeout 관측 기준을 남기는 옵션이며, production request cancellation을 의미하지 않습니다. 예시: diff --git a/README.md b/README.md index 2dcebb8..11e5b26 100644 --- a/README.md +++ b/README.md @@ -487,9 +487,9 @@ This is the first bridge toward the reliable edge agent runtime direction. It re Runtime result JSON also includes additive operation evidence blocks: -- `runtime_health_snapshot`: execution health, backend/device context, run count, latency/FPS summary, and explicit timeout observation status. `--timeout-ms` records an observation threshold; it does not claim production request cancellation. -- `runtime_error_classification`: structured success/error category for downstream report context. -- `runtime_events`: compact lifecycle event log for configuration, benchmark completion, error classification, optional agent context, and tegrastats parsing. +- `runtime_health_snapshot`: execution health, backend/device context, backend availability, run count, latency/FPS summary, latency-budget/deadline observation, tegrastats evidence availability, and explicit timeout observation status. `--timeout-ms` records an observation threshold; it does not claim production request cancellation. +- `runtime_error_classification`: structured success/error category, severity, retry hint, observed mean latency, and timeout budget for downstream report context. +- `runtime_events`: compact indexed lifecycle event log for configuration, benchmark completion, error classification, optional agent context, and tegrastats parsing. These fields are evidence for Orchestrator/Lab analysis. Runtime still does not schedule tasks or own deployment decisions. diff --git a/docs/agent_runtime_result_contract.md b/docs/agent_runtime_result_contract.md index 17162f7..6f0a161 100644 --- a/docs/agent_runtime_result_contract.md +++ b/docs/agent_runtime_result_contract.md @@ -77,6 +77,8 @@ When provided, Runtime appends: "schema_version": "inferedge-runtime-health-v1", "status": "ok", "engine_backend": "onnxruntime", + "engine_available": true, + "engine_status_message": "", "device": "cpu", "input_mode": "synthetic", "input_preprocess": "synthetic", @@ -88,8 +90,14 @@ When provided, Runtime appends: "latency_p95_ms": 0.0, "latency_p99_ms": 0.0, "fps": 0.0, + "latency_budget_ms": 33, + "latency_budget_exceeded": false, + "deadline_missed": false, "power_mode": "unknown", "jetson_clocks": "unknown", + "tegrastats_status": "not_provided", + "tegrastats_sample_count": 0, + "thermal_memory_evidence_available": false, "timeout_policy": "latency_threshold", "timeout_budget_ms": 1, "timeout_observed": false @@ -98,32 +106,56 @@ When provided, Runtime appends: "schema_version": "inferedge-runtime-error-v1", "status": "none", "category": "none", + "severity": "none", "message": "", + "observed_mean_ms": 0.0, + "timeout_budget_ms": 1, "timeout_observed": false, - "retryable": false + "retryable": false, + "retry_hint": "none" }, "runtime_events": [ { + "schema_version": "inferedge-runtime-event-v1", + "event_index": 0, "type": "runtime_configured", "status": "ok", "engine_backend": "onnxruntime", + "engine_available": true, + "engine_status_message": "", "device": "cpu", - "input_mode": "synthetic" + "input_mode": "synthetic", + "timeout_policy": "latency_threshold" }, { + "schema_version": "inferedge-runtime-event-v1", + "event_index": 1, "type": "benchmark_completed", "status": "success", "success": true, "warmup": 1, "runs": 1, - "mean_ms": 0.0 + "mean_ms": 0.0, + "p95_ms": 0.0, + "p99_ms": 0.0, + "fps": 0.0, + "latency_budget_ms": 33, + "latency_budget_exceeded": false, + "deadline_missed": false }, { + "schema_version": "inferedge-runtime-event-v1", + "event_index": 2, "type": "runtime_error_classified", "status": "none", "category": "none", + "severity": "none", "timeout_policy": "latency_threshold", - "timeout_observed": false + "timeout_budget_ms": 1, + "observed_mean_ms": 0.0, + "timeout_observed": false, + "retryable": false, + "retry_hint": "none" } ], "agent": { @@ -187,6 +219,8 @@ When provided, Runtime appends: - `queue_wait_ms` is `null` unless supplied. - `execution_status` defaults to the Runtime benchmark status unless overridden. - `runtime_health_snapshot`, `runtime_error_classification`, and `runtime_events` are additive and safe for existing consumers to ignore. +- `runtime_health_snapshot` includes backend availability, latency-budget/deadline observation, timeout observation, and tegrastats evidence availability when those values are known. +- `runtime_events` uses additive `inferedge-runtime-event-v1` entries with sequential `event_index` values so Lab/Orchestrator reports can show a compact lifecycle trace. - Runtime does not claim production request cancellation. `--timeout-ms` is an observation threshold: if a successful benchmark mean latency exceeds the configured threshold, Runtime records `timeout_observed: true`, `runtime_error_classification.category: runtime_timeout_observed`, and `retryable: true` for downstream reliability reporting. - Without `--timeout-ms`, results record `timeout_policy: not_configured`, `timeout_budget_ms: null`, and `timeout_observed: false`. diff --git a/scripts/smoke_default.sh b/scripts/smoke_default.sh index 087b6f0..b621717 100755 --- a/scripts/smoke_default.sh +++ b/scripts/smoke_default.sh @@ -94,10 +94,20 @@ health = data["runtime_health_snapshot"] assert health["timeout_policy"] == "latency_threshold" assert health["timeout_budget_ms"] == 1 assert health["timeout_observed"] is False +assert health["latency_budget_ms"] == 33 +assert "latency_budget_exceeded" in health +assert "deadline_missed" in health +assert health["tegrastats_status"] == "not_provided" error = data["runtime_error_classification"] assert error["timeout_observed"] is False +assert error["timeout_budget_ms"] == 1 +assert error["severity"] in {"none", "warning", "error"} +assert "retry_hint" in error events = {event["type"]: event for event in data["runtime_events"]} assert events["runtime_error_classified"]["timeout_policy"] == "latency_threshold" +assert events["runtime_error_classified"]["timeout_budget_ms"] == 1 +assert events["benchmark_completed"]["latency_budget_ms"] == 33 +assert [event["event_index"] for event in data["runtime_events"]] == list(range(len(data["runtime_events"]))) assert data["extra"]["agent_manifest_recorded"] is True PY diff --git a/src/result_writer.cpp b/src/result_writer.cpp index 4f48880..58fa252 100644 --- a/src/result_writer.cpp +++ b/src/result_writer.cpp @@ -362,19 +362,55 @@ std::string runtime_error_category(const RuntimeConfig& config, const BenchmarkR return "runtime_error"; } +std::string runtime_error_severity(const RuntimeConfig& config, const BenchmarkResult& benchmark_result) { + if (timeout_observed(config, benchmark_result)) { + return "warning"; + } + if (benchmark_result.success) { + return "none"; + } + if (benchmark_result.status == "skipped") { + return "warning"; + } + return "error"; +} + +std::string runtime_retry_hint(const RuntimeConfig& config, const BenchmarkResult& benchmark_result) { + if (timeout_observed(config, benchmark_result)) { + return "retry_or_degrade"; + } + if (benchmark_result.success) { + return "none"; + } + if (benchmark_result.status == "skipped") { + return "check_backend_availability"; + } + return "check_runtime_error"; +} + +bool latency_budget_exceeded(const RuntimeConfig& config, const BenchmarkResult& benchmark_result) { + return benchmark_result.success && + config.agent_latency_budget_ms > 0 && + benchmark_result.mean_ms > static_cast(config.agent_latency_budget_ms); +} + void write_runtime_health_snapshot_json( std::ostream& output, const RuntimeConfig& config, const EngineMetadata& engine_metadata, const BenchmarkResult& benchmark_result, + const TegrastatsSummary& tegrastats_summary, int indent_spaces) { const std::string indent(static_cast(indent_spaces), ' '); const bool observed_timeout = timeout_observed(config, benchmark_result); + const bool exceeded_latency_budget = latency_budget_exceeded(config, benchmark_result); output << "{\n" << indent << " \"schema_version\": \"inferedge-runtime-health-v1\",\n" << indent << " \"status\": " << json_string(runtime_health_status(config, benchmark_result)) << ",\n" << indent << " \"engine_backend\": " << json_string(engine_metadata.backend) << ",\n" + << indent << " \"engine_available\": " << (engine_metadata.available ? "true" : "false") << ",\n" + << indent << " \"engine_status_message\": " << json_string(engine_metadata.status_message) << ",\n" << indent << " \"device\": " << json_string(config.device) << ",\n" << indent << " \"input_mode\": " << json_string(config.input_mode()) << ",\n" << indent << " \"input_preprocess\": " << json_string(config.input_preprocess()) << ",\n" @@ -386,8 +422,21 @@ void write_runtime_health_snapshot_json( << indent << " \"latency_p95_ms\": " << benchmark_result.p95_ms << ",\n" << indent << " \"latency_p99_ms\": " << benchmark_result.p99_ms << ",\n" << indent << " \"fps\": " << benchmark_result.fps << ",\n" + << indent << " \"latency_budget_ms\": "; + if (config.agent_latency_budget_ms > 0) { + output << config.agent_latency_budget_ms; + } else { + output << "null"; + } + output + << ",\n" + << indent << " \"latency_budget_exceeded\": " << (exceeded_latency_budget ? "true" : "false") << ",\n" + << indent << " \"deadline_missed\": " << (should_mark_deadline_missed(config, benchmark_result) ? "true" : "false") << ",\n" << indent << " \"power_mode\": " << json_string(config.power_mode) << ",\n" << indent << " \"jetson_clocks\": " << json_string(config.jetson_clocks) << ",\n" + << indent << " \"tegrastats_status\": " << json_string(tegrastats_summary.status) << ",\n" + << indent << " \"tegrastats_sample_count\": " << tegrastats_summary.sample_count << ",\n" + << indent << " \"thermal_memory_evidence_available\": " << ((tegrastats_summary.status == "parsed") ? "true" : "false") << ",\n" << indent << " \"timeout_policy\": " << json_string(config.timeout_ms > 0 ? "latency_threshold" : "not_configured") << ",\n" << indent << " \"timeout_budget_ms\": "; @@ -414,10 +463,21 @@ void write_runtime_error_classification_json( << indent << " \"schema_version\": \"inferedge-runtime-error-v1\",\n" << indent << " \"status\": " << json_string((benchmark_result.success && !observed_timeout) ? "none" : "classified") << ",\n" << indent << " \"category\": " << json_string(runtime_error_category(config, benchmark_result)) << ",\n" + << indent << " \"severity\": " << json_string(runtime_error_severity(config, benchmark_result)) << ",\n" << indent << " \"message\": " << json_string(observed_timeout ? "mean latency exceeded configured timeout threshold" : (benchmark_result.success ? "" : benchmark_result.message)) << ",\n" + << indent << " \"observed_mean_ms\": " << benchmark_result.mean_ms << ",\n" + << indent << " \"timeout_budget_ms\": "; + if (config.timeout_ms > 0) { + output << config.timeout_ms; + } else { + output << "null"; + } + output + << ",\n" << indent << " \"timeout_observed\": " << (observed_timeout ? "true" : "false") << ",\n" - << indent << " \"retryable\": " << (observed_timeout ? "true" : "false") << "\n" + << indent << " \"retryable\": " << (observed_timeout ? "true" : "false") << ",\n" + << indent << " \"retry_hint\": " << json_string(runtime_retry_hint(config, benchmark_result)) << "\n" << indent << "}"; } @@ -431,48 +491,94 @@ void write_runtime_events_json( const std::string indent(static_cast(indent_spaces), ' '); const std::string item_indent(static_cast(indent_spaces + 2), ' '); const bool observed_timeout = timeout_observed(config, benchmark_result); + const bool exceeded_latency_budget = latency_budget_exceeded(config, benchmark_result); + int event_index = 0; output << "[\n" << item_indent << "{\n" + << item_indent << " \"schema_version\": \"inferedge-runtime-event-v1\",\n" + << item_indent << " \"event_index\": " << event_index++ << ",\n" << item_indent << " \"type\": \"runtime_configured\",\n" << item_indent << " \"status\": \"ok\",\n" << item_indent << " \"engine_backend\": " << json_string(engine_metadata.backend) << ",\n" + << item_indent << " \"engine_available\": " << (engine_metadata.available ? "true" : "false") << ",\n" + << item_indent << " \"engine_status_message\": " << json_string(engine_metadata.status_message) << ",\n" << item_indent << " \"device\": " << json_string(config.device) << ",\n" - << item_indent << " \"input_mode\": " << json_string(config.input_mode()) << "\n" + << item_indent << " \"input_mode\": " << json_string(config.input_mode()) << ",\n" + << item_indent << " \"timeout_policy\": " + << json_string(config.timeout_ms > 0 ? "latency_threshold" : "not_configured") << "\n" << item_indent << "},\n" << item_indent << "{\n" + << item_indent << " \"schema_version\": \"inferedge-runtime-event-v1\",\n" + << item_indent << " \"event_index\": " << event_index++ << ",\n" << item_indent << " \"type\": \"benchmark_completed\",\n" << item_indent << " \"status\": " << json_string(benchmark_result.status) << ",\n" << item_indent << " \"success\": " << (benchmark_result.success ? "true" : "false") << ",\n" << item_indent << " \"warmup\": " << benchmark_result.warmup_runs << ",\n" << item_indent << " \"runs\": " << benchmark_result.timed_runs << ",\n" - << item_indent << " \"mean_ms\": " << benchmark_result.mean_ms << "\n" + << item_indent << " \"mean_ms\": " << benchmark_result.mean_ms << ",\n" + << item_indent << " \"p95_ms\": " << benchmark_result.p95_ms << ",\n" + << item_indent << " \"p99_ms\": " << benchmark_result.p99_ms << ",\n" + << item_indent << " \"fps\": " << benchmark_result.fps << ",\n" + << item_indent << " \"latency_budget_ms\": "; + if (config.agent_latency_budget_ms > 0) { + output << config.agent_latency_budget_ms; + } else { + output << "null"; + } + output + << ",\n" + << item_indent << " \"latency_budget_exceeded\": " << (exceeded_latency_budget ? "true" : "false") << ",\n" + << item_indent << " \"deadline_missed\": " << (should_mark_deadline_missed(config, benchmark_result) ? "true" : "false") << "\n" << item_indent << "},\n" << item_indent << "{\n" + << item_indent << " \"schema_version\": \"inferedge-runtime-event-v1\",\n" + << item_indent << " \"event_index\": " << event_index++ << ",\n" << item_indent << " \"type\": \"runtime_error_classified\",\n" << item_indent << " \"status\": " << json_string((benchmark_result.success && !observed_timeout) ? "none" : "classified") << ",\n" << item_indent << " \"category\": " << json_string(runtime_error_category(config, benchmark_result)) << ",\n" + << item_indent << " \"severity\": " << json_string(runtime_error_severity(config, benchmark_result)) << ",\n" << item_indent << " \"timeout_policy\": " << json_string(config.timeout_ms > 0 ? "latency_threshold" : "not_configured") << ",\n" - << item_indent << " \"timeout_observed\": " << (observed_timeout ? "true" : "false") << "\n" + << item_indent << " \"timeout_budget_ms\": "; + if (config.timeout_ms > 0) { + output << config.timeout_ms; + } else { + output << "null"; + } + output + << ",\n" + << item_indent << " \"observed_mean_ms\": " << benchmark_result.mean_ms << ",\n" + << item_indent << " \"timeout_observed\": " << (observed_timeout ? "true" : "false") << ",\n" + << item_indent << " \"retryable\": " << (observed_timeout ? "true" : "false") << ",\n" + << item_indent << " \"retry_hint\": " << json_string(runtime_retry_hint(config, benchmark_result)) << "\n" << item_indent << "},\n"; if (!config.agent_manifest_path.empty()) { output << item_indent << "{\n" + << item_indent << " \"schema_version\": \"inferedge-runtime-event-v1\",\n" + << item_indent << " \"event_index\": " << event_index++ << ",\n" << item_indent << " \"type\": \"agent_context_recorded\",\n" << item_indent << " \"status\": " << json_string(config.agent_manifest_applied ? "ok" : "provided") << ",\n" << item_indent << " \"agent_id\": " << json_string(config.agent_id) << ",\n" - << item_indent << " \"task_id\": " << json_string(config.agent_task_id) << "\n" + << item_indent << " \"task_id\": " << json_string(config.agent_task_id) << ",\n" + << item_indent << " \"deadline_missed\": " << (should_mark_deadline_missed(config, benchmark_result) ? "true" : "false") << ",\n" + << item_indent << " \"fallback_used\": " << (config.agent_fallback_used ? "true" : "false") << "\n" << item_indent << "},\n"; } output << item_indent << "{\n" + << item_indent << " \"schema_version\": \"inferedge-runtime-event-v1\",\n" + << item_indent << " \"event_index\": " << event_index++ << ",\n" << item_indent << " \"type\": \"tegrastats_summary\",\n" << item_indent << " \"status\": " << json_string(tegrastats_summary.status) << ",\n" - << item_indent << " \"sample_count\": " << tegrastats_summary.sample_count << "\n" + << item_indent << " \"sample_count\": " << tegrastats_summary.sample_count << ",\n" + << item_indent << " \"ram_used_mb_max\": " << tegrastats_summary.ram_used_mb_max << ",\n" + << item_indent << " \"max_temp_c\": " << tegrastats_summary.max_temp_c << ",\n" + << item_indent << " \"vdd_in_mw_max\": " << tegrastats_summary.vdd_in_mw_max << "\n" << item_indent << "}\n" << indent << "]"; } @@ -664,7 +770,7 @@ std::filesystem::path write_result_json( << "\n" << " },\n" << " \"runtime_health_snapshot\": "; - write_runtime_health_snapshot_json(output, config, engine_metadata, benchmark_result, 2); + write_runtime_health_snapshot_json(output, config, engine_metadata, benchmark_result, tegrastats_summary, 2); output << ",\n" << " \"runtime_error_classification\": "; diff --git a/tests/test_agent_runtime_result_contract.py b/tests/test_agent_runtime_result_contract.py index 9123a67..521aaa9 100644 --- a/tests/test_agent_runtime_result_contract.py +++ b/tests/test_agent_runtime_result_contract.py @@ -83,13 +83,22 @@ def test_runtime_output_records_optional_agent_block_when_manifest_is_provided(s self.assertEqual(health["schema_version"], "inferedge-runtime-health-v1") self.assertIn(health["status"], {"ok", "degraded", "error"}) self.assertEqual(health["engine_backend"], "onnxruntime") + self.assertIn("engine_available", health) + self.assertIn("engine_status_message", health) self.assertEqual(health["device"], "cpu") self.assertEqual(health["timeout_policy"], "latency_threshold") self.assertEqual(health["timeout_budget_ms"], 1) self.assertFalse(health["timeout_observed"]) + self.assertEqual(health["latency_budget_ms"], 33) + self.assertIn("latency_budget_exceeded", health) + self.assertIn("deadline_missed", health) + self.assertEqual(health["tegrastats_status"], "not_provided") error = result["runtime_error_classification"] self.assertEqual(error["schema_version"], "inferedge-runtime-error-v1") + self.assertIn(error["severity"], {"none", "warning", "error"}) + self.assertIn("retry_hint", error) + self.assertEqual(error["timeout_budget_ms"], 1) self.assertFalse(error["timeout_observed"]) if result["success"]: self.assertEqual(error["status"], "none") @@ -100,13 +109,21 @@ def test_runtime_output_records_optional_agent_block_when_manifest_is_provided(s runtime_events = result["runtime_events"] self.assertIsInstance(runtime_events, list) + self.assertEqual([event["event_index"] for event in runtime_events], list(range(len(runtime_events)))) + self.assertTrue(all(event["schema_version"] == "inferedge-runtime-event-v1" for event in runtime_events)) event_types = {event["type"] for event in runtime_events} self.assertIn("runtime_configured", event_types) self.assertIn("benchmark_completed", event_types) self.assertIn("runtime_error_classified", event_types) self.assertIn("agent_context_recorded", event_types) + benchmark_event = next(event for event in runtime_events if event["type"] == "benchmark_completed") + self.assertEqual(benchmark_event["latency_budget_ms"], 33) + self.assertIn("latency_budget_exceeded", benchmark_event) + self.assertIn("deadline_missed", benchmark_event) error_event = next(event for event in runtime_events if event["type"] == "runtime_error_classified") self.assertEqual(error_event["timeout_policy"], "latency_threshold") + self.assertEqual(error_event["timeout_budget_ms"], 1) + self.assertIn("retry_hint", error_event) self.assertFalse(error_event["timeout_observed"]) extra = result["extra"] diff --git a/tests/test_lab_result_schema.py b/tests/test_lab_result_schema.py index d9c164c..d009f31 100644 --- a/tests/test_lab_result_schema.py +++ b/tests/test_lab_result_schema.py @@ -159,6 +159,17 @@ def validate_optional_runtime_operation_evidence(result: dict) -> None: for field in ("success", "run_once", "timeout_observed"): if not isinstance(health.get(field), bool): raise AssertionError(f"runtime_health_snapshot.{field} must be a boolean") + for field in ( + "engine_available", + "latency_budget_exceeded", + "deadline_missed", + "thermal_memory_evidence_available", + ): + if field in health and not isinstance(health[field], bool): + raise AssertionError(f"runtime_health_snapshot.{field} must be a boolean when present") + for field in ("tegrastats_status", "engine_status_message"): + if field in health and not isinstance(health[field], str): + raise AssertionError(f"runtime_health_snapshot.{field} must be a string when present") timeout_budget = health.get("timeout_budget_ms") if timeout_budget is not None and ( isinstance(timeout_budget, bool) or not isinstance(timeout_budget, int) @@ -177,12 +188,17 @@ def validate_optional_runtime_operation_evidence(result: dict) -> None: for field in ("timeout_observed", "retryable"): if not isinstance(error.get(field), bool): raise AssertionError(f"runtime_error_classification.{field} must be a boolean") + if "severity" in error and not isinstance(error["severity"], str): + raise AssertionError("runtime_error_classification.severity must be a string when present") + if "retry_hint" in error and not isinstance(error["retry_hint"], str): + raise AssertionError("runtime_error_classification.retry_hint must be a string when present") events = result.get("runtime_events") if events is not None: if not isinstance(events, list): raise AssertionError("runtime_events must be an array when present") event_types = [] + event_indexes = [] for event in events: if not isinstance(event, dict): raise AssertionError("runtime_events items must be objects") @@ -190,6 +206,14 @@ def validate_optional_runtime_operation_evidence(result: dict) -> None: if not isinstance(event_type, str) or not event_type: raise AssertionError("runtime_events[].type must be a non-empty string") event_types.append(event_type) + if "schema_version" in event and not isinstance(event["schema_version"], str): + raise AssertionError("runtime_events[].schema_version must be a string when present") + if "event_index" in event: + if isinstance(event["event_index"], bool) or not isinstance(event["event_index"], int): + raise AssertionError("runtime_events[].event_index must be an integer when present") + event_indexes.append(event["event_index"]) + if event_indexes and event_indexes != list(range(len(event_indexes))): + raise AssertionError("runtime_events[].event_index must be sequential when present") for expected in ("runtime_configured", "benchmark_completed", "runtime_error_classified"): if expected not in event_types: raise AssertionError(f"runtime_events must include {expected}")