diff --git a/README.ko.md b/README.ko.md index 1f96a05..2765c2f 100644 --- a/README.ko.md +++ b/README.ko.md @@ -75,7 +75,7 @@ Runtime은 Forge `agent_manifest.json`을 선택적으로 읽어 기존 Lab-comp 이 기능은 reliable edge agent runtime 방향의 첫 Runtime-side contract입니다. `agent_id`, `task_id`, `agent_type`, priority, latency budget, queue wait, fallback usage, telemetry context를 기록하지만 기존 `result.json`의 top-level compare/report 필드는 변경하지 않습니다. -Runtime result JSON에는 `runtime_health_snapshot`, `runtime_error_classification`, `runtime_events`도 additive evidence로 기록됩니다. 이제 health snapshot은 backend availability, latency budget/deadline observation, tegrastats evidence availability를 함께 남기고, runtime events는 sequential `event_index`를 가진 lifecycle trace로 기록됩니다. `--timeout-ms`는 latency timeout 관측 기준을 남기는 옵션이며, production request cancellation을 의미하지 않습니다. 실행이 `skipped`로 끝나면 Runtime은 `runtime_execution_skipped`, `retryable: true`, `retry_hint: check_backend_availability`를 남겨 Lab/Orchestrator가 failure handling evidence로 해석할 수 있게 합니다. +Runtime result JSON에는 `runtime_health_snapshot`, `runtime_error_classification`, `runtime_events`, `runtime_operation_summary`도 additive evidence로 기록됩니다. 이제 health snapshot은 backend availability, latency budget/deadline observation, tegrastats evidence availability와 `health_reason`을 함께 남기고, runtime events는 sequential `event_index`를 가진 lifecycle trace로 기록됩니다. `runtime_operation_summary`는 Lab/Orchestrator/AIGuard handoff용 compact index로 `risk_labels`, `evidence_gaps`, retryability, conservative `recommended_action`을 남기되 `decision_owner: lab`, `scheduler_owner: orchestrator`, `production_cancellation: false`를 유지합니다. `--timeout-ms`는 latency timeout 관측 기준을 남기는 옵션이며, production request cancellation을 의미하지 않습니다. 실행이 `skipped`로 끝나면 Runtime은 `runtime_execution_skipped`, `retryable: true`, `retry_hint: check_backend_availability`를 남겨 Lab/Orchestrator가 failure handling evidence로 해석할 수 있게 합니다. 예시: diff --git a/README.md b/README.md index a709fa7..8c1fc8f 100644 --- a/README.md +++ b/README.md @@ -490,6 +490,7 @@ Runtime result JSON also includes additive operation evidence blocks: - `runtime_health_snapshot`: execution health, backend/device context, backend availability, run count, latency/FPS summary, latency-budget/deadline observation, tegrastats evidence availability, and explicit timeout observation status. `--timeout-ms` records an observation threshold; it does not claim production request cancellation. - `runtime_error_classification`: structured success/error category, severity, retryability, retry hint, observed mean latency, and timeout budget for downstream report context. Skipped execution is recorded as `runtime_execution_skipped` with `retry_hint: check_backend_availability` so Lab/Orchestrator can explain runtime failure handling without treating Runtime as a worker daemon. - `runtime_events`: compact indexed lifecycle event log for configuration, benchmark completion, error classification, optional agent context, and tegrastats parsing. +- `runtime_operation_summary`: compact handoff index for Lab/Orchestrator/AIGuard with `health_reason`, `risk_labels`, `evidence_gaps`, retryability, and a conservative `recommended_action`. It keeps `decision_owner: lab`, `scheduler_owner: orchestrator`, and `production_cancellation: false`. These fields are evidence for Orchestrator/Lab analysis. Runtime still does not schedule tasks or own deployment decisions. diff --git a/docs/agent_runtime_result_contract.md b/docs/agent_runtime_result_contract.md index 9ea286a..61227cc 100644 --- a/docs/agent_runtime_result_contract.md +++ b/docs/agent_runtime_result_contract.md @@ -9,6 +9,7 @@ Runtime may also append additive operation evidence blocks: - `runtime_health_snapshot` - `runtime_error_classification` - `runtime_events` +- `runtime_operation_summary` These blocks support downstream runtime operation reporting without turning Runtime into a scheduler or deployment decision owner. @@ -57,6 +58,7 @@ threshold. It records: - `runtime_health_snapshot.timeout_observed: true` - `runtime_error_classification.category: "runtime_timeout_observed"` - `runtime_error_classification.retryable: true` +- `runtime_operation_summary.recommended_action: "review_latency_budget_or_degrade"` Lab treats this as deployment review evidence. Runtime still only records the observation; it does not cancel production requests or make deployment @@ -86,6 +88,7 @@ When provided, Runtime appends: "runs": 1, "run_once": false, "success": true, + "health_reason": "benchmark_completed", "latency_mean_ms": 0.0, "latency_p95_ms": 0.0, "latency_p99_ms": 0.0, @@ -150,14 +153,43 @@ When provided, Runtime appends: "status": "none", "category": "none", "severity": "none", + "health_reason": "benchmark_completed", "timeout_policy": "latency_threshold", "timeout_budget_ms": 1, "observed_mean_ms": 0.0, "timeout_observed": false, "retryable": false, "retry_hint": "none" + }, + { + "schema_version": "inferedge-runtime-event-v1", + "event_index": 3, + "type": "runtime_operation_summary_recorded", + "status": "ok", + "health_reason": "benchmark_completed", + "recommended_action": "none", + "risk_labels": [], + "evidence_gaps": ["thermal_memory_evidence_missing"] } ], + "runtime_operation_summary": { + "schema_version": "inferedge-runtime-operation-summary-v1", + "observation_scope": "single_runtime_result", + "decision_owner": "lab", + "scheduler_owner": "orchestrator", + "production_cancellation": false, + "health_status": "ok", + "health_reason": "benchmark_completed", + "error_category": "none", + "retryable": false, + "recommended_action": "none", + "risk_labels": [], + "evidence_gaps": ["thermal_memory_evidence_missing"], + "timeout_observed": false, + "latency_budget_exceeded": false, + "deadline_missed": false, + "thermal_memory_evidence_available": false + }, "agent": { "schema_version": "inferedge-runtime-agent-task-v1", "source_contract": "inferedge-agent-manifest-v1", @@ -220,7 +252,11 @@ When provided, Runtime appends: - `execution_status` defaults to the Runtime benchmark status unless overridden. - `runtime_health_snapshot`, `runtime_error_classification`, and `runtime_events` are additive and safe for existing consumers to ignore. - `runtime_health_snapshot` includes backend availability, latency-budget/deadline observation, timeout observation, and tegrastats evidence availability when those values are known. +- `runtime_health_snapshot.health_reason` gives a compact reason such as `benchmark_completed`, `backend_unavailable_or_not_enabled`, `runtime_execution_skipped`, or `timeout_threshold_exceeded`. - `runtime_events` uses additive `inferedge-runtime-event-v1` entries with sequential `event_index` values so Lab/Orchestrator reports can show a compact lifecycle trace. +- `runtime_operation_summary` is an additive handoff index for Lab/Orchestrator/AIGuard. It repeats the health reason, retryability, risk labels, evidence gaps, and a conservative `recommended_action` without making the deployment decision itself. +- `runtime_operation_summary.decision_owner` must remain `lab`, and `scheduler_owner` must remain `orchestrator`. +- `runtime_operation_summary.production_cancellation` is always `false`; Runtime records observations only. - Runtime does not claim production request cancellation. `--timeout-ms` is an observation threshold: if a successful benchmark mean latency exceeds the configured threshold, Runtime records `timeout_observed: true`, `runtime_error_classification.category: runtime_timeout_observed`, and `retryable: true` for downstream reliability reporting. - If execution is skipped because Runtime cannot complete the configured benchmark, Runtime records `runtime_error_classification.category: runtime_execution_skipped`, `severity: warning`, `retryable: true`, and `retry_hint: check_backend_availability`. This is failure-handling evidence for Lab/Orchestrator reporting, not a production worker retry loop. - Without `--timeout-ms`, results record `timeout_policy: not_configured`, `timeout_budget_ms: null`, and `timeout_observed: false`. diff --git a/scripts/smoke_default.sh b/scripts/smoke_default.sh index ed04b52..7a3fe11 100755 --- a/scripts/smoke_default.sh +++ b/scripts/smoke_default.sh @@ -62,6 +62,7 @@ assert data["jetson_evidence"]["tegrastats_summary"]["status"] == "not_provided" health = data["runtime_health_snapshot"] assert health["status"] == "degraded", health assert health["success"] is False +assert health["health_reason"] == "backend_unavailable_or_not_enabled", health assert health["timeout_policy"] == "not_configured" assert health["timeout_observed"] is False error = data["runtime_error_classification"] @@ -73,6 +74,21 @@ assert error["retry_hint"] == "check_backend_availability", error events = {event["type"]: event for event in data["runtime_events"]} assert events["runtime_error_classified"]["category"] == "runtime_execution_skipped" assert events["runtime_error_classified"]["retryable"] is True +assert events["runtime_error_classified"]["health_reason"] == health["health_reason"] +summary_event = events["runtime_operation_summary_recorded"] +assert summary_event["health_reason"] == health["health_reason"], summary_event +assert summary_event["recommended_action"] == "check_backend_availability", summary_event +operation = data["runtime_operation_summary"] +assert operation["schema_version"] == "inferedge-runtime-operation-summary-v1", operation +assert operation["decision_owner"] == "lab", operation +assert operation["scheduler_owner"] == "orchestrator", operation +assert operation["production_cancellation"] is False, operation +assert operation["health_status"] == "degraded", operation +assert operation["health_reason"] == health["health_reason"], operation +assert operation["recommended_action"] == "check_backend_availability", operation +assert "runtime_execution_skipped" in operation["risk_labels"], operation +assert "backend_unavailable" in operation["risk_labels"], operation +assert "timeout_policy_not_configured" in operation["evidence_gaps"], operation PY INFEREDGE_RUNTIME_RESULT_JSON="${OUTPUT_PATH}" python3 tests/test_lab_result_schema.py @@ -108,6 +124,7 @@ health = data["runtime_health_snapshot"] assert health["timeout_policy"] == "latency_threshold" assert health["timeout_budget_ms"] == 1 assert health["timeout_observed"] is False +assert "health_reason" in health assert health["latency_budget_ms"] == 33 assert "latency_budget_exceeded" in health assert "deadline_missed" in health @@ -120,8 +137,17 @@ assert "retry_hint" in error events = {event["type"]: event for event in data["runtime_events"]} assert events["runtime_error_classified"]["timeout_policy"] == "latency_threshold" assert events["runtime_error_classified"]["timeout_budget_ms"] == 1 +assert events["runtime_error_classified"]["health_reason"] == health["health_reason"] assert events["benchmark_completed"]["latency_budget_ms"] == 33 +assert events["runtime_operation_summary_recorded"]["health_reason"] == health["health_reason"] assert [event["event_index"] for event in data["runtime_events"]] == list(range(len(data["runtime_events"]))) +operation = data["runtime_operation_summary"] +assert operation["decision_owner"] == "lab", operation +assert operation["scheduler_owner"] == "orchestrator", operation +assert operation["production_cancellation"] is False, operation +assert operation["health_reason"] == health["health_reason"], operation +assert isinstance(operation["risk_labels"], list), operation +assert isinstance(operation["evidence_gaps"], list), operation assert data["extra"]["agent_manifest_recorded"] is True PY diff --git a/src/result_writer.cpp b/src/result_writer.cpp index 28c6c75..24eddf8 100644 --- a/src/result_writer.cpp +++ b/src/result_writer.cpp @@ -259,6 +259,17 @@ void write_tegrastats_summary_json(std::ostream& output, const TegrastatsSummary << indent << "}"; } +void write_string_array_json(std::ostream& output, const std::vector& values) { + output << '['; + for (std::size_t i = 0; i < values.size(); ++i) { + if (i > 0) { + output << ", "; + } + output << json_string(values[i]); + } + output << ']'; +} + bool should_mark_deadline_missed(const RuntimeConfig& config, const BenchmarkResult& benchmark_result) { if (config.agent_deadline_missed_overridden) { return config.agent_deadline_missed; @@ -398,6 +409,84 @@ bool latency_budget_exceeded(const RuntimeConfig& config, const BenchmarkResult& benchmark_result.mean_ms > static_cast(config.agent_latency_budget_ms); } +std::string runtime_health_reason( + const RuntimeConfig& config, + const EngineMetadata& engine_metadata, + const BenchmarkResult& benchmark_result) { + if (timeout_observed(config, benchmark_result)) { + return "timeout_threshold_exceeded"; + } + if (benchmark_result.success) { + return "benchmark_completed"; + } + if (benchmark_result.status == "skipped" && !engine_metadata.available) { + return "backend_unavailable_or_not_enabled"; + } + if (benchmark_result.status == "skipped") { + return "runtime_execution_skipped"; + } + return "runtime_execution_error"; +} + +std::vector runtime_operation_risk_labels( + const RuntimeConfig& config, + const EngineMetadata& engine_metadata, + const BenchmarkResult& benchmark_result) { + std::vector labels; + if (!benchmark_result.success) { + if (benchmark_result.status == "skipped") { + labels.push_back("runtime_execution_skipped"); + } else { + labels.push_back("runtime_execution_error"); + } + if (!engine_metadata.available) { + labels.push_back("backend_unavailable"); + } + } + if (timeout_observed(config, benchmark_result)) { + labels.push_back("runtime_timeout_observed"); + } + if (latency_budget_exceeded(config, benchmark_result)) { + labels.push_back("latency_budget_exceeded"); + } + if (should_mark_deadline_missed(config, benchmark_result)) { + labels.push_back("deadline_missed"); + } + return labels; +} + +std::vector runtime_operation_evidence_gaps( + const RuntimeConfig& config, + const TegrastatsSummary& tegrastats_summary) { + std::vector gaps; + if (config.timeout_ms <= 0) { + gaps.push_back("timeout_policy_not_configured"); + } + if (tegrastats_summary.status != "parsed") { + gaps.push_back("thermal_memory_evidence_missing"); + } + return gaps; +} + +std::string runtime_operation_recommended_action( + const RuntimeConfig& config, + const EngineMetadata& engine_metadata, + const BenchmarkResult& benchmark_result) { + if (timeout_observed(config, benchmark_result) || latency_budget_exceeded(config, benchmark_result)) { + return "review_latency_budget_or_degrade"; + } + if (benchmark_result.success) { + return "none"; + } + if (benchmark_result.status == "skipped" && !engine_metadata.available) { + return "check_backend_availability"; + } + if (benchmark_result.status == "skipped") { + return "review_runtime_configuration"; + } + return "inspect_runtime_error"; +} + void write_runtime_health_snapshot_json( std::ostream& output, const RuntimeConfig& config, @@ -422,6 +511,8 @@ void write_runtime_health_snapshot_json( << indent << " \"runs\": " << config.runs << ",\n" << indent << " \"run_once\": " << (config.run_once ? "true" : "false") << ",\n" << indent << " \"success\": " << (benchmark_result.success ? "true" : "false") << ",\n" + << indent << " \"health_reason\": " + << json_string(runtime_health_reason(config, engine_metadata, benchmark_result)) << ",\n" << indent << " \"latency_mean_ms\": " << benchmark_result.mean_ms << ",\n" << indent << " \"latency_p95_ms\": " << benchmark_result.p95_ms << ",\n" << indent << " \"latency_p99_ms\": " << benchmark_result.p99_ms << ",\n" @@ -455,6 +546,54 @@ void write_runtime_health_snapshot_json( << indent << "}"; } +void write_runtime_operation_summary_json( + std::ostream& output, + const RuntimeConfig& config, + const EngineMetadata& engine_metadata, + const BenchmarkResult& benchmark_result, + const TegrastatsSummary& tegrastats_summary, + int indent_spaces) { + const std::string indent(static_cast(indent_spaces), ' '); + const std::vector risk_labels = + runtime_operation_risk_labels(config, engine_metadata, benchmark_result); + const std::vector evidence_gaps = + runtime_operation_evidence_gaps(config, tegrastats_summary); + output + << "{\n" + << indent << " \"schema_version\": \"inferedge-runtime-operation-summary-v1\",\n" + << indent << " \"observation_scope\": \"single_runtime_result\",\n" + << indent << " \"decision_owner\": \"lab\",\n" + << indent << " \"scheduler_owner\": \"orchestrator\",\n" + << indent << " \"production_cancellation\": false,\n" + << indent << " \"health_status\": " + << json_string(runtime_health_status(config, benchmark_result)) << ",\n" + << indent << " \"health_reason\": " + << json_string(runtime_health_reason(config, engine_metadata, benchmark_result)) << ",\n" + << indent << " \"error_category\": " + << json_string(runtime_error_category(config, benchmark_result)) << ",\n" + << indent << " \"retryable\": " + << (runtime_retryable(config, benchmark_result) ? "true" : "false") << ",\n" + << indent << " \"recommended_action\": " + << json_string(runtime_operation_recommended_action(config, engine_metadata, benchmark_result)) << ",\n" + << indent << " \"risk_labels\": "; + write_string_array_json(output, risk_labels); + output + << ",\n" + << indent << " \"evidence_gaps\": "; + write_string_array_json(output, evidence_gaps); + output + << ",\n" + << indent << " \"timeout_observed\": " + << (timeout_observed(config, benchmark_result) ? "true" : "false") << ",\n" + << indent << " \"latency_budget_exceeded\": " + << (latency_budget_exceeded(config, benchmark_result) ? "true" : "false") << ",\n" + << indent << " \"deadline_missed\": " + << (should_mark_deadline_missed(config, benchmark_result) ? "true" : "false") << ",\n" + << indent << " \"thermal_memory_evidence_available\": " + << ((tegrastats_summary.status == "parsed") ? "true" : "false") << "\n" + << indent << "}"; +} + void write_runtime_error_classification_json( std::ostream& output, const RuntimeConfig& config, @@ -543,6 +682,8 @@ void write_runtime_events_json( << item_indent << " \"status\": " << json_string((benchmark_result.success && !observed_timeout) ? "none" : "classified") << ",\n" << item_indent << " \"category\": " << json_string(runtime_error_category(config, benchmark_result)) << ",\n" << item_indent << " \"severity\": " << json_string(runtime_error_severity(config, benchmark_result)) << ",\n" + << item_indent << " \"health_reason\": " + << json_string(runtime_health_reason(config, engine_metadata, benchmark_result)) << ",\n" << item_indent << " \"timeout_policy\": " << json_string(config.timeout_ms > 0 ? "latency_threshold" : "not_configured") << ",\n" << item_indent << " \"timeout_budget_ms\": "; @@ -574,6 +715,24 @@ void write_runtime_events_json( } output + << item_indent << "{\n" + << item_indent << " \"schema_version\": \"inferedge-runtime-event-v1\",\n" + << item_indent << " \"event_index\": " << event_index++ << ",\n" + << item_indent << " \"type\": \"runtime_operation_summary_recorded\",\n" + << item_indent << " \"status\": " << json_string(runtime_health_status(config, benchmark_result)) << ",\n" + << item_indent << " \"health_reason\": " + << json_string(runtime_health_reason(config, engine_metadata, benchmark_result)) << ",\n" + << item_indent << " \"recommended_action\": " + << json_string(runtime_operation_recommended_action(config, engine_metadata, benchmark_result)) << ",\n" + << item_indent << " \"risk_labels\": "; + write_string_array_json(output, runtime_operation_risk_labels(config, engine_metadata, benchmark_result)); + output + << ",\n" + << item_indent << " \"evidence_gaps\": "; + write_string_array_json(output, runtime_operation_evidence_gaps(config, tegrastats_summary)); + output + << "\n" + << item_indent << "},\n" << item_indent << "{\n" << item_indent << " \"schema_version\": \"inferedge-runtime-event-v1\",\n" << item_indent << " \"event_index\": " << event_index++ << ",\n" @@ -783,6 +942,10 @@ std::filesystem::path write_result_json( << ",\n" << " \"runtime_events\": "; write_runtime_events_json(output, config, engine_metadata, benchmark_result, tegrastats_summary, 2); + output + << ",\n" + << " \"runtime_operation_summary\": "; + write_runtime_operation_summary_json(output, config, engine_metadata, benchmark_result, tegrastats_summary, 2); if (!config.agent_manifest_path.empty()) { output << ",\n" diff --git a/tests/fixtures/runtime_timeout_observed_result.json b/tests/fixtures/runtime_timeout_observed_result.json index 2fcb667..b03dfef 100644 --- a/tests/fixtures/runtime_timeout_observed_result.json +++ b/tests/fixtures/runtime_timeout_observed_result.json @@ -120,6 +120,7 @@ "runs": 10, "run_once": false, "success": true, + "health_reason": "timeout_threshold_exceeded", "latency_mean_ms": 12.345, "latency_p95_ms": 13.8, "latency_p99_ms": 14.2, @@ -134,34 +135,81 @@ "schema_version": "inferedge-runtime-error-v1", "status": "classified", "category": "runtime_timeout_observed", + "severity": "warning", "message": "mean latency exceeded configured timeout threshold", + "observed_mean_ms": 12.345, + "timeout_budget_ms": 10, "timeout_observed": true, - "retryable": true + "retryable": true, + "retry_hint": "retry_or_degrade" }, "runtime_events": [ { + "schema_version": "inferedge-runtime-event-v1", + "event_index": 0, "type": "runtime_configured", "status": "ok", "engine_backend": "onnxruntime", "device": "cpu", - "input_mode": "dummy" + "input_mode": "dummy", + "timeout_policy": "latency_threshold" }, { + "schema_version": "inferedge-runtime-event-v1", + "event_index": 1, "type": "benchmark_completed", "status": "success", "success": true, "warmup": 3, "runs": 10, - "mean_ms": 12.345 + "mean_ms": 12.345, + "latency_budget_exceeded": false, + "deadline_missed": false }, { + "schema_version": "inferedge-runtime-event-v1", + "event_index": 2, "type": "runtime_error_classified", "status": "classified", "category": "runtime_timeout_observed", + "severity": "warning", + "health_reason": "timeout_threshold_exceeded", "timeout_policy": "latency_threshold", - "timeout_observed": true + "timeout_budget_ms": 10, + "observed_mean_ms": 12.345, + "timeout_observed": true, + "retryable": true, + "retry_hint": "retry_or_degrade" + }, + { + "schema_version": "inferedge-runtime-event-v1", + "event_index": 3, + "type": "runtime_operation_summary_recorded", + "status": "degraded", + "health_reason": "timeout_threshold_exceeded", + "recommended_action": "review_latency_budget_or_degrade", + "risk_labels": ["runtime_timeout_observed"], + "evidence_gaps": ["thermal_memory_evidence_missing"] } ], + "runtime_operation_summary": { + "schema_version": "inferedge-runtime-operation-summary-v1", + "observation_scope": "single_runtime_result", + "decision_owner": "lab", + "scheduler_owner": "orchestrator", + "production_cancellation": false, + "health_status": "degraded", + "health_reason": "timeout_threshold_exceeded", + "error_category": "runtime_timeout_observed", + "retryable": true, + "recommended_action": "review_latency_budget_or_degrade", + "risk_labels": ["runtime_timeout_observed"], + "evidence_gaps": ["thermal_memory_evidence_missing"], + "timeout_observed": true, + "latency_budget_exceeded": false, + "deadline_missed": false, + "thermal_memory_evidence_available": false + }, "extra": { "runtime": "inferedge-runtime", "json_export": "enabled", diff --git a/tests/test_agent_runtime_result_contract.py b/tests/test_agent_runtime_result_contract.py index 641bd41..58486eb 100644 --- a/tests/test_agent_runtime_result_contract.py +++ b/tests/test_agent_runtime_result_contract.py @@ -82,6 +82,7 @@ def test_runtime_output_records_optional_agent_block_when_manifest_is_provided(s health = result["runtime_health_snapshot"] self.assertEqual(health["schema_version"], "inferedge-runtime-health-v1") self.assertIn(health["status"], {"ok", "degraded", "error"}) + self.assertIn("health_reason", health) self.assertEqual(health["engine_backend"], "onnxruntime") self.assertIn("engine_available", health) self.assertIn("engine_status_message", health) @@ -123,6 +124,7 @@ def test_runtime_output_records_optional_agent_block_when_manifest_is_provided(s self.assertIn("runtime_configured", event_types) self.assertIn("benchmark_completed", event_types) self.assertIn("runtime_error_classified", event_types) + self.assertIn("runtime_operation_summary_recorded", event_types) self.assertIn("agent_context_recorded", event_types) benchmark_event = next(event for event in runtime_events if event["type"] == "benchmark_completed") self.assertEqual(benchmark_event["latency_budget_ms"], 33) @@ -131,9 +133,39 @@ def test_runtime_output_records_optional_agent_block_when_manifest_is_provided(s error_event = next(event for event in runtime_events if event["type"] == "runtime_error_classified") self.assertEqual(error_event["timeout_policy"], "latency_threshold") self.assertEqual(error_event["timeout_budget_ms"], 1) + self.assertEqual(error_event["health_reason"], health["health_reason"]) self.assertIn("retry_hint", error_event) self.assertFalse(error_event["timeout_observed"]) self.assertEqual(error_event["retryable"], error["retryable"]) + operation_event = next( + event for event in runtime_events if event["type"] == "runtime_operation_summary_recorded" + ) + self.assertEqual(operation_event["health_reason"], health["health_reason"]) + self.assertIn("recommended_action", operation_event) + self.assertIsInstance(operation_event["risk_labels"], list) + self.assertIsInstance(operation_event["evidence_gaps"], list) + + operation_summary = result["runtime_operation_summary"] + self.assertEqual( + operation_summary["schema_version"], + "inferedge-runtime-operation-summary-v1", + ) + self.assertEqual(operation_summary["observation_scope"], "single_runtime_result") + self.assertEqual(operation_summary["decision_owner"], "lab") + self.assertEqual(operation_summary["scheduler_owner"], "orchestrator") + self.assertFalse(operation_summary["production_cancellation"]) + self.assertEqual(operation_summary["health_status"], health["status"]) + self.assertEqual(operation_summary["health_reason"], health["health_reason"]) + self.assertEqual(operation_summary["retryable"], error["retryable"]) + self.assertIn("recommended_action", operation_summary) + self.assertIsInstance(operation_summary["risk_labels"], list) + self.assertIsInstance(operation_summary["evidence_gaps"], list) + self.assertEqual(operation_summary["timeout_observed"], health["timeout_observed"]) + self.assertEqual( + operation_summary["latency_budget_exceeded"], + health["latency_budget_exceeded"], + ) + self.assertEqual(operation_summary["deadline_missed"], health["deadline_missed"]) extra = result["extra"] self.assertTrue(extra["agent_manifest_recorded"]) diff --git a/tests/test_lab_result_schema.py b/tests/test_lab_result_schema.py index d009f31..eb78437 100644 --- a/tests/test_lab_result_schema.py +++ b/tests/test_lab_result_schema.py @@ -218,6 +218,45 @@ def validate_optional_runtime_operation_evidence(result: dict) -> None: if expected not in event_types: raise AssertionError(f"runtime_events must include {expected}") + operation_summary = result.get("runtime_operation_summary") + if operation_summary is not None: + if not isinstance(operation_summary, dict): + raise AssertionError("runtime_operation_summary must be an object when present") + for field in ( + "schema_version", + "observation_scope", + "decision_owner", + "scheduler_owner", + "health_status", + "health_reason", + "error_category", + "recommended_action", + ): + if field not in operation_summary: + raise AssertionError(f"runtime_operation_summary.{field} is required") + if not isinstance(operation_summary[field], str): + raise AssertionError(f"runtime_operation_summary.{field} must be a string") + if operation_summary["schema_version"] != "inferedge-runtime-operation-summary-v1": + raise AssertionError("runtime_operation_summary.schema_version is invalid") + if operation_summary["decision_owner"] != "lab": + raise AssertionError("runtime_operation_summary.decision_owner must remain lab") + if operation_summary["scheduler_owner"] != "orchestrator": + raise AssertionError("runtime_operation_summary.scheduler_owner must remain orchestrator") + for field in ( + "production_cancellation", + "retryable", + "timeout_observed", + "latency_budget_exceeded", + "deadline_missed", + "thermal_memory_evidence_available", + ): + if not isinstance(operation_summary.get(field), bool): + raise AssertionError(f"runtime_operation_summary.{field} must be a boolean") + for field in ("risk_labels", "evidence_gaps"): + values = operation_summary.get(field) + if not isinstance(values, list) or not all(isinstance(item, str) for item in values): + raise AssertionError(f"runtime_operation_summary.{field} must be a string array") + class JetsonEvidenceContractTest(unittest.TestCase): def test_runtime_binary_parses_tegrastats_log_when_available(self):