From 8261e36eea1d6ac5b5da9d9820b198be37970656 Mon Sep 17 00:00:00 2001 From: hyeokjun32 Date: Sat, 23 May 2026 22:20:50 +0900 Subject: [PATCH] Add runtime telemetry coverage metadata --- README.ko.md | 2 +- README.md | 1 + scripts/smoke_default.sh | 14 ++++ src/result_writer.cpp | 85 ++++++++++++++++++++- tests/test_agent_runtime_result_contract.py | 19 +++++ tests/test_lab_result_schema.py | 42 ++++++++++ 6 files changed, 161 insertions(+), 2 deletions(-) diff --git a/README.ko.md b/README.ko.md index 2765c2f..100791d 100644 --- a/README.ko.md +++ b/README.ko.md @@ -75,7 +75,7 @@ Runtime은 Forge `agent_manifest.json`을 선택적으로 읽어 기존 Lab-comp 이 기능은 reliable edge agent runtime 방향의 첫 Runtime-side contract입니다. `agent_id`, `task_id`, `agent_type`, priority, latency budget, queue wait, fallback usage, telemetry context를 기록하지만 기존 `result.json`의 top-level compare/report 필드는 변경하지 않습니다. -Runtime result JSON에는 `runtime_health_snapshot`, `runtime_error_classification`, `runtime_events`, `runtime_operation_summary`도 additive evidence로 기록됩니다. 이제 health snapshot은 backend availability, latency budget/deadline observation, tegrastats evidence availability와 `health_reason`을 함께 남기고, runtime events는 sequential `event_index`를 가진 lifecycle trace로 기록됩니다. `runtime_operation_summary`는 Lab/Orchestrator/AIGuard handoff용 compact index로 `risk_labels`, `evidence_gaps`, retryability, conservative `recommended_action`을 남기되 `decision_owner: lab`, `scheduler_owner: orchestrator`, `production_cancellation: false`를 유지합니다. `--timeout-ms`는 latency timeout 관측 기준을 남기는 옵션이며, production request cancellation을 의미하지 않습니다. 실행이 `skipped`로 끝나면 Runtime은 `runtime_execution_skipped`, `retryable: true`, `retry_hint: check_backend_availability`를 남겨 Lab/Orchestrator가 failure handling evidence로 해석할 수 있게 합니다. +Runtime result JSON에는 `runtime_health_snapshot`, `runtime_error_classification`, `runtime_events`, `runtime_operation_summary`도 additive evidence로 기록됩니다. 이제 health snapshot은 backend availability, latency budget/deadline observation, tegrastats evidence availability와 `health_reason`을 함께 남기고, runtime events는 sequential `event_index`를 가진 lifecycle trace로 기록됩니다. `runtime_operation_summary`는 Lab/Orchestrator/AIGuard handoff용 compact index로 `risk_labels`, `evidence_gaps`, retryability, conservative `recommended_action`을 남기되 `decision_owner: lab`, `scheduler_owner: orchestrator`, `production_cancellation: false`를 유지합니다. `runtime_telemetry.coverage`는 expected / observed / missing telemetry fields를 기록하고 `comparability_owner: edgeenv`, `missing_telemetry_is_failure: false`를 명시합니다. `--timeout-ms`는 latency timeout 관측 기준을 남기는 옵션이며, production request cancellation을 의미하지 않습니다. 실행이 `skipped`로 끝나면 Runtime은 `runtime_execution_skipped`, `retryable: true`, `retry_hint: check_backend_availability`를 남겨 Lab/Orchestrator가 failure handling evidence로 해석할 수 있게 합니다. 예시: diff --git a/README.md b/README.md index fb689b6..c12cf0c 100644 --- a/README.md +++ b/README.md @@ -500,6 +500,7 @@ Runtime Intelligence boundary: - `runtime_telemetry.schema_version` is `inferedge-runtime-telemetry-v1`. - `collection_mode` starts as `single_result_export`; EdgeEnv owns telemetry history accumulation and comparability-first regression. - Missing device telemetry remains explicit in `missing_fields` instead of being fabricated. +- `runtime_telemetry.coverage` records expected / observed / missing telemetry fields, with `comparability_owner: edgeenv` and `missing_telemetry_is_failure: false`. - Runtime exports telemetry evidence only. AIGuard may turn it into deterministic anomaly evidence, and Lab remains the deployment decision owner. The committed fixture diff --git a/scripts/smoke_default.sh b/scripts/smoke_default.sh index 156bb4d..305ae24 100755 --- a/scripts/smoke_default.sh +++ b/scripts/smoke_default.sh @@ -98,6 +98,16 @@ assert telemetry["latency"]["mean_ms"] == data["mean_ms"], telemetry assert telemetry["operation"]["timeout_observed"] == health["timeout_observed"], telemetry assert telemetry["production_monitoring"] is False, telemetry assert "queue_depth" in telemetry["missing_fields"], telemetry +coverage = telemetry["coverage"] +assert coverage["schema_version"] == "inferedge-runtime-telemetry-coverage-v1", coverage +assert coverage["comparability_owner"] == "edgeenv", coverage +assert coverage["missing_telemetry_is_failure"] is False, coverage +assert "queue_depth" in coverage["expected_fields"], coverage +assert "queue_depth" in coverage["missing_fields"], coverage +assert "telemetry_timestamp" in coverage["observed_fields"], coverage +assert coverage["missing_fields"] == telemetry["missing_fields"], coverage +assert events["runtime_telemetry_recorded"]["observed_field_count"] == coverage["observed_field_count"] +assert events["runtime_telemetry_recorded"]["missing_field_count"] == coverage["missing_field_count"] assert events["runtime_telemetry_recorded"]["schema"] == "inferedge-runtime-telemetry-v1" PY @@ -164,6 +174,10 @@ assert telemetry["operation"]["timeout_observed"] == health["timeout_observed"], assert telemetry["operation"]["latency_budget_exceeded"] == health["latency_budget_exceeded"], telemetry assert telemetry["operation"]["deadline_missed"] == health["deadline_missed"], telemetry assert telemetry["latency"]["p99_ms"] == data["p99_ms"], telemetry +coverage = telemetry["coverage"] +assert coverage["schema_version"] == "inferedge-runtime-telemetry-coverage-v1", coverage +assert coverage["comparability_owner"] == "edgeenv", coverage +assert coverage["missing_fields"] == telemetry["missing_fields"], coverage assert "runtime_telemetry_recorded" in events, events assert data["extra"]["agent_manifest_recorded"] is True PY diff --git a/src/result_writer.cpp b/src/result_writer.cpp index ba04ea3..79400df 100644 --- a/src/result_writer.cpp +++ b/src/result_writer.cpp @@ -270,6 +270,10 @@ void write_string_array_json(std::ostream& output, const std::vector& values, const std::string& target) { + return std::find(values.begin(), values.end(), target) != values.end(); +} + bool should_mark_deadline_missed(const RuntimeConfig& config, const BenchmarkResult& benchmark_result) { if (config.agent_deadline_missed_overridden) { return config.agent_deadline_missed; @@ -485,6 +489,77 @@ std::vector runtime_telemetry_missing_fields( return fields; } +std::vector runtime_telemetry_expected_fields() { + return { + "gpu_temperature_c", + "cpu_temperature_c", + "thermal_max_temperature_c", + "gpu_memory_used_mb", + "ram_used_mb", + "power_mode", + "throttling_detected", + "queue_depth", + "inference_interval_ms", + "runtime_uptime_sec", + "rolling_latency_mean_ms", + "rolling_latency_std_ms", + "telemetry_timestamp", + "execution_sequence_id", + }; +} + +std::vector runtime_telemetry_observed_fields( + const TegrastatsSummary& tegrastats_summary) { + const std::vector missing_fields = + runtime_telemetry_missing_fields(tegrastats_summary); + std::vector observed_fields; + for (const std::string& field : runtime_telemetry_expected_fields()) { + if (!contains_string(missing_fields, field)) { + observed_fields.push_back(field); + } + } + return observed_fields; +} + +void write_runtime_telemetry_coverage_json( + std::ostream& output, + const TegrastatsSummary& tegrastats_summary, + int indent_spaces) { + const std::string indent(static_cast(indent_spaces), ' '); + const std::vector expected_fields = runtime_telemetry_expected_fields(); + const std::vector observed_fields = + runtime_telemetry_observed_fields(tegrastats_summary); + const std::vector missing_fields = + runtime_telemetry_missing_fields(tegrastats_summary); + const double coverage_ratio = expected_fields.empty() + ? 0.0 + : static_cast(observed_fields.size()) / + static_cast(expected_fields.size()); + + output + << "{\n" + << indent << " \"schema_version\": \"inferedge-runtime-telemetry-coverage-v1\",\n" + << indent << " \"coverage_scope\": \"single_result_export\",\n" + << indent << " \"comparability_owner\": \"edgeenv\",\n" + << indent << " \"missing_telemetry_is_failure\": false,\n" + << indent << " \"expected_fields\": "; + write_string_array_json(output, expected_fields); + output + << ",\n" + << indent << " \"observed_fields\": "; + write_string_array_json(output, observed_fields); + output + << ",\n" + << indent << " \"missing_fields\": "; + write_string_array_json(output, missing_fields); + output + << ",\n" + << indent << " \"observed_field_count\": " << observed_fields.size() << ",\n" + << indent << " \"missing_field_count\": " << missing_fields.size() << ",\n" + << indent << " \"coverage_ratio\": " << coverage_ratio << "\n" + << indent << "}"; +} + std::string runtime_operation_recommended_action( const RuntimeConfig& config, const EngineMetadata& engine_metadata, @@ -709,6 +784,10 @@ void write_runtime_telemetry_json( << indent << " },\n" << indent << " \"missing_fields\": "; write_string_array_json(output, runtime_telemetry_missing_fields(tegrastats_summary)); + output + << ",\n" + << indent << " \"coverage\": "; + write_runtime_telemetry_coverage_json(output, tegrastats_summary, indent_spaces + 2); output << ",\n" << indent << " \"production_monitoring\": false\n" @@ -866,7 +945,11 @@ void write_runtime_events_json( << item_indent << " \"missing_fields\": "; write_string_array_json(output, runtime_telemetry_missing_fields(tegrastats_summary)); output - << "\n" + << ",\n" + << item_indent << " \"observed_field_count\": " + << runtime_telemetry_observed_fields(tegrastats_summary).size() << ",\n" + << item_indent << " \"missing_field_count\": " + << runtime_telemetry_missing_fields(tegrastats_summary).size() << "\n" << item_indent << "},\n" << item_indent << "{\n" << item_indent << " \"schema_version\": \"inferedge-runtime-event-v1\",\n" diff --git a/tests/test_agent_runtime_result_contract.py b/tests/test_agent_runtime_result_contract.py index 9ad8489..8e41fdb 100644 --- a/tests/test_agent_runtime_result_contract.py +++ b/tests/test_agent_runtime_result_contract.py @@ -184,6 +184,25 @@ def test_runtime_output_records_optional_agent_block_when_manifest_is_provided(s self.assertEqual(telemetry["latency"]["p99_ms"], result["p99_ms"]) self.assertFalse(telemetry["production_monitoring"]) self.assertIn("queue_depth", telemetry["missing_fields"]) + coverage = telemetry["coverage"] + self.assertEqual( + coverage["schema_version"], + "inferedge-runtime-telemetry-coverage-v1", + ) + self.assertEqual(coverage["comparability_owner"], "edgeenv") + self.assertFalse(coverage["missing_telemetry_is_failure"]) + self.assertIn("queue_depth", coverage["expected_fields"]) + self.assertIn("queue_depth", coverage["missing_fields"]) + self.assertIn("telemetry_timestamp", coverage["observed_fields"]) + self.assertEqual(coverage["missing_fields"], telemetry["missing_fields"]) + self.assertEqual( + coverage["observed_field_count"], + len(coverage["observed_fields"]), + ) + self.assertEqual( + coverage["missing_field_count"], + len(coverage["missing_fields"]), + ) extra = result["extra"] self.assertTrue(extra["agent_manifest_recorded"]) diff --git a/tests/test_lab_result_schema.py b/tests/test_lab_result_schema.py index b848a26..4ed2a8b 100644 --- a/tests/test_lab_result_schema.py +++ b/tests/test_lab_result_schema.py @@ -333,6 +333,48 @@ def validate_optional_runtime_telemetry(result: dict) -> None: if not isinstance(missing_fields, list) or not all(isinstance(item, str) for item in missing_fields): raise AssertionError("runtime_telemetry.missing_fields must be a string array") + coverage = telemetry.get("coverage") + if not isinstance(coverage, dict): + raise AssertionError("runtime_telemetry.coverage must be an object") + for field in ("schema_version", "coverage_scope", "comparability_owner"): + if not isinstance(coverage.get(field), str): + raise AssertionError(f"runtime_telemetry.coverage.{field} must be a string") + if coverage["schema_version"] != "inferedge-runtime-telemetry-coverage-v1": + raise AssertionError("runtime_telemetry.coverage.schema_version is invalid") + if coverage["coverage_scope"] != "single_result_export": + raise AssertionError("runtime_telemetry.coverage.coverage_scope is invalid") + if coverage["comparability_owner"] != "edgeenv": + raise AssertionError("runtime_telemetry.coverage.comparability_owner must be edgeenv") + if coverage.get("missing_telemetry_is_failure") is not False: + raise AssertionError("runtime_telemetry.coverage.missing_telemetry_is_failure must be false") + for field in ("expected_fields", "observed_fields", "missing_fields"): + values = coverage.get(field) + if not isinstance(values, list) or not all(isinstance(item, str) for item in values): + raise AssertionError(f"runtime_telemetry.coverage.{field} must be a string array") + if coverage["missing_fields"] != missing_fields: + raise AssertionError("runtime_telemetry.coverage.missing_fields must mirror runtime_telemetry.missing_fields") + for field in ("observed_field_count", "missing_field_count"): + if isinstance(coverage.get(field), bool) or not isinstance(coverage.get(field), int): + raise AssertionError(f"runtime_telemetry.coverage.{field} must be an integer") + if coverage["observed_field_count"] != len(coverage["observed_fields"]): + raise AssertionError("runtime_telemetry.coverage.observed_field_count mismatch") + if coverage["missing_field_count"] != len(coverage["missing_fields"]): + raise AssertionError("runtime_telemetry.coverage.missing_field_count mismatch") + coverage_ratio = coverage.get("coverage_ratio") + if isinstance(coverage_ratio, bool) or not isinstance(coverage_ratio, (int, float)): + raise AssertionError("runtime_telemetry.coverage.coverage_ratio must be numeric") + for expected in ( + "telemetry_timestamp", + "execution_sequence_id", + "inference_interval_ms", + "rolling_latency_mean_ms", + "rolling_latency_std_ms", + "queue_depth", + "gpu_memory_used_mb", + ): + if expected not in coverage["expected_fields"]: + raise AssertionError(f"runtime_telemetry.coverage.expected_fields missing {expected}") + class JetsonEvidenceContractTest(unittest.TestCase): def test_runtime_binary_parses_tegrastats_log_when_available(self):