From 3b5058abc6bd2026eb8c9b8d565f395fff8bef75 Mon Sep 17 00:00:00 2001 From: hyeokjun32 Date: Sun, 24 May 2026 23:00:43 +0900 Subject: [PATCH] test: harden runtime history seed contract --- README.md | 3 ++- docs/agent_runtime_result_contract.md | 2 +- scripts/smoke_default.sh | 8 ++++++++ tests/test_agent_runtime_result_contract.py | 8 ++++++++ tests/test_lab_result_schema.py | 12 ++++++++++++ 5 files changed, 31 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ef10d61..6213f6e 100644 --- a/README.md +++ b/README.md @@ -501,7 +501,8 @@ Runtime Intelligence boundary: - `collection_mode` starts as `single_result_export`; EdgeEnv owns telemetry history accumulation and comparability-first regression. - Missing device telemetry remains explicit in `missing_fields` instead of being fabricated. - `runtime_telemetry.coverage` records expected / observed / missing telemetry fields, with `comparability_owner: edgeenv` and `missing_telemetry_is_failure: false`. -- `runtime_telemetry.history_seed` uses `inferedge-runtime-telemetry-history-seed-v1`, keeps `registry_owner: edgeenv`, `decision_owner: lab`, `production_monitoring: false`, and exposes a single replay point that EdgeEnv can later accumulate into a local telemetry history. +- `runtime_telemetry.history_seed` uses `inferedge-runtime-telemetry-history-seed-v1`, keeps `registry_owner: edgeenv`, `decision_owner: lab`, `production_monitoring: false`, and exposes a `single_result_to_history` replay point that mirrors the Runtime telemetry timestamp, sequence id, latency values, operation flags, power mode, and telemetry source for EdgeEnv accumulation. +- EdgeEnv validates and preserves this seed as `runtime_telemetry_history_seed`; Lab may display the preserved marker in a Runtime Intelligence risk report, but Runtime does not own the registry or deployment decision. - Runtime exports telemetry evidence only. AIGuard may turn it into deterministic anomaly evidence, and Lab remains the deployment decision owner. The committed fixture diff --git a/docs/agent_runtime_result_contract.md b/docs/agent_runtime_result_contract.md index 0888450..4b47392 100644 --- a/docs/agent_runtime_result_contract.md +++ b/docs/agent_runtime_result_contract.md @@ -257,7 +257,7 @@ When provided, Runtime appends: - `runtime_operation_summary` is an additive handoff index for Lab/Orchestrator/AIGuard. It repeats the health reason, retryability, risk labels, evidence gaps, and a conservative `recommended_action` without making the deployment decision itself. - `runtime_operation_summary.decision_owner` must remain `lab`, and `scheduler_owner` must remain `orchestrator`. - `runtime_operation_summary.production_cancellation` is always `false`; Runtime records observations only. -- `runtime_telemetry.history_seed` is an additive `inferedge-runtime-telemetry-history-seed-v1` block for EdgeEnv telemetry history/replay. It keeps `registry_owner: edgeenv`, `decision_owner: lab`, `production_monitoring: false`, and one single-result telemetry point so downstream tools can accumulate history without Runtime becoming a telemetry store. +- `runtime_telemetry.history_seed` is an additive `inferedge-runtime-telemetry-history-seed-v1` block for EdgeEnv telemetry history/replay. It keeps `registry_owner: edgeenv`, `decision_owner: lab`, `production_monitoring: false`, `replay_scope: single_result_to_history`, and one single-result telemetry point so downstream tools can accumulate history without Runtime becoming a telemetry store. The replay point mirrors Runtime telemetry timestamp, sequence id, latency values, operation flags, power mode, and telemetry source. - Runtime does not claim production request cancellation. `--timeout-ms` is an observation threshold: if a successful benchmark mean latency exceeds the configured threshold, Runtime records `timeout_observed: true`, `runtime_error_classification.category: runtime_timeout_observed`, and `retryable: true` for downstream reliability reporting. - If execution is skipped because Runtime cannot complete the configured benchmark, Runtime records `runtime_error_classification.category: runtime_execution_skipped`, `severity: warning`, `retryable: true`, and `retry_hint: check_backend_availability`. This is failure-handling evidence for Lab/Orchestrator reporting, not a production worker retry loop. - Without `--timeout-ms`, results record `timeout_policy: not_configured`, `timeout_budget_ms: null`, and `timeout_observed: false`. diff --git a/scripts/smoke_default.sh b/scripts/smoke_default.sh index afa13e0..6d888db 100755 --- a/scripts/smoke_default.sh +++ b/scripts/smoke_default.sh @@ -108,9 +108,12 @@ assert "telemetry_timestamp" in coverage["observed_fields"], coverage assert coverage["missing_fields"] == telemetry["missing_fields"], coverage history_seed = telemetry["history_seed"] assert history_seed["schema_version"] == "inferedge-runtime-telemetry-history-seed-v1", history_seed +assert history_seed["evidence_role"] == "runtime_telemetry_history_seed", history_seed assert history_seed["registry_owner"] == "edgeenv", history_seed assert history_seed["decision_owner"] == "lab", history_seed +assert history_seed["source_result_schema_version"] == telemetry["source_result_schema_version"], history_seed assert history_seed["source_telemetry_schema_version"] == telemetry["schema_version"], history_seed +assert history_seed["replay_scope"] == "single_result_to_history", history_seed assert history_seed["production_monitoring"] is False, history_seed assert history_seed["missing_telemetry_is_failure"] is False, history_seed assert history_seed["replay_ready"] is True, history_seed @@ -122,6 +125,8 @@ assert history_seed["points"][0]["telemetry_timestamp"] == telemetry["telemetry_ assert history_seed["points"][0]["execution_sequence_id"] == telemetry["execution_sequence_id"], history_seed assert history_seed["points"][0]["mean_ms"] == telemetry["latency"]["mean_ms"], history_seed assert history_seed["points"][0]["timeout_observed"] == telemetry["operation"]["timeout_observed"], history_seed +assert history_seed["points"][0]["power_mode"] == telemetry["power_mode"], history_seed +assert history_seed["points"][0]["telemetry_source"] == telemetry["resource"]["telemetry_source"], history_seed assert events["runtime_telemetry_recorded"]["observed_field_count"] == coverage["observed_field_count"] assert events["runtime_telemetry_recorded"]["missing_field_count"] == coverage["missing_field_count"] assert events["runtime_telemetry_recorded"]["schema"] == "inferedge-runtime-telemetry-v1" @@ -197,6 +202,9 @@ assert coverage["missing_fields"] == telemetry["missing_fields"], coverage history_seed = telemetry["history_seed"] assert history_seed["registry_owner"] == "edgeenv", history_seed assert history_seed["decision_owner"] == "lab", history_seed +assert history_seed["source_result_schema_version"] == telemetry["source_result_schema_version"], history_seed +assert history_seed["source_telemetry_schema_version"] == telemetry["schema_version"], history_seed +assert history_seed["replay_scope"] == "single_result_to_history", history_seed assert history_seed["source_result"]["compare_key"] == data["compare_key"], history_seed assert history_seed["points"][0]["p99_ms"] == telemetry["latency"]["p99_ms"], history_seed assert history_seed["points"][0]["deadline_missed"] == telemetry["operation"]["deadline_missed"], history_seed diff --git a/tests/test_agent_runtime_result_contract.py b/tests/test_agent_runtime_result_contract.py index 3229938..6c405eb 100644 --- a/tests/test_agent_runtime_result_contract.py +++ b/tests/test_agent_runtime_result_contract.py @@ -208,12 +208,18 @@ def test_runtime_output_records_optional_agent_block_when_manifest_is_provided(s history_seed["schema_version"], "inferedge-runtime-telemetry-history-seed-v1", ) + self.assertEqual(history_seed["evidence_role"], "runtime_telemetry_history_seed") self.assertEqual(history_seed["registry_owner"], "edgeenv") self.assertEqual(history_seed["decision_owner"], "lab") + self.assertEqual( + history_seed["source_result_schema_version"], + telemetry["source_result_schema_version"], + ) self.assertEqual( history_seed["source_telemetry_schema_version"], telemetry["schema_version"], ) + self.assertEqual(history_seed["replay_scope"], "single_result_to_history") self.assertFalse(history_seed["production_monitoring"]) self.assertFalse(history_seed["missing_telemetry_is_failure"]) self.assertTrue(history_seed["replay_ready"]) @@ -236,6 +242,8 @@ def test_runtime_output_records_optional_agent_block_when_manifest_is_provided(s self.assertEqual(point["p99_ms"], telemetry["latency"]["p99_ms"]) self.assertEqual(point["timeout_observed"], telemetry["operation"]["timeout_observed"]) self.assertEqual(point["deadline_missed"], telemetry["operation"]["deadline_missed"]) + self.assertEqual(point["power_mode"], telemetry["power_mode"]) + self.assertEqual(point["telemetry_source"], telemetry["resource"]["telemetry_source"]) extra = result["extra"] self.assertTrue(extra["agent_manifest_recorded"]) diff --git a/tests/test_lab_result_schema.py b/tests/test_lab_result_schema.py index c3f08a0..ee8d50b 100644 --- a/tests/test_lab_result_schema.py +++ b/tests/test_lab_result_schema.py @@ -396,12 +396,18 @@ def validate_runtime_telemetry_history_seed(history_seed: dict, telemetry: dict) raise AssertionError(f"runtime_telemetry.history_seed.{field} must be a string") if history_seed["schema_version"] != "inferedge-runtime-telemetry-history-seed-v1": raise AssertionError("runtime_telemetry.history_seed.schema_version is invalid") + if history_seed["evidence_role"] != "runtime_telemetry_history_seed": + raise AssertionError("runtime_telemetry.history_seed.evidence_role is invalid") if history_seed["registry_owner"] != "edgeenv": raise AssertionError("runtime_telemetry.history_seed.registry_owner must be edgeenv") if history_seed["decision_owner"] != "lab": raise AssertionError("runtime_telemetry.history_seed.decision_owner must be lab") + if history_seed["source_result_schema_version"] != telemetry["source_result_schema_version"]: + raise AssertionError("runtime_telemetry.history_seed source result schema mismatch") if history_seed["source_telemetry_schema_version"] != telemetry["schema_version"]: raise AssertionError("runtime_telemetry.history_seed source telemetry schema mismatch") + if history_seed["replay_scope"] != "single_result_to_history": + raise AssertionError("runtime_telemetry.history_seed.replay_scope is invalid") for field in ("replay_ready", "production_monitoring", "missing_telemetry_is_failure"): if not isinstance(history_seed.get(field), bool): raise AssertionError(f"runtime_telemetry.history_seed.{field} must be a boolean") @@ -409,6 +415,8 @@ def validate_runtime_telemetry_history_seed(history_seed: dict, telemetry: dict) raise AssertionError("runtime_telemetry.history_seed.production_monitoring must be false") if history_seed["missing_telemetry_is_failure"] is not False: raise AssertionError("runtime_telemetry.history_seed.missing_telemetry_is_failure must be false") + if history_seed["replay_ready"] is not True: + raise AssertionError("runtime_telemetry.history_seed.replay_ready must be true") for field in ("recommended_registry_key_fields", "time_series_fields"): values = history_seed.get(field) if not isinstance(values, list) or not all(isinstance(item, str) for item in values): @@ -456,6 +464,10 @@ def validate_runtime_telemetry_history_seed(history_seed: dict, telemetry: dict) for field in ("timeout_observed", "latency_budget_exceeded", "deadline_missed"): if first_point.get(field) != telemetry["operation"][field]: raise AssertionError(f"runtime_telemetry.history_seed point {field} mismatch") + if first_point.get("power_mode") != telemetry["power_mode"]: + raise AssertionError("runtime_telemetry.history_seed point power_mode mismatch") + if first_point.get("telemetry_source") != telemetry["resource"]["telemetry_source"]: + raise AssertionError("runtime_telemetry.history_seed point telemetry_source mismatch") class JetsonEvidenceContractTest(unittest.TestCase):