From 2b18329a23bab9deac1ed9e3b85fa456e3c23d6d Mon Sep 17 00:00:00 2001 From: RMANOV <96174405+RMANOV@users.noreply.github.com> Date: Sat, 25 Apr 2026 14:47:58 +0300 Subject: [PATCH] fix(sim): harden replay evidence safety checks --- python/tests/test_strix_sim_replay.py | 47 +++++++++++++++++++++++++++ scripts/strix_sim_replay.py | 22 +++++++++---- 2 files changed, 62 insertions(+), 7 deletions(-) diff --git a/python/tests/test_strix_sim_replay.py b/python/tests/test_strix_sim_replay.py index c1448f2..f25d3fc 100644 --- a/python/tests/test_strix_sim_replay.py +++ b/python/tests/test_strix_sim_replay.py @@ -80,6 +80,23 @@ def test_replay_outputs_public_safe_paths(tmp_path): assert replay["scenario"]["path"] == "/scenario.yaml" +def test_public_path_redacts_parent_traversal(tmp_path, monkeypatch): + module = _load_module() + root = tmp_path / "repo" + root.mkdir() + outside = tmp_path / "secret" / "scenario.yaml" + outside.parent.mkdir() + outside.write_text("scenario_id: external\n", encoding="utf-8") + + monkeypatch.setattr(module, "ROOT", root) + + reported = module.public_path(root / ".." / "secret" / "scenario.yaml") + + assert reported == "/scenario.yaml" + assert ".." not in reported + assert "secret" not in reported + + def test_replay_html_embeds_visualizer_data(tmp_path): module = _load_module() scenario = tmp_path / "scenario.yaml" @@ -94,6 +111,36 @@ def test_replay_html_embeds_visualizer_data(tmp_path): assert str(tmp_path) not in html +def test_replay_html_escapes_scenario_id_in_title(tmp_path): + module = _load_module() + scenario = tmp_path / "scenario.yaml" + _write_scenario(scenario) + data = scenario.read_text(encoding="utf-8").replace("scenario_id: replay_case", "scenario_id: \"&id\"") + scenario.write_text(data, encoding="utf-8") + replay = module.build_replay(scenario, tick_s=10) + + html = module.render_html(replay) + + assert "STRIX Software Replay - <bad>&id" in html + assert "STRIX Software Replay - <bad>&id" not in html + + +def test_envelope_fails_when_required_metric_is_missing(tmp_path): + module = _load_module() + scenario = tmp_path / "scenario.yaml" + _write_scenario(scenario) + data = scenario.read_text(encoding="utf-8").replace( + " area_coverage_pct:\n min: 0\n max: 100\n", + " missing_metric:\n min: 1\n max: 2\n", + ) + scenario.write_text(data, encoding="utf-8") + + replay = module.build_replay(scenario, tick_s=10) + + assert replay["envelope"]["status"] == "failed" + assert replay["envelope"]["checks"][0]["status"] == "not_observed" + + def test_write_replay_creates_json_and_html(tmp_path): module = _load_module() scenario = tmp_path / "scenario.yaml" diff --git a/scripts/strix_sim_replay.py b/scripts/strix_sim_replay.py index 5499cf5..701adb4 100644 --- a/scripts/strix_sim_replay.py +++ b/scripts/strix_sim_replay.py @@ -13,6 +13,7 @@ import argparse import hashlib +import html import json import math import random @@ -54,13 +55,18 @@ class AgentState: def public_path(path: Path) -> str: path_str = str(path) - if path.is_relative_to(ROOT): - return str(path.relative_to(ROOT)) + + root = ROOT.resolve(strict=False) if path_str.startswith("\\\\") or path_str[:3].replace("\\", "/").endswith(":/"): return f"/{PureWindowsPath(path_str).name or '.'}" - if path.is_absolute(): - return f"/{path.name or '.'}" - return path_str + + candidate = path if path.is_absolute() else ROOT / path + normalized = candidate.resolve(strict=False) + if normalized.is_relative_to(root): + return str(normalized.relative_to(root)) + if normalized.is_absolute(): + return f"/{normalized.name or '.'}" + return f"/{path.name or '.'}" def git_value(args: list[str]) -> str | None: @@ -328,6 +334,7 @@ def replay_metrics( base_coverage = 100.0 * alive_fraction metrics: dict[str, float | int] = { "active_agents": active_agents, + "area_coverage_pct": round(base_coverage, 3), "offline_agents": total_agents - active_agents, "frame_count": len(frames), "mean_energy_remaining_pct": round( @@ -389,7 +396,7 @@ def evaluate_envelope(metrics: dict[str, float | int], scenario: dict[str, Any]) "max": bounds.get("max"), } ) - failed = [check for check in checks if check["status"] == "failed"] + failed = [check for check in checks if check["status"] != "passed"] return { "status": "failed" if failed else "passed", "checks": checks, @@ -507,12 +514,13 @@ def world_bounds(replay: dict[str, Any]) -> dict[str, float]: def render_html(replay: dict[str, Any]) -> str: replay_json = json.dumps({**replay, "world": world_bounds(replay)}, sort_keys=True).replace(" - STRIX Software Replay - {replay['scenario']['id']} + STRIX Software Replay - {scenario_id}