From b962b10d22b76a5019e3142424fb50a2eb8f3306 Mon Sep 17 00:00:00 2001
From: RMANOV <96174405+RMANOV@users.noreply.github.com>
Date: Sat, 25 Apr 2026 10:45:54 +0300
Subject: [PATCH] fix(testing): keep evidence reports public-safe

---
 python/tests/test_strix_scenario_contract.py |  5 +++
 python/tests/test_strix_test_matrix.py       | 35 ++++++++++++++++++++
 scripts/strix_scenario_contract.py           | 21 +++++++++---
 scripts/strix_test_matrix.py                 | 23 +++++++++++++
 4 files changed, 79 insertions(+), 5 deletions(-)

diff --git a/python/tests/test_strix_scenario_contract.py b/python/tests/test_strix_scenario_contract.py
index 5bddb77..cc06131 100644
--- a/python/tests/test_strix_scenario_contract.py
+++ b/python/tests/test_strix_scenario_contract.py
@@ -45,6 +45,7 @@ def test_validate_scenario_accepts_minimal_contract(tmp_path):
 
     assert result["status"] == "passed"
     assert result["errors"] == []
+    assert str(tmp_path) not in result["path"]
 
 
 def test_validate_scenario_rejects_missing_seed_and_bad_bounds(tmp_path):
@@ -94,7 +95,9 @@ def test_validate_directory_rejects_missing_directory(tmp_path):
     report = module.validate_directory(tmp_path / "missing")
 
     assert report["summary"]["failed"] == 1
+    assert str(tmp_path) not in report["scenario_dir"]
     assert "does not exist" in report["results"][0]["errors"][0]
+    assert str(tmp_path) not in report["results"][0]["errors"][0]
 
 
 def test_validate_directory_rejects_file_path(tmp_path):
@@ -106,6 +109,7 @@ def test_validate_directory_rejects_file_path(tmp_path):
 
     assert report["summary"]["failed"] == 1
     assert "not a directory" in report["results"][0]["errors"][0]
+    assert str(tmp_path) not in report["results"][0]["errors"][0]
 
 
 def test_validate_directory_rejects_empty_directory(tmp_path):
@@ -115,6 +119,7 @@ def test_validate_directory_rejects_empty_directory(tmp_path):
 
     assert report["summary"]["failed"] == 1
     assert "no scenario files" in report["results"][0]["errors"][0]
+    assert str(tmp_path) not in report["results"][0]["errors"][0]
 
 
 def test_write_report_outputs_json(tmp_path):
diff --git a/python/tests/test_strix_test_matrix.py b/python/tests/test_strix_test_matrix.py
index 9fe1b96..485393d 100644
--- a/python/tests/test_strix_test_matrix.py
+++ b/python/tests/test_strix_test_matrix.py
@@ -60,6 +60,22 @@ def test_matrix_loads_and_selects_non_manual_entries(tmp_path):
     assert [entry["id"] for entry in selected] == ["pass"]
 
 
+def test_matrix_rejects_empty_command_lists(tmp_path):
+    module = _load_module()
+    matrix_path = tmp_path / "matrix.json"
+    _write_matrix(matrix_path)
+    matrix = json.loads(matrix_path.read_text(encoding="utf-8"))
+    matrix["commands"][0]["command"] = []
+    matrix_path.write_text(json.dumps(matrix), encoding="utf-8")
+
+    try:
+        module.load_matrix(matrix_path)
+    except ValueError as exc:
+        assert "must contain at least one argument" in str(exc)
+    else:
+        raise AssertionError("expected load_matrix to reject empty command list")
+
+
 def test_dry_run_report_does_not_execute_commands(tmp_path):
     module = _load_module()
     matrix_path = tmp_path / "matrix.json"
@@ -123,6 +139,25 @@ def test_run_entry_captures_missing_executable():
     assert "definitely-not-a-strix-command" in result["stderr_tail"]
 
 
+def test_run_entry_captures_empty_command():
+    module = _load_module()
+
+    result = module.run_entry(
+        {
+            "id": "empty",
+            "command": [],
+            "expected_exit": 0,
+            "timeout_s": 10,
+            "tags": ["unit"],
+        },
+        dry_run=False,
+    )
+
+    assert result["status"] == "failed"
+    assert result["exit_code"] is None
+    assert "at least one argument" in result["stderr_tail"]
+
+
 def test_empty_selection_is_a_failed_report(tmp_path):
     module = _load_module()
     matrix_path = tmp_path / "matrix.json"
diff --git a/scripts/strix_scenario_contract.py b/scripts/strix_scenario_contract.py
index a31e06a..d1b9d57 100644
--- a/scripts/strix_scenario_contract.py
+++ b/scripts/strix_scenario_contract.py
@@ -26,6 +26,17 @@
 SCENARIO_ID_RE = re.compile(r"^[a-z0-9][a-z0-9_-]*$")
 
 
+def public_path(path: Path) -> str:
+    """Return a report-safe path without leaking local checkout layout."""
+
+    if path.is_relative_to(ROOT):
+        return str(path.relative_to(ROOT))
+    if path.is_absolute():
+        name = path.name or "."
+        return f"<external>/{name}"
+    return str(path)
+
+
 def load_yaml(path: Path) -> dict[str, Any]:
     data = yaml.safe_load(path.read_text(encoding="utf-8"))
     if not isinstance(data, dict):
@@ -123,7 +134,7 @@ def validate_scenario(path: Path) -> dict[str, Any]:
 
     status = "failed" if errors else "passed"
     return {
-        "path": str(path.relative_to(ROOT) if path.is_relative_to(ROOT) else path),
+        "path": public_path(path),
         "scenario_id": scenario_id,
         "status": status,
         "errors": errors,
@@ -132,24 +143,24 @@ def validate_scenario(path: Path) -> dict[str, Any]:
 
 
 def validate_directory(scenario_dir: Path) -> dict[str, Any]:
-    scenario_dir_str = str(scenario_dir.relative_to(ROOT) if scenario_dir.is_relative_to(ROOT) else scenario_dir)
+    scenario_dir_str = public_path(scenario_dir)
 
     if not scenario_dir.exists():
         return directory_failure_report(
             scenario_dir_str,
-            f"scenario directory does not exist: {scenario_dir}",
+            f"scenario directory does not exist: {scenario_dir_str}",
         )
     if not scenario_dir.is_dir():
         return directory_failure_report(
             scenario_dir_str,
-            f"scenario path is not a directory: {scenario_dir}",
+            f"scenario path is not a directory: {scenario_dir_str}",
         )
 
     files = sorted(scenario_dir.glob("*.yaml"))
     if not files:
         return directory_failure_report(
             scenario_dir_str,
-            f"no scenario files found in directory: {scenario_dir}",
+            f"no scenario files found in directory: {scenario_dir_str}",
         )
 
     results = [validate_scenario(path) for path in files]
diff --git a/scripts/strix_test_matrix.py b/scripts/strix_test_matrix.py
index a0ed777..c4b647d 100644
--- a/scripts/strix_test_matrix.py
+++ b/scripts/strix_test_matrix.py
@@ -57,6 +57,8 @@ def load_matrix(path: Path) -> dict[str, Any]:
         command = entry.get("command")
         if not isinstance(command, list) or not all(isinstance(part, str) for part in command):
             raise ValueError(f"{path}: {command_id}: 'command' must be a list of strings")
+        if not command:
+            raise ValueError(f"{path}: {command_id}: 'command' must contain at least one argument")
         tags = entry.get("tags", [])
         if not isinstance(tags, list) or not all(isinstance(tag, str) for tag in tags):
             raise ValueError(f"{path}: {command_id}: 'tags' must be a list of strings")
@@ -109,6 +111,17 @@ def run_entry(entry: dict[str, Any], dry_run: bool) -> dict[str, Any]:
             "stderr_tail": "",
         }
 
+    if not command:
+        elapsed = time.monotonic() - started
+        return {
+            **base_result,
+            "status": "failed",
+            "exit_code": None,
+            "elapsed_s": round(elapsed, 3),
+            "stdout_tail": "",
+            "stderr_tail": "command must contain at least one argument",
+        }
+
     try:
         completed = subprocess.run(
             command,
@@ -140,6 +153,16 @@ def run_entry(entry: dict[str, Any], dry_run: bool) -> dict[str, Any]:
             "stdout_tail": "",
             "stderr_tail": str(exc),
         }
+    except (ValueError, IndexError) as exc:
+        elapsed = time.monotonic() - started
+        return {
+            **base_result,
+            "status": "failed",
+            "exit_code": None,
+            "elapsed_s": round(elapsed, 3),
+            "stdout_tail": "",
+            "stderr_tail": str(exc),
+        }
 
     elapsed = time.monotonic() - started
     status = "passed" if completed.returncode == expected_exit else "failed"