From 4c068eaa3fa675cac6dc71d7e47dde9c83ffedb2 Mon Sep 17 00:00:00 2001 From: ProfRandom92 Date: Fri, 22 May 2026 20:55:14 +0200 Subject: [PATCH] feat: add safe gate privacy checks --- scripts/safe_pr_gate.py | 56 ++++++++++++++++++++++++++++++++++++++ tests/test_safe_pr_gate.py | 49 +++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+) diff --git a/scripts/safe_pr_gate.py b/scripts/safe_pr_gate.py index ca23858..5884ea1 100644 --- a/scripts/safe_pr_gate.py +++ b/scripts/safe_pr_gate.py @@ -14,6 +14,15 @@ if str(REPO_ROOT) not in sys.path: sys.path.insert(0, str(REPO_ROOT)) +RISKY_PATH_NAMES = frozenset({".env", "id_ed25519", "id_rsa"}) +RISKY_PATH_SUFFIXES = (".key", ".pem") +PRIVATE_MARKERS = ( + "BEGIN PRIVATE KEY", + "GITHUB_TOKEN=", + "OPENAI_API_KEY=", + "GEMINI_API_KEY=", +) + @dataclass(frozen=True, slots=True) class GateState: @@ -101,6 +110,51 @@ def _path_in_prefix(path: str, prefix: str) -> bool: return path == normalized or path.startswith(normalized + "/") +def _repo_relative_path(path: str) -> Path | None: + candidate = (REPO_ROOT / path).resolve() + try: + candidate.relative_to(REPO_ROOT) + except ValueError: + return None + return candidate + + +def _is_risky_path(path: str) -> bool: + name = Path(path).name + return name in RISKY_PATH_NAMES or name.endswith(RISKY_PATH_SUFFIXES) + + +def _read_changed_text(path: str) -> str | None: + candidate = _repo_relative_path(path) + if candidate is None or not candidate.is_file(): + return None + try: + data = candidate.read_bytes() + except OSError: + return None + if b"\0" in data: + return None + try: + return data.decode("utf-8") + except UnicodeDecodeError: + return None + + +def _privacy_problems(changed_paths: tuple[str, ...]) -> tuple[str, ...]: + problems: list[str] = [] + for path in sorted(changed_paths): + if _is_risky_path(path): + problems.append(f"privacy_risky_path:{path}") + + text = _read_changed_text(path) + if text is None: + continue + for marker in PRIVATE_MARKERS: + if marker in text: + problems.append(f"privacy_marker:{marker}:{path}") + return tuple(problems) + + def evaluate_gate( state: GateState, *, @@ -125,6 +179,8 @@ def evaluate_gate( problems.append("changed_files_outside_allowed_prefixes") problems.extend(f"outside_prefix:{path}" for path in disallowed_paths) + problems.extend(_privacy_problems(state.changed_paths)) + return GateResult( ok=not problems, branch=state.branch, diff --git a/tests/test_safe_pr_gate.py b/tests/test_safe_pr_gate.py index b22225c..c5e5b52 100644 --- a/tests/test_safe_pr_gate.py +++ b/tests/test_safe_pr_gate.py @@ -93,6 +93,55 @@ def test_evaluate_gate_flags_paths_outside_allowed_prefixes() -> None: assert result.problems == ("changed_files_outside_allowed_prefixes", "outside_prefix:docs/example.md") +def test_evaluate_gate_flags_risky_privacy_paths_in_stable_order() -> None: + result = evaluate_gate( + GateState( + branch="feat/safe-pr-gate", + status_short=(), + changed_paths=( + "secrets/id_rsa", + "config/.env", + "keys/service.key", + "certs/client.pem", + ), + ) + ) + + assert result.ok is False + assert result.problems == ( + "privacy_risky_path:certs/client.pem", + "privacy_risky_path:config/.env", + "privacy_risky_path:keys/service.key", + "privacy_risky_path:secrets/id_rsa", + ) + + +def test_evaluate_gate_flags_private_markers_in_changed_text_files( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +) -> None: + text_path = tmp_path / "docs" / "example.md" + binary_path = tmp_path / "docs" / "binary.bin" + text_path.parent.mkdir() + text_path.write_text("GITHUB_TOKEN=example\nOPENAI_API_KEY=example\n", encoding="utf-8") + binary_path.write_bytes(b"\0OPENAI_API_KEY=example") + monkeypatch.setattr(safe_pr_gate, "REPO_ROOT", tmp_path) + + result = safe_pr_gate.evaluate_gate( + GateState( + branch="feat/safe-pr-gate", + status_short=(), + changed_paths=("docs/example.md", "docs/binary.bin"), + ) + ) + + assert result.ok is False + assert result.problems == ( + "privacy_marker:GITHUB_TOKEN=:docs/example.md", + "privacy_marker:OPENAI_API_KEY=:docs/example.md", + ) + + def test_parse_porcelain_paths_handles_rename_status_in_second_position() -> None: assert _parse_porcelain_paths(" R old-name.txt\0new-name.txt\0") == ("new-name.txt",)