-
Notifications
You must be signed in to change notification settings - Fork 0
feat: add safe gate privacy checks #206
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,6 +14,15 @@ | |
| if str(REPO_ROOT) not in sys.path: | ||
| sys.path.insert(0, str(REPO_ROOT)) | ||
|
|
||
| RISKY_PATH_NAMES = frozenset({".env", "id_ed25519", "id_rsa"}) | ||
| RISKY_PATH_SUFFIXES = (".key", ".pem") | ||
| PRIVATE_MARKERS = ( | ||
| "BEGIN PRIVATE KEY", | ||
| "GITHUB_TOKEN=", | ||
| "OPENAI_API_KEY=", | ||
| "GEMINI_API_KEY=", | ||
| ) | ||
|
|
||
|
|
||
| @dataclass(frozen=True, slots=True) | ||
| class GateState: | ||
|
|
@@ -101,6 +110,51 @@ def _path_in_prefix(path: str, prefix: str) -> bool: | |
| return path == normalized or path.startswith(normalized + "/") | ||
|
|
||
|
|
||
| def _repo_relative_path(path: str) -> Path | None: | ||
| candidate = (REPO_ROOT / path).resolve() | ||
| try: | ||
| candidate.relative_to(REPO_ROOT) | ||
| except ValueError: | ||
| return None | ||
| return candidate | ||
|
|
||
|
|
||
| def _is_risky_path(path: str) -> bool: | ||
| name = Path(path).name | ||
| return name in RISKY_PATH_NAMES or name.endswith(RISKY_PATH_SUFFIXES) | ||
|
|
||
|
|
||
| def _read_changed_text(path: str) -> str | None: | ||
| candidate = _repo_relative_path(path) | ||
| if candidate is None or not candidate.is_file(): | ||
| return None | ||
| try: | ||
| data = candidate.read_bytes() | ||
| except OSError: | ||
| return None | ||
| if b"\0" in data: | ||
| return None | ||
| try: | ||
| return data.decode("utf-8") | ||
| except UnicodeDecodeError: | ||
| return None | ||
|
|
||
|
|
||
| def _privacy_problems(changed_paths: tuple[str, ...]) -> tuple[str, ...]: | ||
| problems: list[str] = [] | ||
| for path in sorted(changed_paths): | ||
| if _is_risky_path(path): | ||
| problems.append(f"privacy_risky_path:{path}") | ||
|
|
||
| text = _read_changed_text(path) | ||
| if text is None: | ||
| continue | ||
| for marker in PRIVATE_MARKERS: | ||
| if marker in text: | ||
| problems.append(f"privacy_marker:{marker}:{path}") | ||
| return tuple(problems) | ||
|
Comment on lines
+143
to
+155
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The current implementation flags risky paths even if the file has been deleted in the PR. This is a significant issue as it blocks users from committing the removal of secrets or sensitive files. By moving the existence check to the start of the loop, we avoid false positives for deleted files and can pass the resolved def _privacy_problems(changed_paths: tuple[str, ...]) -> tuple[str, ...]:
problems: list[str] = []
for path in sorted(changed_paths):
repo_path = _repo_relative_path(path)
if repo_path is None or not repo_path.is_file():
continue
if _is_risky_path(path):
problems.append(f"privacy_risky_path:{path}")
text = _read_changed_text(repo_path)
if text is None:
continue
for marker in PRIVATE_MARKERS:
if marker in text:
problems.append(f"privacy_marker:{marker}:{path}")
return tuple(problems) |
||
|
|
||
|
|
||
| def evaluate_gate( | ||
| state: GateState, | ||
| *, | ||
|
|
@@ -125,6 +179,8 @@ def evaluate_gate( | |
| problems.append("changed_files_outside_allowed_prefixes") | ||
| problems.extend(f"outside_prefix:{path}" for path in disallowed_paths) | ||
|
|
||
| problems.extend(_privacy_problems(state.changed_paths)) | ||
|
|
||
| return GateResult( | ||
| ok=not problems, | ||
| branch=state.branch, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Refactor
_read_changed_textto accept aPathobject directly. This avoids redundant path resolution and existence checks when called from_privacy_problems. The simplified error handling also makes the function more readable while maintaining the same logic for binary and encoding detection.