From 318a026cfc2e76c1d5c91db06dfd9fb2957f271d Mon Sep 17 00:00:00 2001 From: David Sarkisyan <281478990+srkyn@users.noreply.github.com> Date: Fri, 22 May 2026 11:01:43 -0400 Subject: [PATCH] Add GitHub API context adapter --- LICENSE | 21 ++++ Makefile | 2 + README.md | 11 ++ docs/github_api_integration.md | 56 ++++++++++ src/github_repo_context.py | 180 ++++++++++++++++++++++++++++++ tests/test_github_repo_context.py | 61 ++++++++++ 6 files changed, 331 insertions(+) create mode 100644 LICENSE create mode 100644 docs/github_api_integration.md create mode 100644 src/github_repo_context.py create mode 100644 tests/test_github_repo_context.py diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..aa00aca --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 David Sarkisyan + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Makefile b/Makefile index d5655a9..c2e1446 100644 --- a/Makefile +++ b/Makefile @@ -10,3 +10,5 @@ run: cloudtrail-demo: python cloudtrail_detector.py --file data/cloudtrail/sample_cloudtrail_iam_events.json +github-context: + python -m src.github_repo_context srkyn/IdentityRiskGraph diff --git a/README.md b/README.md index 07c627e..c8ce502 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ ![pytest](https://img.shields.io/badge/tests-pytest-green) ![MITRE ATT&CK](https://img.shields.io/badge/MITRE-ATT%26CK-red) ![CloudTrail](https://img.shields.io/badge/AWS-CloudTrail_IAM-orange) +![License](https://img.shields.io/badge/license-MIT-green) **Identity-first detection engineering for AWS IAM, nested access paths, and SOC-style risk investigation.** @@ -109,6 +110,7 @@ python -m streamlit run app.py - Detection-as-code foundation in `rules/cloudtrail_iam_rules.yaml` - Optional YAML detection engine for simple CloudTrail IAM rule execution - Splunk-friendly JSON export +- GitHub REST API adapter for public repository context and review notes - Pytest suite and GitHub Actions CI ## Architecture @@ -236,6 +238,14 @@ python cloudtrail_detector.py --file data/cloudtrail/suspicious_cloudtrail_event python cloudtrail_detector.py --engine yaml --file data/cloudtrail/suspicious_cloudtrail_events.json ``` +Fetch public GitHub repository context: + +```powershell +python -m src.github_repo_context srkyn/IdentityRiskGraph +``` + +This optional adapter reads public GitHub REST API metadata and prints review notes for repository visibility, maintenance state, topics, issue workflow, and licensing. See [docs/github_api_integration.md](docs/github_api_integration.md). + Makefile shortcuts: ```bash @@ -243,6 +253,7 @@ make install make test make run make cloudtrail-demo +make github-context ``` ## Tests diff --git a/docs/github_api_integration.md b/docs/github_api_integration.md new file mode 100644 index 0000000..b56df6e --- /dev/null +++ b/docs/github_api_integration.md @@ -0,0 +1,56 @@ +# GitHub API Integration + +IdentityRiskGraph includes a small GitHub REST API adapter for public repository context. + +The adapter is intentionally narrow. It reads public repository metadata and turns it into short review notes that help an analyst understand project hygiene signals before opening the code. + +## What It Reads + +- repository owner and name +- description +- default branch +- visibility +- archived/fork state +- open issue count +- star count +- pushed timestamp +- topics +- issue/wiki/discussion settings +- license name + +## What It Does Not Do + +- does not scrape GitHub pages +- does not write to GitHub +- does not store API responses +- does not require a token for public repositories +- does not inspect private code +- does not treat repository metadata as a security verdict + +## Example + +```powershell +python -m src.github_repo_context srkyn/IdentityRiskGraph +``` + +Example output: + +```text +# GitHub Repository Context: srkyn/IdentityRiskGraph + +Description: Identity-first detection engineering app for CloudTrail IAM events, nested access paths, and explainable SOC risk investigation. +Default branch: main +URL: https://github.com/srkyn/IdentityRiskGraph + +| Signal | Status | Note | +|---|---|---| +| visibility | public | Public metadata can be reviewed without credentials. | +| repository state | active | Recent maintenance signals can support trust. | +| issue workflow | enabled | Issues provide a visible review path for fixes and follow-up. | +| topics | 12 topics | Topics improve discoverability and make project intent easier to scan. | +| license | not declared | Add a license if reuse is intended. | +``` + +## Why It Fits + +Identity investigations often start with a small set of observable signals. This adapter applies the same habit to public GitHub projects: collect context, avoid overclaiming, and write down what the signal does or does not prove. diff --git a/src/github_repo_context.py b/src/github_repo_context.py new file mode 100644 index 0000000..ff53ac8 --- /dev/null +++ b/src/github_repo_context.py @@ -0,0 +1,180 @@ +from __future__ import annotations + +import json +import os +import argparse +from dataclasses import dataclass +from typing import Any +from urllib.error import HTTPError, URLError +from urllib.request import Request, urlopen + + +GITHUB_API_ROOT = "https://api.github.com" + + +@dataclass(frozen=True) +class GitHubRepoContext: + owner: str + name: str + description: str + default_branch: str + visibility: str + archived: bool + fork: bool + open_issues_count: int + stargazers_count: int + pushed_at: str + html_url: str + topics: tuple[str, ...] + has_issues: bool + has_wiki: bool + has_discussions: bool + license_name: str | None + + +@dataclass(frozen=True) +class GitHubRepoReviewNote: + signal: str + status: str + note: str + + +def build_repo_api_url(owner: str, repo: str) -> str: + owner = owner.strip() + repo = repo.strip() + if not owner or not repo: + raise ValueError("owner and repo are required") + return f"{GITHUB_API_ROOT}/repos/{owner}/{repo}" + + +def fetch_repo_context(owner: str, repo: str, token: str | None = None) -> GitHubRepoContext: + """Fetch public GitHub repository metadata with an optional token. + + This does not store responses, write files, or call any non-GitHub service. + """ + + token = token or os.getenv("GITHUB_TOKEN") + headers = { + "Accept": "application/vnd.github+json", + "User-Agent": "IdentityRiskGraph-public-repo-context", + "X-GitHub-Api-Version": "2022-11-28", + } + if token: + headers["Authorization"] = f"Bearer {token}" + + request = Request(build_repo_api_url(owner, repo), headers=headers) + try: + with urlopen(request, timeout=10) as response: + payload = json.loads(response.read().decode("utf-8")) + except HTTPError as exc: + raise RuntimeError(f"GitHub API returned HTTP {exc.code} for {owner}/{repo}") from exc + except URLError as exc: + raise RuntimeError(f"GitHub API request failed for {owner}/{repo}: {exc.reason}") from exc + + return parse_repo_context(payload) + + +def parse_repo_context(payload: dict[str, Any]) -> GitHubRepoContext: + owner = payload.get("owner") or {} + license_info = payload.get("license") or {} + + topics = payload.get("topics") or [] + if not isinstance(topics, list): + topics = [] + + return GitHubRepoContext( + owner=str(owner.get("login") or ""), + name=str(payload.get("name") or ""), + description=str(payload.get("description") or ""), + default_branch=str(payload.get("default_branch") or ""), + visibility=str(payload.get("visibility") or "public"), + archived=bool(payload.get("archived")), + fork=bool(payload.get("fork")), + open_issues_count=int(payload.get("open_issues_count") or 0), + stargazers_count=int(payload.get("stargazers_count") or 0), + pushed_at=str(payload.get("pushed_at") or ""), + html_url=str(payload.get("html_url") or ""), + topics=tuple(str(topic) for topic in topics), + has_issues=bool(payload.get("has_issues")), + has_wiki=bool(payload.get("has_wiki")), + has_discussions=bool(payload.get("has_discussions")), + license_name=str(license_info.get("name") or "") if license_info else None, + ) + + +def review_repo_context(context: GitHubRepoContext) -> list[GitHubRepoReviewNote]: + notes: list[GitHubRepoReviewNote] = [] + + notes.append( + GitHubRepoReviewNote( + signal="visibility", + status=context.visibility, + note="Public metadata can be reviewed without credentials." if context.visibility == "public" else "Review access boundaries before sharing.", + ) + ) + + notes.append( + GitHubRepoReviewNote( + signal="repository state", + status="archived" if context.archived else "active", + note="Archived repositories should be treated as historical context." if context.archived else "Recent maintenance signals can support trust.", + ) + ) + + notes.append( + GitHubRepoReviewNote( + signal="issue workflow", + status="enabled" if context.has_issues else "disabled", + note="Issues provide a visible review path for fixes and follow-up." if context.has_issues else "No public issue workflow is exposed.", + ) + ) + + notes.append( + GitHubRepoReviewNote( + signal="topics", + status=f"{len(context.topics)} topics", + note="Topics improve discoverability and make project intent easier to scan." if context.topics else "Missing topics can reduce discoverability.", + ) + ) + + notes.append( + GitHubRepoReviewNote( + signal="license", + status=context.license_name or "not declared", + note="A license clarifies reuse expectations." if context.license_name else "Add a license if reuse is intended.", + ) + ) + + return notes + + +def format_review_notes(context: GitHubRepoContext, notes: list[GitHubRepoReviewNote]) -> str: + lines = [ + f"# GitHub Repository Context: {context.owner}/{context.name}", + "", + f"Description: {context.description or 'No description provided.'}", + f"Default branch: {context.default_branch or 'unknown'}", + f"URL: {context.html_url or 'unknown'}", + "", + "| Signal | Status | Note |", + "|---|---|---|", + ] + for note in notes: + lines.append(f"| {note.signal} | {note.status} | {note.note} |") + return "\n".join(lines) + + +def main() -> None: + parser = argparse.ArgumentParser(description="Fetch public GitHub repository context for identity/security review notes.") + parser.add_argument("repo", help="Repository in owner/name format, for example srkyn/IdentityRiskGraph") + args = parser.parse_args() + + if "/" not in args.repo: + raise SystemExit("repo must be in owner/name format") + owner, repo = args.repo.split("/", 1) + context = fetch_repo_context(owner, repo) + print(format_review_notes(context, review_repo_context(context))) + + +if __name__ == "__main__": + main() diff --git a/tests/test_github_repo_context.py b/tests/test_github_repo_context.py new file mode 100644 index 0000000..934ec01 --- /dev/null +++ b/tests/test_github_repo_context.py @@ -0,0 +1,61 @@ +from src.github_repo_context import build_repo_api_url, parse_repo_context, review_repo_context + + +def test_build_repo_api_url_requires_owner_and_repo(): + assert build_repo_api_url("srkyn", "IdentityRiskGraph") == "https://api.github.com/repos/srkyn/IdentityRiskGraph" + + +def test_parse_repo_context_normalizes_github_api_payload(): + context = parse_repo_context( + { + "name": "IdentityRiskGraph", + "description": "Identity-first detection engineering app.", + "default_branch": "main", + "visibility": "public", + "archived": False, + "fork": False, + "open_issues_count": 3, + "stargazers_count": 4, + "pushed_at": "2026-05-22T14:00:00Z", + "html_url": "https://github.com/srkyn/IdentityRiskGraph", + "topics": ["iam", "soc", "github-api"], + "has_issues": True, + "has_wiki": False, + "has_discussions": True, + "license": {"name": "MIT License"}, + "owner": {"login": "srkyn"}, + } + ) + + assert context.owner == "srkyn" + assert context.name == "IdentityRiskGraph" + assert context.default_branch == "main" + assert context.topics == ("iam", "soc", "github-api") + assert context.license_name == "MIT License" + assert context.has_issues is True + + +def test_review_repo_context_returns_human_review_notes(): + context = parse_repo_context( + { + "name": "sample", + "visibility": "public", + "archived": False, + "has_issues": True, + "topics": ["security"], + "license": {"name": "MIT License"}, + "owner": {"login": "srkyn"}, + } + ) + + notes = review_repo_context(context) + + assert [note.signal for note in notes] == [ + "visibility", + "repository state", + "issue workflow", + "topics", + "license", + ] + assert any(note.status == "public" for note in notes) + assert all(note.note for note in notes)