Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions .github/workflows/ai_security_tracker.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
name: AI & AI Security Trending Tracker

on:
schedule:
# Runs daily at 08:00 UTC (16:00 Beijing Time)
- cron: '0 8 * * *'
workflow_dispatch: # Allow manual triggering

permissions:
issues: write
contents: read

jobs:
track-ai-projects:
runs-on: ubuntu-latest

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.x'

- name: Run AI Security Tracker
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_REPOSITORY: ${{ github.repository }}
CREATE_ISSUE: 'true'
OUTPUT_DIR: output
run: python scripts/github_trending_tracker.py

- name: Upload report artifact
uses: actions/upload-artifact@v4
with:
name: ai-security-report
path: output/ai_security_report.md
retention-days: 30
11 changes: 10 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,13 @@ build/
.vscode/

### Mac OS ###
.DS_Store
.DS_Store

### Python ###
__pycache__/
*.py[cod]
*$py.class
*.egg-info/

### Tracker Output ###
output/
232 changes: 232 additions & 0 deletions scripts/github_trending_tracker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,232 @@
"""
GitHub AI & AI Security Trending Projects Tracker

Searches GitHub for high-quality open source projects related to AI and AI Security,
filters by recent activity and star growth, and generates a formatted report.
The report can be posted as a GitHub Issue for daily tracking.
"""

import json
import os
import sys
from datetime import datetime, timedelta, timezone
from urllib.error import HTTPError, URLError
from urllib.request import Request, urlopen


GITHUB_API = "https://api.github.com"

SEARCH_QUERIES = [
"AI Security",
"LLM Security",
"AI safety",
"adversarial attack defense",
"model jailbreak detection",
"data poisoning",
"prompt injection",
"AI red team",
]

MIN_STARS = 50
MAX_RESULTS_PER_QUERY = 5
RECENT_DAYS = 30


def github_request(url, token=None):
"""Make a request to the GitHub API and return parsed JSON."""
headers = {
"Accept": "application/vnd.github.v3+json",
"User-Agent": "AI-Security-Tracker",
}
if token:
headers["Authorization"] = f"Bearer {token}"

req = Request(url, headers=headers)
with urlopen(req, timeout=30) as resp:
return json.loads(resp.read().decode("utf-8"))


def search_repos(query, token=None, min_stars=MIN_STARS, pushed_after=None):
"""Search GitHub repositories matching the query with filters."""
q_parts = [query, f"stars:>={min_stars}"]
if pushed_after:
q_parts.append(f"pushed:>={pushed_after}")

from urllib.parse import quote

q = quote(" ".join(q_parts))
url = f"{GITHUB_API}/search/repositories?q={q}&sort=stars&order=desc&per_page={MAX_RESULTS_PER_QUERY}"

return github_request(url, token)


def collect_trending_repos(token=None):
"""Collect trending repos across all search queries, deduplicated."""
pushed_after = (datetime.now(timezone.utc) - timedelta(days=RECENT_DAYS)).strftime(
"%Y-%m-%d"
)

seen = set()
repos = []

for query in SEARCH_QUERIES:
try:
result = search_repos(
query, token=token, pushed_after=pushed_after
)
except (HTTPError, URLError) as exc:
print(f"Warning: search failed for '{query}': {exc}", file=sys.stderr)
continue

for item in result.get("items", []):
full_name = item["full_name"]
if full_name in seen:
continue
seen.add(full_name)
repos.append(item)

repos.sort(key=lambda r: r.get("stargazers_count", 0), reverse=True)
return repos


def format_report(repos):
"""Format the collected repos into a Markdown report."""
now = datetime.now(timezone.utc).strftime("%Y-%m-%d")
lines = [
f"# 🤖 AI & AI Security Trending Projects Report",
f"",
f"> Generated on **{now}**",
f"> Filter: stars ≥ {MIN_STARS}, updated within last {RECENT_DAYS} days",
"",
]

if not repos:
lines.append("No matching projects found for this period.")
return "\n".join(lines)

for i, repo in enumerate(repos, 1):
name = repo["full_name"]
url = repo["html_url"]
description = repo.get("description") or "No description provided."
stars = repo.get("stargazers_count", 0)
language = repo.get("language") or "N/A"
topics = repo.get("topics", [])
updated = repo.get("pushed_at", "N/A")
if updated != "N/A":
updated = updated[:10]

lines.append(f"## {i}. [{name}]({url})")
lines.append("")
lines.append(f"⭐ **Stars**: {stars} | 🗓 **Last Updated**: {updated} | 💻 **Language**: {language}")
lines.append("")
lines.append(f"**Core Functionality**: {description}")
lines.append("")

# Infer AI Security application value from topics and description
value = infer_security_value(description, topics)
lines.append(f"**AI / AI Security Application Value**: {value}")
lines.append("")
lines.append("---")
lines.append("")

return "\n".join(lines)


def infer_security_value(description, topics):
"""Infer the AI security application value based on description and topics."""
desc_lower = (description or "").lower()
topics_lower = [t.lower() for t in topics]
all_text = desc_lower + " " + " ".join(topics_lower)

value_parts = []

keyword_map = {
"jailbreak": "Model jailbreak detection and prevention",
"adversarial": "Adversarial attack research and defense",
"red team": "AI red teaming and security evaluation",
"prompt injection": "Prompt injection detection and defense",
"data poison": "Data poisoning analysis and mitigation",
"safety": "AI safety alignment and guardrails",
"llm security": "Large Language Model security hardening",
"vulnerability": "AI system vulnerability assessment",
"robustness": "Model robustness and reliability testing",
"alignment": "AI alignment and value safety research",
"guardrail": "AI output guardrails and content filtering",
"watermark": "AI-generated content watermarking and detection",
"audit": "AI model auditing and compliance",
"privacy": "AI privacy protection and federated learning",
"trojan": "Neural trojan detection and defense",
"backdoor": "Model backdoor detection and mitigation",
"fairness": "AI fairness and bias detection",
}

for keyword, value in keyword_map.items():
if keyword in all_text:
value_parts.append(value)

if not value_parts:
if any(kw in all_text for kw in ["security", "secure", "defense", "detect"]):
value_parts.append("AI security tooling and infrastructure")
elif any(kw in all_text for kw in ["llm", "language model", "gpt", "transformer"]):
value_parts.append("Large Language Model research and applications")
else:
value_parts.append("AI ecosystem tooling and open-source infrastructure")

return "; ".join(value_parts)


def create_github_issue(title, body, token, repo_owner, repo_name):
"""Create a GitHub issue with the report content."""
url = f"{GITHUB_API}/repos/{repo_owner}/{repo_name}/issues"
data = json.dumps({"title": title, "body": body, "labels": ["ai-tracker"]}).encode(
"utf-8"
)

headers = {
"Accept": "application/vnd.github.v3+json",
"Authorization": f"Bearer {token}",
"Content-Type": "application/json",
"User-Agent": "AI-Security-Tracker",
}

req = Request(url, data=data, headers=headers, method="POST")
with urlopen(req, timeout=30) as resp:
result = json.loads(resp.read().decode("utf-8"))
print(f"Issue created: {result['html_url']}")
return result


def main():
token = os.environ.get("GITHUB_TOKEN")
repo_full = os.environ.get("GITHUB_REPOSITORY", "")

print("Collecting trending AI & AI Security projects...")
repos = collect_trending_repos(token=token)
print(f"Found {len(repos)} unique repositories.")

report = format_report(repos)

# Write report to file for GitHub Actions artifact upload
output_dir = os.environ.get("OUTPUT_DIR", ".")
os.makedirs(output_dir, exist_ok=True)
report_path = os.path.join(output_dir, "ai_security_report.md")
with open(report_path, "w", encoding="utf-8") as f:
f.write(report)
print(f"Report written to {report_path}")

# Create GitHub Issue if running in CI with proper permissions
if os.environ.get("CREATE_ISSUE", "false").lower() == "true" and token and "/" in repo_full:
owner, name = repo_full.split("/", 1)
now = datetime.now(timezone.utc).strftime("%Y-%m-%d")
title = f"🤖 AI & AI Security Trending Report - {now}"
try:
create_github_issue(title, report, token, owner, name)
except (HTTPError, URLError) as exc:
print(f"Warning: failed to create issue: {exc}", file=sys.stderr)

# Print report to stdout
print("\n" + report)


if __name__ == "__main__":
main()
108 changes: 108 additions & 0 deletions scripts/test_tracker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
"""Tests for github_trending_tracker.py"""

import json
import os
import sys
import unittest
from unittest.mock import MagicMock, patch

sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "scripts"))

from github_trending_tracker import (
collect_trending_repos,
format_report,
infer_security_value,
)

SAMPLE_REPO = {
"full_name": "example/ai-security-tool",
"html_url": "https://github.com/example/ai-security-tool",
"description": "A tool for LLM security testing and jailbreak detection",
"stargazers_count": 1500,
"language": "Python",
"topics": ["llm", "security", "jailbreak"],
"pushed_at": "2026-03-10T12:00:00Z",
}


class TestInferSecurityValue(unittest.TestCase):
def test_jailbreak_keyword(self):
result = infer_security_value("jailbreak detection tool", [])
self.assertIn("jailbreak", result.lower())

def test_adversarial_keyword(self):
result = infer_security_value("adversarial attack defense framework", [])
self.assertIn("adversarial", result.lower())

def test_prompt_injection_keyword(self):
result = infer_security_value("prompt injection scanner", [])
self.assertIn("prompt injection", result.lower())

def test_topics_based_inference(self):
result = infer_security_value("a tool", ["red team", "ai"])
self.assertIn("red team", result.lower())

def test_generic_security(self):
result = infer_security_value("security scanning tool", [])
self.assertIn("security", result.lower())

def test_generic_llm(self):
result = infer_security_value("llm framework for chatbot", [])
self.assertIn("language model", result.lower())

def test_fallback(self):
result = infer_security_value("machine learning toolkit", [])
self.assertIn("AI", result)


class TestFormatReport(unittest.TestCase):
def test_empty_repos(self):
report = format_report([])
self.assertIn("No matching projects found", report)

def test_single_repo(self):
report = format_report([SAMPLE_REPO])
self.assertIn("example/ai-security-tool", report)
self.assertIn("https://github.com/example/ai-security-tool", report)
self.assertIn("1500", report)
self.assertIn("Python", report)
self.assertIn("Core Functionality", report)
self.assertIn("AI / AI Security Application Value", report)

def test_report_header(self):
report = format_report([SAMPLE_REPO])
self.assertIn("AI & AI Security Trending Projects Report", report)

def test_multiple_repos_sorted(self):
repo2 = dict(SAMPLE_REPO)
repo2["full_name"] = "other/ai-tool"
repo2["html_url"] = "https://github.com/other/ai-tool"
repo2["stargazers_count"] = 500
report = format_report([SAMPLE_REPO, repo2])
idx1 = report.index("example/ai-security-tool")
idx2 = report.index("other/ai-tool")
self.assertLess(idx1, idx2)


class TestCollectTrendingRepos(unittest.TestCase):
@patch("github_trending_tracker.github_request")
def test_deduplication(self, mock_request):
mock_request.return_value = {"items": [SAMPLE_REPO]}
repos = collect_trending_repos(token=None)
# Same repo returned for all queries should appear only once
self.assertEqual(len(repos), 1)
self.assertEqual(repos[0]["full_name"], "example/ai-security-tool")

@patch("github_trending_tracker.github_request")
def test_handles_api_error(self, mock_request):
from urllib.error import HTTPError
mock_request.side_effect = HTTPError(
url="", code=403, msg="rate limited", hdrs={}, fp=None
)
# Should not raise, just return empty
repos = collect_trending_repos(token=None)
self.assertEqual(repos, [])


if __name__ == "__main__":
unittest.main()