From 01228c274a5bad758af54a77e7d6b8093abe3170 Mon Sep 17 00:00:00 2001 From: hapo-nghialuu Date: Sun, 25 Jan 2026 15:47:07 +0700 Subject: [PATCH 1/4] feat: AI Code Reviewer v2 - severity-based actions and project context --- docs/PLAN-auto-resolve.md | 40 ++++ scripts/ai_reviewer.py | 461 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 501 insertions(+) create mode 100644 docs/PLAN-auto-resolve.md create mode 100644 scripts/ai_reviewer.py diff --git a/docs/PLAN-auto-resolve.md b/docs/PLAN-auto-resolve.md new file mode 100644 index 00000000..192ac131 --- /dev/null +++ b/docs/PLAN-auto-resolve.md @@ -0,0 +1,40 @@ +# Plan: Auto-Resolve AI Review Threads + +## Goal Description +Enhance `ai_reviewer.py` to automatically resolve outdated review threads created by the bot. This ensures that when new code is pushed or the AI re-runs, the PR doesn't get cluttered with old, potentially irrelevant comments. + +## Proposed Changes + +### Logic Script (`scripts/ai_reviewer.py`) +1. **Add `resolve_thread(thread_id)` function**: + * Construct a GraphQL mutation: + ```graphql + mutation ResolveThread($threadId: ID!) { + resolveReviewThread(input: {threadId: $threadId}) { + thread { + isResolved + } + } + } + ``` + * Execute this via `requests.post("https://api.github.com/graphql", ...)` using `GITHUB_TOKEN`. + +2. **Add `get_unresolved_bot_threads(pr_node_id)` function**: + * Use GraphQL query to fetch all review threads for the PR. + * Filter for threads where: + * `isResolved` is `false`. + * The author is `github-actions[bot]`. + +3. **Update `main` flow**: + * **Step 1 (New)**: Before analyzing code, fetch all unresolved bot threads. + * **Step 2 (New)**: Resolve them all. (Strategy: "Flush & Refresh"). + * **Step 3**: Analyze code and post new comments as usual. + +### Rationale for "Flush & Refresh" +Attempting to map old comments to new line numbers is complex and error-prone. The most reliable "stateless" approach for an AI bot is to clear its previous "state" (unresolved threads) and re-evaluate the current state of the code. + +## Verification Plan +1. **Manual Verification**: + * Create a PR with a bug. Let bot comment. + * Push a fix (or just an empty commit to trigger re-run). + * Verify that the *old* comment is marked as "Resolved" (check icon) in the GitHub UI. diff --git a/scripts/ai_reviewer.py b/scripts/ai_reviewer.py new file mode 100644 index 00000000..fc83f7ad --- /dev/null +++ b/scripts/ai_reviewer.py @@ -0,0 +1,461 @@ +""" +AI Code Reviewer v2 +==================== +Automatically reviews Pull Requests using OpenRouter AI and posts reviews with severity-based actions. + +Features: +- Severity classification: critical, high, medium, low +- Smart action: Request Changes for critical/high, Comment for medium/low +- Project context injection for better understanding +- Auto-resolve outdated bot comments + +Model: mistralai/devstral-2512:free (configurable via MODEL_NAME) +""" + +import os +import json +import requests +import logging +from github import Github, GithubException +from openai import OpenAI, AuthenticationError, RateLimitError + +# Configure Logging +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') + +# Constants +IGNORED_EXTENSIONS = ['.json', '.md', '.txt', '.yml', '.yaml', '.lock', '.png', '.jpg', '.jpeg', '.gif', '.svg'] +IGNORED_DIRS = ['dist', 'build', 'node_modules', '.github', '.git', '__pycache__'] +MODEL_NAME = os.getenv("AI_MODEL", "mistralai/devstral-2512:free") +GITHUB_GRAPHQL_URL = "https://api.github.com/graphql" + +# Severity levels that trigger Request Changes +BLOCKING_SEVERITIES = ['critical', 'high'] + + +def should_review(filename): + """Check if file should be reviewed based on extension and path.""" + if any(filename.endswith(ext) for ext in IGNORED_EXTENSIONS): + return False + if any(part in filename.split('/') for part in IGNORED_DIRS): + return False + return True + + +def get_project_context(repo): + """Fetch project context files (README, ARCHITECTURE) for better AI understanding.""" + context_files = ['README.md', 'ARCHITECTURE.md', 'docs/ARCHITECTURE.md'] + context = [] + + for filepath in context_files: + try: + content = repo.get_contents(filepath) + if content and hasattr(content, 'decoded_content'): + text = content.decoded_content.decode('utf-8') + # Limit context size per file + if len(text) > 2000: + text = text[:2000] + "\n...[truncated]..." + context.append(f"### {filepath}\n{text}") + except Exception: + # File doesn't exist, skip silently + pass + + return "\n\n".join(context) if context else "No project documentation found." + + +def get_file_list(repo): + """Get a list of all files in the repository for context.""" + try: + contents = repo.get_contents("") + files = [] + while contents: + item = contents.pop(0) + if item.type == "dir" and item.name not in IGNORED_DIRS: + try: + contents.extend(repo.get_contents(item.path)) + except Exception: + pass + elif item.type == "file": + files.append(item.path) + # Limit to 100 files for context + if len(files) >= 100: + break + return files + except Exception as e: + logging.warning(f"Could not fetch file list: {e}") + return [] + + +def get_pr_diff(repo, pr_number): + """Fetch PR diff and full file content for context.""" + pr = repo.get_pull(pr_number) + files_data = [] + + for file in pr.get_files(): + if not should_review(file.filename): + continue + + if file.status == 'removed': + continue + + file_info = { + "filename": file.filename, + "status": file.status, + "patch": file.patch + } + + # Try to get full file content for better context + try: + content = repo.get_contents(file.filename, ref=pr.head.sha) + if content and hasattr(content, 'decoded_content'): + full_content = content.decoded_content.decode('utf-8') + # Limit full content size + if len(full_content) <= 5000: + file_info["full_content"] = full_content + except Exception: + pass + + files_data.append(file_info) + + return pr, files_data + + +def resolve_thread(thread_id, token): + """Resolve a specific review thread using GraphQL.""" + mutation = """ + mutation ResolveThread($threadId: ID!) { + resolveReviewThread(input: {threadId: $threadId}) { + thread { isResolved } + } + } + """ + headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"} + payload = {"query": mutation, "variables": {"threadId": thread_id}} + + try: + response = requests.post(GITHUB_GRAPHQL_URL, json=payload, headers=headers, timeout=30) + response.raise_for_status() + data = response.json() + if 'errors' in data: + logging.error(f"GraphQL Error resolving thread: {data['errors']}") + else: + logging.info(f"Resolved thread {thread_id}") + except Exception as e: + logging.error(f"Failed to resolve thread {thread_id}: {e}") + + +def resolve_existing_comments(repo_owner, repo_name, pr_number, token): + """Resolve all unresolved bot review threads on the PR.""" + query = """ + query($owner: String!, $repo: String!, $prNumber: Int!) { + repository(owner: $owner, name: $repo) { + pullRequest(number: $prNumber) { + reviewThreads(last: 100) { + nodes { + id + isResolved + comments(first: 1) { + nodes { author { login } } + } + } + } + } + } + } + """ + headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"} + payload = {"query": query, "variables": {"owner": repo_owner, "repo": repo_name, "prNumber": pr_number}} + + try: + response = requests.post(GITHUB_GRAPHQL_URL, json=payload, headers=headers, timeout=30) + response.raise_for_status() + data = response.json() + + if 'errors' in data: + logging.error(f"GraphQL Error: {data['errors']}") + return + + pr_data = data.get('data', {}).get('repository', {}).get('pullRequest') + if not pr_data: + return + + threads = pr_data.get('reviewThreads', {}).get('nodes', []) + bot_threads = [] + + for thread in threads: + if thread.get('isResolved'): + continue + comments = thread.get('comments', {}).get('nodes', []) + if comments: + author = comments[0].get('author') + if author and author.get('login') == 'github-actions[bot]': + bot_threads.append(thread.get('id')) + + if bot_threads: + logging.info(f"Resolving {len(bot_threads)} old bot threads...") + for tid in bot_threads: + resolve_thread(tid, token) + + except Exception as e: + logging.error(f"Failed to resolve threads: {e}") + + +def analyze_code_with_openrouter(files_data, project_context, file_list): + """Send code to OpenRouter AI for review with severity classification.""" + api_key = os.getenv("OPENROUTER_API_KEY") + if not api_key: + logging.error("OPENROUTER_API_KEY not found.") + return [] + + try: + client = OpenAI( + base_url="https://openrouter.ai/api/v1", + api_key=api_key, + default_headers={ + "HTTP-Referer": "https://github.com/hapo-nghialuu/antigravity-kit", + "X-Title": "AI Code Reviewer v2" + } + ) + except Exception as e: + logging.error(f"Failed to init OpenAI client: {e}") + return [] + + # Build comprehensive prompt with project context + prompt = f""" +Bạn là một Senior Code Reviewer chuyên nghiệp. Review code trong Pull Request này. + +## PROJECT CONTEXT +{project_context} + +## PROJECT FILES +{', '.join(file_list[:50])} + +## SEVERITY LEVELS (QUAN TRỌNG - PHẢI TRẢ VỀ) +- **critical**: Security vulnerabilities (hardcoded secrets, SQL injection, XSS), data loss bugs +- **high**: Logic bugs gây crash, infinite loops, memory leaks +- **medium**: Performance issues, bad practices, potential bugs +- **low**: Minor suggestions, style (chỉ khi rất cần thiết) + +## RULES +1. Chỉ báo lỗi BẠN CHẮC CHẮN 100% là bug thực sự. +2. Code đã có try-except? -> KHÔNG báo "thiếu xử lý lỗi". +3. Nếu không chắc chắn -> Trả về [] (rỗng). +4. PHẢI trả về `severity` cho mỗi issue. + +## OUTPUT FORMAT (JSON only, no markdown) +[ + {{ + "filename": "path/to/file.py", + "line_number": 42, + "severity": "critical|high|medium|low", + "comment": "Mô tả vấn đề ngắn gọn bằng tiếng Việt" + }} +] + +## CODE TO REVIEW +{json.dumps(files_data, ensure_ascii=False)} +""" + + try: + response = client.chat.completions.create( + model=MODEL_NAME, + messages=[ + {"role": "system", "content": "You are a strict code reviewer. Respond with valid JSON only."}, + {"role": "user", "content": prompt} + ], + ) + + if not response.choices: + return [] + + content = response.choices[0].message.content or "" + content = content.strip() + + # Clean markdown formatting + if content.startswith("```json"): + content = content[7:] + if content.startswith("```"): + content = content[3:] + if content.endswith("```"): + content = content[:-3] + content = content.strip() + + if not content: + return [] + + logging.info("AI response received.") + return json.loads(content) + + except AuthenticationError: + logging.error("OpenRouter authentication failed.") + return [] + except RateLimitError: + logging.error("OpenRouter rate limit exceeded.") + return [] + except json.JSONDecodeError as e: + logging.error(f"JSON parse error: {e}") + return [] + except Exception as e: + logging.error(f"OpenRouter error: {e}") + return [] + + +def submit_review(pr, comments): + """Submit a PR review with appropriate action based on severity.""" + if not comments: + logging.info("No issues found. LGTM! ✅") + return + + try: + commit = pr.get_commits().reversed[0] + except Exception as e: + logging.error(f"Failed to get commit: {e}") + return + + # Classify comments by severity + has_blocking = any(c.get('severity', '').lower() in BLOCKING_SEVERITIES for c in comments) + + # Build review body + summary_parts = [] + if has_blocking: + summary_parts.append("🚨 **Phát hiện vấn đề nghiêm trọng cần xử lý trước khi merge.**") + else: + summary_parts.append("📝 **Một số góp ý để cải thiện code.**") + + summary_parts.append("\n_Đây là AI review tự động. Vui lòng verify trước khi áp dụng._") + + # Determine review event + event = "REQUEST_CHANGES" if has_blocking else "COMMENT" + logging.info(f"Submitting review with {len(comments)} comments. Event: {event}") + + # Build review comments + review_comments = [] + for note in comments: + filename = note.get('filename') + line = note.get('line_number') + severity = note.get('severity', 'medium') + comment_text = note.get('comment', '') + + if not comment_text or not filename: + continue + + # Add severity badge + severity_badge = { + 'critical': '🔴 CRITICAL', + 'high': '🟠 HIGH', + 'medium': '🟡 MEDIUM', + 'low': '🟢 LOW' + }.get(severity.lower(), '⚪') + + body = f"**{severity_badge}**\n\n{comment_text}" + + # Validate line number + try: + line_int = int(line) if line else None + if line_int and line_int > 0: + review_comments.append({ + "path": filename, + "line": line_int, + "body": body, + "side": "RIGHT" + }) + except (ValueError, TypeError): + logging.warning(f"Invalid line for {filename}: {line}") + + # Submit review + try: + if review_comments: + pr.create_review( + commit=commit, + body="\n".join(summary_parts), + event=event, + comments=review_comments + ) + logging.info(f"Review submitted successfully: {event}") + else: + # No valid line comments, post as issue comment + for note in comments: + body = f"🤖 **AI Review** ({note.get('severity', 'info').upper()}): {note.get('comment', '')}" + pr.create_issue_comment(body) + + except GithubException as e: + logging.error(f"GitHub API error: {e}") + # Fallback to issue comments + for note in comments: + try: + body = f"🤖 **AI Review**: {note.get('comment', '')}" + pr.create_issue_comment(body) + except Exception: + pass + except Exception as e: + logging.error(f"Failed to submit review: {e}") + + +def main(): + github_token = os.getenv("GITHUB_TOKEN") + event_path = os.getenv("GITHUB_EVENT_PATH") + + if not github_token or not event_path: + logging.error("Missing GITHUB_TOKEN or GITHUB_EVENT_PATH.") + return + + try: + with open(event_path, 'r') as f: + event_data = json.load(f) + except Exception as e: + logging.error(f"Failed to read event: {e}") + return + + if 'pull_request' not in event_data: + logging.info("Not a pull_request event.") + return + + try: + pr_number = event_data['pull_request']['number'] + repo_name = event_data['repository']['full_name'] + except KeyError as e: + logging.error(f"Missing field: {e}") + return + + if '/' not in repo_name: + logging.error(f"Invalid repo format: {repo_name}") + return + + owner_name, repository_name = repo_name.split('/', 1) + logging.info(f"🔍 Starting AI review for PR #{pr_number} in {repo_name}") + + # Initialize GitHub client + g = Github(github_token) + try: + repo = g.get_repo(repo_name) + except GithubException as e: + logging.error(f"Repo access failed: {e}") + return + + # Step 1: Resolve old bot threads + logging.info("Resolving old bot comments...") + resolve_existing_comments(owner_name, repository_name, pr_number, github_token) + + # Step 2: Get project context + logging.info("Fetching project context...") + project_context = get_project_context(repo) + file_list = get_file_list(repo) + + # Step 3: Get PR diff + pr, files_data = get_pr_diff(repo, pr_number) + + if not files_data: + logging.info("No reviewable files.") + return + + logging.info(f"Analyzing {len(files_data)} files...") + + # Step 4: AI Analysis + comments = analyze_code_with_openrouter(files_data, project_context, file_list) + + # Step 5: Submit review + submit_review(pr, comments) + logging.info("🎉 Review complete!") + + +if __name__ == "__main__": + main() From 103b183e163f8eee87da579db5a90c856400ebd0 Mon Sep 17 00:00:00 2001 From: hapo-nghialuu Date: Sun, 25 Jan 2026 15:53:54 +0700 Subject: [PATCH 2/4] ci: add missing AI Reviewer workflow --- .github/workflows/ai-review.yml | 36 +++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 .github/workflows/ai-review.yml diff --git a/.github/workflows/ai-review.yml b/.github/workflows/ai-review.yml new file mode 100644 index 00000000..ae3d23fb --- /dev/null +++ b/.github/workflows/ai-review.yml @@ -0,0 +1,36 @@ +name: AI Reviewer + +on: + pull_request: + types: [opened, synchronize, reopened] + +permissions: + contents: read + pull-requests: write + issues: write + +jobs: + review: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 # Fetch all history for diffs + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install PyGithub openai requests + + - name: Run AI Reviewer + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} + AI_MODEL: "mistralai/devstral-2512:free" # Configurable via env + run: python scripts/ai_reviewer.py From 7428164963e1b15fc71af76e4cd32fc9cd00581b Mon Sep 17 00:00:00 2001 From: hapo-nghialuu Date: Sun, 25 Jan 2026 16:07:52 +0700 Subject: [PATCH 3/4] fix: address AI review feedback (docstring, exception handling) --- scripts/ai_reviewer.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/scripts/ai_reviewer.py b/scripts/ai_reviewer.py index fc83f7ad..e0d09a56 100644 --- a/scripts/ai_reviewer.py +++ b/scripts/ai_reviewer.py @@ -9,7 +9,15 @@ - Project context injection for better understanding - Auto-resolve outdated bot comments -Model: mistralai/devstral-2512:free (configurable via MODEL_NAME) +Usage: + Run this script in a GitHub Action triggered by `pull_request` events. + Ensure GITHUB_TOKEN and OPENROUTER_API_KEY, GITHUB_EVENT_PATH are set. + +Environment Variables: + GITHUB_TOKEN: GitHub Action token + OPENROUTER_API_KEY: API key for OpenRouter + GITHUB_EVENT_PATH: Path to the GitHub event JSON file + AI_MODEL: Model to use (default: mistralai/devstral-2512:free) """ import os @@ -401,8 +409,14 @@ def main(): try: with open(event_path, 'r') as f: event_data = json.load(f) + except FileNotFoundError: + logging.error(f"Event file not found at {event_path}") + return + except json.JSONDecodeError as e: + logging.error(f"Failed to parse event file: {e}") + return except Exception as e: - logging.error(f"Failed to read event: {e}") + logging.error(f"Unexpected error reading event file: {e}") return if 'pull_request' not in event_data: From a36a40e989356651b2b904d58e1e669813070a30 Mon Sep 17 00:00:00 2001 From: hapo-nghialuu Date: Sun, 25 Jan 2026 16:45:25 +0700 Subject: [PATCH 4/4] feat: implement smart auto-resolve (sync comments instead of wipe) --- scripts/ai_reviewer.py | 356 +++++++++++++++++------------------------ 1 file changed, 147 insertions(+), 209 deletions(-) diff --git a/scripts/ai_reviewer.py b/scripts/ai_reviewer.py index e0d09a56..dd766a56 100644 --- a/scripts/ai_reviewer.py +++ b/scripts/ai_reviewer.py @@ -1,13 +1,13 @@ """ -AI Code Reviewer v2 -==================== +AI Code Reviewer v2 (Smart Auto-Resolve) +======================================== Automatically reviews Pull Requests using OpenRouter AI and posts reviews with severity-based actions. Features: - Severity classification: critical, high, medium, low - Smart action: Request Changes for critical/high, Comment for medium/low - Project context injection for better understanding -- Auto-resolve outdated bot comments +- Smart Auto-resolve: Only resolves threads if the issue is no longer detected by AI Usage: Run this script in a GitHub Action triggered by `pull_request` events. @@ -59,12 +59,10 @@ def get_project_context(repo): content = repo.get_contents(filepath) if content and hasattr(content, 'decoded_content'): text = content.decoded_content.decode('utf-8') - # Limit context size per file if len(text) > 2000: text = text[:2000] + "\n...[truncated]..." context.append(f"### {filepath}\n{text}") except Exception: - # File doesn't exist, skip silently pass return "\n\n".join(context) if context else "No project documentation found." @@ -84,12 +82,10 @@ def get_file_list(repo): pass elif item.type == "file": files.append(item.path) - # Limit to 100 files for context if len(files) >= 100: break return files - except Exception as e: - logging.warning(f"Could not fetch file list: {e}") + except Exception: return [] @@ -101,7 +97,6 @@ def get_pr_diff(repo, pr_number): for file in pr.get_files(): if not should_review(file.filename): continue - if file.status == 'removed': continue @@ -110,49 +105,24 @@ def get_pr_diff(repo, pr_number): "status": file.status, "patch": file.patch } - - # Try to get full file content for better context try: content = repo.get_contents(file.filename, ref=pr.head.sha) if content and hasattr(content, 'decoded_content'): full_content = content.decoded_content.decode('utf-8') - # Limit full content size if len(full_content) <= 5000: file_info["full_content"] = full_content except Exception: pass - files_data.append(file_info) return pr, files_data -def resolve_thread(thread_id, token): - """Resolve a specific review thread using GraphQL.""" - mutation = """ - mutation ResolveThread($threadId: ID!) { - resolveReviewThread(input: {threadId: $threadId}) { - thread { isResolved } - } - } +def get_existing_bot_threads(repo_owner, repo_name, pr_number, token): + """ + Fetch all unresolved review threads created by the bot. + Returns a list of dicts: {'id': ..., 'filename': ..., 'line': ..., 'body': ...} """ - headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"} - payload = {"query": mutation, "variables": {"threadId": thread_id}} - - try: - response = requests.post(GITHUB_GRAPHQL_URL, json=payload, headers=headers, timeout=30) - response.raise_for_status() - data = response.json() - if 'errors' in data: - logging.error(f"GraphQL Error resolving thread: {data['errors']}") - else: - logging.info(f"Resolved thread {thread_id}") - except Exception as e: - logging.error(f"Failed to resolve thread {thread_id}: {e}") - - -def resolve_existing_comments(repo_owner, repo_name, pr_number, token): - """Resolve all unresolved bot review threads on the PR.""" query = """ query($owner: String!, $repo: String!, $prNumber: Int!) { repository(owner: $owner, name: $repo) { @@ -161,8 +131,13 @@ def resolve_existing_comments(repo_owner, repo_name, pr_number, token): nodes { id isResolved + line # The current line in the file comments(first: 1) { - nodes { author { login } } + nodes { + path + body + author { login } + } } } } @@ -180,38 +155,119 @@ def resolve_existing_comments(repo_owner, repo_name, pr_number, token): if 'errors' in data: logging.error(f"GraphQL Error: {data['errors']}") - return + return [] pr_data = data.get('data', {}).get('repository', {}).get('pullRequest') if not pr_data: - return + return [] - threads = pr_data.get('reviewThreads', {}).get('nodes', []) + threads_raw = pr_data.get('reviewThreads', {}).get('nodes', []) bot_threads = [] - for thread in threads: + for thread in threads_raw: if thread.get('isResolved'): continue + comments = thread.get('comments', {}).get('nodes', []) - if comments: - author = comments[0].get('author') - if author and author.get('login') == 'github-actions[bot]': - bot_threads.append(thread.get('id')) + if not comments: + continue + + first_comment = comments[0] + author = first_comment.get('author') + + # Check if this thread belongs to github-actions[bot] + if author and author.get('login') == 'github-actions[bot]': + bot_threads.append({ + 'id': thread.get('id'), + 'line': thread.get('line'), # Current line in PR head + 'filename': first_comment.get('path'), + 'body': first_comment.get('body') + }) - if bot_threads: - logging.info(f"Resolving {len(bot_threads)} old bot threads...") - for tid in bot_threads: - resolve_thread(tid, token) + return bot_threads except Exception as e: - logging.error(f"Failed to resolve threads: {e}") + logging.error(f"Failed to fetch existing threads: {e}") + return [] + + +def resolve_thread(thread_id, token): + """Resolve a specific thread via GraphQL.""" + mutation = """ + mutation ResolveThread($threadId: ID!) { + resolveReviewThread(input: {threadId: $threadId}) { + thread { isResolved } + } + } + """ + headers = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"} + payload = {"query": mutation, "variables": {"threadId": thread_id}} + try: + requests.post(GITHUB_GRAPHQL_URL, json=payload, headers=headers, timeout=30) + logging.info(f"Resolved thread {thread_id}") + except Exception as e: + logging.error(f"Failed to resolve thread {thread_id}: {e}") + + +def manage_review_threads(repo_owner, repo_name, pr_number, new_comments, token): + """ + Smart Auto-Resolve Logic: + 1. Fetch existing unresolved bot threads. + 2. Match new comments to existing threads (by filename + approx line). + 3. If matched: Keep thread open (do NOT post new comment). + 4. If old thread has no match in new comments: Resolve it (Fixed). + 5. Return list of TRULY NEW comments to post. + """ + logging.info("Syncing comments (Smart Auto-Resolve)...") + + old_threads = get_existing_bot_threads(repo_owner, repo_name, pr_number, token) + matched_thread_ids = set() + comments_to_post = [] + + # Map new comments to finding signature + # Since line numbers can shift, we use a relaxed match: +/- 3 lines + + for new_cmt in new_comments: + new_file = new_cmt.get('filename') + new_line = int(new_cmt.get('line_number', 0)) + + found_match = False + + for old_th in old_threads: + if old_th['id'] in matched_thread_ids: + continue # Already matched + + old_file = old_th['filename'] + old_line = old_th['line'] + + # If thread has no line (outdated), we can't match by line reliably. + # But let's assume valid threads have lines. + if old_line is None: + continue + + if new_file == old_file and abs(new_line - old_line) <= 3: + # MATCH FOUND! + matched_thread_ids.add(old_th['id']) + found_match = True + logging.info(f"Matched existing thread {old_th['id']} for {new_file}:{new_line}. Keeping open.") + break + + if not found_match: + comments_to_post.append(new_cmt) + + # Resolve unmatched old threads + for old_th in old_threads: + if old_th['id'] not in matched_thread_ids: + logging.info(f"Issue at {old_th['filename']}:{old_th['line']} no longer detected. Resolving...") + resolve_thread(old_th['id'], token) + + return comments_to_post def analyze_code_with_openrouter(files_data, project_context, file_list): - """Send code to OpenRouter AI for review with severity classification.""" + """Send code review request to AI.""" api_key = os.getenv("OPENROUTER_API_KEY") if not api_key: - logging.error("OPENROUTER_API_KEY not found.") return [] try: @@ -223,11 +279,9 @@ def analyze_code_with_openrouter(files_data, project_context, file_list): "X-Title": "AI Code Reviewer v2" } ) - except Exception as e: - logging.error(f"Failed to init OpenAI client: {e}") + except Exception: return [] - # Build comprehensive prompt with project context prompt = f""" Bạn là một Senior Code Reviewer chuyên nghiệp. Review code trong Pull Request này. @@ -272,128 +326,45 @@ def analyze_code_with_openrouter(files_data, project_context, file_list): ], ) - if not response.choices: - return [] - content = response.choices[0].message.content or "" - content = content.strip() - - # Clean markdown formatting - if content.startswith("```json"): - content = content[7:] - if content.startswith("```"): - content = content[3:] - if content.endswith("```"): - content = content[:-3] - content = content.strip() - - if not content: - return [] - + content = content.replace("```json", "").replace("```", "").strip() + if not content: return [] + logging.info("AI response received.") return json.loads(content) - except AuthenticationError: - logging.error("OpenRouter authentication failed.") - return [] - except RateLimitError: - logging.error("OpenRouter rate limit exceeded.") - return [] - except json.JSONDecodeError as e: - logging.error(f"JSON parse error: {e}") - return [] except Exception as e: - logging.error(f"OpenRouter error: {e}") + logging.error(f"AI Error: {e}") return [] def submit_review(pr, comments): - """Submit a PR review with appropriate action based on severity.""" + """Submit review comments.""" if not comments: - logging.info("No issues found. LGTM! ✅") - return - - try: - commit = pr.get_commits().reversed[0] - except Exception as e: - logging.error(f"Failed to get commit: {e}") + logging.info("No new issues found.") return - # Classify comments by severity + # Check for blocking issues has_blocking = any(c.get('severity', '').lower() in BLOCKING_SEVERITIES for c in comments) - - # Build review body - summary_parts = [] - if has_blocking: - summary_parts.append("🚨 **Phát hiện vấn đề nghiêm trọng cần xử lý trước khi merge.**") - else: - summary_parts.append("📝 **Một số góp ý để cải thiện code.**") - - summary_parts.append("\n_Đây là AI review tự động. Vui lòng verify trước khi áp dụng._") - - # Determine review event + summary = "🚨 **Phát hiện vấn đề nghiêm trọng.**" if has_blocking else "📝 **Góp ý cải thiện code.**" event = "REQUEST_CHANGES" if has_blocking else "COMMENT" - logging.info(f"Submitting review with {len(comments)} comments. Event: {event}") - - # Build review comments + review_comments = [] for note in comments: filename = note.get('filename') - line = note.get('line_number') - severity = note.get('severity', 'medium') - comment_text = note.get('comment', '') + line = int(note.get('line_number', 0)) + if not line or not filename: continue - if not comment_text or not filename: - continue - - # Add severity badge - severity_badge = { - 'critical': '🔴 CRITICAL', - 'high': '🟠 HIGH', - 'medium': '🟡 MEDIUM', - 'low': '🟢 LOW' - }.get(severity.lower(), '⚪') - - body = f"**{severity_badge}**\n\n{comment_text}" - - # Validate line number - try: - line_int = int(line) if line else None - if line_int and line_int > 0: - review_comments.append({ - "path": filename, - "line": line_int, - "body": body, - "side": "RIGHT" - }) - except (ValueError, TypeError): - logging.warning(f"Invalid line for {filename}: {line}") + severity = note.get('severity', 'medium').lower() + badge = {'critical': '🔴', 'high': '🟠', 'medium': '🟡', 'low': '🟢'}.get(severity, '⚪') + body = f"**{badge} {severity.upper()}**\n\n{note.get('comment')}" + + review_comments.append({"path": filename, "line": line, "body": body, "side": "RIGHT"}) - # Submit review try: if review_comments: - pr.create_review( - commit=commit, - body="\n".join(summary_parts), - event=event, - comments=review_comments - ) - logging.info(f"Review submitted successfully: {event}") - else: - # No valid line comments, post as issue comment - for note in comments: - body = f"🤖 **AI Review** ({note.get('severity', 'info').upper()}): {note.get('comment', '')}" - pr.create_issue_comment(body) - - except GithubException as e: - logging.error(f"GitHub API error: {e}") - # Fallback to issue comments - for note in comments: - try: - body = f"🤖 **AI Review**: {note.get('comment', '')}" - pr.create_issue_comment(body) - except Exception: - pass + pr.create_review(commit=pr.get_commits().reversed[0], body=summary, event=event, comments=review_comments) + logging.info(f"Submitted review: {event} with {len(review_comments)} comments.") except Exception as e: logging.error(f"Failed to submit review: {e}") @@ -401,74 +372,41 @@ def submit_review(pr, comments): def main(): github_token = os.getenv("GITHUB_TOKEN") event_path = os.getenv("GITHUB_EVENT_PATH") - - if not github_token or not event_path: - logging.error("Missing GITHUB_TOKEN or GITHUB_EVENT_PATH.") - return + if not github_token or not event_path: return try: with open(event_path, 'r') as f: event_data = json.load(f) - except FileNotFoundError: - logging.error(f"Event file not found at {event_path}") - return - except json.JSONDecodeError as e: - logging.error(f"Failed to parse event file: {e}") - return - except Exception as e: - logging.error(f"Unexpected error reading event file: {e}") - return + except Exception: return - if 'pull_request' not in event_data: - logging.info("Not a pull_request event.") - return - - try: - pr_number = event_data['pull_request']['number'] - repo_name = event_data['repository']['full_name'] - except KeyError as e: - logging.error(f"Missing field: {e}") - return - - if '/' not in repo_name: - logging.error(f"Invalid repo format: {repo_name}") - return + if 'pull_request' not in event_data: return - owner_name, repository_name = repo_name.split('/', 1) - logging.info(f"🔍 Starting AI review for PR #{pr_number} in {repo_name}") + pr_number = event_data['pull_request']['number'] + repo_name = event_data['repository']['full_name'] + owner, repo_name_only = repo_name.split('/', 1) - # Initialize GitHub client + logging.info(f"🔍 AI Review (Smart Mode) for PR #{pr_number}") + g = Github(github_token) - try: - repo = g.get_repo(repo_name) - except GithubException as e: - logging.error(f"Repo access failed: {e}") - return - - # Step 1: Resolve old bot threads - logging.info("Resolving old bot comments...") - resolve_existing_comments(owner_name, repository_name, pr_number, github_token) + repo = g.get_repo(repo_name) - # Step 2: Get project context - logging.info("Fetching project context...") + # 1. Fetch Context & Analyze project_context = get_project_context(repo) file_list = get_file_list(repo) - - # Step 3: Get PR diff pr, files_data = get_pr_diff(repo, pr_number) - + if not files_data: - logging.info("No reviewable files.") + logging.info("No files to review.") return - logging.info(f"Analyzing {len(files_data)} files...") + ai_comments = analyze_code_with_openrouter(files_data, project_context, file_list) - # Step 4: AI Analysis - comments = analyze_code_with_openrouter(files_data, project_context, file_list) + # 2. Smart Auto-Resolve & Sync + final_comments = manage_review_threads(owner, repo_name_only, pr_number, ai_comments, github_token) - # Step 5: Submit review - submit_review(pr, comments) - logging.info("🎉 Review complete!") + # 3. Post (only new) comments + submit_review(pr, final_comments) + logging.info("Done.") if __name__ == "__main__":