From 72bc142d390a615896be1a96cf81b5f3aaa56cdb Mon Sep 17 00:00:00 2001 From: Lavanya Kappagantu Date: Fri, 12 Dec 2025 11:06:38 -0800 Subject: [PATCH 1/4] Adding the points allocated to users in a PR comment --- .github/workflows/points.yml | 80 ++---- scripts/config_points.yml | 47 +++- scripts/track_points.py | 520 +++++++++++++++++++++++++++++++++++ 3 files changed, 580 insertions(+), 67 deletions(-) create mode 100644 scripts/track_points.py diff --git a/.github/workflows/points.yml b/.github/workflows/points.yml index c87cdace..e162097c 100644 --- a/.github/workflows/points.yml +++ b/.github/workflows/points.yml @@ -1,29 +1,37 @@ -name: Points Allocation +name: Contributor Points Tracker on: pull_request_review: - types: [submitted] + types: [submitted, edited, dismissed] issue_comment: - types: [created] + types: [created, edited] + pull_request: + types: [opened, closed, reopened, labeled, unlabeled] + pull_request_review_comment: + types: [created, edited] + issues: + types: [opened, closed, labeled] permissions: - contents: write - pull-requests: write + contents: read + pull-requests: read + issues: write jobs: - assign-points: + track-points: runs-on: ubuntu-latest - # Only run for PR reviews or comments on PRs (not regular issues) + # Only run for PR-related events or issue events (not regular issue comments) if: > github.event_name == 'pull_request_review' || + github.event_name == 'pull_request_review_comment' || + github.event_name == 'pull_request' || + github.event_name == 'issues' || (github.event_name == 'issue_comment' && github.event.issue.pull_request != null) steps: - name: Checkout repository uses: actions/checkout@v4 with: - ref: main token: ${{ secrets.GITHUB_TOKEN }} - fetch-depth: 0 - name: Set up Python uses: actions/setup-python@v5 @@ -33,51 +41,13 @@ jobs: cache: 'pip' - name: Install dependencies - run: pip install PyYAML - - - name: Run points script - id: assign_points run: | - set +e # Don't exit on error - python scripts/assign_points.py - exit_code=$? - echo "exit_code=$exit_code" >> $GITHUB_OUTPUT - - # Exit codes: - # 0 = Success (points awarded) - # 2 = No-op (no points, but not an error) - # 1 or other = Actual error - - if [ $exit_code -eq 0 ] || [ $exit_code -eq 2 ]; then - exit 0 - else - exit $exit_code - fi - - - name: Update leaderboard markdown - if: steps.assign_points.outputs.exit_code == '0' - run: python scripts/update_leaderboard.py + pip install PyYAML requests - - name: Create Pull Request - if: steps.assign_points.outputs.exit_code == '0' - uses: peter-evans/create-pull-request@v6 - continue-on-error: true - with: - token: ${{ secrets.GITHUB_TOKEN }} - add: 'leaderboard.json,LEADERBOARD.md' - commit-message: "Update leaderboard" - branch: leaderboard-update-${{ github.run_id }} - delete-branch: true - title: "Update contributor leaderboard" - body: | - ## Leaderboard Update - - This PR updates the contributor leaderboard based on recent PR review activity. - - **Triggered by:** ${{ github.event_name }} - **Run:** ${{ github.run_number }} - - Please review and merge to update the leaderboard. - labels: | - leaderboard - automated + - name: Calculate and update points + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_REPOSITORY: ${{ github.repository }} + GITHUB_EVENT_NAME: ${{ github.event_name }} + GITHUB_EVENT_PATH: ${{ github.event_path }} + run: python scripts/track_points.py diff --git a/scripts/config_points.yml b/scripts/config_points.yml index 627bf51f..9e2908b5 100644 --- a/scripts/config_points.yml +++ b/scripts/config_points.yml @@ -1,23 +1,46 @@ # Contributor Points Configuration # -# Scoring is based on actual review actions, not keywords. -# All contributions are valued and tracked automatically. +# This configuration defines point values for various contribution activities. +# Points are calculated automatically by the workflow and displayed in PR comments. # # Scoring Rules: -# 1. Any PR review submission = review_submission points (base points) -# 2. Substantial review (100+ characters, excluding whitespace) = detailed_review bonus (additive) -# 3. PR approval (state=approved) = approve_pr bonus (additive) -# 4. PR comment (not a full review) = pr_comment points +# 1. Review submission = base points +# 2. Detailed review (100+ characters) = bonus points (additive) +# 3. PR approval = bonus points (additive) +# 4. PR comment = fixed points +# 5. PR merged = points for author +# 6. Special labels (bug, priority, docs) = bonus points +# 7. First-time contributor = bonus points # # Examples: # - Simple review with short comment = 5 points -# - Review with detailed comment (100+ characters) = 5 + 5 = 10 points -# - Approved PR = 5 + 3 = 8 points -# - Approved PR with detailed feedback = 5 + 5 + 3 = 13 points -# - Comment on PR (not a review) = 2 points +# - Review with detailed comment (100+ chars) = 10 points (5 + 5) +# - Approved PR with detailed feedback = 13 points (5 + 5 + 3) +# - PR comment = 2 points +# - PR merged with bug fix = 10 points (5 + 5) points: + # Review & Comment Points review_submission: 5 # Base points for submitting any PR review - detailed_review: 5 # Bonus for substantial review (100+ characters of feedback) + detailed_review: 5 # Bonus for substantial review (100+ characters) approve_pr: 3 # Bonus for approving a PR - pr_comment: 2 # Points for commenting on a PR (not a full review) \ No newline at end of file + + # PR Author Points + pr_merged: 5 # Points when PR is successfully merged + + # Label-Based Bonuses (for PR authors) + bug_fix: 5 # Bonus for fixing bugs + high_priority: 3 # Bonus for high-priority work + critical_bug: 10 # Bonus for critical bug fixes + documentation: 4 # Bonus for documentation contributions + performance_improvement: 6 # Bonus for performance enhancements + security_fix: 15 # Bonus for security fixes + + # Special Bonuses + first_time_contributor: 5 # Bonus for first-time contributors + + # Future Implementation (not currently calculated) + # speed_bonus_24h: 3 # TODO: Bonus for PRs merged within 24 hours + # test_coverage: 8 # TODO: Bonus for adding comprehensive tests + # mentorship: 10 # TODO: Points for mentoring sessions + # issue_triage: 2 # TODO: Points for triaging issues diff --git a/scripts/track_points.py b/scripts/track_points.py new file mode 100644 index 00000000..60a560a4 --- /dev/null +++ b/scripts/track_points.py @@ -0,0 +1,520 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +""" +Contributor Points Tracker + +This script: +1. Monitors all PR-related events (reviews, comments, approvals, merges, etc.) +2. Calculates points for all contributors on a PR +3. Updates a single PR comment with live point tracking +4. Exports metadata in comment for external pipeline to parse and store in Kusto + +Supported events: +- pull_request_review (submitted, edited, dismissed) +- issue_comment (on PRs only) +- pull_request (opened, closed, labeled, etc.) +- pull_request_review_comment + +Points are calculated based on: +- Review submission: 5 points +- Detailed review (100+ chars): +5 bonus +- PR approval: +3 bonus +- PR comment: 2 points +- PR merged: 5 points (for author) +- Bug fix (labeled): +5 bonus +- High priority: +3 bonus +- Test coverage: +8 points +- Documentation: +4 points +- First-time contributor: +5 bonus +""" + +import os +import sys +import json +import yaml +import requests +from datetime import datetime, timezone +from typing import Dict, List, Optional, Tuple + +# Configuration +CONFIG_FILE = os.path.join(os.path.dirname(__file__), 'config_points.yml') +GITHUB_TOKEN = os.getenv('GITHUB_TOKEN') +GITHUB_REPOSITORY = os.getenv('GITHUB_REPOSITORY') # Format: owner/repo +GITHUB_EVENT_NAME = os.getenv('GITHUB_EVENT_NAME') +GITHUB_EVENT_PATH = os.getenv('GITHUB_EVENT_PATH') + +# Comment identifier for finding the bot's tracking comment +COMMENT_MARKER = "" + +# Keywords for detecting performance improvement suggestions in reviews +PERFORMANCE_KEYWORDS = [ + 'performance', 'performant', 'optimization', 'optimize', + 'fast', 'faster', 'efficient', 'efficiency', 'speed' +] + +def load_config() -> dict: + """Load points configuration from YAML file.""" + if not os.path.exists(CONFIG_FILE): + print(f"ERROR: Config file not found: {CONFIG_FILE}", file=sys.stderr) + sys.exit(1) + + try: + with open(CONFIG_FILE, 'r', encoding='utf-8') as f: + config = yaml.safe_load(f) + + # Validate required keys exist + required_keys = ['review_submission', 'detailed_review', 'approve_pr', 'pr_merged'] + if 'points' not in config: + print(f"ERROR: Config missing 'points' section", file=sys.stderr) + sys.exit(1) + + missing = [key for key in required_keys if key not in config['points']] + if missing: + print(f"ERROR: Config missing required keys: {missing}", file=sys.stderr) + sys.exit(1) + + return config + except Exception as e: + print(f"ERROR: Failed to load config: {e}", file=sys.stderr) + sys.exit(1) + +def load_event() -> dict: + """Load GitHub event payload.""" + if not GITHUB_EVENT_PATH or not os.path.exists(GITHUB_EVENT_PATH): + print(f"ERROR: Event file not found: {GITHUB_EVENT_PATH}", file=sys.stderr) + sys.exit(1) + + try: + with open(GITHUB_EVENT_PATH, 'r', encoding='utf-8') as f: + event = json.load(f) + return event + except Exception as e: + print(f"ERROR: Failed to load event: {e}", file=sys.stderr) + sys.exit(1) + +def get_pr_number(event: dict) -> Optional[int]: + """Extract PR number from event.""" + if 'pull_request' in event: + return event['pull_request']['number'] + elif 'issue' in event and 'pull_request' in event['issue']: + return event['issue']['number'] + elif 'review' in event: + pr_url = event['review'].get('pull_request_url', '') + if pr_url: + try: + return int(pr_url.split('/')[-1]) + except (ValueError, IndexError): + print(f"WARNING: Could not parse PR number from URL: {pr_url}", file=sys.stderr) + return None + return None + +def get_issue_number(event: dict) -> Optional[int]: + """Extract issue number from event (for non-PR issues only).""" + if 'issue' in event and 'pull_request' not in event['issue']: + return event['issue']['number'] + return None + +def is_issue_event(event: dict) -> bool: + """Check if this is an issue event (not a PR).""" + return GITHUB_EVENT_NAME == 'issues' or (GITHUB_EVENT_NAME == 'issue_comment' and 'issue' in event and 'pull_request' not in event['issue']) + +def github_api_request(method: str, endpoint: str, data: Optional[dict] = None) -> dict: + """Make GitHub API request.""" + url = f"https://api.github.com/repos/{GITHUB_REPOSITORY}/{endpoint}" + headers = { + 'Authorization': f'token {GITHUB_TOKEN}', + 'Accept': 'application/vnd.github.v3+json' + } + + try: + if method == 'GET': + response = requests.get(url, headers=headers) + elif method == 'POST': + response = requests.post(url, headers=headers, json=data) + elif method == 'PATCH': + response = requests.patch(url, headers=headers, json=data) + + response.raise_for_status() + return response.json() if response.text else {} + except Exception as e: + print(f"ERROR: GitHub API request failed: {e}", file=sys.stderr) + return {} + +def get_all_pr_activity(pr_number: int) -> Tuple[List[dict], List[dict], dict]: + """ + Fetch all activity on a PR: reviews, comments, and PR details. + + Returns: + Tuple of (reviews, comments, pr_details) + """ + reviews = github_api_request('GET', f'pulls/{pr_number}/reviews') + comments = github_api_request('GET', f'issues/{pr_number}/comments') + pr_details = github_api_request('GET', f'pulls/{pr_number}') + + return ( + reviews if isinstance(reviews, list) else [], + comments if isinstance(comments, list) else [], + pr_details if isinstance(pr_details, dict) else {} + ) + +def calculate_review_points(review: dict, config: dict) -> Tuple[int, List[str]]: + """Calculate points for a single review.""" + points = config['points']['review_submission'] # Base: 5 points + breakdown = ['Review submission: +5 points'] + + # Detailed review bonus (100+ characters) + body = review.get('body', '').strip() + if len(body) >= 100: + points += config['points']['detailed_review'] # +5 points + breakdown.append(f'Detailed feedback ({len(body)} characters): +5 points') + + # Performance improvement suggestion bonus + body_lower = body.lower() + if any(keyword in body_lower for keyword in PERFORMANCE_KEYWORDS): + bonus = config['points'].get('performance_improvement', 6) + points += bonus + breakdown.append(f'Performance improvement suggestion: +{bonus} points') + + # Approval bonus + if review.get('state') == 'APPROVED': + points += config['points']['approve_pr'] # +3 points + breakdown.append('Approved PR: +3 points') + + return points, breakdown + +def calculate_pr_author_points(pr_details: dict, config: dict) -> Tuple[int, List[str]]: + """Calculate points for PR author based on PR characteristics.""" + points = 0 + breakdown = [] + + # PR merged + if pr_details.get('merged'): + points += config['points'].get('pr_merged', 5) + breakdown.append('PR merged: +5 points') + + # Check labels for bonuses + labels = [label['name'].lower() for label in pr_details.get('labels', [])] + + # Bug fix: Check if PR is linked to an issue with 'bug' label + linked_issues = get_linked_issues(pr_details) + for issue in linked_issues: + issue_labels = [label['name'].lower() for label in issue.get('labels', [])] + if any('bug' in label for label in issue_labels): + bonus = config['points'].get('bug_fix', 5) + points += bonus + breakdown.append(f'Bug fix (closes issue #{issue["number"]}): +{bonus} points') + break # Only award once even if multiple bug issues linked + + # Security fix: Check if PR is linked to a security issue OR has security labels + has_security = any('security' in label or 'vulnerability' in label for label in labels) + + # Also check linked issues for security labels (only if not already found) + if not has_security: + for issue in linked_issues: + issue_labels = [label['name'].lower() for label in issue.get('labels', [])] + if any('security' in label or 'vulnerability' in label for label in issue_labels): + has_security = True + breakdown_source = f'closes issue #{issue["number"]}' + break + else: + breakdown_source = 'PR labeled' + + # Award security bonus only once + if has_security: + bonus = config['points'].get('security_fix', 15) + points += bonus + breakdown.append(f'Security fix ({breakdown_source}): +{bonus} points') + + # Documentation: Check both labels AND files changed + has_docs = any('documentation' in label or 'docs' in label for label in labels) + + # Also check if PR modifies documentation files + if not has_docs: + pr_number = pr_details.get('number') + if pr_number: + files = github_api_request('GET', f'pulls/{pr_number}/files') + if isinstance(files, list): + has_docs = any( + 'readme' in f['filename'].lower() or + 'docs/' in f['filename'].lower() or + f['filename'].lower().endswith('.md') + for f in files if f.get('additions', 0) > 0 + ) + + if has_docs: + bonus = config['points'].get('documentation', 4) + points += bonus + breakdown.append(f'Documentation: +{bonus} points') + + # Check if first-time contributor + author = pr_details.get('user', {}).get('login') + pr_number = pr_details.get('number') + if author and pr_number and is_first_time_contributor(author, pr_number): + bonus = config['points'].get('first_time_contributor', 5) + points += bonus + breakdown.append(f'First-time contributor: +{bonus} points') + + return points, breakdown + +def get_linked_issues(pr_details: dict) -> List[dict]: + """Get issues linked to this PR by checking PR body for closing keywords.""" + linked_issues = [] + pr_body = pr_details.get('body', '') or '' + pr_number = pr_details.get('number') + + # Keywords that link issues: closes, fixes, resolves (case-insensitive) + import re + # Pattern matches: "closes #123", "fixes #456", "resolves #789", etc. + pattern = r'(?:close[sd]?|fix(?:e[sd])?|resolve[sd]?)\s*:?\s*#(\d+)' + matches = re.findall(pattern, pr_body, re.IGNORECASE) + + for issue_number in matches: + issue = github_api_request('GET', f'issues/{issue_number}') + if issue and isinstance(issue, dict): + linked_issues.append(issue) + + return linked_issues + +def is_first_time_contributor(username: str, current_pr_number: int) -> bool: + """Check if the current PR is the user's first merged PR.""" + # Check if user has any other merged PRs (excluding current one) + # Limit to recent 100 PRs to avoid API rate limits and performance issues + try: + search_result = github_api_request('GET', f'pulls?state=all&creator={username}&per_page=100') + if isinstance(search_result, list): + # Get all merged PRs excluding the current one + other_merged_prs = [ + pr for pr in search_result + if pr.get('merged_at') and pr.get('number') != current_pr_number + ] + # First-time contributor if no other merged PRs exist + return len(other_merged_prs) == 0 + except Exception as e: + print(f"WARNING: Error checking first-time contributor status: {e}", file=sys.stderr) + return False + +def calculate_issue_points(issue: dict, config: dict) -> Tuple[int, List[str]]: + """Calculate points for issue creator based on priority.""" + points = 0 + breakdown = [] + + labels = [label['name'].lower() for label in issue.get('labels', [])] + + # High priority issue creation + if any('priority' in label and 'high' in label for label in labels): + bonus = config['points'].get('high_priority', 3) + points += bonus + breakdown.append(f'High priority issue created: +{bonus} points') + + # Critical bug issue + if any('critical' in label and 'bug' in label for label in labels): + bonus = config['points'].get('critical_bug', 10) + points += bonus + breakdown.append(f'Critical bug reported: +{bonus} points') + + # Security vulnerability reported + if any('security' in label or 'vulnerability' in label for label in labels): + bonus = config['points'].get('security_fix', 15) + points += bonus + breakdown.append(f'Security vulnerability reported: +{bonus} points') + + return points, breakdown + +def aggregate_contributor_points(reviews: List[dict], comments: List[dict], pr_details: dict, config: dict) -> Dict[str, dict]: + """ + Aggregate points for all contributors on a PR. + + Returns: + Dict mapping username to {'total': int, 'activities': [list of activity dicts]} + """ + contributors = {} + + # Process reviews + for review in reviews: + username = review['user']['login'] + if username not in contributors: + contributors[username] = {'total': 0, 'activities': []} + + points, breakdown = calculate_review_points(review, config) + contributors[username]['total'] += points + contributors[username]['activities'].append({ + 'type': 'review', + 'points': points, + 'breakdown': breakdown, + 'timestamp': review['submitted_at'], + 'state': review['state'] + }) + + # Add PR author points (if PR is merged or has special labels) + pr_author = pr_details.get('user', {}).get('login') + if pr_author: + author_points, author_breakdown = calculate_pr_author_points(pr_details, config) + if author_points > 0: + if pr_author not in contributors: + contributors[pr_author] = {'total': 0, 'activities': []} + + contributors[pr_author]['total'] += author_points + contributors[pr_author]['activities'].append({ + 'type': 'pr_author', + 'points': author_points, + 'breakdown': author_breakdown, + 'timestamp': pr_details.get('merged_at') or pr_details.get('created_at') + }) + + return contributors + +def format_comment_body(pr_number: int, contributors: Dict[str, dict]) -> str: + """Format the PR comment body with points tracking.""" + total_points = sum(c['total'] for c in contributors.values()) + timestamp = datetime.now(timezone.utc).strftime('%B %d, %Y at %I:%M %p UTC') + + # Header + comment = f"{COMMENT_MARKER}\n\n" + comment += "## 🏆 Contributor Points Tracker\n\n" + comment += f"**Total Points on This PR: {total_points} points**\n\n" + comment += "### Points by Contributor\n\n" + + # Sort contributors by points (descending) + sorted_contributors = sorted(contributors.items(), key=lambda x: x[1]['total'], reverse=True) + + for username, data in sorted_contributors: + comment += f"#### @{username} - **{data['total']} points**\n\n" + + # Group activities by type + for activity in data['activities']: + timestamp_str = activity['timestamp'].split('T')[0] # Just the date + comment += f"**{activity['type'].replace('_', ' ').title()}** ({timestamp_str}):\n" + for item in activity['breakdown']: + comment += f"- ✅ {item}\n" + comment += "\n" + + # Footer + comment += "---\n\n" + comment += "### How Points Are Calculated\n\n" + comment += "| Action | Points |\n" + comment += "|--------|--------|\n" + comment += "| Review submission | 5 |\n" + comment += "| Detailed review (100+ chars) | +5 bonus |\n" + comment += "| Performance improvement suggestion | +6 bonus |\n" + comment += "| PR approval | +3 bonus |\n" + comment += "| PR merged | 5 |\n" + comment += "| Bug fix (closes issue) | +5 bonus |\n" + comment += "| Security fix/vulnerability | +15 bonus |\n" + comment += "| Documentation | +4 bonus |\n" + comment += "| First-time contributor | +5 bonus |\n" + comment += "| High priority issue created | +3 bonus |\n" + comment += "| Critical bug reported | +10 bonus |\n\n" + comment += f"*Last updated: {timestamp}*\n\n" + + # Metadata for external pipeline parsing + metadata = { + 'pr_number': pr_number, + 'total_points': total_points, + 'contributors': { + username: { + 'total': data['total'], + 'activity_count': len(data['activities']) + } + for username, data in contributors.items() + }, + 'last_updated': datetime.now(timezone.utc).isoformat() + } + comment += f"\n" + + return comment + +def find_existing_comment(pr_number: int) -> Optional[int]: + """Find the bot's existing tracking comment on the PR.""" + comments = github_api_request('GET', f'issues/{pr_number}/comments') + if not isinstance(comments, list): + return None + + for comment in comments: + if COMMENT_MARKER in comment.get('body', ''): + return comment['id'] + + return None + +def update_or_create_comment(pr_number: int, body: str): + """Update existing tracking comment or create a new one.""" + existing_comment_id = find_existing_comment(pr_number) + + if existing_comment_id: + # Update existing comment + result = github_api_request('PATCH', f'issues/comments/{existing_comment_id}', {'body': body}) + print(f"✅ Updated tracking comment (ID: {existing_comment_id})") + else: + # Create new comment + result = github_api_request('POST', f'issues/{pr_number}/comments', {'body': body}) + print(f"✅ Created new tracking comment") + +def main(): + """Main execution function.""" + if not GITHUB_TOKEN: + print("ERROR: GITHUB_TOKEN not set", file=sys.stderr) + sys.exit(1) + + print(f"🔄 Processing {GITHUB_EVENT_NAME} event...") + + # Load configuration and event + config = load_config() + event = load_event() + + # Check if this is an issue event (not a PR) + if is_issue_event(event): + print("📋 Processing issue event...") + issue_number = get_issue_number(event) + if not issue_number: + print("â„šī¸ Could not extract issue number") + sys.exit(0) + + # Get issue details + issue = github_api_request('GET', f'issues/{issue_number}') + if not issue or not isinstance(issue, dict): + print("❌ Failed to fetch issue details") + sys.exit(1) + + # Calculate points for issue creator + issue_creator = issue.get('user', {}).get('login') + points, breakdown = calculate_issue_points(issue, config) + + if points > 0 and issue_creator: + print(f" Issue #{issue_number} by @{issue_creator}: {points} points") + for item in breakdown: + print(f" - {item}") + print("â„šī¸ External pipeline should parse this event and store in Kusto") + else: + print("â„šī¸ No points awarded for this issue") + + sys.exit(0) + + # Handle PR events + pr_number = get_pr_number(event) + if not pr_number: + print("â„šī¸ Not a PR-related event, skipping") + sys.exit(0) + + print(f"📝 Processing PR #{pr_number}...") + + # Fetch all PR activity + reviews, comments, pr_details = get_all_pr_activity(pr_number) + print(f" Found {len(reviews)} reviews and {len(comments)} comments") + + # Calculate points for all contributors + contributors = aggregate_contributor_points(reviews, comments, pr_details, config) + print(f" Calculated points for {len(contributors)} contributors") + + if not contributors: + print("â„šī¸ No points to award yet") + sys.exit(0) + + # Format and update comment + comment_body = format_comment_body(pr_number, contributors) + update_or_create_comment(pr_number, comment_body) + + print("✅ Points tracking complete!") + print("â„šī¸ External pipeline can parse comment metadata for Kusto ingestion") + +if __name__ == '__main__': + main() From 07bf7ea84b61747388dac3aa8e2383d538749c46 Mon Sep 17 00:00:00 2001 From: Lavanya Kappagantu Date: Mon, 22 Dec 2025 14:54:09 -0800 Subject: [PATCH 2/4] addressing comments --- .github/workflows/points.yml | 6 +- scripts/track_points.py | 109 +++++++++++++++++++---------------- 2 files changed, 64 insertions(+), 51 deletions(-) diff --git a/.github/workflows/points.yml b/.github/workflows/points.yml index e162097c..077179f1 100644 --- a/.github/workflows/points.yml +++ b/.github/workflows/points.yml @@ -1,3 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + name: Contributor Points Tracker on: @@ -14,7 +17,7 @@ on: permissions: contents: read - pull-requests: read + pull-requests: write issues: write jobs: @@ -32,6 +35,7 @@ jobs: uses: actions/checkout@v4 with: token: ${{ secrets.GITHUB_TOKEN }} + ref: ${{ github.event.repository.default_branch }} - name: Set up Python uses: actions/setup-python@v5 diff --git a/scripts/track_points.py b/scripts/track_points.py index 60a560a4..32c0936d 100644 --- a/scripts/track_points.py +++ b/scripts/track_points.py @@ -20,18 +20,20 @@ - Review submission: 5 points - Detailed review (100+ chars): +5 bonus - PR approval: +3 bonus -- PR comment: 2 points - PR merged: 5 points (for author) -- Bug fix (labeled): +5 bonus -- High priority: +3 bonus -- Test coverage: +8 points -- Documentation: +4 points +- Bug fix (closes issue): +5 bonus +- Security fix/vulnerability: +15 bonus +- Documentation: +4 bonus +- Performance improvement: +6 bonus - First-time contributor: +5 bonus +- High priority issue: +3 bonus +- Critical bug reported: +10 bonus """ import os import sys import json +import re import yaml import requests from datetime import datetime, timezone @@ -123,7 +125,7 @@ def github_api_request(method: str, endpoint: str, data: Optional[dict] = None) """Make GitHub API request.""" url = f"https://api.github.com/repos/{GITHUB_REPOSITORY}/{endpoint}" headers = { - 'Authorization': f'token {GITHUB_TOKEN}', + 'Authorization': f'Bearer {GITHUB_TOKEN}', 'Accept': 'application/vnd.github.v3+json' } @@ -137,6 +139,16 @@ def github_api_request(method: str, endpoint: str, data: Optional[dict] = None) response.raise_for_status() return response.json() if response.text else {} + except requests.exceptions.HTTPError as e: + if e.response.status_code == 401: + print(f"ERROR: Authentication failed - check GITHUB_TOKEN", file=sys.stderr) + sys.exit(1) + elif e.response.status_code == 403: + print(f"ERROR: API rate limit exceeded or access forbidden", file=sys.stderr) + sys.exit(1) + else: + print(f"ERROR: GitHub API request failed ({e.response.status_code}): {e}", file=sys.stderr) + return {} except Exception as e: print(f"ERROR: GitHub API request failed: {e}", file=sys.stderr) return {} @@ -208,6 +220,7 @@ def calculate_pr_author_points(pr_details: dict, config: dict) -> Tuple[int, Lis # Security fix: Check if PR is linked to a security issue OR has security labels has_security = any('security' in label or 'vulnerability' in label for label in labels) + breakdown_source = 'PR labeled' if has_security else '' # Also check linked issues for security labels (only if not already found) if not has_security: @@ -217,8 +230,6 @@ def calculate_pr_author_points(pr_details: dict, config: dict) -> Tuple[int, Lis has_security = True breakdown_source = f'closes issue #{issue["number"]}' break - else: - breakdown_source = 'PR labeled' # Award security bonus only once if has_security: @@ -248,9 +259,7 @@ def calculate_pr_author_points(pr_details: dict, config: dict) -> Tuple[int, Lis breakdown.append(f'Documentation: +{bonus} points') # Check if first-time contributor - author = pr_details.get('user', {}).get('login') - pr_number = pr_details.get('number') - if author and pr_number and is_first_time_contributor(author, pr_number): + if is_first_time_contributor(pr_details): bonus = config['points'].get('first_time_contributor', 5) points += bonus breakdown.append(f'First-time contributor: +{bonus} points') @@ -261,38 +270,29 @@ def get_linked_issues(pr_details: dict) -> List[dict]: """Get issues linked to this PR by checking PR body for closing keywords.""" linked_issues = [] pr_body = pr_details.get('body', '') or '' - pr_number = pr_details.get('number') # Keywords that link issues: closes, fixes, resolves (case-insensitive) - import re # Pattern matches: "closes #123", "fixes #456", "resolves #789", etc. - pattern = r'(?:close[sd]?|fix(?:e[sd])?|resolve[sd]?)\s*:?\s*#(\d+)' + pattern = r'\b(closes?|fixe[sd]|resolved?)\b\s*:?\s*#(\d+)' matches = re.findall(pattern, pr_body, re.IGNORECASE) - for issue_number in matches: + # Extract just the issue numbers (group 2 from the matches) + issue_numbers = [match[1] for match in matches] + + for issue_number in issue_numbers: issue = github_api_request('GET', f'issues/{issue_number}') if issue and isinstance(issue, dict): linked_issues.append(issue) return linked_issues -def is_first_time_contributor(username: str, current_pr_number: int) -> bool: - """Check if the current PR is the user's first merged PR.""" - # Check if user has any other merged PRs (excluding current one) - # Limit to recent 100 PRs to avoid API rate limits and performance issues - try: - search_result = github_api_request('GET', f'pulls?state=all&creator={username}&per_page=100') - if isinstance(search_result, list): - # Get all merged PRs excluding the current one - other_merged_prs = [ - pr for pr in search_result - if pr.get('merged_at') and pr.get('number') != current_pr_number - ] - # First-time contributor if no other merged PRs exist - return len(other_merged_prs) == 0 - except Exception as e: - print(f"WARNING: Error checking first-time contributor status: {e}", file=sys.stderr) - return False +def is_first_time_contributor(pr_details: dict) -> bool: + """ + Check if the PR author is a first-time contributor using the 'author_association' field. + Returns True if the author_association is 'FIRST_TIME_CONTRIBUTOR'. + """ + author_association = pr_details.get('author_association', '') + return author_association.upper() == 'FIRST_TIME_CONTRIBUTOR' def calculate_issue_points(issue: dict, config: dict) -> Tuple[int, List[str]]: """Calculate points for issue creator based on priority.""" @@ -321,7 +321,7 @@ def calculate_issue_points(issue: dict, config: dict) -> Tuple[int, List[str]]: return points, breakdown -def aggregate_contributor_points(reviews: List[dict], comments: List[dict], pr_details: dict, config: dict) -> Dict[str, dict]: +def aggregate_contributor_points(reviews: List[dict], pr_details: dict, config: dict) -> Dict[str, dict]: """ Aggregate points for all contributors on a PR. @@ -364,7 +364,7 @@ def aggregate_contributor_points(reviews: List[dict], comments: List[dict], pr_d return contributors -def format_comment_body(pr_number: int, contributors: Dict[str, dict]) -> str: +def format_comment_body(pr_number: int, contributors: Dict[str, dict], config: dict) -> str: """Format the PR comment body with points tracking.""" total_points = sum(c['total'] for c in contributors.values()) timestamp = datetime.now(timezone.utc).strftime('%B %d, %Y at %I:%M %p UTC') @@ -383,7 +383,13 @@ def format_comment_body(pr_number: int, contributors: Dict[str, dict]) -> str: # Group activities by type for activity in data['activities']: - timestamp_str = activity['timestamp'].split('T')[0] # Just the date + # Robustly parse ISO 8601 timestamp, fallback to original string if parsing fails + try: + ts = activity['timestamp'].replace('Z', '+00:00') if activity.get('timestamp') else '' + dt = datetime.fromisoformat(ts) + timestamp_str = dt.strftime('%Y-%m-%d') + except Exception: + timestamp_str = activity.get('timestamp', 'Unknown date').split('T')[0] if activity.get('timestamp') else 'Unknown date' comment += f"**{activity['type'].replace('_', ' ').title()}** ({timestamp_str}):\n" for item in activity['breakdown']: comment += f"- ✅ {item}\n" @@ -394,17 +400,20 @@ def format_comment_body(pr_number: int, contributors: Dict[str, dict]) -> str: comment += "### How Points Are Calculated\n\n" comment += "| Action | Points |\n" comment += "|--------|--------|\n" - comment += "| Review submission | 5 |\n" - comment += "| Detailed review (100+ chars) | +5 bonus |\n" - comment += "| Performance improvement suggestion | +6 bonus |\n" - comment += "| PR approval | +3 bonus |\n" - comment += "| PR merged | 5 |\n" - comment += "| Bug fix (closes issue) | +5 bonus |\n" - comment += "| Security fix/vulnerability | +15 bonus |\n" - comment += "| Documentation | +4 bonus |\n" - comment += "| First-time contributor | +5 bonus |\n" - comment += "| High priority issue created | +3 bonus |\n" - comment += "| Critical bug reported | +10 bonus |\n\n" + + # Dynamically generate points table from config + points_config = config.get('points', {}) + comment += f"| Review submission | {points_config.get('review_submission', 5)} |\n" + comment += f"| Detailed review (100+ chars) | +{points_config.get('detailed_review', 5)} bonus |\n" + comment += f"| Performance improvement suggestion | +{points_config.get('performance_improvement', 6)} bonus |\n" + comment += f"| PR approval | +{points_config.get('approve_pr', 3)} bonus |\n" + comment += f"| PR merged | {points_config.get('pr_merged', 5)} |\n" + comment += f"| Bug fix (closes issue) | +{points_config.get('bug_fix', 5)} bonus |\n" + comment += f"| Security fix/vulnerability | +{points_config.get('security_fix', 15)} bonus |\n" + comment += f"| Documentation | +{points_config.get('documentation', 4)} bonus |\n" + comment += f"| First-time contributor | +{points_config.get('first_time_contributor', 5)} bonus |\n" + comment += f"| High priority issue created | +{points_config.get('high_priority', 3)} bonus |\n" + comment += f"| Critical bug reported | +{points_config.get('critical_bug', 10)} bonus |\n\n" comment += f"*Last updated: {timestamp}*\n\n" # Metadata for external pipeline parsing @@ -442,11 +451,11 @@ def update_or_create_comment(pr_number: int, body: str): if existing_comment_id: # Update existing comment - result = github_api_request('PATCH', f'issues/comments/{existing_comment_id}', {'body': body}) + github_api_request('PATCH', f'issues/comments/{existing_comment_id}', {'body': body}) print(f"✅ Updated tracking comment (ID: {existing_comment_id})") else: # Create new comment - result = github_api_request('POST', f'issues/{pr_number}/comments', {'body': body}) + github_api_request('POST', f'issues/{pr_number}/comments', {'body': body}) print(f"✅ Created new tracking comment") def main(): @@ -502,7 +511,7 @@ def main(): print(f" Found {len(reviews)} reviews and {len(comments)} comments") # Calculate points for all contributors - contributors = aggregate_contributor_points(reviews, comments, pr_details, config) + contributors = aggregate_contributor_points(reviews, pr_details, config) print(f" Calculated points for {len(contributors)} contributors") if not contributors: @@ -510,7 +519,7 @@ def main(): sys.exit(0) # Format and update comment - comment_body = format_comment_body(pr_number, contributors) + comment_body = format_comment_body(pr_number, contributors, config) update_or_create_comment(pr_number, comment_body) print("✅ Points tracking complete!") From 02a3df274e1ed285e906f8f9709da9579964474e Mon Sep 17 00:00:00 2001 From: Lavanya Kappagantu Date: Mon, 22 Dec 2025 15:07:22 -0800 Subject: [PATCH 3/4] addressed comments --- .github/workflows/points.yml | 2 +- scripts/config_points.yml | 8 +++----- scripts/track_points.py | 29 ++++++++++++++++++++++++----- 3 files changed, 28 insertions(+), 11 deletions(-) diff --git a/.github/workflows/points.yml b/.github/workflows/points.yml index 077179f1..0a0b63f4 100644 --- a/.github/workflows/points.yml +++ b/.github/workflows/points.yml @@ -29,7 +29,7 @@ jobs: github.event_name == 'pull_request_review_comment' || github.event_name == 'pull_request' || github.event_name == 'issues' || - (github.event_name == 'issue_comment' && github.event.issue.pull_request != null) + (github.event_name == 'issue_comment' && github.event.issue.pull_request) steps: - name: Checkout repository uses: actions/checkout@v4 diff --git a/scripts/config_points.yml b/scripts/config_points.yml index 9e2908b5..9069a7b1 100644 --- a/scripts/config_points.yml +++ b/scripts/config_points.yml @@ -7,16 +7,14 @@ # 1. Review submission = base points # 2. Detailed review (100+ characters) = bonus points (additive) # 3. PR approval = bonus points (additive) -# 4. PR comment = fixed points -# 5. PR merged = points for author -# 6. Special labels (bug, priority, docs) = bonus points -# 7. First-time contributor = bonus points +# 4. PR merged = points for author +# 5. Special labels (bug, priority, docs) = bonus points +# 6. First-time contributor = bonus points # # Examples: # - Simple review with short comment = 5 points # - Review with detailed comment (100+ chars) = 10 points (5 + 5) # - Approved PR with detailed feedback = 13 points (5 + 5 + 3) -# - PR comment = 2 points # - PR merged with bug fix = 10 points (5 + 5) points: diff --git a/scripts/track_points.py b/scripts/track_points.py index 32c0936d..23a4b851 100644 --- a/scripts/track_points.py +++ b/scripts/track_points.py @@ -41,6 +41,8 @@ # Configuration CONFIG_FILE = os.path.join(os.path.dirname(__file__), 'config_points.yml') +# GITHUB_TOKEN is provided by GitHub Actions with limited repository scope +# It expires after workflow completion and is never logged or exposed GITHUB_TOKEN = os.getenv('GITHUB_TOKEN') GITHUB_REPOSITORY = os.getenv('GITHUB_REPOSITORY') # Format: owner/repo GITHUB_EVENT_NAME = os.getenv('GITHUB_EVENT_NAME') @@ -49,6 +51,9 @@ # Comment identifier for finding the bot's tracking comment COMMENT_MARKER = "" +# Minimum character count for detailed review bonus +DETAILED_REVIEW_MIN_CHARS = 100 + # Keywords for detecting performance improvement suggestions in reviews PERFORMANCE_KEYWORDS = [ 'performance', 'performant', 'optimization', 'optimize', @@ -76,6 +81,20 @@ def load_config() -> dict: print(f"ERROR: Config missing required keys: {missing}", file=sys.stderr) sys.exit(1) + # Set defaults for optional keys + optional_defaults = { + 'performance_improvement': 6, + 'bug_fix': 5, + 'security_fix': 15, + 'documentation': 4, + 'first_time_contributor': 5, + 'high_priority': 3, + 'critical_bug': 10 + } + for key, default_value in optional_defaults.items(): + if key not in config['points']: + config['points'][key] = default_value + return config except Exception as e: print(f"ERROR: Failed to load config: {e}", file=sys.stderr) @@ -175,9 +194,9 @@ def calculate_review_points(review: dict, config: dict) -> Tuple[int, List[str]] points = config['points']['review_submission'] # Base: 5 points breakdown = ['Review submission: +5 points'] - # Detailed review bonus (100+ characters) + # Detailed review bonus (threshold defined by DETAILED_REVIEW_MIN_CHARS constant) body = review.get('body', '').strip() - if len(body) >= 100: + if len(body) >= DETAILED_REVIEW_MIN_CHARS: points += config['points']['detailed_review'] # +5 points breakdown.append(f'Detailed feedback ({len(body)} characters): +5 points') @@ -294,8 +313,8 @@ def is_first_time_contributor(pr_details: dict) -> bool: author_association = pr_details.get('author_association', '') return author_association.upper() == 'FIRST_TIME_CONTRIBUTOR' -def calculate_issue_points(issue: dict, config: dict) -> Tuple[int, List[str]]: - """Calculate points for issue creator based on priority.""" +def calculate_issue_creator_points(issue: dict, config: dict) -> Tuple[int, List[str]]: + """Calculate points awarded to the issue creator based on issue priority and labels.""" points = 0 breakdown = [] @@ -486,7 +505,7 @@ def main(): # Calculate points for issue creator issue_creator = issue.get('user', {}).get('login') - points, breakdown = calculate_issue_points(issue, config) + points, breakdown = calculate_issue_creator_points(issue, config) if points > 0 and issue_creator: print(f" Issue #{issue_number} by @{issue_creator}: {points} points") From 169f995db118c906eb2972064151d06be6330853 Mon Sep 17 00:00:00 2001 From: Lavanya Kappagantu Date: Mon, 22 Dec 2025 15:34:49 -0800 Subject: [PATCH 4/4] removed dead code and optimized the existing code --- leaderboard.json | 1 - scripts/assign_points.py | 282 ---------------------------------- scripts/track_points.py | 186 ++++++++++++---------- scripts/update_leaderboard.py | 203 ------------------------ 4 files changed, 102 insertions(+), 570 deletions(-) delete mode 100644 leaderboard.json delete mode 100644 scripts/assign_points.py delete mode 100644 scripts/update_leaderboard.py diff --git a/leaderboard.json b/leaderboard.json deleted file mode 100644 index 9e26dfee..00000000 --- a/leaderboard.json +++ /dev/null @@ -1 +0,0 @@ -{} \ No newline at end of file diff --git a/scripts/assign_points.py b/scripts/assign_points.py deleted file mode 100644 index 1bb60045..00000000 --- a/scripts/assign_points.py +++ /dev/null @@ -1,282 +0,0 @@ -# Copyright (c) Microsoft. All rights reserved. - -import os -import json -import yaml -import sys - -# Exit codes used by this script: -# 0 = Success - points were awarded and leaderboard updated -# 1 = Error - something went wrong (missing config, permissions, etc.) -# 2 = No-op - no points awarded, but not an error (duplicate event, no criteria matched) - -# Path to config file inside scripts folder -CONFIG_FILE = os.path.join(os.path.dirname(__file__), 'config_points.yml') -PROCESSED_FILE = os.path.join(os.path.dirname(__file__), 'processed_ids.json') -# Path to leaderboard in repository root (one level up from scripts/) -LEADERBOARD_FILE = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'leaderboard.json') - -def load_config(): - """ - Load the points configuration from config_points.yml. - - Returns: - dict: Configuration dictionary with 'points' section - - Exits: - 1 if config file is missing or contains invalid YAML - """ - if not os.path.exists(CONFIG_FILE): - print(f"ERROR: Config file not found: {CONFIG_FILE}", file=sys.stderr) - print("Expected location: scripts/config_points.yml", file=sys.stderr) - sys.exit(1) - - try: - with open(CONFIG_FILE, 'r', encoding='utf-8') as f: - config = yaml.safe_load(f) - except yaml.YAMLError as e: - print(f"ERROR: Invalid YAML syntax in config file: {e}", file=sys.stderr) - print(f"File location: {CONFIG_FILE}", file=sys.stderr) - sys.exit(1) - except Exception as e: - print(f"ERROR: Failed to read config file: {e}", file=sys.stderr) - sys.exit(1) - - # Validate that config has the expected structure - if not isinstance(config, dict) or 'points' not in config: - print(f"ERROR: Invalid config structure in {CONFIG_FILE}", file=sys.stderr) - print("Expected format: { points: { review_submission: 5, detailed_review: 5, approve_pr: 3, pr_comment: 2 } }", file=sys.stderr) - sys.exit(1) - - return config - -def load_event(): - event_path = os.getenv('GITHUB_EVENT_PATH') - if not event_path: - print("ERROR: GITHUB_EVENT_PATH is not set.") - sys.exit(1) - if not os.path.exists(event_path): - print(f"ERROR: Event file not found: {event_path}") - sys.exit(1) - - try: - with open(event_path, 'r', encoding='utf-8') as f: - event = json.load(f) - except json.JSONDecodeError as e: - print(f"ERROR: Invalid JSON in event file: {e}", file=sys.stderr) - print(f"File location: {event_path}", file=sys.stderr) - sys.exit(1) - - # Validate that this is a PR-related event, not a regular issue comment - if 'issue' in event and 'pull_request' not in event.get('issue', {}): - print("INFO: Skipping - this is a comment on a regular issue, not a pull request.") - sys.exit(2) # Exit code 2 = no-op - - return event - -def load_processed_ids(): - if os.path.exists(PROCESSED_FILE): - with open(PROCESSED_FILE, 'r', encoding='utf-8') as f: - try: - return set(json.load(f)) - except json.JSONDecodeError: - return set() - return set() - -def save_processed_ids(ids): - """ - Save processed event IDs to prevent duplicate scoring. - - This is critical for data integrity - if this fails after points - are awarded, the same event could be scored multiple times on retry. - """ - try: - with open(PROCESSED_FILE, 'w', encoding='utf-8') as f: - json.dump(list(ids), f, indent=2) - except PermissionError as e: - print(f"ERROR: Permission denied when saving processed IDs to {PROCESSED_FILE}: {e}", file=sys.stderr) - print("Check file permissions and ensure the workflow has write access.", file=sys.stderr) - sys.exit(1) - except IOError as e: - print(f"ERROR: Failed to write processed IDs to {PROCESSED_FILE}: {e}", file=sys.stderr) - print("This may be due to disk space issues or file system problems.", file=sys.stderr) - sys.exit(1) - except Exception as e: - print(f"ERROR: Unexpected error saving processed IDs: {e}", file=sys.stderr) - sys.exit(1) - -def extract_user(event): - """ - Extract user login from GitHub event with multiple fallback strategies. - - Priority: - 1. review.user.login (for pull_request_review events) - 2. comment.user.login (for issue_comment events) - 3. sender.login (top-level event sender) - - Returns: - tuple: (user_login: str, source: str) or (None, None) if extraction fails - """ - # Try review user first - review = event.get('review') - if review and isinstance(review, dict): - review_user = review.get('user') - if review_user and isinstance(review_user, dict): - login = review_user.get('login') - if login: - return login, 'review.user' - - # Try comment user second - comment = event.get('comment') - if comment and isinstance(comment, dict): - comment_user = comment.get('user') - if comment_user and isinstance(comment_user, dict): - login = comment_user.get('login') - if login: - return login, 'comment.user' - - # Fallback to top-level sender (most reliable) - sender = event.get('sender') - if sender and isinstance(sender, dict): - login = sender.get('login') - if login: - return login, 'sender' - - # All extraction methods failed - return None, None - -def detect_points(event, cfg): - """ - Calculate points for a GitHub event based on review actions. - - Scoring Rules: - 1. Any PR review submission = review_submission points (base points) - 2. PR approval (state=approved) = approve_pr bonus (additive) - 3. Substantial review (comment length >= 100 characters) = detailed_review bonus (additive) - - Scoring Examples: - - Simple review with short comment = 5 points (base) - - Review with detailed feedback (100+ chars) = 5 + 5 = 10 points - - Approved PR = 5 + 3 = 8 points - - Approved PR with detailed feedback = 5 + 3 + 5 = 13 points - - Comment on PR (not a review) = 2 points - """ - action = event.get('action', '') - review = event.get('review') or {} - comment = event.get('comment') or {} - - review_body = review.get('body') or '' - review_state = (review.get('state') or '').lower() - - user, source = extract_user(event) - - if not user: - print("ERROR: Unable to extract user from event. Checked review.user, comment.user, and sender fields.") - print("Event structure:", json.dumps({ - 'has_review': 'review' in event, - 'has_comment': 'comment' in event, - 'has_sender': 'sender' in event, - 'action': action - }, indent=2)) - sys.exit(1) - - print(f"User identified: {user} (source: {source})") - - points = 0 - scoring_breakdown = [] - - # Determine if this is a review or just a comment - is_review = action == "submitted" and event.get('review') is not None and event.get('review') - is_comment = event.get('comment') is not None and event.get('comment') and not is_review - - if is_review: - # Base points for any PR review submission - points += cfg['points']['review_submission'] - scoring_breakdown.append(f"review_submission: +{cfg['points']['review_submission']}") - - # Bonus for substantial review (100+ characters) - if len(review_body.strip()) >= 100: - points += cfg['points']['detailed_review'] - scoring_breakdown.append(f"detailed_review: +{cfg['points']['detailed_review']}") - - # Bonus for approving the PR - if review_state == "approved": - points += cfg['points']['approve_pr'] - scoring_breakdown.append(f"approve_pr: +{cfg['points']['approve_pr']}") - - elif is_comment: - # Points for commenting on a PR (less than review) - points += cfg['points']['pr_comment'] - scoring_breakdown.append(f"pr_comment: +{cfg['points']['pr_comment']}") - - # Log scoring breakdown for transparency - if scoring_breakdown: - print(f"Scoring breakdown: {', '.join(scoring_breakdown)} = {points} total") - else: - print("No scoring criteria matched.") - - return points, user - -def update_leaderboard(user, points): - """ - Update the leaderboard with awarded points for a user. - - Args: - user: GitHub username - points: Points to award - """ - leaderboard = {} - - if os.path.exists(LEADERBOARD_FILE): - with open(LEADERBOARD_FILE, 'r', encoding='utf-8') as f: - try: - leaderboard = json.load(f) - except json.JSONDecodeError: - leaderboard = {} - - leaderboard[user] = leaderboard.get(user, 0) + points - - try: - with open(LEADERBOARD_FILE, 'w', encoding='utf-8') as f: - json.dump(leaderboard, f, indent=2) - except PermissionError as e: - print(f"ERROR: Permission denied when saving leaderboard to {LEADERBOARD_FILE}: {e}", file=sys.stderr) - print("Check file permissions and ensure the workflow has write access.", file=sys.stderr) - sys.exit(1) - except IOError as e: - print(f"ERROR: Failed to write leaderboard to {LEADERBOARD_FILE}: {e}", file=sys.stderr) - print("This may be due to disk space issues or file system problems.", file=sys.stderr) - sys.exit(1) - except Exception as e: - print(f"ERROR: Unexpected error saving leaderboard: {e}", file=sys.stderr) - sys.exit(1) - -def main(): - cfg = load_config() - event = load_event() - points, user = detect_points(event, cfg) - - # Extract unique ID for duplicate prevention - event_id = event.get('review', {}).get('id') or event.get('comment', {}).get('id') - if not event_id: - print("No unique ID found in event. Skipping duplicate check.") - sys.exit(2) # Exit code 2 = no-op (not an error) - - processed_ids = load_processed_ids() - if event_id in processed_ids: - print(f"Event {event_id} already processed. Skipping scoring.") - sys.exit(2) # Exit code 2 = no-op (not an error) - - if points <= 0: - print("No points awarded for this event.") - sys.exit(2) # Exit code 2 = no-op (not an error) - - # Update leaderboard first, then mark as processed - # This order ensures we can retry if processed_ids save fails - update_leaderboard(user, points) - processed_ids.add(event_id) - save_processed_ids(processed_ids) - sys.exit(0) # Exit code 0 = success (points awarded) - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/scripts/track_points.py b/scripts/track_points.py index 23a4b851..e0b5943c 100644 --- a/scripts/track_points.py +++ b/scripts/track_points.py @@ -214,70 +214,69 @@ def calculate_review_points(review: dict, config: dict) -> Tuple[int, List[str]] return points, breakdown +def _check_label_in_issues(linked_issues: List[dict], label_keywords: List[str]) -> Optional[Tuple[bool, str]]: + """Helper: Check if any linked issue has labels matching keywords.""" + for issue in linked_issues: + issue_labels = [label['name'].lower() for label in issue.get('labels', [])] + if any(keyword in label for keyword in label_keywords for label in issue_labels): + return True, f'closes issue #{issue["number"]}' + return False, '' + +def _has_documentation_changes(pr_number: int) -> bool: + """Helper: Check if PR modifies documentation files.""" + files = github_api_request('GET', f'pulls/{pr_number}/files') + if isinstance(files, list): + return any( + 'readme' in f['filename'].lower() or + 'docs/' in f['filename'].lower() or + f['filename'].lower().endswith('.md') + for f in files if f.get('additions', 0) > 0 + ) + return False + def calculate_pr_author_points(pr_details: dict, config: dict) -> Tuple[int, List[str]]: """Calculate points for PR author based on PR characteristics.""" points = 0 breakdown = [] + labels = [label['name'].lower() for label in pr_details.get('labels', [])] + linked_issues = get_linked_issues(pr_details) # PR merged if pr_details.get('merged'): points += config['points'].get('pr_merged', 5) breakdown.append('PR merged: +5 points') - # Check labels for bonuses - labels = [label['name'].lower() for label in pr_details.get('labels', [])] - - # Bug fix: Check if PR is linked to an issue with 'bug' label - linked_issues = get_linked_issues(pr_details) - for issue in linked_issues: - issue_labels = [label['name'].lower() for label in issue.get('labels', [])] - if any('bug' in label for label in issue_labels): - bonus = config['points'].get('bug_fix', 5) - points += bonus - breakdown.append(f'Bug fix (closes issue #{issue["number"]}): +{bonus} points') - break # Only award once even if multiple bug issues linked + # Bug fix bonus + found, source = _check_label_in_issues(linked_issues, ['bug']) + if found: + bonus = config['points'].get('bug_fix', 5) + points += bonus + breakdown.append(f'Bug fix ({source}): +{bonus} points') - # Security fix: Check if PR is linked to a security issue OR has security labels + # Security fix bonus has_security = any('security' in label or 'vulnerability' in label for label in labels) breakdown_source = 'PR labeled' if has_security else '' - # Also check linked issues for security labels (only if not already found) if not has_security: - for issue in linked_issues: - issue_labels = [label['name'].lower() for label in issue.get('labels', [])] - if any('security' in label or 'vulnerability' in label for label in issue_labels): - has_security = True - breakdown_source = f'closes issue #{issue["number"]}' - break - - # Award security bonus only once + found, source = _check_label_in_issues(linked_issues, ['security', 'vulnerability']) + has_security, breakdown_source = found, source + if has_security: bonus = config['points'].get('security_fix', 15) points += bonus breakdown.append(f'Security fix ({breakdown_source}): +{bonus} points') - # Documentation: Check both labels AND files changed + # Documentation bonus has_docs = any('documentation' in label or 'docs' in label for label in labels) - - # Also check if PR modifies documentation files - if not has_docs: - pr_number = pr_details.get('number') - if pr_number: - files = github_api_request('GET', f'pulls/{pr_number}/files') - if isinstance(files, list): - has_docs = any( - 'readme' in f['filename'].lower() or - 'docs/' in f['filename'].lower() or - f['filename'].lower().endswith('.md') - for f in files if f.get('additions', 0) > 0 - ) + if not has_docs and pr_details.get('number'): + has_docs = _has_documentation_changes(pr_details['number']) if has_docs: bonus = config['points'].get('documentation', 4) points += bonus breakdown.append(f'Documentation: +{bonus} points') - # Check if first-time contributor + # First-time contributor bonus if is_first_time_contributor(pr_details): bonus = config['points'].get('first_time_contributor', 5) points += bonus @@ -383,74 +382,93 @@ def aggregate_contributor_points(reviews: List[dict], pr_details: dict, config: return contributors +def _format_timestamp(timestamp: str) -> str: + """Helper: Parse and format ISO 8601 timestamp.""" + try: + ts = timestamp.replace('Z', '+00:00') if timestamp else '' + dt = datetime.fromisoformat(ts) + return dt.strftime('%Y-%m-%d') + except Exception: + return timestamp.split('T')[0] if timestamp else 'Unknown date' + +def _build_points_table(config: dict) -> str: + """Helper: Build the points calculation table from config.""" + points_config = config.get('points', {}) + table = "| Action | Points |\n|--------|--------|\n" + + points_map = [ + ('Review submission', points_config.get('review_submission', 5), False), + ('Detailed review (100+ chars)', points_config.get('detailed_review', 5), True), + ('Performance improvement suggestion', points_config.get('performance_improvement', 6), True), + ('PR approval', points_config.get('approve_pr', 3), True), + ('PR merged', points_config.get('pr_merged', 5), False), + ('Bug fix (closes issue)', points_config.get('bug_fix', 5), True), + ('Security fix/vulnerability', points_config.get('security_fix', 15), True), + ('Documentation', points_config.get('documentation', 4), True), + ('First-time contributor', points_config.get('first_time_contributor', 5), True), + ('High priority issue created', points_config.get('high_priority', 3), True), + ('Critical bug reported', points_config.get('critical_bug', 10), True), + ] + + for action, points, is_bonus in points_map: + value = f"+{points} bonus" if is_bonus else str(points) + table += f"| {action} | {value} |\n" + + return table + def format_comment_body(pr_number: int, contributors: Dict[str, dict], config: dict) -> str: """Format the PR comment body with points tracking.""" total_points = sum(c['total'] for c in contributors.values()) timestamp = datetime.now(timezone.utc).strftime('%B %d, %Y at %I:%M %p UTC') # Header - comment = f"{COMMENT_MARKER}\n\n" - comment += "## 🏆 Contributor Points Tracker\n\n" - comment += f"**Total Points on This PR: {total_points} points**\n\n" - comment += "### Points by Contributor\n\n" - - # Sort contributors by points (descending) + lines = [ + COMMENT_MARKER, + "", + "## 🏆 Contributor Points Tracker", + "", + f"**Total Points on This PR: {total_points} points**", + "", + "### Points by Contributor", + "" + ] + + # Contributors sorted_contributors = sorted(contributors.items(), key=lambda x: x[1]['total'], reverse=True) - for username, data in sorted_contributors: - comment += f"#### @{username} - **{data['total']} points**\n\n" + lines.append(f"#### @{username} - **{data['total']} points**") + lines.append("") - # Group activities by type for activity in data['activities']: - # Robustly parse ISO 8601 timestamp, fallback to original string if parsing fails - try: - ts = activity['timestamp'].replace('Z', '+00:00') if activity.get('timestamp') else '' - dt = datetime.fromisoformat(ts) - timestamp_str = dt.strftime('%Y-%m-%d') - except Exception: - timestamp_str = activity.get('timestamp', 'Unknown date').split('T')[0] if activity.get('timestamp') else 'Unknown date' - comment += f"**{activity['type'].replace('_', ' ').title()}** ({timestamp_str}):\n" - for item in activity['breakdown']: - comment += f"- ✅ {item}\n" - comment += "\n" + timestamp_str = _format_timestamp(activity.get('timestamp', '')) + lines.append(f"**{activity['type'].replace('_', ' ').title()}** ({timestamp_str}):") + lines.extend(f"- ✅ {item}" for item in activity['breakdown']) + lines.append("") # Footer - comment += "---\n\n" - comment += "### How Points Are Calculated\n\n" - comment += "| Action | Points |\n" - comment += "|--------|--------|\n" - - # Dynamically generate points table from config - points_config = config.get('points', {}) - comment += f"| Review submission | {points_config.get('review_submission', 5)} |\n" - comment += f"| Detailed review (100+ chars) | +{points_config.get('detailed_review', 5)} bonus |\n" - comment += f"| Performance improvement suggestion | +{points_config.get('performance_improvement', 6)} bonus |\n" - comment += f"| PR approval | +{points_config.get('approve_pr', 3)} bonus |\n" - comment += f"| PR merged | {points_config.get('pr_merged', 5)} |\n" - comment += f"| Bug fix (closes issue) | +{points_config.get('bug_fix', 5)} bonus |\n" - comment += f"| Security fix/vulnerability | +{points_config.get('security_fix', 15)} bonus |\n" - comment += f"| Documentation | +{points_config.get('documentation', 4)} bonus |\n" - comment += f"| First-time contributor | +{points_config.get('first_time_contributor', 5)} bonus |\n" - comment += f"| High priority issue created | +{points_config.get('high_priority', 3)} bonus |\n" - comment += f"| Critical bug reported | +{points_config.get('critical_bug', 10)} bonus |\n\n" - comment += f"*Last updated: {timestamp}*\n\n" - - # Metadata for external pipeline parsing + lines.extend([ + "---", + "", + "### How Points Are Calculated", + "", + _build_points_table(config), + f"*Last updated: {timestamp}*", + "" + ]) + + # Metadata metadata = { 'pr_number': pr_number, 'total_points': total_points, 'contributors': { - username: { - 'total': data['total'], - 'activity_count': len(data['activities']) - } + username: {'total': data['total'], 'activity_count': len(data['activities'])} for username, data in contributors.items() }, 'last_updated': datetime.now(timezone.utc).isoformat() } - comment += f"\n" + lines.append(f"") - return comment + return '\n'.join(lines) def find_existing_comment(pr_number: int) -> Optional[int]: """Find the bot's existing tracking comment on the PR.""" diff --git a/scripts/update_leaderboard.py b/scripts/update_leaderboard.py deleted file mode 100644 index 0e65e54e..00000000 --- a/scripts/update_leaderboard.py +++ /dev/null @@ -1,203 +0,0 @@ -# Copyright (c) Microsoft. All rights reserved. - -import json -import os -import sys -import argparse -from datetime import datetime, timezone - -LB_JSON = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'leaderboard.json') -OUT_MD = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'LEADERBOARD.md') - -def parse_args(): - parser = argparse.ArgumentParser( - description="Update LEADERBOARD.md from leaderboard.json and set the 'top' contributor for the README badge." - ) - parser.add_argument("--limit", type=int, default=0, - help="Show only the top N contributors in LEADERBOARD.md (0 = show all).") - parser.add_argument("--no-badge", action="store_true", - help="Do not write the 'top' key back into leaderboard.json.") - parser.add_argument("--create-if-missing", action="store_true", default=True, - help="Create an empty leaderboard if none exists (default: True).") - return parser.parse_args() - -def create_empty_leaderboard(): - """ - Create an empty leaderboard.json file with a 'top' field. - """ - empty_leaderboard = { - "top": "None", - "_comment": "This file tracks contributor points. Run assign_points.py to populate." - } - try: - with open(LB_JSON, 'w', encoding='utf-8') as f: - json.dump(empty_leaderboard, f, indent=2, ensure_ascii=False) - print(f"Created empty leaderboard at: {LB_JSON}", file=sys.stderr) - return empty_leaderboard - except Exception as e: - print(f"ERROR: Failed to create leaderboard.json: {e}", file=sys.stderr) - sys.exit(1) - -def load_leaderboard(create_if_missing=True): - """ - Load leaderboard.json with improved error handling. - - Args: - create_if_missing: If True, creates an empty leaderboard when missing - - Returns: - dict: Leaderboard data or empty dict on unrecoverable error - """ - if not os.path.exists(LB_JSON): - if create_if_missing: - print(f"WARNING: No leaderboard.json found at {LB_JSON}", file=sys.stderr) - print("Creating empty leaderboard. Run assign_points.py to populate it.", file=sys.stderr) - return create_empty_leaderboard() - else: - print(f"ERROR: No leaderboard.json found at {LB_JSON}", file=sys.stderr) - print("Run assign_points.py first to create the leaderboard.", file=sys.stderr) - sys.exit(1) - - try: - with open(LB_JSON, 'r', encoding='utf-8') as f: - data = json.load(f) - except json.JSONDecodeError as e: - print(f"ERROR: leaderboard.json contains invalid JSON: {e}", file=sys.stderr) - print(f"File location: {LB_JSON}", file=sys.stderr) - print("Fix the JSON syntax or delete the file to recreate it.", file=sys.stderr) - sys.exit(1) - except Exception as e: - print(f"ERROR: Failed to read leaderboard.json: {e}", file=sys.stderr) - sys.exit(1) - - if not isinstance(data, dict): - print(f"ERROR: leaderboard.json must be a JSON object (dict), got {type(data).__name__}", file=sys.stderr) - print(f"File location: {LB_JSON}", file=sys.stderr) - print("Expected format: {\"username\": points, ...}", file=sys.stderr) - sys.exit(1) - - return data - -def normalize_scores(leaderboard): - """ - Ensure points are integers and filter out non-user keys other than 'top'. - Returns a list of (user, points) tuples suitable for sorting. - """ - items = [] - for user, points in leaderboard.items(): - # Skip metadata fields - if user in ('top', '_comment'): - continue - try: - # Convert numeric strings/floats to int safely - points_int = int(float(points)) - except (ValueError, TypeError): - # If points cannot be parsed, skip this user - print(f"WARNING: Skipping '{user}' due to non-numeric points: {points}", file=sys.stderr) - continue - items.append((user, points_int)) - return items - -def sort_contributors(items): - """ - Sort by points descending, then by user name ascending for stable tie ordering. - """ - return sorted(items, key=lambda x: (-x[1], x[0].lower())) - -def write_badge_top(leaderboard, items, no_badge=False): - """ - Write 'top' contributor back to leaderboard.json unless disabled. - """ - if no_badge: - return - - top_user = items[0][0] if items else "None" - leaderboard['top'] = top_user - - try: - with open(LB_JSON, 'w', encoding='utf-8') as f: - json.dump(leaderboard, f, indent=2, ensure_ascii=False) - print(f"Updated top contributor: {top_user}") - except Exception as e: - print(f"WARNING: Failed to write updated leaderboard.json: {e}", file=sys.stderr) - # Non-fatal: continue to write the MD even if we couldn't update the badge key - -def render_markdown(items, limit=0): - """ - Build the markdown leaderboard table with optional row limit and a 'Last updated' footer. - """ - if limit > 0: - items = items[:limit] - - lines = [] - lines.append("# Contributor Leaderboard\n\n") - - if not items: - lines.append("_No contributors yet. Be the first!_\n\n") - else: - lines.append("| Rank | User | Points |\n") - lines.append("|------|------|--------|\n") - - for rank, (user, points) in enumerate(items, start=1): - # Add medal emoji for top 3 - medal = "" - if rank == 1: - medal = "đŸĨ‡ " - elif rank == 2: - medal = "đŸĨˆ " - elif rank == 3: - medal = "đŸĨ‰ " - - lines.append(f"| {rank} | {medal}{user} | {points} |\n") - - lines.append("\n") - - # Footer with timestamp (UTC) - ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC") - lines.append(f"_Last updated: {ts}_\n") - - return "".join(lines) - -def write_markdown(markdown): - try: - with open(OUT_MD, 'w', encoding='utf-8') as f: - f.write(markdown) - print(f"Leaderboard written to: {OUT_MD}") - except Exception as e: - print(f"ERROR: Failed to write LEADERBOARD.md: {e}", file=sys.stderr) - sys.exit(1) - -def main(): - args = parse_args() - - print("=" * 60) - print("Updating Contributor Leaderboard") - print("=" * 60) - - # Load leaderboard with improved error handling - leaderboard = load_leaderboard(create_if_missing=args.create_if_missing) - - # Normalize and sort contributors - items = normalize_scores(leaderboard) - items = sort_contributors(items) - - if not items: - print("No valid contributors found in leaderboard.") - print("This is normal if no points have been awarded yet.") - else: - print(f"Found {len(items)} contributor(s)") - - # Update badge source unless disabled - write_badge_top(leaderboard, items, no_badge=args.no_badge) - - # Generate Markdown - md = render_markdown(items, limit=args.limit) - write_markdown(md) - - top_user = items[0][0] if items else "None" - print("=" * 60) - print(f"SUCCESS: Top contributor is {top_user}") - print("=" * 60) - -if __name__ == "__main__": - main() \ No newline at end of file