diff --git a/.github/workflows/points.yml b/.github/workflows/points.yml index c87cdace..0a0b63f4 100644 --- a/.github/workflows/points.yml +++ b/.github/workflows/points.yml @@ -1,29 +1,41 @@ -name: Points Allocation +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +name: Contributor Points Tracker on: pull_request_review: - types: [submitted] + types: [submitted, edited, dismissed] issue_comment: - types: [created] + types: [created, edited] + pull_request: + types: [opened, closed, reopened, labeled, unlabeled] + pull_request_review_comment: + types: [created, edited] + issues: + types: [opened, closed, labeled] permissions: - contents: write + contents: read pull-requests: write + issues: write jobs: - assign-points: + track-points: runs-on: ubuntu-latest - # Only run for PR reviews or comments on PRs (not regular issues) + # Only run for PR-related events or issue events (not regular issue comments) if: > github.event_name == 'pull_request_review' || - (github.event_name == 'issue_comment' && github.event.issue.pull_request != null) + github.event_name == 'pull_request_review_comment' || + github.event_name == 'pull_request' || + github.event_name == 'issues' || + (github.event_name == 'issue_comment' && github.event.issue.pull_request) steps: - name: Checkout repository uses: actions/checkout@v4 with: - ref: main token: ${{ secrets.GITHUB_TOKEN }} - fetch-depth: 0 + ref: ${{ github.event.repository.default_branch }} - name: Set up Python uses: actions/setup-python@v5 @@ -33,51 +45,13 @@ jobs: cache: 'pip' - name: Install dependencies - run: pip install PyYAML - - - name: Run points script - id: assign_points run: | - set +e # Don't exit on error - python scripts/assign_points.py - exit_code=$? - echo "exit_code=$exit_code" >> $GITHUB_OUTPUT - - # Exit codes: - # 0 = Success (points awarded) - # 2 = No-op (no points, but not an error) - # 1 or other = Actual error - - if [ $exit_code -eq 0 ] || [ $exit_code -eq 2 ]; then - exit 0 - else - exit $exit_code - fi - - - name: Update leaderboard markdown - if: steps.assign_points.outputs.exit_code == '0' - run: python scripts/update_leaderboard.py + pip install PyYAML requests - - name: Create Pull Request - if: steps.assign_points.outputs.exit_code == '0' - uses: peter-evans/create-pull-request@v6 - continue-on-error: true - with: - token: ${{ secrets.GITHUB_TOKEN }} - add: 'leaderboard.json,LEADERBOARD.md' - commit-message: "Update leaderboard" - branch: leaderboard-update-${{ github.run_id }} - delete-branch: true - title: "Update contributor leaderboard" - body: | - ## Leaderboard Update - - This PR updates the contributor leaderboard based on recent PR review activity. - - **Triggered by:** ${{ github.event_name }} - **Run:** ${{ github.run_number }} - - Please review and merge to update the leaderboard. - labels: | - leaderboard - automated + - name: Calculate and update points + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_REPOSITORY: ${{ github.repository }} + GITHUB_EVENT_NAME: ${{ github.event_name }} + GITHUB_EVENT_PATH: ${{ github.event_path }} + run: python scripts/track_points.py diff --git a/leaderboard.json b/leaderboard.json deleted file mode 100644 index 9e26dfee..00000000 --- a/leaderboard.json +++ /dev/null @@ -1 +0,0 @@ -{} \ No newline at end of file diff --git a/scripts/assign_points.py b/scripts/assign_points.py deleted file mode 100644 index 1bb60045..00000000 --- a/scripts/assign_points.py +++ /dev/null @@ -1,282 +0,0 @@ -# Copyright (c) Microsoft. All rights reserved. - -import os -import json -import yaml -import sys - -# Exit codes used by this script: -# 0 = Success - points were awarded and leaderboard updated -# 1 = Error - something went wrong (missing config, permissions, etc.) -# 2 = No-op - no points awarded, but not an error (duplicate event, no criteria matched) - -# Path to config file inside scripts folder -CONFIG_FILE = os.path.join(os.path.dirname(__file__), 'config_points.yml') -PROCESSED_FILE = os.path.join(os.path.dirname(__file__), 'processed_ids.json') -# Path to leaderboard in repository root (one level up from scripts/) -LEADERBOARD_FILE = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'leaderboard.json') - -def load_config(): - """ - Load the points configuration from config_points.yml. - - Returns: - dict: Configuration dictionary with 'points' section - - Exits: - 1 if config file is missing or contains invalid YAML - """ - if not os.path.exists(CONFIG_FILE): - print(f"ERROR: Config file not found: {CONFIG_FILE}", file=sys.stderr) - print("Expected location: scripts/config_points.yml", file=sys.stderr) - sys.exit(1) - - try: - with open(CONFIG_FILE, 'r', encoding='utf-8') as f: - config = yaml.safe_load(f) - except yaml.YAMLError as e: - print(f"ERROR: Invalid YAML syntax in config file: {e}", file=sys.stderr) - print(f"File location: {CONFIG_FILE}", file=sys.stderr) - sys.exit(1) - except Exception as e: - print(f"ERROR: Failed to read config file: {e}", file=sys.stderr) - sys.exit(1) - - # Validate that config has the expected structure - if not isinstance(config, dict) or 'points' not in config: - print(f"ERROR: Invalid config structure in {CONFIG_FILE}", file=sys.stderr) - print("Expected format: { points: { review_submission: 5, detailed_review: 5, approve_pr: 3, pr_comment: 2 } }", file=sys.stderr) - sys.exit(1) - - return config - -def load_event(): - event_path = os.getenv('GITHUB_EVENT_PATH') - if not event_path: - print("ERROR: GITHUB_EVENT_PATH is not set.") - sys.exit(1) - if not os.path.exists(event_path): - print(f"ERROR: Event file not found: {event_path}") - sys.exit(1) - - try: - with open(event_path, 'r', encoding='utf-8') as f: - event = json.load(f) - except json.JSONDecodeError as e: - print(f"ERROR: Invalid JSON in event file: {e}", file=sys.stderr) - print(f"File location: {event_path}", file=sys.stderr) - sys.exit(1) - - # Validate that this is a PR-related event, not a regular issue comment - if 'issue' in event and 'pull_request' not in event.get('issue', {}): - print("INFO: Skipping - this is a comment on a regular issue, not a pull request.") - sys.exit(2) # Exit code 2 = no-op - - return event - -def load_processed_ids(): - if os.path.exists(PROCESSED_FILE): - with open(PROCESSED_FILE, 'r', encoding='utf-8') as f: - try: - return set(json.load(f)) - except json.JSONDecodeError: - return set() - return set() - -def save_processed_ids(ids): - """ - Save processed event IDs to prevent duplicate scoring. - - This is critical for data integrity - if this fails after points - are awarded, the same event could be scored multiple times on retry. - """ - try: - with open(PROCESSED_FILE, 'w', encoding='utf-8') as f: - json.dump(list(ids), f, indent=2) - except PermissionError as e: - print(f"ERROR: Permission denied when saving processed IDs to {PROCESSED_FILE}: {e}", file=sys.stderr) - print("Check file permissions and ensure the workflow has write access.", file=sys.stderr) - sys.exit(1) - except IOError as e: - print(f"ERROR: Failed to write processed IDs to {PROCESSED_FILE}: {e}", file=sys.stderr) - print("This may be due to disk space issues or file system problems.", file=sys.stderr) - sys.exit(1) - except Exception as e: - print(f"ERROR: Unexpected error saving processed IDs: {e}", file=sys.stderr) - sys.exit(1) - -def extract_user(event): - """ - Extract user login from GitHub event with multiple fallback strategies. - - Priority: - 1. review.user.login (for pull_request_review events) - 2. comment.user.login (for issue_comment events) - 3. sender.login (top-level event sender) - - Returns: - tuple: (user_login: str, source: str) or (None, None) if extraction fails - """ - # Try review user first - review = event.get('review') - if review and isinstance(review, dict): - review_user = review.get('user') - if review_user and isinstance(review_user, dict): - login = review_user.get('login') - if login: - return login, 'review.user' - - # Try comment user second - comment = event.get('comment') - if comment and isinstance(comment, dict): - comment_user = comment.get('user') - if comment_user and isinstance(comment_user, dict): - login = comment_user.get('login') - if login: - return login, 'comment.user' - - # Fallback to top-level sender (most reliable) - sender = event.get('sender') - if sender and isinstance(sender, dict): - login = sender.get('login') - if login: - return login, 'sender' - - # All extraction methods failed - return None, None - -def detect_points(event, cfg): - """ - Calculate points for a GitHub event based on review actions. - - Scoring Rules: - 1. Any PR review submission = review_submission points (base points) - 2. PR approval (state=approved) = approve_pr bonus (additive) - 3. Substantial review (comment length >= 100 characters) = detailed_review bonus (additive) - - Scoring Examples: - - Simple review with short comment = 5 points (base) - - Review with detailed feedback (100+ chars) = 5 + 5 = 10 points - - Approved PR = 5 + 3 = 8 points - - Approved PR with detailed feedback = 5 + 3 + 5 = 13 points - - Comment on PR (not a review) = 2 points - """ - action = event.get('action', '') - review = event.get('review') or {} - comment = event.get('comment') or {} - - review_body = review.get('body') or '' - review_state = (review.get('state') or '').lower() - - user, source = extract_user(event) - - if not user: - print("ERROR: Unable to extract user from event. Checked review.user, comment.user, and sender fields.") - print("Event structure:", json.dumps({ - 'has_review': 'review' in event, - 'has_comment': 'comment' in event, - 'has_sender': 'sender' in event, - 'action': action - }, indent=2)) - sys.exit(1) - - print(f"User identified: {user} (source: {source})") - - points = 0 - scoring_breakdown = [] - - # Determine if this is a review or just a comment - is_review = action == "submitted" and event.get('review') is not None and event.get('review') - is_comment = event.get('comment') is not None and event.get('comment') and not is_review - - if is_review: - # Base points for any PR review submission - points += cfg['points']['review_submission'] - scoring_breakdown.append(f"review_submission: +{cfg['points']['review_submission']}") - - # Bonus for substantial review (100+ characters) - if len(review_body.strip()) >= 100: - points += cfg['points']['detailed_review'] - scoring_breakdown.append(f"detailed_review: +{cfg['points']['detailed_review']}") - - # Bonus for approving the PR - if review_state == "approved": - points += cfg['points']['approve_pr'] - scoring_breakdown.append(f"approve_pr: +{cfg['points']['approve_pr']}") - - elif is_comment: - # Points for commenting on a PR (less than review) - points += cfg['points']['pr_comment'] - scoring_breakdown.append(f"pr_comment: +{cfg['points']['pr_comment']}") - - # Log scoring breakdown for transparency - if scoring_breakdown: - print(f"Scoring breakdown: {', '.join(scoring_breakdown)} = {points} total") - else: - print("No scoring criteria matched.") - - return points, user - -def update_leaderboard(user, points): - """ - Update the leaderboard with awarded points for a user. - - Args: - user: GitHub username - points: Points to award - """ - leaderboard = {} - - if os.path.exists(LEADERBOARD_FILE): - with open(LEADERBOARD_FILE, 'r', encoding='utf-8') as f: - try: - leaderboard = json.load(f) - except json.JSONDecodeError: - leaderboard = {} - - leaderboard[user] = leaderboard.get(user, 0) + points - - try: - with open(LEADERBOARD_FILE, 'w', encoding='utf-8') as f: - json.dump(leaderboard, f, indent=2) - except PermissionError as e: - print(f"ERROR: Permission denied when saving leaderboard to {LEADERBOARD_FILE}: {e}", file=sys.stderr) - print("Check file permissions and ensure the workflow has write access.", file=sys.stderr) - sys.exit(1) - except IOError as e: - print(f"ERROR: Failed to write leaderboard to {LEADERBOARD_FILE}: {e}", file=sys.stderr) - print("This may be due to disk space issues or file system problems.", file=sys.stderr) - sys.exit(1) - except Exception as e: - print(f"ERROR: Unexpected error saving leaderboard: {e}", file=sys.stderr) - sys.exit(1) - -def main(): - cfg = load_config() - event = load_event() - points, user = detect_points(event, cfg) - - # Extract unique ID for duplicate prevention - event_id = event.get('review', {}).get('id') or event.get('comment', {}).get('id') - if not event_id: - print("No unique ID found in event. Skipping duplicate check.") - sys.exit(2) # Exit code 2 = no-op (not an error) - - processed_ids = load_processed_ids() - if event_id in processed_ids: - print(f"Event {event_id} already processed. Skipping scoring.") - sys.exit(2) # Exit code 2 = no-op (not an error) - - if points <= 0: - print("No points awarded for this event.") - sys.exit(2) # Exit code 2 = no-op (not an error) - - # Update leaderboard first, then mark as processed - # This order ensures we can retry if processed_ids save fails - update_leaderboard(user, points) - processed_ids.add(event_id) - save_processed_ids(processed_ids) - sys.exit(0) # Exit code 0 = success (points awarded) - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/scripts/config_points.yml b/scripts/config_points.yml index 627bf51f..9069a7b1 100644 --- a/scripts/config_points.yml +++ b/scripts/config_points.yml @@ -1,23 +1,44 @@ # Contributor Points Configuration # -# Scoring is based on actual review actions, not keywords. -# All contributions are valued and tracked automatically. +# This configuration defines point values for various contribution activities. +# Points are calculated automatically by the workflow and displayed in PR comments. # # Scoring Rules: -# 1. Any PR review submission = review_submission points (base points) -# 2. Substantial review (100+ characters, excluding whitespace) = detailed_review bonus (additive) -# 3. PR approval (state=approved) = approve_pr bonus (additive) -# 4. PR comment (not a full review) = pr_comment points +# 1. Review submission = base points +# 2. Detailed review (100+ characters) = bonus points (additive) +# 3. PR approval = bonus points (additive) +# 4. PR merged = points for author +# 5. Special labels (bug, priority, docs) = bonus points +# 6. First-time contributor = bonus points # # Examples: # - Simple review with short comment = 5 points -# - Review with detailed comment (100+ characters) = 5 + 5 = 10 points -# - Approved PR = 5 + 3 = 8 points -# - Approved PR with detailed feedback = 5 + 5 + 3 = 13 points -# - Comment on PR (not a review) = 2 points +# - Review with detailed comment (100+ chars) = 10 points (5 + 5) +# - Approved PR with detailed feedback = 13 points (5 + 5 + 3) +# - PR merged with bug fix = 10 points (5 + 5) points: + # Review & Comment Points review_submission: 5 # Base points for submitting any PR review - detailed_review: 5 # Bonus for substantial review (100+ characters of feedback) + detailed_review: 5 # Bonus for substantial review (100+ characters) approve_pr: 3 # Bonus for approving a PR - pr_comment: 2 # Points for commenting on a PR (not a full review) \ No newline at end of file + + # PR Author Points + pr_merged: 5 # Points when PR is successfully merged + + # Label-Based Bonuses (for PR authors) + bug_fix: 5 # Bonus for fixing bugs + high_priority: 3 # Bonus for high-priority work + critical_bug: 10 # Bonus for critical bug fixes + documentation: 4 # Bonus for documentation contributions + performance_improvement: 6 # Bonus for performance enhancements + security_fix: 15 # Bonus for security fixes + + # Special Bonuses + first_time_contributor: 5 # Bonus for first-time contributors + + # Future Implementation (not currently calculated) + # speed_bonus_24h: 3 # TODO: Bonus for PRs merged within 24 hours + # test_coverage: 8 # TODO: Bonus for adding comprehensive tests + # mentorship: 10 # TODO: Points for mentoring sessions + # issue_triage: 2 # TODO: Points for triaging issues diff --git a/scripts/track_points.py b/scripts/track_points.py new file mode 100644 index 00000000..e0b5943c --- /dev/null +++ b/scripts/track_points.py @@ -0,0 +1,566 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +""" +Contributor Points Tracker + +This script: +1. Monitors all PR-related events (reviews, comments, approvals, merges, etc.) +2. Calculates points for all contributors on a PR +3. Updates a single PR comment with live point tracking +4. Exports metadata in comment for external pipeline to parse and store in Kusto + +Supported events: +- pull_request_review (submitted, edited, dismissed) +- issue_comment (on PRs only) +- pull_request (opened, closed, labeled, etc.) +- pull_request_review_comment + +Points are calculated based on: +- Review submission: 5 points +- Detailed review (100+ chars): +5 bonus +- PR approval: +3 bonus +- PR merged: 5 points (for author) +- Bug fix (closes issue): +5 bonus +- Security fix/vulnerability: +15 bonus +- Documentation: +4 bonus +- Performance improvement: +6 bonus +- First-time contributor: +5 bonus +- High priority issue: +3 bonus +- Critical bug reported: +10 bonus +""" + +import os +import sys +import json +import re +import yaml +import requests +from datetime import datetime, timezone +from typing import Dict, List, Optional, Tuple + +# Configuration +CONFIG_FILE = os.path.join(os.path.dirname(__file__), 'config_points.yml') +# GITHUB_TOKEN is provided by GitHub Actions with limited repository scope +# It expires after workflow completion and is never logged or exposed +GITHUB_TOKEN = os.getenv('GITHUB_TOKEN') +GITHUB_REPOSITORY = os.getenv('GITHUB_REPOSITORY') # Format: owner/repo +GITHUB_EVENT_NAME = os.getenv('GITHUB_EVENT_NAME') +GITHUB_EVENT_PATH = os.getenv('GITHUB_EVENT_PATH') + +# Comment identifier for finding the bot's tracking comment +COMMENT_MARKER = "" + +# Minimum character count for detailed review bonus +DETAILED_REVIEW_MIN_CHARS = 100 + +# Keywords for detecting performance improvement suggestions in reviews +PERFORMANCE_KEYWORDS = [ + 'performance', 'performant', 'optimization', 'optimize', + 'fast', 'faster', 'efficient', 'efficiency', 'speed' +] + +def load_config() -> dict: + """Load points configuration from YAML file.""" + if not os.path.exists(CONFIG_FILE): + print(f"ERROR: Config file not found: {CONFIG_FILE}", file=sys.stderr) + sys.exit(1) + + try: + with open(CONFIG_FILE, 'r', encoding='utf-8') as f: + config = yaml.safe_load(f) + + # Validate required keys exist + required_keys = ['review_submission', 'detailed_review', 'approve_pr', 'pr_merged'] + if 'points' not in config: + print(f"ERROR: Config missing 'points' section", file=sys.stderr) + sys.exit(1) + + missing = [key for key in required_keys if key not in config['points']] + if missing: + print(f"ERROR: Config missing required keys: {missing}", file=sys.stderr) + sys.exit(1) + + # Set defaults for optional keys + optional_defaults = { + 'performance_improvement': 6, + 'bug_fix': 5, + 'security_fix': 15, + 'documentation': 4, + 'first_time_contributor': 5, + 'high_priority': 3, + 'critical_bug': 10 + } + for key, default_value in optional_defaults.items(): + if key not in config['points']: + config['points'][key] = default_value + + return config + except Exception as e: + print(f"ERROR: Failed to load config: {e}", file=sys.stderr) + sys.exit(1) + +def load_event() -> dict: + """Load GitHub event payload.""" + if not GITHUB_EVENT_PATH or not os.path.exists(GITHUB_EVENT_PATH): + print(f"ERROR: Event file not found: {GITHUB_EVENT_PATH}", file=sys.stderr) + sys.exit(1) + + try: + with open(GITHUB_EVENT_PATH, 'r', encoding='utf-8') as f: + event = json.load(f) + return event + except Exception as e: + print(f"ERROR: Failed to load event: {e}", file=sys.stderr) + sys.exit(1) + +def get_pr_number(event: dict) -> Optional[int]: + """Extract PR number from event.""" + if 'pull_request' in event: + return event['pull_request']['number'] + elif 'issue' in event and 'pull_request' in event['issue']: + return event['issue']['number'] + elif 'review' in event: + pr_url = event['review'].get('pull_request_url', '') + if pr_url: + try: + return int(pr_url.split('/')[-1]) + except (ValueError, IndexError): + print(f"WARNING: Could not parse PR number from URL: {pr_url}", file=sys.stderr) + return None + return None + +def get_issue_number(event: dict) -> Optional[int]: + """Extract issue number from event (for non-PR issues only).""" + if 'issue' in event and 'pull_request' not in event['issue']: + return event['issue']['number'] + return None + +def is_issue_event(event: dict) -> bool: + """Check if this is an issue event (not a PR).""" + return GITHUB_EVENT_NAME == 'issues' or (GITHUB_EVENT_NAME == 'issue_comment' and 'issue' in event and 'pull_request' not in event['issue']) + +def github_api_request(method: str, endpoint: str, data: Optional[dict] = None) -> dict: + """Make GitHub API request.""" + url = f"https://api.github.com/repos/{GITHUB_REPOSITORY}/{endpoint}" + headers = { + 'Authorization': f'Bearer {GITHUB_TOKEN}', + 'Accept': 'application/vnd.github.v3+json' + } + + try: + if method == 'GET': + response = requests.get(url, headers=headers) + elif method == 'POST': + response = requests.post(url, headers=headers, json=data) + elif method == 'PATCH': + response = requests.patch(url, headers=headers, json=data) + + response.raise_for_status() + return response.json() if response.text else {} + except requests.exceptions.HTTPError as e: + if e.response.status_code == 401: + print(f"ERROR: Authentication failed - check GITHUB_TOKEN", file=sys.stderr) + sys.exit(1) + elif e.response.status_code == 403: + print(f"ERROR: API rate limit exceeded or access forbidden", file=sys.stderr) + sys.exit(1) + else: + print(f"ERROR: GitHub API request failed ({e.response.status_code}): {e}", file=sys.stderr) + return {} + except Exception as e: + print(f"ERROR: GitHub API request failed: {e}", file=sys.stderr) + return {} + +def get_all_pr_activity(pr_number: int) -> Tuple[List[dict], List[dict], dict]: + """ + Fetch all activity on a PR: reviews, comments, and PR details. + + Returns: + Tuple of (reviews, comments, pr_details) + """ + reviews = github_api_request('GET', f'pulls/{pr_number}/reviews') + comments = github_api_request('GET', f'issues/{pr_number}/comments') + pr_details = github_api_request('GET', f'pulls/{pr_number}') + + return ( + reviews if isinstance(reviews, list) else [], + comments if isinstance(comments, list) else [], + pr_details if isinstance(pr_details, dict) else {} + ) + +def calculate_review_points(review: dict, config: dict) -> Tuple[int, List[str]]: + """Calculate points for a single review.""" + points = config['points']['review_submission'] # Base: 5 points + breakdown = ['Review submission: +5 points'] + + # Detailed review bonus (threshold defined by DETAILED_REVIEW_MIN_CHARS constant) + body = review.get('body', '').strip() + if len(body) >= DETAILED_REVIEW_MIN_CHARS: + points += config['points']['detailed_review'] # +5 points + breakdown.append(f'Detailed feedback ({len(body)} characters): +5 points') + + # Performance improvement suggestion bonus + body_lower = body.lower() + if any(keyword in body_lower for keyword in PERFORMANCE_KEYWORDS): + bonus = config['points'].get('performance_improvement', 6) + points += bonus + breakdown.append(f'Performance improvement suggestion: +{bonus} points') + + # Approval bonus + if review.get('state') == 'APPROVED': + points += config['points']['approve_pr'] # +3 points + breakdown.append('Approved PR: +3 points') + + return points, breakdown + +def _check_label_in_issues(linked_issues: List[dict], label_keywords: List[str]) -> Optional[Tuple[bool, str]]: + """Helper: Check if any linked issue has labels matching keywords.""" + for issue in linked_issues: + issue_labels = [label['name'].lower() for label in issue.get('labels', [])] + if any(keyword in label for keyword in label_keywords for label in issue_labels): + return True, f'closes issue #{issue["number"]}' + return False, '' + +def _has_documentation_changes(pr_number: int) -> bool: + """Helper: Check if PR modifies documentation files.""" + files = github_api_request('GET', f'pulls/{pr_number}/files') + if isinstance(files, list): + return any( + 'readme' in f['filename'].lower() or + 'docs/' in f['filename'].lower() or + f['filename'].lower().endswith('.md') + for f in files if f.get('additions', 0) > 0 + ) + return False + +def calculate_pr_author_points(pr_details: dict, config: dict) -> Tuple[int, List[str]]: + """Calculate points for PR author based on PR characteristics.""" + points = 0 + breakdown = [] + labels = [label['name'].lower() for label in pr_details.get('labels', [])] + linked_issues = get_linked_issues(pr_details) + + # PR merged + if pr_details.get('merged'): + points += config['points'].get('pr_merged', 5) + breakdown.append('PR merged: +5 points') + + # Bug fix bonus + found, source = _check_label_in_issues(linked_issues, ['bug']) + if found: + bonus = config['points'].get('bug_fix', 5) + points += bonus + breakdown.append(f'Bug fix ({source}): +{bonus} points') + + # Security fix bonus + has_security = any('security' in label or 'vulnerability' in label for label in labels) + breakdown_source = 'PR labeled' if has_security else '' + + if not has_security: + found, source = _check_label_in_issues(linked_issues, ['security', 'vulnerability']) + has_security, breakdown_source = found, source + + if has_security: + bonus = config['points'].get('security_fix', 15) + points += bonus + breakdown.append(f'Security fix ({breakdown_source}): +{bonus} points') + + # Documentation bonus + has_docs = any('documentation' in label or 'docs' in label for label in labels) + if not has_docs and pr_details.get('number'): + has_docs = _has_documentation_changes(pr_details['number']) + + if has_docs: + bonus = config['points'].get('documentation', 4) + points += bonus + breakdown.append(f'Documentation: +{bonus} points') + + # First-time contributor bonus + if is_first_time_contributor(pr_details): + bonus = config['points'].get('first_time_contributor', 5) + points += bonus + breakdown.append(f'First-time contributor: +{bonus} points') + + return points, breakdown + +def get_linked_issues(pr_details: dict) -> List[dict]: + """Get issues linked to this PR by checking PR body for closing keywords.""" + linked_issues = [] + pr_body = pr_details.get('body', '') or '' + + # Keywords that link issues: closes, fixes, resolves (case-insensitive) + # Pattern matches: "closes #123", "fixes #456", "resolves #789", etc. + pattern = r'\b(closes?|fixe[sd]|resolved?)\b\s*:?\s*#(\d+)' + matches = re.findall(pattern, pr_body, re.IGNORECASE) + + # Extract just the issue numbers (group 2 from the matches) + issue_numbers = [match[1] for match in matches] + + for issue_number in issue_numbers: + issue = github_api_request('GET', f'issues/{issue_number}') + if issue and isinstance(issue, dict): + linked_issues.append(issue) + + return linked_issues + +def is_first_time_contributor(pr_details: dict) -> bool: + """ + Check if the PR author is a first-time contributor using the 'author_association' field. + Returns True if the author_association is 'FIRST_TIME_CONTRIBUTOR'. + """ + author_association = pr_details.get('author_association', '') + return author_association.upper() == 'FIRST_TIME_CONTRIBUTOR' + +def calculate_issue_creator_points(issue: dict, config: dict) -> Tuple[int, List[str]]: + """Calculate points awarded to the issue creator based on issue priority and labels.""" + points = 0 + breakdown = [] + + labels = [label['name'].lower() for label in issue.get('labels', [])] + + # High priority issue creation + if any('priority' in label and 'high' in label for label in labels): + bonus = config['points'].get('high_priority', 3) + points += bonus + breakdown.append(f'High priority issue created: +{bonus} points') + + # Critical bug issue + if any('critical' in label and 'bug' in label for label in labels): + bonus = config['points'].get('critical_bug', 10) + points += bonus + breakdown.append(f'Critical bug reported: +{bonus} points') + + # Security vulnerability reported + if any('security' in label or 'vulnerability' in label for label in labels): + bonus = config['points'].get('security_fix', 15) + points += bonus + breakdown.append(f'Security vulnerability reported: +{bonus} points') + + return points, breakdown + +def aggregate_contributor_points(reviews: List[dict], pr_details: dict, config: dict) -> Dict[str, dict]: + """ + Aggregate points for all contributors on a PR. + + Returns: + Dict mapping username to {'total': int, 'activities': [list of activity dicts]} + """ + contributors = {} + + # Process reviews + for review in reviews: + username = review['user']['login'] + if username not in contributors: + contributors[username] = {'total': 0, 'activities': []} + + points, breakdown = calculate_review_points(review, config) + contributors[username]['total'] += points + contributors[username]['activities'].append({ + 'type': 'review', + 'points': points, + 'breakdown': breakdown, + 'timestamp': review['submitted_at'], + 'state': review['state'] + }) + + # Add PR author points (if PR is merged or has special labels) + pr_author = pr_details.get('user', {}).get('login') + if pr_author: + author_points, author_breakdown = calculate_pr_author_points(pr_details, config) + if author_points > 0: + if pr_author not in contributors: + contributors[pr_author] = {'total': 0, 'activities': []} + + contributors[pr_author]['total'] += author_points + contributors[pr_author]['activities'].append({ + 'type': 'pr_author', + 'points': author_points, + 'breakdown': author_breakdown, + 'timestamp': pr_details.get('merged_at') or pr_details.get('created_at') + }) + + return contributors + +def _format_timestamp(timestamp: str) -> str: + """Helper: Parse and format ISO 8601 timestamp.""" + try: + ts = timestamp.replace('Z', '+00:00') if timestamp else '' + dt = datetime.fromisoformat(ts) + return dt.strftime('%Y-%m-%d') + except Exception: + return timestamp.split('T')[0] if timestamp else 'Unknown date' + +def _build_points_table(config: dict) -> str: + """Helper: Build the points calculation table from config.""" + points_config = config.get('points', {}) + table = "| Action | Points |\n|--------|--------|\n" + + points_map = [ + ('Review submission', points_config.get('review_submission', 5), False), + ('Detailed review (100+ chars)', points_config.get('detailed_review', 5), True), + ('Performance improvement suggestion', points_config.get('performance_improvement', 6), True), + ('PR approval', points_config.get('approve_pr', 3), True), + ('PR merged', points_config.get('pr_merged', 5), False), + ('Bug fix (closes issue)', points_config.get('bug_fix', 5), True), + ('Security fix/vulnerability', points_config.get('security_fix', 15), True), + ('Documentation', points_config.get('documentation', 4), True), + ('First-time contributor', points_config.get('first_time_contributor', 5), True), + ('High priority issue created', points_config.get('high_priority', 3), True), + ('Critical bug reported', points_config.get('critical_bug', 10), True), + ] + + for action, points, is_bonus in points_map: + value = f"+{points} bonus" if is_bonus else str(points) + table += f"| {action} | {value} |\n" + + return table + +def format_comment_body(pr_number: int, contributors: Dict[str, dict], config: dict) -> str: + """Format the PR comment body with points tracking.""" + total_points = sum(c['total'] for c in contributors.values()) + timestamp = datetime.now(timezone.utc).strftime('%B %d, %Y at %I:%M %p UTC') + + # Header + lines = [ + COMMENT_MARKER, + "", + "## 🏆 Contributor Points Tracker", + "", + f"**Total Points on This PR: {total_points} points**", + "", + "### Points by Contributor", + "" + ] + + # Contributors + sorted_contributors = sorted(contributors.items(), key=lambda x: x[1]['total'], reverse=True) + for username, data in sorted_contributors: + lines.append(f"#### @{username} - **{data['total']} points**") + lines.append("") + + for activity in data['activities']: + timestamp_str = _format_timestamp(activity.get('timestamp', '')) + lines.append(f"**{activity['type'].replace('_', ' ').title()}** ({timestamp_str}):") + lines.extend(f"- ✅ {item}" for item in activity['breakdown']) + lines.append("") + + # Footer + lines.extend([ + "---", + "", + "### How Points Are Calculated", + "", + _build_points_table(config), + f"*Last updated: {timestamp}*", + "" + ]) + + # Metadata + metadata = { + 'pr_number': pr_number, + 'total_points': total_points, + 'contributors': { + username: {'total': data['total'], 'activity_count': len(data['activities'])} + for username, data in contributors.items() + }, + 'last_updated': datetime.now(timezone.utc).isoformat() + } + lines.append(f"") + + return '\n'.join(lines) + +def find_existing_comment(pr_number: int) -> Optional[int]: + """Find the bot's existing tracking comment on the PR.""" + comments = github_api_request('GET', f'issues/{pr_number}/comments') + if not isinstance(comments, list): + return None + + for comment in comments: + if COMMENT_MARKER in comment.get('body', ''): + return comment['id'] + + return None + +def update_or_create_comment(pr_number: int, body: str): + """Update existing tracking comment or create a new one.""" + existing_comment_id = find_existing_comment(pr_number) + + if existing_comment_id: + # Update existing comment + github_api_request('PATCH', f'issues/comments/{existing_comment_id}', {'body': body}) + print(f"✅ Updated tracking comment (ID: {existing_comment_id})") + else: + # Create new comment + github_api_request('POST', f'issues/{pr_number}/comments', {'body': body}) + print(f"✅ Created new tracking comment") + +def main(): + """Main execution function.""" + if not GITHUB_TOKEN: + print("ERROR: GITHUB_TOKEN not set", file=sys.stderr) + sys.exit(1) + + print(f"🔄 Processing {GITHUB_EVENT_NAME} event...") + + # Load configuration and event + config = load_config() + event = load_event() + + # Check if this is an issue event (not a PR) + if is_issue_event(event): + print("📋 Processing issue event...") + issue_number = get_issue_number(event) + if not issue_number: + print("â„šī¸ Could not extract issue number") + sys.exit(0) + + # Get issue details + issue = github_api_request('GET', f'issues/{issue_number}') + if not issue or not isinstance(issue, dict): + print("❌ Failed to fetch issue details") + sys.exit(1) + + # Calculate points for issue creator + issue_creator = issue.get('user', {}).get('login') + points, breakdown = calculate_issue_creator_points(issue, config) + + if points > 0 and issue_creator: + print(f" Issue #{issue_number} by @{issue_creator}: {points} points") + for item in breakdown: + print(f" - {item}") + print("â„šī¸ External pipeline should parse this event and store in Kusto") + else: + print("â„šī¸ No points awarded for this issue") + + sys.exit(0) + + # Handle PR events + pr_number = get_pr_number(event) + if not pr_number: + print("â„šī¸ Not a PR-related event, skipping") + sys.exit(0) + + print(f"📝 Processing PR #{pr_number}...") + + # Fetch all PR activity + reviews, comments, pr_details = get_all_pr_activity(pr_number) + print(f" Found {len(reviews)} reviews and {len(comments)} comments") + + # Calculate points for all contributors + contributors = aggregate_contributor_points(reviews, pr_details, config) + print(f" Calculated points for {len(contributors)} contributors") + + if not contributors: + print("â„šī¸ No points to award yet") + sys.exit(0) + + # Format and update comment + comment_body = format_comment_body(pr_number, contributors, config) + update_or_create_comment(pr_number, comment_body) + + print("✅ Points tracking complete!") + print("â„šī¸ External pipeline can parse comment metadata for Kusto ingestion") + +if __name__ == '__main__': + main() diff --git a/scripts/update_leaderboard.py b/scripts/update_leaderboard.py deleted file mode 100644 index 0e65e54e..00000000 --- a/scripts/update_leaderboard.py +++ /dev/null @@ -1,203 +0,0 @@ -# Copyright (c) Microsoft. All rights reserved. - -import json -import os -import sys -import argparse -from datetime import datetime, timezone - -LB_JSON = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'leaderboard.json') -OUT_MD = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'LEADERBOARD.md') - -def parse_args(): - parser = argparse.ArgumentParser( - description="Update LEADERBOARD.md from leaderboard.json and set the 'top' contributor for the README badge." - ) - parser.add_argument("--limit", type=int, default=0, - help="Show only the top N contributors in LEADERBOARD.md (0 = show all).") - parser.add_argument("--no-badge", action="store_true", - help="Do not write the 'top' key back into leaderboard.json.") - parser.add_argument("--create-if-missing", action="store_true", default=True, - help="Create an empty leaderboard if none exists (default: True).") - return parser.parse_args() - -def create_empty_leaderboard(): - """ - Create an empty leaderboard.json file with a 'top' field. - """ - empty_leaderboard = { - "top": "None", - "_comment": "This file tracks contributor points. Run assign_points.py to populate." - } - try: - with open(LB_JSON, 'w', encoding='utf-8') as f: - json.dump(empty_leaderboard, f, indent=2, ensure_ascii=False) - print(f"Created empty leaderboard at: {LB_JSON}", file=sys.stderr) - return empty_leaderboard - except Exception as e: - print(f"ERROR: Failed to create leaderboard.json: {e}", file=sys.stderr) - sys.exit(1) - -def load_leaderboard(create_if_missing=True): - """ - Load leaderboard.json with improved error handling. - - Args: - create_if_missing: If True, creates an empty leaderboard when missing - - Returns: - dict: Leaderboard data or empty dict on unrecoverable error - """ - if not os.path.exists(LB_JSON): - if create_if_missing: - print(f"WARNING: No leaderboard.json found at {LB_JSON}", file=sys.stderr) - print("Creating empty leaderboard. Run assign_points.py to populate it.", file=sys.stderr) - return create_empty_leaderboard() - else: - print(f"ERROR: No leaderboard.json found at {LB_JSON}", file=sys.stderr) - print("Run assign_points.py first to create the leaderboard.", file=sys.stderr) - sys.exit(1) - - try: - with open(LB_JSON, 'r', encoding='utf-8') as f: - data = json.load(f) - except json.JSONDecodeError as e: - print(f"ERROR: leaderboard.json contains invalid JSON: {e}", file=sys.stderr) - print(f"File location: {LB_JSON}", file=sys.stderr) - print("Fix the JSON syntax or delete the file to recreate it.", file=sys.stderr) - sys.exit(1) - except Exception as e: - print(f"ERROR: Failed to read leaderboard.json: {e}", file=sys.stderr) - sys.exit(1) - - if not isinstance(data, dict): - print(f"ERROR: leaderboard.json must be a JSON object (dict), got {type(data).__name__}", file=sys.stderr) - print(f"File location: {LB_JSON}", file=sys.stderr) - print("Expected format: {\"username\": points, ...}", file=sys.stderr) - sys.exit(1) - - return data - -def normalize_scores(leaderboard): - """ - Ensure points are integers and filter out non-user keys other than 'top'. - Returns a list of (user, points) tuples suitable for sorting. - """ - items = [] - for user, points in leaderboard.items(): - # Skip metadata fields - if user in ('top', '_comment'): - continue - try: - # Convert numeric strings/floats to int safely - points_int = int(float(points)) - except (ValueError, TypeError): - # If points cannot be parsed, skip this user - print(f"WARNING: Skipping '{user}' due to non-numeric points: {points}", file=sys.stderr) - continue - items.append((user, points_int)) - return items - -def sort_contributors(items): - """ - Sort by points descending, then by user name ascending for stable tie ordering. - """ - return sorted(items, key=lambda x: (-x[1], x[0].lower())) - -def write_badge_top(leaderboard, items, no_badge=False): - """ - Write 'top' contributor back to leaderboard.json unless disabled. - """ - if no_badge: - return - - top_user = items[0][0] if items else "None" - leaderboard['top'] = top_user - - try: - with open(LB_JSON, 'w', encoding='utf-8') as f: - json.dump(leaderboard, f, indent=2, ensure_ascii=False) - print(f"Updated top contributor: {top_user}") - except Exception as e: - print(f"WARNING: Failed to write updated leaderboard.json: {e}", file=sys.stderr) - # Non-fatal: continue to write the MD even if we couldn't update the badge key - -def render_markdown(items, limit=0): - """ - Build the markdown leaderboard table with optional row limit and a 'Last updated' footer. - """ - if limit > 0: - items = items[:limit] - - lines = [] - lines.append("# Contributor Leaderboard\n\n") - - if not items: - lines.append("_No contributors yet. Be the first!_\n\n") - else: - lines.append("| Rank | User | Points |\n") - lines.append("|------|------|--------|\n") - - for rank, (user, points) in enumerate(items, start=1): - # Add medal emoji for top 3 - medal = "" - if rank == 1: - medal = "đŸĨ‡ " - elif rank == 2: - medal = "đŸĨˆ " - elif rank == 3: - medal = "đŸĨ‰ " - - lines.append(f"| {rank} | {medal}{user} | {points} |\n") - - lines.append("\n") - - # Footer with timestamp (UTC) - ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S UTC") - lines.append(f"_Last updated: {ts}_\n") - - return "".join(lines) - -def write_markdown(markdown): - try: - with open(OUT_MD, 'w', encoding='utf-8') as f: - f.write(markdown) - print(f"Leaderboard written to: {OUT_MD}") - except Exception as e: - print(f"ERROR: Failed to write LEADERBOARD.md: {e}", file=sys.stderr) - sys.exit(1) - -def main(): - args = parse_args() - - print("=" * 60) - print("Updating Contributor Leaderboard") - print("=" * 60) - - # Load leaderboard with improved error handling - leaderboard = load_leaderboard(create_if_missing=args.create_if_missing) - - # Normalize and sort contributors - items = normalize_scores(leaderboard) - items = sort_contributors(items) - - if not items: - print("No valid contributors found in leaderboard.") - print("This is normal if no points have been awarded yet.") - else: - print(f"Found {len(items)} contributor(s)") - - # Update badge source unless disabled - write_badge_top(leaderboard, items, no_badge=args.no_badge) - - # Generate Markdown - md = render_markdown(items, limit=args.limit) - write_markdown(md) - - top_user = items[0][0] if items else "None" - print("=" * 60) - print(f"SUCCESS: Top contributor is {top_user}") - print("=" * 60) - -if __name__ == "__main__": - main() \ No newline at end of file