From 7cae5016b42f571c2cad9b4350d9b9154c923a94 Mon Sep 17 00:00:00 2001
From: Zack Koppert <zkoppert@github.com>
Date: Fri, 30 May 2025 16:50:48 -0700
Subject: [PATCH 1/7] Add ci to each pull request that runs tests and lints

---
 .github/workflows/python-ci.yml | 36 +++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100644 .github/workflows/python-ci.yml

diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml
new file mode 100644
index 0000000..a3aed2a
--- /dev/null
+++ b/.github/workflows/python-ci.yml
@@ -0,0 +1,36 @@
+---
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+name: Python package
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+permissions:
+  contents: read
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: [3.11, 3.12]
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          pip install -r requirements.txt -r requirements-test.txt
+      - name: Lint with flake8 and pylint
+        run: |
+          make lint
+      - name: Test with pytest
+        run: |
+          make test

From 1d92be3ff1cf7e09c879c61bc9a7de25049dde93 Mon Sep 17 00:00:00 2001
From: Zack Koppert <zkoppert@github.com>
Date: Fri, 30 May 2025 16:53:24 -0700
Subject: [PATCH 2/7] docs: update docs link

---
 .github/workflows/python-ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml
index a3aed2a..ebc75bc 100644
--- a/.github/workflows/python-ci.yml
+++ b/.github/workflows/python-ci.yml
@@ -1,6 +1,6 @@
 ---
 # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
-# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+# For more information see: https://docs.github.com/en/actions/use-cases-and-examples/building-and-testing/building-and-testing-python
 
 name: Python package
 

From 0ea54ce50bfc2abedbc93ecfb5a094de0e1b19a2 Mon Sep 17 00:00:00 2001
From: Zack Koppert <zkoppert@github.com>
Date: Fri, 30 May 2025 16:56:11 -0700
Subject: [PATCH 3/7] perf: moar speed if we cache!

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 .github/workflows/python-ci.yml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml
index ebc75bc..b5a46c3 100644
--- a/.github/workflows/python-ci.yml
+++ b/.github/workflows/python-ci.yml
@@ -25,6 +25,13 @@ jobs:
         uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
         with:
           python-version: ${{ matrix.python-version }}
+      - name: Cache pip dependencies
+        uses: actions/cache@v3
+        with:
+          path: ~/.cache/pip
+          key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt', '**/requirements-test.txt') }}
+          restore-keys: |
+            ${{ runner.os }}-pip-
       - name: Install dependencies
         run: |
           pip install -r requirements.txt -r requirements-test.txt

From 36a223549e77e69509dfe9fcd2c377b34d23156e Mon Sep 17 00:00:00 2001
From: Zack Koppert <zkoppert@github.com>
Date: Fri, 30 May 2025 20:21:36 -0700
Subject: [PATCH 4/7] fix: lint fixes and docstrings

Signed-off-by: Zack Koppert <zkoppert@github.com>
---
 test_user_engagement_metrics.py |  62 ++++++++++-
 user_engagement_metrics.py      | 179 +++++++++++++++++++++++++-------
 2 files changed, 201 insertions(+), 40 deletions(-)

diff --git a/test_user_engagement_metrics.py b/test_user_engagement_metrics.py
index ff2f65b..d5411b5 100644
--- a/test_user_engagement_metrics.py
+++ b/test_user_engagement_metrics.py
@@ -1,3 +1,10 @@
+"""
+Test suite for the GitHub User Engagement Metrics module.
+
+This module contains unit tests for all the functions in the user_engagement_metrics.py
+module, including API interactions, file operations, and data processing logic.
+"""
+
 import json
 import time
 from unittest.mock import MagicMock
@@ -8,6 +15,16 @@
 
 @pytest.fixture(autouse=True)
 def patch_globals(tmp_path, monkeypatch):
+    """
+    Fixture to patch global file paths to use temporary test directories.
+
+    This automatically runs for all tests, ensuring that tests don't use
+    or modify the real data files.
+
+    Args:
+        tmp_path: pytest fixture providing a temporary directory
+        monkeypatch: pytest fixture for modifying objects
+    """
     # Patch file paths to use test dir
     monkeypatch.setattr(
         user_engagement_metrics, "USERNAMES_FILE", str(tmp_path / "usernames.txt")
@@ -24,6 +41,12 @@ def patch_globals(tmp_path, monkeypatch):
 
 
 def test_safe_get_rate_limit(monkeypatch):
+    """
+    Test that safe_get handles GitHub API rate limits correctly.
+
+    This test verifies that when a rate limit response is received,
+    the function waits and retries the request.
+    """
     m_resp = MagicMock()
     m_resp.status_code = 403
     m_resp.headers = {
@@ -34,7 +57,8 @@ def test_safe_get_rate_limit(monkeypatch):
 
     call_count = {"count": 0}
 
-    def fake_requests_get(*a, **kw):
+    def fake_requests_get(*_a, **_kw):
+        """Fake requests.get to simulate rate limiting."""
         call_count["count"] += 1
         # simulate second call as success
         if call_count["count"] > 1:
@@ -52,6 +76,9 @@ def fake_requests_get(*a, **kw):
 
 
 def test_get_user_profile(monkeypatch):
+    """
+    Test that get_user_profile correctly calls the GitHub API and processes the result.
+    """
     monkeypatch.setattr(
         user_engagement_metrics,
         "safe_get",
@@ -61,12 +88,13 @@ def test_get_user_profile(monkeypatch):
 
 
 def test_get_user_repos(monkeypatch):
+    """
+    Test that get_user_repos correctly handles API pagination.
+    """
     # Simulate 2 pages, then empty
-    responses = [
-        [{"id": 1}, {"id": 2}]
-    ]
+    responses = [[{"id": 1}, {"id": 2}]]
 
-    def safe_get(url, params=None):
+    def safe_get(_url, _params=None):
         return MagicMock(json=lambda: responses.pop(0))
 
     monkeypatch.setattr(user_engagement_metrics, "safe_get", safe_get)
@@ -75,6 +103,9 @@ def safe_get(url, params=None):
 
 
 def test_get_starred_repos_count_no_link(monkeypatch):
+    """
+    Test that get_starred_repos_count works correctly when no pagination Link header is present.
+    """
     m_resp = MagicMock()
     m_resp.headers = {}
     m_resp.json.return_value = [1, 2, 3]
@@ -83,6 +114,9 @@ def test_get_starred_repos_count_no_link(monkeypatch):
 
 
 def test_get_starred_repos_count_with_link(monkeypatch):
+    """
+    Test that get_starred_repos_count correctly parses the Link header for total count.
+    """
     m_resp = MagicMock()
     m_resp.headers = {
         "Link": '<https://api.github.com/user/123/starred?page=42>; rel="last"'
@@ -93,6 +127,9 @@ def test_get_starred_repos_count_with_link(monkeypatch):
 
 
 def test_get_orgs(monkeypatch):
+    """
+    Test that get_orgs correctly processes the API response.
+    """
     monkeypatch.setattr(
         user_engagement_metrics,
         "safe_get",
@@ -102,6 +139,9 @@ def test_get_orgs(monkeypatch):
 
 
 def test_search_user_contributions_commit(monkeypatch):
+    """
+    Test that search_user_contributions correctly handles commit searches.
+    """
     m_resp = MagicMock()
     m_resp.json.return_value = {"total_count": 123}
     monkeypatch.setattr(
@@ -113,6 +153,9 @@ def test_search_user_contributions_commit(monkeypatch):
 
 
 def test_search_user_contributions_issue(monkeypatch):
+    """
+    Test that search_user_contributions correctly handles issue searches.
+    """
     m_resp = MagicMock()
     m_resp.json.return_value = {"total_count": 99}
     monkeypatch.setattr(
@@ -124,6 +167,9 @@ def test_search_user_contributions_issue(monkeypatch):
 
 
 def test_load_completed_usernames(tmp_path, monkeypatch):
+    """
+    Test that load_completed_usernames correctly reads and processes the checkpoint file.
+    """
     file_path = tmp_path / "completed_usernames.txt"
     file_path.write_text("a\nb\n\nc\n")
     monkeypatch.setattr(user_engagement_metrics, "CHECKPOINT_FILE", str(file_path))
@@ -131,6 +177,9 @@ def test_load_completed_usernames(tmp_path, monkeypatch):
 
 
 def test_append_completed_username(tmp_path, monkeypatch):
+    """
+    Test that append_completed_username correctly writes to the checkpoint file.
+    """
     file_path = tmp_path / "completed_usernames.txt"
     monkeypatch.setattr(user_engagement_metrics, "CHECKPOINT_FILE", str(file_path))
     user_engagement_metrics.append_completed_username("dude")
@@ -138,6 +187,9 @@ def test_append_completed_username(tmp_path, monkeypatch):
 
 
 def test_append_result(tmp_path, monkeypatch):
+    """
+    Test that append_result correctly writes results to the output file.
+    """
     file_path = tmp_path / "user_results.jsonl"
     monkeypatch.setattr(user_engagement_metrics, "OUTPUT_FILE", str(file_path))
     user_engagement_metrics.append_result({"foo": "bar"})
diff --git a/user_engagement_metrics.py b/user_engagement_metrics.py
index 4168a1b..a6d45e1 100644
--- a/user_engagement_metrics.py
+++ b/user_engagement_metrics.py
@@ -1,7 +1,23 @@
+"""
+GitHub User Engagement Metrics Collector
+
+This module fetches and aggregates GitHub user engagement metrics for a list of usernames.
+It collects data on repositories, contributions, organizations, and user profiles from the
+GitHub API. The script handles rate limiting, retries, and checkpointing to resume
+interrupted operations.
+
+Usage:
+    1. Add GitHub usernames to 'usernames.txt' (one per line)
+    2. Replace 'your_token' with a valid GitHub API token
+    3. Run the script to collect metrics
+    4. Results are stored in 'user_results.jsonl'
+"""
+
 import json
 import os
 import random
 import time
+from re import search
 
 import requests
 
@@ -16,12 +32,31 @@
 
 
 def safe_get(url, params=None, extra_headers=None, max_retries=5):
+    """
+    Make a GET request to the GitHub API with automatic rate limit handling and retries.
+
+    This function handles rate limits by sleeping until the reset time when limits are hit.
+    It also implements exponential backoff for server errors.
+
+    Args:
+        url (str): The API endpoint URL to request
+        params (dict, optional): Query parameters for the request. Defaults to None.
+        extra_headers (dict, optional): Additional headers to include in the request.
+                                        Defaults to None.
+        max_retries (int, optional): Maximum number of retry attempts for failed requests.
+                                     Defaults to 5.
+
+    Returns:
+        requests.Response: The response object from the successful request
+    """
     retries = 0
     while True:
         combined_headers = headers.copy()
         if extra_headers:
             combined_headers.update(extra_headers)
-        response = requests.get(url, headers=combined_headers, params=params)
+        response = requests.get(
+            url, headers=combined_headers, params=params, timeout=10
+        )
         if response.status_code == 403:
             remaining = int(response.headers.get("X-RateLimit-Remaining", "1"))
             if remaining == 0:
@@ -44,57 +79,107 @@ def safe_get(url, params=None, extra_headers=None, max_retries=5):
                 time.sleep(wait)
                 retries += 1
                 continue
-            else:
-                print(f"Max retries reached for {url}. Skipping.")
-                return response
+            print(f"Max retries reached for {url}. Skipping.")
+            return response
         return response
 
 
-def get_user_profile(username):
-    url = f"{GITHUB_API}/users/{username}"
+def get_user_profile(user):
+    """
+    Fetch a GitHub user's profile information.
+
+    Args:
+        user (str): The GitHub username to fetch profile for
+
+    Returns:
+        dict: User profile data from the GitHub API
+    """
+    url = f"{GITHUB_API}/users/{user}"
     return safe_get(url).json()
 
 
-def get_user_repos(username):
-    repos = []
+def get_user_repos(user_name):
+    """
+    Fetch all public repositories for a GitHub user.
+
+    This function handles pagination to retrieve all repositories even if
+    the user has more than 100 repos (the API's default page size).
+
+    Args:
+        user_name (str): The GitHub username to fetch repositories for
+
+    Returns:
+        list: A list of repository objects from the GitHub API
+    """
+    repositories = []
     page = 1
     while True:
-        url = f"{GITHUB_API}/users/{username}/repos"
+        url = f"{GITHUB_API}/users/{user_name}/repos"
         params = {"per_page": 100, "page": page}
         res = safe_get(url, params=params).json()
         if not res or "message" in res:
             break
-        repos.extend(res)
+        repositories.extend(res)
         if len(res) < 100:
             break
         page += 1
-    return repos
+    return repositories
+
 
+def get_starred_repos_count(user):
+    """
+    Get the total count of repositories starred by a GitHub user.
 
-def get_starred_repos_count(username):
-    url = f"{GITHUB_API}/users/{username}/starred"
+    This function efficiently determines the count by examining the pagination
+    links rather than fetching all starred repos.
+
+    Args:
+        user (str): The GitHub username to check
+
+    Returns:
+        int: The number of repositories starred by the user
+    """
+    url = f"{GITHUB_API}/users/{user}/starred"
     params = {"per_page": 1}
     res = safe_get(url, params=params)
     link = res.headers.get("Link", "")
     if 'rel="last"' in link:
-        import re
-
-        match = re.search(r'page=(\d+)>; rel="last"', link)
+        match = search(r'page=(\d+)>; rel="last"', link)
         if match:
             return int(match.group(1))
     return len(res.json())
 
 
-def get_orgs(username):
-    url = f"{GITHUB_API}/users/{username}/orgs"
+def get_orgs(user):
+    """
+    Fetch all organizations a GitHub user is a member of.
+
+    Args:
+        user (str): The GitHub username to check
+
+    Returns:
+        list: A list of organization objects from the GitHub API
+    """
+    url = f"{GITHUB_API}/users/{user}/orgs"
     return safe_get(url).json()
 
 
-def search_user_contributions(username, type_):
+def search_user_contributions(user, type_):
+    """
+    Search for a user's public contributions of a specific type.
+
+    Args:
+        user (str): The GitHub username to check
+        type_ (str): The type of contribution to search for:
+                     'pr' (pull requests), 'issue', or 'commit'
+
+    Returns:
+        int: The total count of contributions of the specified type
+    """
     q_map = {
-        "pr": f"type:pr author:{username}",
-        "issue": f"type:issue author:{username}",
-        "commit": f"author:{username}",
+        "pr": f"type:pr author:{user}",
+        "issue": f"type:issue author:{user}",
+        "commit": f"author:{user}",
     }
     if type_ == "commit":
         url = f"{GITHUB_API}/search/commits"
@@ -107,26 +192,47 @@ def search_user_contributions(username, type_):
 
 
 def load_completed_usernames():
+    """
+    Load the set of usernames that have already been processed.
+
+    This function reads the checkpoint file to determine which users
+    have been successfully processed in previous runs.
+
+    Returns:
+        set: A set of usernames that have already been processed
+    """
     if os.path.exists(CHECKPOINT_FILE):
-        with open(CHECKPOINT_FILE, "r") as f:
-            return set(line.strip() for line in f if line.strip())
+        with open(CHECKPOINT_FILE, "r", encoding="utf-8") as checkpoint_file_load:
+            return set(line.strip() for line in checkpoint_file_load if line.strip())
     return set()
 
 
-def append_completed_username(username):
-    with open(CHECKPOINT_FILE, "a") as f:
-        f.write(username + "\n")
+def append_completed_username(completed_username):
+    """
+    Mark a username as completed by adding it to the checkpoint file.
 
+    Args:
+        completed_username (str): The GitHub username to mark as completed
+    """
+    with open(CHECKPOINT_FILE, "a", encoding="utf-8") as checkpoint_file_append:
+        checkpoint_file_append.write(completed_username + "\n")
 
-def append_result(result):
-    with open(OUTPUT_FILE, "a") as f:
-        f.write(json.dumps(result) + "\n")
 
+def append_result(user_result):
+    """
+    Append a user's engagement metrics to the output file.
 
-if __name__ == "__main__":
+    Args:
+        user_result (dict): The user's engagement metrics to save
+    """
+    with open(OUTPUT_FILE, "a", encoding="utf-8") as output_file_append:
+        output_file_append.write(json.dumps(user_result) + "\n")
+
+
+if __name__ == "__main__":  # pragma: no cover
     completed = load_completed_usernames()
-    with open(USERNAMES_FILE, "r") as f:
-        usernames = [line.strip() for line in f if line.strip()]
+    with open(USERNAMES_FILE, "r", encoding="utf-8") as file:
+        usernames = [line.strip() for line in file if line.strip()]
 
     print(f"Loaded {len(usernames)} usernames, {len(completed)} already completed.")
     if TOKEN == "your_token":
@@ -162,7 +268,10 @@ def append_result(result):
                 "total_public_commits": commit_count,
             }
             append_result(result)
-        except Exception as e:
-            print(f"Error processing {username}: {e}")
+        except requests.RequestException as e:
+            print(f"Network error processing {username}: {e}")
+            continue
+        except (KeyError, ValueError) as e:
+            print(f"Data error processing {username}: {e}")
             continue
         append_completed_username(username)

From 5a4e28becd5e672960d8eff7750d9b94aafd1ba2 Mon Sep 17 00:00:00 2001
From: Zack Koppert <zkoppert@github.com>
Date: Fri, 30 May 2025 20:24:53 -0700
Subject: [PATCH 5/7] fix: update required packages for testing

Signed-off-by: Zack Koppert <zkoppert@github.com>
---
 requirements-test.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/requirements-test.txt b/requirements-test.txt
index fbf31c1..93cc3c4 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -4,4 +4,5 @@ flake8
 isort
 pylint
 mypy
-black
\ No newline at end of file
+black
+types-requests

From 3df64ff64b329345cbcb4a1093401badfe27a0d0 Mon Sep 17 00:00:00 2001
From: Zack Koppert <zkoppert@github.com>
Date: Fri, 30 May 2025 20:33:39 -0700
Subject: [PATCH 6/7] fix: do throw away in test using python kwargs

Signed-off-by: Zack Koppert <zkoppert@github.com>
---
 test_user_engagement_metrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test_user_engagement_metrics.py b/test_user_engagement_metrics.py
index d5411b5..d776851 100644
--- a/test_user_engagement_metrics.py
+++ b/test_user_engagement_metrics.py
@@ -94,7 +94,7 @@ def test_get_user_repos(monkeypatch):
     # Simulate 2 pages, then empty
     responses = [[{"id": 1}, {"id": 2}]]
 
-    def safe_get(_url, _params=None):
+    def safe_get(_url, **kwargs):
         return MagicMock(json=lambda: responses.pop(0))
 
     monkeypatch.setattr(user_engagement_metrics, "safe_get", safe_get)

From 0ef8bb4e23c3ff95fd49f32b737cf055fa210651 Mon Sep 17 00:00:00 2001
From: Zack Koppert <zkoppert@github.com>
Date: Fri, 30 May 2025 20:37:02 -0700
Subject: [PATCH 7/7] fix: use pinned hash in action reference

---
 .github/workflows/python-ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml
index b5a46c3..33fa9ad 100644
--- a/.github/workflows/python-ci.yml
+++ b/.github/workflows/python-ci.yml
@@ -26,7 +26,7 @@ jobs:
         with:
           python-version: ${{ matrix.python-version }}
       - name: Cache pip dependencies
-        uses: actions/cache@v3
+        uses: actions/cache@2f8e54208210a422b2efd51efaa6bd6d7ca8920f # v3.4.3
         with:
           path: ~/.cache/pip
           key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt', '**/requirements-test.txt') }}