From da8cb65cbe8e603ce8ef6972a59140ddeb4393f5 Mon Sep 17 00:00:00 2001
From: David Lawrence <dkl@mozilla.com>
Date: Wed, 21 Jan 2026 16:57:08 -0500
Subject: [PATCH 01/12] feat: Added comprehensive unit testing and github
 action to run tests on new pull requests

---
 .github/workflows/tests.yml |   75 ++
 TESTING.md                  |  621 +++++++++++
 pytest.ini                  |   49 +
 requirements.txt            |    6 +
 test_main.py                | 2106 +++++++++++++++++++++++++++++++++++
 5 files changed, 2857 insertions(+)
 create mode 100644 .github/workflows/tests.yml
 create mode 100644 TESTING.md
 create mode 100644 pytest.ini
 create mode 100644 test_main.py

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
new file mode 100644
index 0000000..87e2800
--- /dev/null
+++ b/.github/workflows/tests.yml
@@ -0,0 +1,75 @@
+name: Tests and Linting
+
+on:
+  pull_request:
+    branches: [ main ]
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+
+    - uses: actions/setup-python@v5
+      with:
+        python-version: '3.11'
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install black flake8 mypy isort
+
+    - name: Run black
+      run: black --check main.py test_main.py
+
+    - name: Run isort
+      run: isort --check-only main.py test_main.py
+
+    - name: Run flake8
+      run: flake8 main.py test_main.py --max-line-length=100 --extend-ignore=E203,W503
+
+    - name: Run mypy
+      run: mypy main.py --no-strict-optional --ignore-missing-imports
+
+  test:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+
+    - uses: actions/setup-python@v5
+      with:
+        python-version: '3.11'
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r requirements.txt
+
+    - name: Run unit tests with coverage
+      run: |
+        pytest -m "not integration and not slow" --cov=main --cov-report=term-missing --cov-fail-under=80
+
+    - name: Run all tests
+      run: |
+        pytest --cov=main --cov-report=xml --cov-report=html
+
+    - name: Upload coverage reports
+      uses: actions/upload-artifact@v4
+      with:
+        name: coverage-reports
+        path: |
+          htmlcov/
+          coverage.xml
+
+  integration-test:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Run integration test with docker-compose
+      run: |
+        docker-compose up --build --abort-on-container-exit --exit-code-from github-etl
+
+    - name: Cleanup
+      if: always()
+      run: docker-compose down -v
diff --git a/TESTING.md b/TESTING.md
new file mode 100644
index 0000000..c0bb5dd
--- /dev/null
+++ b/TESTING.md
@@ -0,0 +1,621 @@
+# Testing Guide for GitHub ETL
+
+This document describes comprehensive testing for the GitHub ETL pipeline, including
+unit tests, integration tests, Docker testing, linting, and CI/CD workflows.
+
+## Table of Contents
+
+1. [Unit Testing](#unit-testing)
+2. [Test Organization](#test-organization)
+3. [Running Tests](#running-tests)
+4. [Code Coverage](#code-coverage)
+5. [Linting and Code Quality](#linting-and-code-quality)
+6. [CI/CD Integration](#cicd-integration)
+7. [Docker Testing](#docker-testing)
+8. [Adding New Tests](#adding-new-tests)
+
+---
+
+## Unit Testing
+
+The test suite in `test_main.py` provides comprehensive coverage for all functions in `main.py`.
+We have **95 unit tests** covering 9 functions with 80%+ code coverage requirement.
+
+### Test Structure
+
+Tests are organized into 10 test classes:
+
+1. **TestSetupLogging** (1 test) - Logging configuration
+2. **TestSleepForRateLimit** (4 tests) - Rate limit handling
+3. **TestExtractPullRequests** (14 tests) - PR extraction with pagination and enrichment
+4. **TestExtractCommits** (9 tests) - Commit and file extraction
+5. **TestExtractReviewers** (6 tests) - Reviewer extraction
+6. **TestExtractComments** (7 tests) - Comment extraction (uses /issues endpoint)
+7. **TestTransformData** (26 tests) - Data transformation for all 4 BigQuery tables
+8. **TestLoadData** (8 tests) - BigQuery data loading
+9. **TestMain** (17 tests) - Main ETL orchestration
+10. **TestIntegration** (3 tests) - End-to-end integration tests (marked with `@pytest.mark.integration`)
+
+### Fixtures
+
+Reusable fixtures are defined at the top of `test_main.py`:
+
+- `mock_session` - Mocked `requests.Session`
+- `mock_bigquery_client` - Mocked BigQuery client
+- `mock_pr_response` - Realistic pull request response
+- `mock_commit_response` - Realistic commit with files
+- `mock_reviewer_response` - Realistic reviewer response
+- `mock_comment_response` - Realistic comment response
+
+## Test Organization
+
+### Function Coverage
+
+| Function | Tests | Coverage Target | Key Test Areas |
+|----------|-------|-----------------|----------------|
+| `setup_logging()` | 1 | 100% | Logger configuration |
+| `sleep_for_rate_limit()` | 4 | 100% | Rate limit sleep logic, edge cases |
+| `extract_pull_requests()` | 14 | 90%+ | Pagination, rate limits, enrichment, error handling |
+| `extract_commits()` | 9 | 85%+ | Commit/file fetching, rate limits, errors |
+| `extract_reviewers()` | 6 | 85%+ | Reviewer states, rate limits, errors |
+| `extract_comments()` | 7 | 85%+ | Comment fetching (via /issues), rate limits |
+| `transform_data()` | 26 | 95%+ | Bug ID extraction, 4 tables, field mapping |
+| `load_data()` | 8 | 90%+ | BigQuery insertion, snapshot dates, errors |
+| `main()` | 17 | 85%+ | Env vars, orchestration, chunking |
+
+**Overall Target: 85-90% coverage** (80% minimum enforced in CI)
+
+### Critical Test Cases
+
+#### Bug ID Extraction
+Tests verify the regex pattern matches:
+- `Bug 1234567 - Fix` → 1234567
+- `bug 1234567` → 1234567
+- `b=1234567` → 1234567
+- `Bug #1234567` → 1234567
+- Filters out IDs >= 100000000
+
+#### Data Transformation
+Tests ensure correct transformation for all 4 BigQuery tables:
+- **pull_requests**: PR metadata, bug IDs, labels, date_approved
+- **commits**: Flattened files (one row per file), commit metadata
+- **reviewers**: Review states, date_approved calculation
+- **comments**: Character count, status mapping from reviews
+
+#### Rate Limiting
+Tests verify rate limit handling at all API levels:
+- Pull requests pagination
+- Commit fetching
+- Reviewer fetching
+- Comment fetching
+
+## Running Tests
+
+### All Tests with Coverage
+
+```bash
+pytest
+```
+
+This runs all tests with coverage reporting (configured in `pytest.ini`).
+
+### Fast Unit Tests Only (Skip Integration)
+
+```bash
+pytest -m "not integration and not slow"
+```
+
+Use this for fast feedback during development.
+
+### Specific Test Class
+
+```bash
+pytest test_main.py::TestTransformData
+```
+
+### Specific Test Function
+
+```bash
+pytest test_main.py::TestTransformData::test_bug_id_extraction_basic -v
+```
+
+### With Verbose Output
+
+```bash
+pytest -v
+```
+
+### With Coverage Report
+
+```bash
+# Terminal report
+pytest --cov=main --cov-report=term-missing
+
+# HTML report
+pytest --cov=main --cov-report=html
+open htmlcov/index.html
+```
+
+### Integration Tests Only
+
+```bash
+pytest -m integration
+```
+
+## Code Coverage
+
+### Coverage Requirements
+
+- **Minimum**: 80% (enforced in CI via `--cov-fail-under=80`)
+- **Target**: 85-90%
+- **Current**: Run `pytest --cov=main` to see current coverage
+
+### Coverage Configuration
+
+Coverage settings are in `pytest.ini`:
+
+```ini
+[pytest]
+addopts =
+    --cov=main
+    --cov-report=term-missing
+    --cov-report=html
+    --cov-branch
+    --cov-fail-under=80
+```
+
+### Viewing Coverage
+
+```bash
+# Generate HTML coverage report
+pytest --cov=main --cov-report=html
+
+# Open in browser
+xdg-open htmlcov/index.html  # Linux
+open htmlcov/index.html      # macOS
+```
+
+The HTML report shows:
+- Line-by-line coverage
+- Branch coverage
+- Missing lines highlighted
+- Per-file coverage percentages
+
+## Linting and Code Quality
+
+### Available Linters
+
+The project uses these linting tools (defined in `requirements.txt`):
+
+- **black** - Code formatting
+- **isort** - Import sorting
+- **flake8** - Style and syntax checking
+- **mypy** - Static type checking
+
+### Running Linters
+
+```bash
+# Run black (auto-format)
+black main.py test_main.py
+
+# Check formatting without changes
+black --check main.py test_main.py
+
+# Sort imports
+isort main.py test_main.py
+
+# Check import sorting
+isort --check-only main.py test_main.py
+
+# Run flake8
+flake8 main.py test_main.py --max-line-length=100 --extend-ignore=E203,W503
+
+# Run mypy
+mypy main.py --no-strict-optional --ignore-missing-imports
+```
+
+### All Linting Checks
+
+```bash
+# Run all linters in sequence
+black --check main.py test_main.py && \
+isort --check-only main.py test_main.py && \
+flake8 main.py test_main.py --max-line-length=100 --extend-ignore=E203,W503 && \
+mypy main.py --no-strict-optional --ignore-missing-imports
+```
+
+## CI/CD Integration
+
+### GitHub Actions Workflow
+
+The `.github/workflows/tests.yml` workflow runs on every push and pull request:
+
+**Lint Job:**
+1. Runs black (format check)
+2. Runs isort (import check)
+3. Runs flake8 (style check)
+4. Runs mypy (type check)
+
+**Test Job:**
+1. Runs fast unit tests with 80% coverage threshold
+2. Runs all tests (including integration)
+3. Uploads coverage reports as artifacts
+
+### Workflow Triggers
+
+- Push to `main` or `unit-tests` branch
+- Pull requests to `main` branch
+
+### Viewing Results
+
+- Check the Actions tab in GitHub
+- Coverage artifacts are uploaded for each run
+- Failed linting or tests will block merges
+
+## Docker Testing
+
+## Overview
+
+The `docker-compose.yml` configuration provides a complete local testing environment with:
+
+1. **Mock GitHub API** - A Flask-based mock service that simulates the GitHub Pull Requests API
+2. **BigQuery Emulator** - A local BigQuery instance for testing data loads
+3. **ETL Service** - The main GitHub ETL application configured to use the mock services
+
+## Quick Start
+
+### Start all services
+
+```bash
+docker-compose up --build
+```
+
+This will:
+
+- Build and start the mock GitHub API (port 5000)
+- Start the BigQuery emulator (ports 9050, 9060)
+- Build and run the ETL service
+
+The ETL service will automatically:
+
+- Fetch 250 mock pull requests from the mock GitHub API
+- Transform the data
+- Load it into the BigQuery emulator
+
+### View logs
+
+```bash
+# All services
+docker-compose logs -f
+
+# Specific service
+docker-compose logs -f github-etl
+docker-compose logs -f bigquery-emulator
+docker-compose logs -f mock-github-api
+```
+
+### Stop services
+
+```bash
+docker-compose down
+```
+
+## Architecture
+
+### Mock GitHub API Service
+
+- **Port**: 5000
+- **Endpoint**: `http://localhost:5000/repos/{owner}/{repo}/pulls`
+- **Mock data**: Generates 250 sample pull requests with realistic data
+- **Features**:
+  - Pagination support (per_page, page parameters)
+  - Realistic PR data (numbers, titles, states, timestamps, users, etc.)
+  - Mock rate limit headers
+  - No authentication required
+
+### BigQuery Emulator Service
+
+- **Ports**:
+  - 9050 (BigQuery API)
+  - 9060 (Discovery/Admin API)
+- **Configuration**: Uses `data.yml` to define the schema
+- **Project**: test-project
+- **Dataset**: test_dataset
+- **Table**: pull_requests
+
+### ETL Service
+
+The ETL service is configured via environment variables in `docker-compose.yml`:
+
+```yaml
+environment:
+  GITHUB_REPOS: "mozilla/firefox"
+  GITHUB_API_URL: "http://mock-github-api:5000"  # Points to mock API
+  BIGQUERY_PROJECT: "test"
+  BIGQUERY_DATASET: "github_etl"
+  BIGQUERY_EMULATOR_HOST: "http://bigquery-emulator:9050"
+```
+
+## Customization
+
+### Using Real GitHub API
+
+To test with the real GitHub API instead of the mock:
+
+1. Set `GITHUB_TOKEN` environment variable
+2. Remove or comment out `GITHUB_API_URL` in docker-compose.yml
+3. Update `depends_on` to not require mock-github-api
+
+```bash
+export GITHUB_TOKEN="your_github_token"
+docker-compose up github-etl bigquery-emulator
+```
+
+### Adjusting Mock Data
+
+Edit `mock_github_api.py` to customize:
+
+- Total number of PRs (default: 250)
+- PR field values
+- Pagination behavior
+
+### Modifying BigQuery Schema
+
+Edit `data.yml` to change the table schema. The schema matches the fields
+extracted in `main.py`'s `transform_data()` function.
+
+## Querying the BigQuery Emulator
+
+You can query the BigQuery emulator using the BigQuery Python client:
+
+```python
+from google.cloud import bigquery
+from google.api_core.client_options import ClientOptions
+
+client = bigquery.Client(
+    project="test-project",
+    client_options=ClientOptions(api_endpoint="http://localhost:9050")
+)
+
+query = """
+SELECT pr_number, title, state, user_login
+FROM `test-project.test_dataset.pull_requests`
+LIMIT 10
+"""
+
+for row in client.query(query):
+    print(f"PR #{row.pr_number}: {row.title} - {row.state}")
+```
+
+Or use the `bq` command-line tool with the emulator endpoint.
+
+## Troubleshooting
+
+### Services not starting
+
+Check if ports are already in use:
+
+```bash
+lsof -i :5000  # Mock GitHub API
+lsof -i :9050  # BigQuery emulator
+```
+
+### ETL fails to connect
+
+Ensure services are healthy:
+
+```bash
+docker-compose ps
+```
+
+Check service logs:
+
+```bash
+docker-compose logs bigquery-emulator
+docker-compose logs mock-github-api
+```
+
+### Schema mismatch errors
+
+Verify `data.yml` schema matches fields in `main.py:transform_data()`.
+
+## Development Workflow
+
+1. Make changes to `main.py`
+2. Restart the ETL service: `docker-compose restart github-etl`
+3. View logs: `docker-compose logs -f github-etl`
+
+The `main.py` file is mounted as a volume, so changes are reflected without rebuilding.
+
+## Cleanup
+
+Remove all containers and volumes:
+
+```bash
+docker-compose down -v
+```
+
+Remove built images:
+
+```bash
+docker-compose down --rmi all
+```
+
+---
+
+## Adding New Tests
+
+### Testing Patterns
+
+#### 1. Mock External Dependencies
+
+Always mock external API calls and BigQuery operations:
+
+```python
+@patch("requests.Session")
+def test_api_call(mock_session_class):
+    mock_session = MagicMock()
+    mock_session_class.return_value = mock_session
+
+    mock_response = Mock()
+    mock_response.status_code = 200
+    mock_response.json.return_value = [{"id": 1}]
+
+    mock_session.get.return_value = mock_response
+    # Test code here
+```
+
+#### 2. Use Fixtures
+
+Leverage existing fixtures for common test data:
+
+```python
+def test_with_fixtures(mock_session, mock_pr_response):
+    # Use mock_session and mock_pr_response
+    pass
+```
+
+#### 3. Test Edge Cases
+
+Always test:
+- Empty inputs
+- None values
+- Missing fields
+- Rate limits
+- API errors (404, 500, etc.)
+- Boundary conditions
+
+#### 4. Verify Call Arguments
+
+Check that functions are called with correct parameters:
+
+```python
+mock_extract.assert_called_once_with(
+    session=mock_session,
+    repo="mozilla/firefox",
+    github_api_url="https://api.github.com"
+)
+```
+
+### Example: Adding a New Test
+
+```python
+class TestNewFunction:
+    """Tests for new_function."""
+
+    def test_basic_functionality(self, mock_session):
+        """Test basic happy path."""
+        # Arrange
+        mock_response = Mock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {"result": "success"}
+        mock_session.get.return_value = mock_response
+
+        # Act
+        result = main.new_function(mock_session, "arg1")
+
+        # Assert
+        assert result == {"result": "success"}
+        mock_session.get.assert_called_once()
+
+    def test_error_handling(self, mock_session):
+        """Test error handling."""
+        mock_response = Mock()
+        mock_response.status_code = 500
+        mock_response.text = "Internal Error"
+        mock_session.get.return_value = mock_response
+
+        with pytest.raises(SystemExit) as exc_info:
+            main.new_function(mock_session, "arg1")
+
+        assert "500" in str(exc_info.value)
+```
+
+### Test Organization Guidelines
+
+1. **Group related tests** in test classes
+2. **Use descriptive names** like `test_handles_rate_limit_on_commits`
+3. **One assertion concept per test** - Test one thing at a time
+4. **Arrange-Act-Assert pattern** - Structure tests clearly
+5. **Add docstrings** to explain what each test verifies
+
+### Mocking Patterns
+
+#### Mocking Time
+
+```python
+@patch("time.time")
+@patch("time.sleep")
+def test_with_time(mock_sleep, mock_time):
+    mock_time.return_value = 1000
+    # Test code
+```
+
+#### Mocking Environment Variables
+
+```python
+with patch.dict(os.environ, {"VAR_NAME": "value"}, clear=True):
+    # Test code
+```
+
+#### Mocking Generators
+
+```python
+mock_extract.return_value = iter([[{"id": 1}], [{"id": 2}]])
+```
+
+### Running Tests During Development
+
+```bash
+# Auto-run tests on file changes (requires pytest-watch)
+pip install pytest-watch
+ptw -- --cov=main -m "not integration"
+```
+
+### Debugging Tests
+
+```bash
+# Drop into debugger on failures
+pytest --pdb
+
+# Show print statements
+pytest -s
+
+# Verbose with full diff
+pytest -vv
+```
+
+### Coverage Tips
+
+If coverage is below 80%:
+
+1. Run `pytest --cov=main --cov-report=term-missing` to see missing lines
+2. Look for untested branches (if/else paths)
+3. Check error handling paths
+4. Verify edge cases are covered
+
+## Resources
+
+- [pytest documentation](https://docs.pytest.org/)
+- [pytest-cov documentation](https://pytest-cov.readthedocs.io/)
+- [unittest.mock documentation](https://docs.python.org/3/library/unittest.mock.html)
+
+## Troubleshooting
+
+### Tests Pass Locally But Fail in CI
+
+- Check Python version (must be 3.11)
+- Verify all dependencies are in `requirements.txt`
+- Look for environment-specific issues
+
+### Coverage Dropped Below 80%
+
+- Run locally: `pytest --cov=main --cov-report=html`
+- Open `htmlcov/index.html` to see uncovered lines
+- Add tests for missing coverage
+
+### Import Errors
+
+- Ensure `PYTHONPATH` includes project root
+- Check that `__init__.py` files exist if needed
+- Verify module names match file names
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 0000000..d4a601a
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,49 @@
+[pytest]
+# Pytest configuration for GitHub ETL project
+
+# Test discovery patterns
+python_files = test_*.py
+python_classes = Test*
+python_functions = test_*
+
+# Output options
+addopts =
+    -v
+    --strict-markers
+    --tb=short
+    --cov=main
+    --cov-report=term-missing
+    --cov-report=html
+    --cov-branch
+
+# Minimum coverage threshold (can adjust as needed)
+--cov-fail-under=80
+
+# Test paths
+testpaths = .
+
+# Markers for organizing tests
+markers =
+    unit: Unit tests for individual functions
+    integration: Integration tests that test multiple components
+    slow: Tests that take longer to run
+
+# Logging
+log_cli = false
+log_cli_level = INFO
+log_cli_format = %(asctime)s [%(levelname)8s] %(message)s
+log_cli_date_format = %Y-%m-%d %H:%M:%S
+
+# Coverage options
+[coverage:run]
+source = .
+omit =
+    test_*.py
+    .venv/*
+    venv/*
+    */site-packages/*
+
+[coverage:report]
+precision = 2
+show_missing = true
+skip_covered = false
diff --git a/requirements.txt b/requirements.txt
index 008aa8a..8ede7d4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,3 +6,9 @@ google-cloud-bigquery==3.25.0
 pytest>=7.0.0
 pytest-mock>=3.10.0
 pytest-cov>=4.0.0
+
+# Linting and formatting tools
+black>=24.0.0
+flake8>=7.0.0
+mypy>=1.8.0
+isort>=5.13.0
diff --git a/test_main.py b/test_main.py
new file mode 100644
index 0000000..7165677
--- /dev/null
+++ b/test_main.py
@@ -0,0 +1,2106 @@
+#!/usr/bin/env python3
+"""
+Comprehensive test suite for GitHub ETL main.py
+
+This test suite provides complete coverage for all functions in main.py,
+including extraction, transformation, loading, and orchestration logic.
+"""
+
+import logging
+import os
+import sys
+import time
+from datetime import datetime, timezone
+from unittest.mock import Mock, MagicMock, patch, call
+import pytest
+import requests
+from google.cloud import bigquery
+
+import main
+
+
+# =============================================================================
+# FIXTURES
+# =============================================================================
+
+
+@pytest.fixture
+def mock_session():
+    """Provide a mocked requests.Session for testing."""
+    session = Mock(spec=requests.Session)
+    session.headers = {}
+    return session
+
+
+@pytest.fixture
+def mock_bigquery_client():
+    """Provide a mocked BigQuery client for testing."""
+    client = Mock(spec=bigquery.Client)
+    client.project = "test-project"
+    client.insert_rows_json = Mock(return_value=[])
+    return client
+
+
+@pytest.fixture
+def mock_pr_response():
+    """Provide a realistic pull request response for testing."""
+    return {
+        "number": 123,
+        "title": "Bug 1234567 - Fix login issue",
+        "state": "closed",
+        "created_at": "2024-01-01T10:00:00Z",
+        "updated_at": "2024-01-02T10:00:00Z",
+        "merged_at": "2024-01-02T10:00:00Z",
+        "user": {"login": "testuser"},
+        "head": {"ref": "fix-branch"},
+        "base": {"ref": "main"},
+        "labels": [{"name": "bug"}, {"name": "priority-high"}],
+        "commit_data": [],
+        "reviewer_data": [],
+        "comment_data": [],
+    }
+
+
+@pytest.fixture
+def mock_commit_response():
+    """Provide a realistic commit response with files."""
+    return {
+        "sha": "abc123def456",
+        "commit": {
+            "author": {
+                "name": "Test Author",
+                "email": "test@example.com",
+                "date": "2024-01-01T12:00:00Z",
+            }
+        },
+        "files": [
+            {
+                "filename": "src/login.py",
+                "additions": 10,
+                "deletions": 5,
+                "changes": 15,
+            },
+            {
+                "filename": "tests/test_login.py",
+                "additions": 20,
+                "deletions": 2,
+                "changes": 22,
+            },
+        ],
+    }
+
+
+@pytest.fixture
+def mock_reviewer_response():
+    """Provide a realistic reviewer response."""
+    return {
+        "id": 789,
+        "user": {"login": "reviewer1"},
+        "state": "APPROVED",
+        "submitted_at": "2024-01-01T15:00:00Z",
+        "body": "LGTM",
+    }
+
+
+@pytest.fixture
+def mock_comment_response():
+    """Provide a realistic comment response."""
+    return {
+        "id": 456,
+        "user": {"login": "commenter1"},
+        "created_at": "2024-01-01T14:00:00Z",
+        "body": "This looks good to me",
+        "pull_request_review_id": None,
+    }
+
+
+# =============================================================================
+# TEST CLASSES
+# =============================================================================
+
+
+class TestSetupLogging:
+    """Tests for setup_logging function."""
+
+    def test_setup_logging_configures_logger(self):
+        """Test that setup_logging configures the root logger correctly."""
+        main.setup_logging()
+
+        root_logger = logging.getLogger()
+        assert root_logger.level == logging.INFO
+        assert len(root_logger.handlers) > 0
+
+        # Check that at least one handler is a StreamHandler
+        has_stream_handler = any(
+            isinstance(handler, logging.StreamHandler)
+            for handler in root_logger.handlers
+        )
+        assert has_stream_handler
+
+
+class TestSleepForRateLimit:
+    """Tests for sleep_for_rate_limit function."""
+
+    @patch("time.time")
+    @patch("time.sleep")
+    def test_sleep_for_rate_limit_when_remaining_is_zero(
+        self, mock_sleep, mock_time
+    ):
+        """Test that sleep_for_rate_limit sleeps until reset time."""
+        mock_time.return_value = 1000
+
+        mock_response = Mock()
+        mock_response.headers = {
+            "X-RateLimit-Remaining": "0",
+            "X-RateLimit-Reset": "1120",  # 120 seconds from now
+        }
+
+        main.sleep_for_rate_limit(mock_response)
+
+        mock_sleep.assert_called_once_with(120)
+
+    @patch("time.time")
+    @patch("time.sleep")
+    def test_sleep_for_rate_limit_when_reset_already_passed(
+        self, mock_sleep, mock_time
+    ):
+        """Test that sleep_for_rate_limit doesn't sleep negative time."""
+        mock_time.return_value = 2000
+
+        mock_response = Mock()
+        mock_response.headers = {
+            "X-RateLimit-Remaining": "0",
+            "X-RateLimit-Reset": "1500",  # Already passed
+        }
+
+        main.sleep_for_rate_limit(mock_response)
+
+        # Should sleep for 0 seconds (max of 0 and negative value)
+        mock_sleep.assert_called_once_with(0)
+
+    @patch("time.sleep")
+    def test_sleep_for_rate_limit_when_remaining_not_zero(self, mock_sleep):
+        """Test that sleep_for_rate_limit doesn't sleep when remaining > 0."""
+        mock_response = Mock()
+        mock_response.headers = {
+            "X-RateLimit-Remaining": "5",
+            "X-RateLimit-Reset": "1500",
+        }
+
+        main.sleep_for_rate_limit(mock_response)
+
+        # Should not sleep when remaining > 0
+        mock_sleep.assert_not_called()
+
+    @patch("time.sleep")
+    def test_sleep_for_rate_limit_with_missing_headers(self, mock_sleep):
+        """Test sleep_for_rate_limit with missing rate limit headers."""
+        mock_response = Mock()
+        mock_response.headers = {}
+
+        main.sleep_for_rate_limit(mock_response)
+
+        # Should not sleep when headers are missing (defaults to remaining=1)
+        mock_sleep.assert_not_called()
+
+
+class TestExtractPullRequests:
+    """Tests for extract_pull_requests function."""
+
+    def test_extract_single_page(self, mock_session):
+        """Test extracting data from a single page of results."""
+        mock_response = Mock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = [
+            {"number": 1, "title": "PR 1"},
+            {"number": 2, "title": "PR 2"},
+        ]
+        mock_response.links = {}
+
+        mock_session.get.return_value = mock_response
+
+        # Mock the extract functions
+        with patch("main.extract_commits", return_value=[]), patch(
+            "main.extract_reviewers", return_value=[]
+        ), patch("main.extract_comments", return_value=[]):
+            result = list(
+                main.extract_pull_requests(mock_session, "mozilla/firefox")
+            )
+
+        assert len(result) == 1
+        assert len(result[0]) == 2
+        assert result[0][0]["number"] == 1
+        assert result[0][1]["number"] == 2
+
+    def test_extract_multiple_pages(self, mock_session):
+        """Test extracting data across multiple pages with pagination."""
+        # First page response
+        mock_response_1 = Mock()
+        mock_response_1.status_code = 200
+        mock_response_1.json.return_value = [
+            {"number": 1, "title": "PR 1"},
+            {"number": 2, "title": "PR 2"},
+        ]
+        mock_response_1.links = {
+            "next": {
+                "url": "https://api.github.com/repos/mozilla/firefox/pulls?page=2"
+            }
+        }
+
+        # Second page response
+        mock_response_2 = Mock()
+        mock_response_2.status_code = 200
+        mock_response_2.json.return_value = [{"number": 3, "title": "PR 3"}]
+        mock_response_2.links = {}
+
+        mock_session.get.side_effect = [mock_response_1, mock_response_2]
+
+        with patch("main.extract_commits", return_value=[]), patch(
+            "main.extract_reviewers", return_value=[]
+        ), patch("main.extract_comments", return_value=[]):
+            result = list(
+                main.extract_pull_requests(mock_session, "mozilla/firefox")
+            )
+
+        assert len(result) == 2
+        assert len(result[0]) == 2
+        assert len(result[1]) == 1
+        assert result[0][0]["number"] == 1
+        assert result[1][0]["number"] == 3
+
+    def test_enriches_prs_with_commit_data(self, mock_session):
+        """Test that PRs are enriched with commit data."""
+        mock_response = Mock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = [{"number": 1, "title": "PR 1"}]
+        mock_response.links = {}
+
+        mock_session.get.return_value = mock_response
+
+        mock_commits = [{"sha": "abc123"}]
+
+        with patch(
+            "main.extract_commits", return_value=mock_commits
+        ) as mock_extract_commits, patch(
+            "main.extract_reviewers", return_value=[]
+        ), patch(
+            "main.extract_comments", return_value=[]
+        ):
+            result = list(
+                main.extract_pull_requests(mock_session, "mozilla/firefox")
+            )
+
+        assert result[0][0]["commit_data"] == mock_commits
+        mock_extract_commits.assert_called_once()
+
+    def test_enriches_prs_with_reviewer_data(self, mock_session):
+        """Test that PRs are enriched with reviewer data."""
+        mock_response = Mock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = [{"number": 1, "title": "PR 1"}]
+        mock_response.links = {}
+
+        mock_session.get.return_value = mock_response
+
+        mock_reviewers = [{"id": 789, "state": "APPROVED"}]
+
+        with patch("main.extract_commits", return_value=[]), patch(
+            "main.extract_reviewers", return_value=mock_reviewers
+        ) as mock_extract_reviewers, patch(
+            "main.extract_comments", return_value=[]
+        ):
+            result = list(
+                main.extract_pull_requests(mock_session, "mozilla/firefox")
+            )
+
+        assert result[0][0]["reviewer_data"] == mock_reviewers
+        mock_extract_reviewers.assert_called_once()
+
+    def test_enriches_prs_with_comment_data(self, mock_session):
+        """Test that PRs are enriched with comment data."""
+        mock_response = Mock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = [{"number": 1, "title": "PR 1"}]
+        mock_response.links = {}
+
+        mock_session.get.return_value = mock_response
+
+        mock_comments = [{"id": 456, "body": "Great work!"}]
+
+        with patch("main.extract_commits", return_value=[]), patch(
+            "main.extract_reviewers", return_value=[]
+        ), patch(
+            "main.extract_comments", return_value=mock_comments
+        ) as mock_extract_comments:
+            result = list(
+                main.extract_pull_requests(mock_session, "mozilla/firefox")
+            )
+
+        assert result[0][0]["comment_data"] == mock_comments
+        mock_extract_comments.assert_called_once()
+
+    @patch("main.sleep_for_rate_limit")
+    def test_handles_rate_limit(self, mock_sleep, mock_session):
+        """Test that extract_pull_requests handles rate limiting correctly."""
+        # Rate limit response
+        mock_response_rate_limit = Mock()
+        mock_response_rate_limit.status_code = 403
+        mock_response_rate_limit.headers = {"X-RateLimit-Remaining": "0"}
+
+        # Successful response after rate limit
+        mock_response_success = Mock()
+        mock_response_success.status_code = 200
+        mock_response_success.json.return_value = [
+            {"number": 1, "title": "PR 1"}
+        ]
+        mock_response_success.links = {}
+
+        mock_session.get.side_effect = [
+            mock_response_rate_limit,
+            mock_response_success,
+        ]
+
+        with patch("main.extract_commits", return_value=[]), patch(
+            "main.extract_reviewers", return_value=[]
+        ), patch("main.extract_comments", return_value=[]):
+            result = list(
+                main.extract_pull_requests(mock_session, "mozilla/firefox")
+            )
+
+        mock_sleep.assert_called_once_with(mock_response_rate_limit)
+        assert len(result) == 1
+
+    def test_handles_api_error_404(self, mock_session):
+        """Test that extract_pull_requests raises SystemExit on 404."""
+        mock_response = Mock()
+        mock_response.status_code = 404
+        mock_response.text = "Not Found"
+
+        mock_session.get.return_value = mock_response
+
+        with pytest.raises(SystemExit) as exc_info:
+            list(main.extract_pull_requests(mock_session, "mozilla/nonexistent"))
+
+        assert "GitHub API error 404" in str(exc_info.value)
+
+    def test_handles_api_error_500(self, mock_session):
+        """Test that extract_pull_requests raises SystemExit on 500."""
+        mock_response = Mock()
+        mock_response.status_code = 500
+        mock_response.text = "Internal Server Error"
+
+        mock_session.get.return_value = mock_response
+
+        with pytest.raises(SystemExit) as exc_info:
+            list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
+
+        assert "GitHub API error 500" in str(exc_info.value)
+
+    def test_stops_on_empty_batch(self, mock_session):
+        """Test that extraction stops when an empty batch is returned."""
+        # First page with data
+        mock_response_1 = Mock()
+        mock_response_1.status_code = 200
+        mock_response_1.json.return_value = [{"number": 1}]
+        mock_response_1.links = {
+            "next": {
+                "url": "https://api.github.com/repos/mozilla/firefox/pulls?page=2"
+            }
+        }
+
+        # Second page empty
+        mock_response_2 = Mock()
+        mock_response_2.status_code = 200
+        mock_response_2.json.return_value = []
+        mock_response_2.links = {}
+
+        mock_session.get.side_effect = [mock_response_1, mock_response_2]
+
+        with patch("main.extract_commits", return_value=[]), patch(
+            "main.extract_reviewers", return_value=[]
+        ), patch("main.extract_comments", return_value=[]):
+            result = list(
+                main.extract_pull_requests(mock_session, "mozilla/firefox")
+            )
+
+        # Should only have 1 chunk from first page
+        assert len(result) == 1
+        assert len(result[0]) == 1
+
+    def test_invalid_page_number_handling(self, mock_session):
+        """Test handling of invalid page number in pagination."""
+        mock_response_1 = Mock()
+        mock_response_1.status_code = 200
+        mock_response_1.json.return_value = [{"number": 1}]
+        mock_response_1.links = {
+            "next": {
+                "url": "https://api.github.com/repos/mozilla/firefox/pulls?page=invalid"
+            }
+        }
+
+        mock_session.get.return_value = mock_response_1
+
+        with patch("main.extract_commits", return_value=[]), patch(
+            "main.extract_reviewers", return_value=[]
+        ), patch("main.extract_comments", return_value=[]):
+            result = list(
+                main.extract_pull_requests(mock_session, "mozilla/firefox")
+            )
+
+        # Should stop pagination on invalid page number
+        assert len(result) == 1
+
+    def test_custom_github_api_url(self, mock_session):
+        """Test using custom GitHub API URL."""
+        custom_url = "https://mock-github.example.com"
+
+        mock_response = Mock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = [{"number": 1}]
+        mock_response.links = {}
+
+        mock_session.get.return_value = mock_response
+
+        with patch("main.extract_commits", return_value=[]), patch(
+            "main.extract_reviewers", return_value=[]
+        ), patch("main.extract_comments", return_value=[]):
+            list(
+                main.extract_pull_requests(
+                    mock_session, "mozilla/firefox", github_api_url=custom_url
+                )
+            )
+
+        # Verify custom URL was used
+        call_args = mock_session.get.call_args
+        assert custom_url in call_args[0][0]
+
+    def test_skips_prs_without_number_field(self, mock_session):
+        """Test that PRs without 'number' field are skipped."""
+        mock_response = Mock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = [
+            {"number": 1, "title": "PR 1"},
+            {"title": "PR without number"},  # Missing number field
+            {"number": 2, "title": "PR 2"},
+        ]
+        mock_response.links = {}
+
+        mock_session.get.return_value = mock_response
+
+        with patch("main.extract_commits", return_value=[]) as mock_commits, patch(
+            "main.extract_reviewers", return_value=[]
+        ), patch("main.extract_comments", return_value=[]):
+            result = list(
+                main.extract_pull_requests(mock_session, "mozilla/firefox")
+            )
+
+        # extract_commits should only be called for PRs with number field
+        assert mock_commits.call_count == 2
+
+
+class TestExtractCommits:
+    """Tests for extract_commits function."""
+
+    def test_fetch_commits_with_files(self, mock_session):
+        """Test fetching commits with files for a PR."""
+        # Mock commits list response
+        commits_response = Mock()
+        commits_response.status_code = 200
+        commits_response.json.return_value = [
+            {"sha": "abc123"},
+            {"sha": "def456"},
+        ]
+
+        # Mock individual commit responses
+        commit_detail_1 = Mock()
+        commit_detail_1.status_code = 200
+        commit_detail_1.json.return_value = {
+            "sha": "abc123",
+            "files": [{"filename": "file1.py", "additions": 10}],
+        }
+
+        commit_detail_2 = Mock()
+        commit_detail_2.status_code = 200
+        commit_detail_2.json.return_value = {
+            "sha": "def456",
+            "files": [{"filename": "file2.py", "deletions": 5}],
+        }
+
+        mock_session.get.side_effect = [
+            commits_response,
+            commit_detail_1,
+            commit_detail_2,
+        ]
+
+        result = main.extract_commits(mock_session, "mozilla/firefox", 123)
+
+        assert len(result) == 2
+        assert result[0]["sha"] == "abc123"
+        assert result[0]["files"][0]["filename"] == "file1.py"
+        assert result[1]["sha"] == "def456"
+        assert result[1]["files"][0]["filename"] == "file2.py"
+
+    def test_multiple_files_per_commit(self, mock_session):
+        """Test handling multiple files in a single commit."""
+        commits_response = Mock()
+        commits_response.status_code = 200
+        commits_response.json.return_value = [{"sha": "abc123"}]
+
+        commit_detail = Mock()
+        commit_detail.status_code = 200
+        commit_detail.json.return_value = {
+            "sha": "abc123",
+            "files": [
+                {"filename": "file1.py", "additions": 10},
+                {"filename": "file2.py", "additions": 20},
+                {"filename": "file3.py", "deletions": 5},
+            ],
+        }
+
+        mock_session.get.side_effect = [commits_response, commit_detail]
+
+        result = main.extract_commits(mock_session, "mozilla/firefox", 123)
+
+        assert len(result) == 1
+        assert len(result[0]["files"]) == 3
+
+    @patch("main.sleep_for_rate_limit")
+    def test_rate_limit_on_commits_list(self, mock_sleep, mock_session):
+        """Test rate limit handling when fetching commits list."""
+        # Rate limit response
+        rate_limit_response = Mock()
+        rate_limit_response.status_code = 403
+        rate_limit_response.headers = {"X-RateLimit-Remaining": "0"}
+
+        # Success response
+        success_response = Mock()
+        success_response.status_code = 200
+        success_response.json.return_value = []
+
+        mock_session.get.side_effect = [rate_limit_response, success_response]
+
+        result = main.extract_commits(mock_session, "mozilla/firefox", 123)
+
+        mock_sleep.assert_called_once()
+        assert result == []
+
+    def test_api_error_on_commits_list(self, mock_session):
+        """Test API error handling when fetching commits list."""
+        error_response = Mock()
+        error_response.status_code = 500
+        error_response.text = "Internal Server Error"
+
+        mock_session.get.return_value = error_response
+
+        with pytest.raises(SystemExit) as exc_info:
+            main.extract_commits(mock_session, "mozilla/firefox", 123)
+
+        assert "GitHub API error 500" in str(exc_info.value)
+
+    def test_api_error_on_individual_commit(self, mock_session):
+        """Test API error when fetching individual commit details."""
+        commits_response = Mock()
+        commits_response.status_code = 200
+        commits_response.json.return_value = [{"sha": "abc123"}]
+
+        commit_error = Mock()
+        commit_error.status_code = 404
+        commit_error.text = "Commit not found"
+
+        mock_session.get.side_effect = [commits_response, commit_error]
+
+        with pytest.raises(SystemExit) as exc_info:
+            main.extract_commits(mock_session, "mozilla/firefox", 123)
+
+        assert "GitHub API error 404" in str(exc_info.value)
+
+    def test_commit_without_sha_field(self, mock_session):
+        """Test handling commits without sha field."""
+        commits_response = Mock()
+        commits_response.status_code = 200
+        commits_response.json.return_value = [
+            {"sha": "abc123"},
+            {},  # Missing sha field
+        ]
+
+        commit_detail_1 = Mock()
+        commit_detail_1.status_code = 200
+        commit_detail_1.json.return_value = {"sha": "abc123", "files": []}
+
+        commit_detail_2 = Mock()
+        commit_detail_2.status_code = 200
+        commit_detail_2.json.return_value = {"files": []}
+
+        mock_session.get.side_effect = [commits_response, commit_detail_1, commit_detail_2]
+
+        result = main.extract_commits(mock_session, "mozilla/firefox", 123)
+
+        # Should handle the commit without sha gracefully
+        assert len(result) == 2
+
+    def test_custom_github_api_url(self, mock_session):
+        """Test using custom GitHub API URL for commits."""
+        custom_url = "https://mock-github.example.com"
+
+        commits_response = Mock()
+        commits_response.status_code = 200
+        commits_response.json.return_value = []
+
+        mock_session.get.return_value = commits_response
+
+        main.extract_commits(
+            mock_session, "mozilla/firefox", 123, github_api_url=custom_url
+        )
+
+        call_args = mock_session.get.call_args
+        assert custom_url in call_args[0][0]
+
+    def test_empty_commits_list(self, mock_session):
+        """Test handling PR with no commits."""
+        commits_response = Mock()
+        commits_response.status_code = 200
+        commits_response.json.return_value = []
+
+        mock_session.get.return_value = commits_response
+
+        result = main.extract_commits(mock_session, "mozilla/firefox", 123)
+
+        assert result == []
+
+
+class TestExtractReviewers:
+    """Tests for extract_reviewers function."""
+
+    def test_fetch_reviewers(self, mock_session):
+        """Test fetching reviewers for a PR."""
+        reviewers_response = Mock()
+        reviewers_response.status_code = 200
+        reviewers_response.json.return_value = [
+            {
+                "id": 789,
+                "user": {"login": "reviewer1"},
+                "state": "APPROVED",
+                "submitted_at": "2024-01-01T15:00:00Z",
+            },
+            {
+                "id": 790,
+                "user": {"login": "reviewer2"},
+                "state": "CHANGES_REQUESTED",
+                "submitted_at": "2024-01-01T16:00:00Z",
+            },
+        ]
+
+        mock_session.get.return_value = reviewers_response
+
+        result = main.extract_reviewers(mock_session, "mozilla/firefox", 123)
+
+        assert len(result) == 2
+        assert result[0]["state"] == "APPROVED"
+        assert result[1]["state"] == "CHANGES_REQUESTED"
+
+    def test_multiple_review_states(self, mock_session):
+        """Test handling multiple different review states."""
+        reviewers_response = Mock()
+        reviewers_response.status_code = 200
+        reviewers_response.json.return_value = [
+            {"id": 1, "state": "APPROVED", "user": {"login": "user1"}},
+            {"id": 2, "state": "CHANGES_REQUESTED", "user": {"login": "user2"}},
+            {"id": 3, "state": "COMMENTED", "user": {"login": "user3"}},
+            {"id": 4, "state": "DISMISSED", "user": {"login": "user4"}},
+        ]
+
+        mock_session.get.return_value = reviewers_response
+
+        result = main.extract_reviewers(mock_session, "mozilla/firefox", 123)
+
+        assert len(result) == 4
+        states = [r["state"] for r in result]
+        assert "APPROVED" in states
+        assert "CHANGES_REQUESTED" in states
+        assert "COMMENTED" in states
+
+    def test_empty_reviewers_list(self, mock_session):
+        """Test handling PR with no reviewers."""
+        reviewers_response = Mock()
+        reviewers_response.status_code = 200
+        reviewers_response.json.return_value = []
+
+        mock_session.get.return_value = reviewers_response
+
+        result = main.extract_reviewers(mock_session, "mozilla/firefox", 123)
+
+        assert result == []
+
+    @patch("main.sleep_for_rate_limit")
+    def test_rate_limit_handling(self, mock_sleep, mock_session):
+        """Test rate limit handling when fetching reviewers."""
+        rate_limit_response = Mock()
+        rate_limit_response.status_code = 403
+        rate_limit_response.headers = {"X-RateLimit-Remaining": "0"}
+
+        success_response = Mock()
+        success_response.status_code = 200
+        success_response.json.return_value = []
+
+        mock_session.get.side_effect = [rate_limit_response, success_response]
+
+        result = main.extract_reviewers(mock_session, "mozilla/firefox", 123)
+
+        mock_sleep.assert_called_once()
+        assert result == []
+
+    def test_api_error(self, mock_session):
+        """Test API error handling when fetching reviewers."""
+        error_response = Mock()
+        error_response.status_code = 500
+        error_response.text = "Internal Server Error"
+
+        mock_session.get.return_value = error_response
+
+        with pytest.raises(SystemExit) as exc_info:
+            main.extract_reviewers(mock_session, "mozilla/firefox", 123)
+
+        assert "GitHub API error 500" in str(exc_info.value)
+
+    def test_custom_github_api_url(self, mock_session):
+        """Test using custom GitHub API URL for reviewers."""
+        custom_url = "https://mock-github.example.com"
+
+        reviewers_response = Mock()
+        reviewers_response.status_code = 200
+        reviewers_response.json.return_value = []
+
+        mock_session.get.return_value = reviewers_response
+
+        main.extract_reviewers(
+            mock_session, "mozilla/firefox", 123, github_api_url=custom_url
+        )
+
+        call_args = mock_session.get.call_args
+        assert custom_url in call_args[0][0]
+
+
+class TestExtractComments:
+    """Tests for extract_comments function."""
+
+    def test_fetch_comments(self, mock_session):
+        """Test fetching comments for a PR."""
+        comments_response = Mock()
+        comments_response.status_code = 200
+        comments_response.json.return_value = [
+            {
+                "id": 456,
+                "user": {"login": "commenter1"},
+                "body": "This looks good",
+                "created_at": "2024-01-01T14:00:00Z",
+            },
+            {
+                "id": 457,
+                "user": {"login": "commenter2"},
+                "body": "I have concerns",
+                "created_at": "2024-01-01T15:00:00Z",
+            },
+        ]
+
+        mock_session.get.return_value = comments_response
+
+        result = main.extract_comments(mock_session, "mozilla/firefox", 123)
+
+        assert len(result) == 2
+        assert result[0]["id"] == 456
+        assert result[1]["id"] == 457
+
+    def test_uses_issues_endpoint(self, mock_session):
+        """Test that comments use /issues endpoint not /pulls."""
+        comments_response = Mock()
+        comments_response.status_code = 200
+        comments_response.json.return_value = []
+
+        mock_session.get.return_value = comments_response
+
+        main.extract_comments(mock_session, "mozilla/firefox", 123)
+
+        call_args = mock_session.get.call_args
+        url = call_args[0][0]
+        assert "/issues/123/comments" in url
+        assert "/pulls/123/comments" not in url
+
+    def test_multiple_comments(self, mock_session):
+        """Test handling multiple comments."""
+        comments_response = Mock()
+        comments_response.status_code = 200
+        comments_response.json.return_value = [
+            {"id": i, "user": {"login": f"user{i}"}, "body": f"Comment {i}"}
+            for i in range(1, 11)
+        ]
+
+        mock_session.get.return_value = comments_response
+
+        result = main.extract_comments(mock_session, "mozilla/firefox", 123)
+
+        assert len(result) == 10
+
+    def test_empty_comments_list(self, mock_session):
+        """Test handling PR with no comments."""
+        comments_response = Mock()
+        comments_response.status_code = 200
+        comments_response.json.return_value = []
+
+        mock_session.get.return_value = comments_response
+
+        result = main.extract_comments(mock_session, "mozilla/firefox", 123)
+
+        assert result == []
+
+    @patch("main.sleep_for_rate_limit")
+    def test_rate_limit_handling(self, mock_sleep, mock_session):
+        """Test rate limit handling when fetching comments."""
+        rate_limit_response = Mock()
+        rate_limit_response.status_code = 403
+        rate_limit_response.headers = {"X-RateLimit-Remaining": "0"}
+
+        success_response = Mock()
+        success_response.status_code = 200
+        success_response.json.return_value = []
+
+        mock_session.get.side_effect = [rate_limit_response, success_response]
+
+        result = main.extract_comments(mock_session, "mozilla/firefox", 123)
+
+        mock_sleep.assert_called_once()
+        assert result == []
+
+    def test_api_error(self, mock_session):
+        """Test API error handling when fetching comments."""
+        error_response = Mock()
+        error_response.status_code = 404
+        error_response.text = "Not Found"
+
+        mock_session.get.return_value = error_response
+
+        with pytest.raises(SystemExit) as exc_info:
+            main.extract_comments(mock_session, "mozilla/firefox", 123)
+
+        assert "GitHub API error 404" in str(exc_info.value)
+
+    def test_custom_github_api_url(self, mock_session):
+        """Test using custom GitHub API URL for comments."""
+        custom_url = "https://mock-github.example.com"
+
+        comments_response = Mock()
+        comments_response.status_code = 200
+        comments_response.json.return_value = []
+
+        mock_session.get.return_value = comments_response
+
+        main.extract_comments(
+            mock_session, "mozilla/firefox", 123, github_api_url=custom_url
+        )
+
+        call_args = mock_session.get.call_args
+        assert custom_url in call_args[0][0]
+
+
+class TestTransformData:
+    """Tests for transform_data function."""
+
+    def test_basic_pr_transformation(self):
+        """Test basic pull request field mapping."""
+        raw_data = [
+            {
+                "number": 123,
+                "title": "Fix login bug",
+                "state": "closed",
+                "created_at": "2024-01-01T10:00:00Z",
+                "updated_at": "2024-01-02T10:00:00Z",
+                "merged_at": "2024-01-02T12:00:00Z",
+                "labels": [],
+                "commit_data": [],
+                "reviewer_data": [],
+                "comment_data": [],
+            }
+        ]
+
+        result = main.transform_data(raw_data, "mozilla/firefox")
+
+        assert len(result["pull_requests"]) == 1
+        pr = result["pull_requests"][0]
+        assert pr["pull_request_id"] == 123
+        assert pr["current_status"] == "closed"
+        assert pr["date_created"] == "2024-01-01T10:00:00Z"
+        assert pr["date_modified"] == "2024-01-02T10:00:00Z"
+        assert pr["date_landed"] == "2024-01-02T12:00:00Z"
+        assert pr["target_repository"] == "mozilla/firefox"
+
+    def test_bug_id_extraction_basic(self):
+        """Test bug ID extraction from PR title."""
+        test_cases = [
+            ("Bug 1234567 - Fix issue", 1234567),
+            ("bug 1234567: Update code", 1234567),
+            ("Fix for bug 7654321", 7654321),
+            ("b=9876543 - Change behavior", 9876543),
+        ]
+
+        for title, expected_bug_id in test_cases:
+            raw_data = [
+                {
+                    "number": 1,
+                    "title": title,
+                    "state": "open",
+                    "labels": [],
+                    "commit_data": [],
+                    "reviewer_data": [],
+                    "comment_data": [],
+                }
+            ]
+
+            result = main.transform_data(raw_data, "mozilla/firefox")
+            assert result["pull_requests"][0]["bug_id"] == expected_bug_id
+
+    def test_bug_id_extraction_with_hash(self):
+        """Test bug ID extraction with # symbol."""
+        raw_data = [
+            {
+                "number": 1,
+                "title": "Bug #1234567 - Fix issue",
+                "state": "open",
+                "labels": [],
+                "commit_data": [],
+                "reviewer_data": [],
+                "comment_data": [],
+            }
+        ]
+
+        result = main.transform_data(raw_data, "mozilla/firefox")
+        assert result["pull_requests"][0]["bug_id"] == 1234567
+
+    def test_bug_id_filter_large_numbers(self):
+        """Test that bug IDs >= 100000000 are filtered out."""
+        raw_data = [
+            {
+                "number": 1,
+                "title": "Bug 999999999 - Invalid bug ID",
+                "state": "open",
+                "labels": [],
+                "commit_data": [],
+                "reviewer_data": [],
+                "comment_data": [],
+            }
+        ]
+
+        result = main.transform_data(raw_data, "mozilla/firefox")
+        assert result["pull_requests"][0]["bug_id"] is None
+
+    def test_bug_id_no_match(self):
+        """Test PR title with no bug ID."""
+        raw_data = [
+            {
+                "number": 1,
+                "title": "Update documentation",
+                "state": "open",
+                "labels": [],
+                "commit_data": [],
+                "reviewer_data": [],
+                "comment_data": [],
+            }
+        ]
+
+        result = main.transform_data(raw_data, "mozilla/firefox")
+        assert result["pull_requests"][0]["bug_id"] is None
+
+    def test_labels_extraction(self):
+        """Test labels array extraction."""
+        raw_data = [
+            {
+                "number": 1,
+                "title": "PR with labels",
+                "state": "open",
+                "labels": [
+                    {"name": "bug"},
+                    {"name": "priority-high"},
+                    {"name": "needs-review"},
+                ],
+                "commit_data": [],
+                "reviewer_data": [],
+                "comment_data": [],
+            }
+        ]
+
+        result = main.transform_data(raw_data, "mozilla/firefox")
+        labels = result["pull_requests"][0]["labels"]
+        assert len(labels) == 3
+        assert "bug" in labels
+        assert "priority-high" in labels
+        assert "needs-review" in labels
+
+    def test_labels_empty_list(self):
+        """Test handling empty labels list."""
+        raw_data = [
+            {
+                "number": 1,
+                "title": "PR without labels",
+                "state": "open",
+                "labels": [],
+                "commit_data": [],
+                "reviewer_data": [],
+                "comment_data": [],
+            }
+        ]
+
+        result = main.transform_data(raw_data, "mozilla/firefox")
+        assert result["pull_requests"][0]["labels"] == []
+
+    def test_commit_transformation(self):
+        """Test commit fields mapping."""
+        raw_data = [
+            {
+                "number": 123,
+                "title": "PR with commits",
+                "state": "open",
+                "labels": [],
+                "commit_data": [
+                    {
+                        "sha": "abc123",
+                        "commit": {
+                            "author": {
+                                "name": "Test Author",
+                                "date": "2024-01-01T12:00:00Z",
+                            }
+                        },
+                        "files": [
+                            {
+                                "filename": "src/main.py",
+                                "additions": 10,
+                                "deletions": 5,
+                            }
+                        ],
+                    }
+                ],
+                "reviewer_data": [],
+                "comment_data": [],
+            }
+        ]
+
+        result = main.transform_data(raw_data, "mozilla/firefox")
+
+        assert len(result["commits"]) == 1
+        commit = result["commits"][0]
+        assert commit["pull_request_id"] == 123
+        assert commit["target_repository"] == "mozilla/firefox"
+        assert commit["commit_sha"] == "abc123"
+        assert commit["date_created"] == "2024-01-01T12:00:00Z"
+        assert commit["author_username"] == "Test Author"
+        assert commit["filename"] == "src/main.py"
+        assert commit["lines_added"] == 10
+        assert commit["lines_removed"] == 5
+
+    def test_commit_file_flattening(self):
+        """Test that each file becomes a separate row."""
+        raw_data = [
+            {
+                "number": 123,
+                "title": "PR with multiple files",
+                "state": "open",
+                "labels": [],
+                "commit_data": [
+                    {
+                        "sha": "abc123",
+                        "commit": {"author": {"name": "Author", "date": "2024-01-01"}},
+                        "files": [
+                            {"filename": "file1.py", "additions": 10, "deletions": 5},
+                            {"filename": "file2.py", "additions": 20, "deletions": 2},
+                            {"filename": "file3.py", "additions": 5, "deletions": 15},
+                        ],
+                    }
+                ],
+                "reviewer_data": [],
+                "comment_data": [],
+            }
+        ]
+
+        result = main.transform_data(raw_data, "mozilla/firefox")
+
+        # Should have 3 rows in commits table (one per file)
+        assert len(result["commits"]) == 3
+        filenames = [c["filename"] for c in result["commits"]]
+        assert "file1.py" in filenames
+        assert "file2.py" in filenames
+        assert "file3.py" in filenames
+
+    def test_multiple_commits_with_files(self):
+        """Test multiple commits with multiple files per PR."""
+        raw_data = [
+            {
+                "number": 123,
+                "title": "PR with multiple commits",
+                "state": "open",
+                "labels": [],
+                "commit_data": [
+                    {
+                        "sha": "commit1",
+                        "commit": {"author": {"name": "Author1", "date": "2024-01-01"}},
+                        "files": [
+                            {"filename": "file1.py", "additions": 10, "deletions": 0}
+                        ],
+                    },
+                    {
+                        "sha": "commit2",
+                        "commit": {"author": {"name": "Author2", "date": "2024-01-02"}},
+                        "files": [
+                            {"filename": "file2.py", "additions": 5, "deletions": 2},
+                            {"filename": "file3.py", "additions": 8, "deletions": 3},
+                        ],
+                    },
+                ],
+                "reviewer_data": [],
+                "comment_data": [],
+            }
+        ]
+
+        result = main.transform_data(raw_data, "mozilla/firefox")
+
+        # Should have 3 rows total (1 file from commit1, 2 files from commit2)
+        assert len(result["commits"]) == 3
+        assert result["commits"][0]["commit_sha"] == "commit1"
+        assert result["commits"][1]["commit_sha"] == "commit2"
+        assert result["commits"][2]["commit_sha"] == "commit2"
+
+    def test_reviewer_transformation(self):
+        """Test reviewer fields mapping."""
+        raw_data = [
+            {
+                "number": 123,
+                "title": "PR with reviewers",
+                "state": "open",
+                "labels": [],
+                "commit_data": [],
+                "reviewer_data": [
+                    {
+                        "id": 789,
+                        "user": {"login": "reviewer1"},
+                        "state": "APPROVED",
+                        "submitted_at": "2024-01-01T15:00:00Z",
+                    }
+                ],
+                "comment_data": [],
+            }
+        ]
+
+        result = main.transform_data(raw_data, "mozilla/firefox")
+
+        assert len(result["reviewers"]) == 1
+        reviewer = result["reviewers"][0]
+        assert reviewer["pull_request_id"] == 123
+        assert reviewer["target_repository"] == "mozilla/firefox"
+        assert reviewer["reviewer_username"] == "reviewer1"
+        assert reviewer["status"] == "APPROVED"
+        assert reviewer["date_reviewed"] == "2024-01-01T15:00:00Z"
+
+    def test_multiple_review_states(self):
+        """Test handling multiple review states."""
+        raw_data = [
+            {
+                "number": 123,
+                "title": "PR with multiple reviews",
+                "state": "open",
+                "labels": [],
+                "commit_data": [],
+                "reviewer_data": [
+                    {
+                        "id": 1,
+                        "user": {"login": "user1"},
+                        "state": "APPROVED",
+                        "submitted_at": "2024-01-01T15:00:00Z",
+                    },
+                    {
+                        "id": 2,
+                        "user": {"login": "user2"},
+                        "state": "CHANGES_REQUESTED",
+                        "submitted_at": "2024-01-01T16:00:00Z",
+                    },
+                    {
+                        "id": 3,
+                        "user": {"login": "user3"},
+                        "state": "COMMENTED",
+                        "submitted_at": "2024-01-01T17:00:00Z",
+                    },
+                ],
+                "comment_data": [],
+            }
+        ]
+
+        result = main.transform_data(raw_data, "mozilla/firefox")
+
+        assert len(result["reviewers"]) == 3
+        states = [r["status"] for r in result["reviewers"]]
+        assert "APPROVED" in states
+        assert "CHANGES_REQUESTED" in states
+        assert "COMMENTED" in states
+
+    def test_date_approved_from_earliest_approval(self):
+        """Test that date_approved is set to earliest APPROVED review."""
+        raw_data = [
+            {
+                "number": 123,
+                "title": "PR with multiple approvals",
+                "state": "open",
+                "labels": [],
+                "commit_data": [],
+                "reviewer_data": [
+                    {
+                        "id": 1,
+                        "user": {"login": "user1"},
+                        "state": "APPROVED",
+                        "submitted_at": "2024-01-02T15:00:00Z",
+                    },
+                    {
+                        "id": 2,
+                        "user": {"login": "user2"},
+                        "state": "APPROVED",
+                        "submitted_at": "2024-01-01T14:00:00Z",  # Earliest
+                    },
+                    {
+                        "id": 3,
+                        "user": {"login": "user3"},
+                        "state": "APPROVED",
+                        "submitted_at": "2024-01-03T16:00:00Z",
+                    },
+                ],
+                "comment_data": [],
+            }
+        ]
+
+        result = main.transform_data(raw_data, "mozilla/firefox")
+
+        pr = result["pull_requests"][0]
+        assert pr["date_approved"] == "2024-01-01T14:00:00Z"
+
+    def test_comment_transformation(self):
+        """Test comment fields mapping."""
+        raw_data = [
+            {
+                "number": 123,
+                "title": "PR with comments",
+                "state": "open",
+                "labels": [],
+                "commit_data": [],
+                "reviewer_data": [],
+                "comment_data": [
+                    {
+                        "id": 456,
+                        "user": {"login": "commenter1"},
+                        "body": "This looks great!",
+                        "created_at": "2024-01-01T14:00:00Z",
+                        "pull_request_review_id": None,
+                    }
+                ],
+            }
+        ]
+
+        result = main.transform_data(raw_data, "mozilla/firefox")
+
+        assert len(result["comments"]) == 1
+        comment = result["comments"][0]
+        assert comment["pull_request_id"] == 123
+        assert comment["target_repository"] == "mozilla/firefox"
+        assert comment["comment_id"] == 456
+        assert comment["author_username"] == "commenter1"
+        assert comment["date_created"] == "2024-01-01T14:00:00Z"
+        assert comment["character_count"] == 17
+
+    def test_comment_character_count(self):
+        """Test character count calculation for comments."""
+        raw_data = [
+            {
+                "number": 123,
+                "title": "PR",
+                "state": "open",
+                "labels": [],
+                "commit_data": [],
+                "reviewer_data": [],
+                "comment_data": [
+                    {
+                        "id": 1,
+                        "user": {"login": "user1"},
+                        "body": "Short",
+                        "created_at": "2024-01-01",
+                    },
+                    {
+                        "id": 2,
+                        "user": {"login": "user2"},
+                        "body": "This is a much longer comment with more text",
+                        "created_at": "2024-01-01",
+                    },
+                ],
+            }
+        ]
+
+        result = main.transform_data(raw_data, "mozilla/firefox")
+
+        assert result["comments"][0]["character_count"] == 5
+        assert result["comments"][1]["character_count"] == 44
+
+    def test_comment_status_from_review(self):
+        """Test that comment status is mapped from review_id_statuses."""
+        raw_data = [
+            {
+                "number": 123,
+                "title": "PR",
+                "state": "open",
+                "labels": [],
+                "commit_data": [],
+                "reviewer_data": [
+                    {
+                        "id": 789,
+                        "user": {"login": "reviewer"},
+                        "state": "APPROVED",
+                        "submitted_at": "2024-01-01",
+                    }
+                ],
+                "comment_data": [
+                    {
+                        "id": 456,
+                        "user": {"login": "commenter"},
+                        "body": "LGTM",
+                        "created_at": "2024-01-01",
+                        "pull_request_review_id": 789,
+                    }
+                ],
+            }
+        ]
+
+        result = main.transform_data(raw_data, "mozilla/firefox")
+
+        # Comment should have status from the review
+        assert result["comments"][0]["status"] == "APPROVED"
+
+    def test_comment_empty_body(self):
+        """Test handling comments with empty or None body."""
+        raw_data = [
+            {
+                "number": 123,
+                "title": "PR",
+                "state": "open",
+                "labels": [],
+                "commit_data": [],
+                "reviewer_data": [],
+                "comment_data": [
+                    {
+                        "id": 1,
+                        "user": {"login": "user1"},
+                        "body": None,
+                        "created_at": "2024-01-01",
+                    },
+                    {
+                        "id": 2,
+                        "user": {"login": "user2"},
+                        "body": "",
+                        "created_at": "2024-01-01",
+                    },
+                ],
+            }
+        ]
+
+        result = main.transform_data(raw_data, "mozilla/firefox")
+
+        assert result["comments"][0]["character_count"] == 0
+        assert result["comments"][1]["character_count"] == 0
+
+    def test_empty_raw_data(self):
+        """Test handling empty input list."""
+        result = main.transform_data([], "mozilla/firefox")
+
+        assert result["pull_requests"] == []
+        assert result["commits"] == []
+        assert result["reviewers"] == []
+        assert result["comments"] == []
+
+    def test_pr_without_commits_reviewers_comments(self):
+        """Test PR with no commits, reviewers, or comments."""
+        raw_data = [
+            {
+                "number": 123,
+                "title": "Minimal PR",
+                "state": "open",
+                "labels": [],
+                "commit_data": [],
+                "reviewer_data": [],
+                "comment_data": [],
+            }
+        ]
+
+        result = main.transform_data(raw_data, "mozilla/firefox")
+
+        assert len(result["pull_requests"]) == 1
+        assert len(result["commits"]) == 0
+        assert len(result["reviewers"]) == 0
+        assert len(result["comments"]) == 0
+
+    def test_return_structure(self):
+        """Test that transform_data returns dict with 4 keys."""
+        raw_data = [
+            {
+                "number": 1,
+                "title": "Test",
+                "state": "open",
+                "labels": [],
+                "commit_data": [],
+                "reviewer_data": [],
+                "comment_data": [],
+            }
+        ]
+
+        result = main.transform_data(raw_data, "mozilla/firefox")
+
+        assert isinstance(result, dict)
+        assert "pull_requests" in result
+        assert "commits" in result
+        assert "reviewers" in result
+        assert "comments" in result
+
+    def test_all_tables_have_target_repository(self):
+        """Test that all tables include target_repository field."""
+        raw_data = [
+            {
+                "number": 123,
+                "title": "Test PR",
+                "state": "open",
+                "labels": [],
+                "commit_data": [
+                    {
+                        "sha": "abc",
+                        "commit": {"author": {"name": "Author", "date": "2024-01-01"}},
+                        "files": [{"filename": "test.py", "additions": 1, "deletions": 0}],
+                    }
+                ],
+                "reviewer_data": [
+                    {
+                        "id": 1,
+                        "user": {"login": "reviewer"},
+                        "state": "APPROVED",
+                        "submitted_at": "2024-01-01",
+                    }
+                ],
+                "comment_data": [
+                    {
+                        "id": 2,
+                        "user": {"login": "commenter"},
+                        "body": "Test",
+                        "created_at": "2024-01-01",
+                    }
+                ],
+            }
+        ]
+
+        result = main.transform_data(raw_data, "mozilla/firefox")
+
+        assert result["pull_requests"][0]["target_repository"] == "mozilla/firefox"
+        assert result["commits"][0]["target_repository"] == "mozilla/firefox"
+        assert result["reviewers"][0]["target_repository"] == "mozilla/firefox"
+        assert result["comments"][0]["target_repository"] == "mozilla/firefox"
+
+
+class TestLoadData:
+    """Tests for load_data function."""
+
+    @patch("main.datetime")
+    def test_load_all_tables(self, mock_datetime, mock_bigquery_client):
+        """Test loading all 4 tables to BigQuery."""
+        mock_datetime.now.return_value.strftime.return_value = "2024-01-15"
+
+        transformed_data = {
+            "pull_requests": [{"pull_request_id": 1}],
+            "commits": [{"commit_sha": "abc"}],
+            "reviewers": [{"reviewer_username": "user1"}],
+            "comments": [{"comment_id": 123}],
+        }
+
+        main.load_data(mock_bigquery_client, "test_dataset", transformed_data)
+
+        # Should call insert_rows_json 4 times (once per table)
+        assert mock_bigquery_client.insert_rows_json.call_count == 4
+
+    @patch("main.datetime")
+    def test_adds_snapshot_date(self, mock_datetime, mock_bigquery_client):
+        """Test that snapshot_date is added to all rows."""
+        mock_datetime.now.return_value.strftime.return_value = "2024-01-15"
+
+        transformed_data = {
+            "pull_requests": [{"pull_request_id": 1}, {"pull_request_id": 2}],
+            "commits": [],
+            "reviewers": [],
+            "comments": [],
+        }
+
+        main.load_data(mock_bigquery_client, "test_dataset", transformed_data)
+
+        call_args = mock_bigquery_client.insert_rows_json.call_args
+        rows = call_args[0][1]
+        assert all(row["snapshot_date"] == "2024-01-15" for row in rows)
+
+    def test_constructs_correct_table_ref(self, mock_bigquery_client):
+        """Test that table_ref is constructed correctly."""
+        transformed_data = {
+            "pull_requests": [{"pull_request_id": 1}],
+            "commits": [],
+            "reviewers": [],
+            "comments": [],
+        }
+
+        main.load_data(mock_bigquery_client, "my_dataset", transformed_data)
+
+        call_args = mock_bigquery_client.insert_rows_json.call_args
+        table_ref = call_args[0][0]
+        assert table_ref == "test-project.my_dataset.pull_requests"
+
+    def test_empty_transformed_data_skipped(self, mock_bigquery_client):
+        """Test that empty transformed_data dict is skipped."""
+        transformed_data = {}
+
+        main.load_data(mock_bigquery_client, "test_dataset", transformed_data)
+
+        mock_bigquery_client.insert_rows_json.assert_not_called()
+
+    def test_skips_empty_tables_individually(self, mock_bigquery_client):
+        """Test that empty tables are skipped individually."""
+        transformed_data = {
+            "pull_requests": [{"pull_request_id": 1}],
+            "commits": [],  # Empty, should be skipped
+            "reviewers": [],  # Empty, should be skipped
+            "comments": [{"comment_id": 456}],
+        }
+
+        main.load_data(mock_bigquery_client, "test_dataset", transformed_data)
+
+        # Should only call insert_rows_json twice (for PRs and comments)
+        assert mock_bigquery_client.insert_rows_json.call_count == 2
+
+    def test_only_pull_requests_table(self, mock_bigquery_client):
+        """Test loading only pull_requests table."""
+        transformed_data = {
+            "pull_requests": [{"pull_request_id": 1}],
+            "commits": [],
+            "reviewers": [],
+            "comments": [],
+        }
+
+        main.load_data(mock_bigquery_client, "test_dataset", transformed_data)
+
+        assert mock_bigquery_client.insert_rows_json.call_count == 1
+
+    def test_raises_exception_on_insert_errors(self, mock_bigquery_client):
+        """Test that Exception is raised on BigQuery insert errors."""
+        mock_bigquery_client.insert_rows_json.return_value = [
+            {"index": 0, "errors": ["Insert failed"]}
+        ]
+
+        transformed_data = {"pull_requests": [{"pull_request_id": 1}], "commits": [], "reviewers": [], "comments": []}
+
+        with pytest.raises(Exception) as exc_info:
+            main.load_data(mock_bigquery_client, "test_dataset", transformed_data)
+
+        assert "BigQuery insert errors" in str(exc_info.value)
+
+    def test_verifies_client_insert_called_correctly(self, mock_bigquery_client):
+        """Test that client.insert_rows_json is called with correct arguments."""
+        transformed_data = {
+            "pull_requests": [{"pull_request_id": 1}, {"pull_request_id": 2}],
+            "commits": [],
+            "reviewers": [],
+            "comments": [],
+        }
+
+        main.load_data(mock_bigquery_client, "test_dataset", transformed_data)
+
+        call_args = mock_bigquery_client.insert_rows_json.call_args
+        table_ref, rows = call_args[0]
+
+        assert "pull_requests" in table_ref
+        assert len(rows) == 2
+
+
+class TestMain:
+    """Tests for main function."""
+
+    @patch("main.setup_logging")
+    @patch("main.bigquery.Client")
+    @patch("requests.Session")
+    def test_requires_github_repos(
+        self, mock_session_class, mock_bq_client, mock_setup_logging
+    ):
+        """Test that GITHUB_REPOS is required."""
+        with patch.dict(
+            os.environ,
+            {"BIGQUERY_PROJECT": "test", "BIGQUERY_DATASET": "test"},
+            clear=True,
+        ):
+            with pytest.raises(SystemExit) as exc_info:
+                main.main()
+
+            assert "GITHUB_REPOS" in str(exc_info.value)
+
+    @patch("main.setup_logging")
+    @patch("main.bigquery.Client")
+    @patch("requests.Session")
+    def test_requires_bigquery_project(
+        self, mock_session_class, mock_bq_client, mock_setup_logging
+    ):
+        """Test that BIGQUERY_PROJECT is required."""
+        with patch.dict(
+            os.environ, {"GITHUB_REPOS": "mozilla/firefox", "BIGQUERY_DATASET": "test"}, clear=True
+        ):
+            with pytest.raises(SystemExit) as exc_info:
+                main.main()
+
+            assert "BIGQUERY_PROJECT" in str(exc_info.value)
+
+    @patch("main.setup_logging")
+    @patch("main.bigquery.Client")
+    @patch("requests.Session")
+    def test_requires_bigquery_dataset(
+        self, mock_session_class, mock_bq_client, mock_setup_logging
+    ):
+        """Test that BIGQUERY_DATASET is required."""
+        with patch.dict(
+            os.environ, {"GITHUB_REPOS": "mozilla/firefox", "BIGQUERY_PROJECT": "test"}, clear=True
+        ):
+            with pytest.raises(SystemExit) as exc_info:
+                main.main()
+
+            assert "BIGQUERY_DATASET" in str(exc_info.value)
+
+    @patch("main.setup_logging")
+    @patch("main.bigquery.Client")
+    @patch("requests.Session")
+    def test_github_token_optional_with_warning(
+        self, mock_session_class, mock_bq_client, mock_setup_logging
+    ):
+        """Test that GITHUB_TOKEN is optional but warns if missing."""
+        with patch.dict(
+            os.environ,
+            {
+                "GITHUB_REPOS": "mozilla/firefox",
+                "BIGQUERY_PROJECT": "test",
+                "BIGQUERY_DATASET": "test",
+            },
+            clear=True,
+        ), patch("main.extract_pull_requests", return_value=iter([])):
+            # Should not raise, but should log warning
+            result = main.main()
+            assert result == 0
+
+    @patch("main.setup_logging")
+    @patch("main.bigquery.Client")
+    @patch("requests.Session")
+    def test_splits_github_repos_by_comma(
+        self, mock_session_class, mock_bq_client, mock_setup_logging
+    ):
+        """Test that GITHUB_REPOS is split by comma."""
+        with patch.dict(
+            os.environ,
+            {
+                "GITHUB_REPOS": "mozilla/firefox,mozilla/gecko-dev",
+                "BIGQUERY_PROJECT": "test",
+                "BIGQUERY_DATASET": "test",
+                "GITHUB_TOKEN": "token",
+            },
+            clear=True,
+        ), patch("main.extract_pull_requests", return_value=iter([])) as mock_extract:
+            main.main()
+
+            # Should be called twice (once per repo)
+            assert mock_extract.call_count == 2
+
+    @patch("main.setup_logging")
+    @patch("main.bigquery.Client")
+    @patch("requests.Session")
+    def test_honors_github_api_url(
+        self, mock_session_class, mock_bq_client, mock_setup_logging
+    ):
+        """Test that GITHUB_API_URL is honored."""
+        with patch.dict(
+            os.environ,
+            {
+                "GITHUB_REPOS": "mozilla/firefox",
+                "BIGQUERY_PROJECT": "test",
+                "BIGQUERY_DATASET": "test",
+                "GITHUB_TOKEN": "token",
+                "GITHUB_API_URL": "https://custom-api.example.com",
+            },
+            clear=True,
+        ), patch("main.extract_pull_requests", return_value=iter([])) as mock_extract:
+            main.main()
+
+            call_kwargs = mock_extract.call_args[1]
+            assert call_kwargs["github_api_url"] == "https://custom-api.example.com"
+
+    @patch("main.setup_logging")
+    @patch("main.bigquery.Client")
+    @patch("requests.Session")
+    def test_honors_bigquery_emulator_host(
+        self, mock_session_class, mock_bq_client_class, mock_setup_logging
+    ):
+        """Test that BIGQUERY_EMULATOR_HOST is honored."""
+        with patch.dict(
+            os.environ,
+            {
+                "GITHUB_REPOS": "mozilla/firefox",
+                "BIGQUERY_PROJECT": "test",
+                "BIGQUERY_DATASET": "test",
+                "GITHUB_TOKEN": "token",
+                "BIGQUERY_EMULATOR_HOST": "http://localhost:9050",
+            },
+            clear=True,
+        ), patch("main.extract_pull_requests", return_value=iter([])):
+            main.main()
+
+            # Verify BigQuery client was created with emulator settings
+            mock_bq_client_class.assert_called_once()
+
+    @patch("main.setup_logging")
+    @patch("main.bigquery.Client")
+    @patch("requests.Session")
+    def test_creates_session_with_headers(
+        self, mock_session_class, mock_bq_client, mock_setup_logging
+    ):
+        """Test that session is created with Accept and User-Agent headers."""
+        mock_session = MagicMock()
+        mock_session_class.return_value = mock_session
+
+        with patch.dict(
+            os.environ,
+            {
+                "GITHUB_REPOS": "mozilla/firefox",
+                "BIGQUERY_PROJECT": "test",
+                "BIGQUERY_DATASET": "test",
+                "GITHUB_TOKEN": "token",
+            },
+            clear=True,
+        ), patch("main.extract_pull_requests", return_value=iter([])):
+            main.main()
+
+            # Verify session headers were set
+            assert mock_session.headers.update.called
+            call_args = mock_session.headers.update.call_args[0][0]
+            assert "Accept" in call_args
+            assert "User-Agent" in call_args
+
+    @patch("main.setup_logging")
+    @patch("main.bigquery.Client")
+    @patch("requests.Session")
+    def test_sets_authorization_header_with_token(
+        self, mock_session_class, mock_bq_client, mock_setup_logging
+    ):
+        """Test that Authorization header is set when token provided."""
+        mock_session = MagicMock()
+        mock_session_class.return_value = mock_session
+
+        with patch.dict(
+            os.environ,
+            {
+                "GITHUB_REPOS": "mozilla/firefox",
+                "BIGQUERY_PROJECT": "test",
+                "BIGQUERY_DATASET": "test",
+                "GITHUB_TOKEN": "test-token-123",
+            },
+            clear=True,
+        ), patch("main.extract_pull_requests", return_value=iter([])):
+            main.main()
+
+            # Verify Authorization header was set
+            assert mock_session.headers.__setitem__.called
+
+    @patch("main.setup_logging")
+    @patch("main.bigquery.Client")
+    @patch("requests.Session")
+    @patch("main.extract_pull_requests")
+    @patch("main.transform_data")
+    @patch("main.load_data")
+    def test_single_repo_successful_etl(
+        self,
+        mock_load,
+        mock_transform,
+        mock_extract,
+        mock_session_class,
+        mock_bq_client,
+        mock_setup_logging,
+    ):
+        """Test successful ETL for single repository."""
+        mock_extract.return_value = iter([[{"number": 1}]])
+        mock_transform.return_value = {"pull_requests": [{"pull_request_id": 1}], "commits": [], "reviewers": [], "comments": []}
+
+        with patch.dict(
+            os.environ,
+            {
+                "GITHUB_REPOS": "mozilla/firefox",
+                "BIGQUERY_PROJECT": "test",
+                "BIGQUERY_DATASET": "test",
+                "GITHUB_TOKEN": "token",
+            },
+            clear=True,
+        ):
+            result = main.main()
+
+        assert result == 0
+        mock_extract.assert_called_once()
+        mock_transform.assert_called_once()
+        mock_load.assert_called_once()
+
+    @patch("main.setup_logging")
+    @patch("main.bigquery.Client")
+    @patch("requests.Session")
+    @patch("main.extract_pull_requests")
+    @patch("main.transform_data")
+    @patch("main.load_data")
+    def test_multiple_repos_processing(
+        self,
+        mock_load,
+        mock_transform,
+        mock_extract,
+        mock_session_class,
+        mock_bq_client,
+        mock_setup_logging,
+    ):
+        """Test processing multiple repositories."""
+        mock_extract.return_value = iter([[{"number": 1}]])
+        mock_transform.return_value = {"pull_requests": [{"pull_request_id": 1}], "commits": [], "reviewers": [], "comments": []}
+
+        with patch.dict(
+            os.environ,
+            {
+                "GITHUB_REPOS": "mozilla/firefox,mozilla/gecko-dev,mozilla/addons",
+                "BIGQUERY_PROJECT": "test",
+                "BIGQUERY_DATASET": "test",
+                "GITHUB_TOKEN": "token",
+            },
+            clear=True,
+        ):
+            result = main.main()
+
+        assert result == 0
+        # Should process 3 repositories
+        assert mock_extract.call_count == 3
+
+    @patch("main.setup_logging")
+    @patch("main.bigquery.Client")
+    @patch("requests.Session")
+    @patch("main.extract_pull_requests")
+    @patch("main.transform_data")
+    @patch("main.load_data")
+    def test_processes_chunks_iteratively(
+        self,
+        mock_load,
+        mock_transform,
+        mock_extract,
+        mock_session_class,
+        mock_bq_client,
+        mock_setup_logging,
+    ):
+        """Test that chunks are processed iteratively from generator."""
+        # Return 3 chunks
+        mock_extract.return_value = iter([
+            [{"number": 1}],
+            [{"number": 2}],
+            [{"number": 3}],
+        ])
+        mock_transform.return_value = {"pull_requests": [{"pull_request_id": 1}], "commits": [], "reviewers": [], "comments": []}
+
+        with patch.dict(
+            os.environ,
+            {
+                "GITHUB_REPOS": "mozilla/firefox",
+                "BIGQUERY_PROJECT": "test",
+                "BIGQUERY_DATASET": "test",
+                "GITHUB_TOKEN": "token",
+            },
+            clear=True,
+        ):
+            result = main.main()
+
+        assert result == 0
+        # Transform and load should be called 3 times (once per chunk)
+        assert mock_transform.call_count == 3
+        assert mock_load.call_count == 3
+
+    @patch("main.setup_logging")
+    @patch("main.bigquery.Client")
+    @patch("requests.Session")
+    def test_returns_zero_on_success(
+        self, mock_session_class, mock_bq_client, mock_setup_logging
+    ):
+        """Test that main returns 0 on success."""
+        with patch.dict(
+            os.environ,
+            {
+                "GITHUB_REPOS": "mozilla/firefox",
+                "BIGQUERY_PROJECT": "test",
+                "BIGQUERY_DATASET": "test",
+                "GITHUB_TOKEN": "token",
+            },
+            clear=True,
+        ), patch("main.extract_pull_requests", return_value=iter([])):
+            result = main.main()
+
+        assert result == 0
+
+
+@pytest.mark.integration
+class TestIntegration:
+    """Integration tests that test multiple components together."""
+
+    @patch("main.setup_logging")
+    @patch("main.load_data")
+    @patch("main.bigquery.Client")
+    @patch("requests.Session")
+    def test_end_to_end_with_mocked_github(
+        self, mock_session_class, mock_bq_client, mock_load, mock_setup_logging
+    ):
+        """Test end-to-end flow with mocked GitHub responses."""
+        mock_session = MagicMock()
+        mock_session_class.return_value = mock_session
+
+        # Mock PR response
+        pr_response = Mock()
+        pr_response.status_code = 200
+        pr_response.json.return_value = [
+            {"number": 1, "title": "Bug 1234567 - Test PR", "state": "open"}
+        ]
+        pr_response.links = {}
+
+        # Mock commits, reviewers, comments responses
+        empty_response = Mock()
+        empty_response.status_code = 200
+        empty_response.json.return_value = []
+
+        mock_session.get.side_effect = [
+            pr_response,
+            empty_response,
+            empty_response,
+            empty_response,
+        ]
+
+        with patch.dict(
+            os.environ,
+            {
+                "GITHUB_REPOS": "mozilla/firefox",
+                "BIGQUERY_PROJECT": "test",
+                "BIGQUERY_DATASET": "test",
+                "GITHUB_TOKEN": "token",
+            },
+            clear=True,
+        ):
+            result = main.main()
+
+        assert result == 0
+        mock_load.assert_called_once()
+
+        # Verify transformed data structure
+        call_args = mock_load.call_args[0]
+        transformed_data = call_args[2]
+        assert "pull_requests" in transformed_data
+        assert len(transformed_data["pull_requests"]) == 1
+
+    @patch("main.setup_logging")
+    @patch("main.load_data")
+    @patch("main.bigquery.Client")
+    @patch("requests.Session")
+    def test_bug_id_extraction_through_pipeline(
+        self, mock_session_class, mock_bq_client, mock_load, mock_setup_logging
+    ):
+        """Test bug ID extraction through full pipeline."""
+        mock_session = MagicMock()
+        mock_session_class.return_value = mock_session
+
+        pr_response = Mock()
+        pr_response.status_code = 200
+        pr_response.json.return_value = [
+            {"number": 1, "title": "Bug 9876543 - Fix critical issue", "state": "closed"}
+        ]
+        pr_response.links = {}
+
+        empty_response = Mock()
+        empty_response.status_code = 200
+        empty_response.json.return_value = []
+
+        mock_session.get.side_effect = [
+            pr_response,
+            empty_response,
+            empty_response,
+            empty_response,
+        ]
+
+        with patch.dict(
+            os.environ,
+            {
+                "GITHUB_REPOS": "mozilla/firefox",
+                "BIGQUERY_PROJECT": "test",
+                "BIGQUERY_DATASET": "test",
+                "GITHUB_TOKEN": "token",
+            },
+            clear=True,
+        ):
+            main.main()
+
+        call_args = mock_load.call_args[0]
+        transformed_data = call_args[2]
+        pr = transformed_data["pull_requests"][0]
+        assert pr["bug_id"] == 9876543
+
+    @patch("main.setup_logging")
+    @patch("main.load_data")
+    @patch("main.bigquery.Client")
+    @patch("requests.Session")
+    def test_pagination_through_full_flow(
+        self, mock_session_class, mock_bq_client, mock_load, mock_setup_logging
+    ):
+        """Test pagination through full ETL flow."""
+        mock_session = MagicMock()
+        mock_session_class.return_value = mock_session
+
+        # First page
+        pr_response_1 = Mock()
+        pr_response_1.status_code = 200
+        pr_response_1.json.return_value = [
+            {"number": 1, "title": "PR 1", "state": "open"}
+        ]
+        pr_response_1.links = {
+            "next": {"url": "https://api.github.com/repos/mozilla/firefox/pulls?page=2"}
+        }
+
+        # Second page
+        pr_response_2 = Mock()
+        pr_response_2.status_code = 200
+        pr_response_2.json.return_value = [
+            {"number": 2, "title": "PR 2", "state": "open"}
+        ]
+        pr_response_2.links = {}
+
+        empty_response = Mock()
+        empty_response.status_code = 200
+        empty_response.json.return_value = []
+
+        mock_session.get.side_effect = [
+            pr_response_1,
+            empty_response,
+            empty_response,
+            empty_response,
+            pr_response_2,
+            empty_response,
+            empty_response,
+            empty_response,
+        ]
+
+        with patch.dict(
+            os.environ,
+            {
+                "GITHUB_REPOS": "mozilla/firefox",
+                "BIGQUERY_PROJECT": "test",
+                "BIGQUERY_DATASET": "test",
+                "GITHUB_TOKEN": "token",
+            },
+            clear=True,
+        ):
+            main.main()
+
+        # Should be called twice (once per chunk/page)
+        assert mock_load.call_count == 2

From 89d1edb4aaaf6b218ded786ea1edfe9468c1383c Mon Sep 17 00:00:00 2001
From: David Lawrence <dkl@mozilla.com>
Date: Wed, 21 Jan 2026 18:47:57 -0500
Subject: [PATCH 02/12] Copilot suggested fixes

---
 TESTING.md   |   3 +-
 main.py      |   1 +
 pytest.ini   |   4 +-
 test_main.py | 374 +++++++++++++++++++++++++++++----------------------
 4 files changed, 214 insertions(+), 168 deletions(-)

diff --git a/TESTING.md b/TESTING.md
index c0bb5dd..104d401 100644
--- a/TESTING.md
+++ b/TESTING.md
@@ -228,7 +228,7 @@ mypy main.py --no-strict-optional --ignore-missing-imports
 
 ### GitHub Actions Workflow
 
-The `.github/workflows/tests.yml` workflow runs on every push and pull request:
+The `.github/workflows/tests.yml` workflow runs on every pull request:
 
 **Lint Job:**
 1. Runs black (format check)
@@ -243,7 +243,6 @@ The `.github/workflows/tests.yml` workflow runs on every push and pull request:
 
 ### Workflow Triggers
 
-- Push to `main` or `unit-tests` branch
 - Pull requests to `main` branch
 
 ### Viewing Results
diff --git a/main.py b/main.py
index db80d03..645f167 100755
--- a/main.py
+++ b/main.py
@@ -29,6 +29,7 @@ def setup_logging() -> None:
         level=logging.INFO,
         format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
         handlers=[logging.StreamHandler(sys.stdout)],
+        force=True,
     )
 
 
diff --git a/pytest.ini b/pytest.ini
index d4a601a..33ef84b 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -15,9 +15,7 @@ addopts =
     --cov-report=term-missing
     --cov-report=html
     --cov-branch
-
-# Minimum coverage threshold (can adjust as needed)
---cov-fail-under=80
+    --cov-fail-under=80
 
 # Test paths
 testpaths = .
diff --git a/test_main.py b/test_main.py
index 7165677..400c6d3 100644
--- a/test_main.py
+++ b/test_main.py
@@ -8,10 +8,9 @@
 
 import logging
 import os
-import sys
 import time
-from datetime import datetime, timezone
-from unittest.mock import Mock, MagicMock, patch, call
+from datetime import datetime
+from unittest.mock import Mock, MagicMock, patch
 import pytest
 import requests
 from google.cloud import bigquery
@@ -143,9 +142,7 @@ class TestSleepForRateLimit:
 
     @patch("time.time")
     @patch("time.sleep")
-    def test_sleep_for_rate_limit_when_remaining_is_zero(
-        self, mock_sleep, mock_time
-    ):
+    def test_sleep_for_rate_limit_when_remaining_is_zero(self, mock_sleep, mock_time):
         """Test that sleep_for_rate_limit sleeps until reset time."""
         mock_time.return_value = 1000
 
@@ -220,12 +217,12 @@ def test_extract_single_page(self, mock_session):
         mock_session.get.return_value = mock_response
 
         # Mock the extract functions
-        with patch("main.extract_commits", return_value=[]), patch(
-            "main.extract_reviewers", return_value=[]
-        ), patch("main.extract_comments", return_value=[]):
-            result = list(
-                main.extract_pull_requests(mock_session, "mozilla/firefox")
-            )
+        with (
+            patch("main.extract_commits", return_value=[]),
+            patch("main.extract_reviewers", return_value=[]),
+            patch("main.extract_comments", return_value=[]),
+        ):
+            result = list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
 
         assert len(result) == 1
         assert len(result[0]) == 2
@@ -242,9 +239,7 @@ def test_extract_multiple_pages(self, mock_session):
             {"number": 2, "title": "PR 2"},
         ]
         mock_response_1.links = {
-            "next": {
-                "url": "https://api.github.com/repos/mozilla/firefox/pulls?page=2"
-            }
+            "next": {"url": "https://api.github.com/repos/mozilla/firefox/pulls?page=2"}
         }
 
         # Second page response
@@ -255,12 +250,12 @@ def test_extract_multiple_pages(self, mock_session):
 
         mock_session.get.side_effect = [mock_response_1, mock_response_2]
 
-        with patch("main.extract_commits", return_value=[]), patch(
-            "main.extract_reviewers", return_value=[]
-        ), patch("main.extract_comments", return_value=[]):
-            result = list(
-                main.extract_pull_requests(mock_session, "mozilla/firefox")
-            )
+        with (
+            patch("main.extract_commits", return_value=[]),
+            patch("main.extract_reviewers", return_value=[]),
+            patch("main.extract_comments", return_value=[]),
+        ):
+            result = list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
 
         assert len(result) == 2
         assert len(result[0]) == 2
@@ -279,16 +274,14 @@ def test_enriches_prs_with_commit_data(self, mock_session):
 
         mock_commits = [{"sha": "abc123"}]
 
-        with patch(
-            "main.extract_commits", return_value=mock_commits
-        ) as mock_extract_commits, patch(
-            "main.extract_reviewers", return_value=[]
-        ), patch(
-            "main.extract_comments", return_value=[]
+        with (
+            patch(
+                "main.extract_commits", return_value=mock_commits
+            ) as mock_extract_commits,
+            patch("main.extract_reviewers", return_value=[]),
+            patch("main.extract_comments", return_value=[]),
         ):
-            result = list(
-                main.extract_pull_requests(mock_session, "mozilla/firefox")
-            )
+            result = list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
 
         assert result[0][0]["commit_data"] == mock_commits
         mock_extract_commits.assert_called_once()
@@ -304,14 +297,14 @@ def test_enriches_prs_with_reviewer_data(self, mock_session):
 
         mock_reviewers = [{"id": 789, "state": "APPROVED"}]
 
-        with patch("main.extract_commits", return_value=[]), patch(
-            "main.extract_reviewers", return_value=mock_reviewers
-        ) as mock_extract_reviewers, patch(
-            "main.extract_comments", return_value=[]
+        with (
+            patch("main.extract_commits", return_value=[]),
+            patch(
+                "main.extract_reviewers", return_value=mock_reviewers
+            ) as mock_extract_reviewers,
+            patch("main.extract_comments", return_value=[]),
         ):
-            result = list(
-                main.extract_pull_requests(mock_session, "mozilla/firefox")
-            )
+            result = list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
 
         assert result[0][0]["reviewer_data"] == mock_reviewers
         mock_extract_reviewers.assert_called_once()
@@ -327,14 +320,14 @@ def test_enriches_prs_with_comment_data(self, mock_session):
 
         mock_comments = [{"id": 456, "body": "Great work!"}]
 
-        with patch("main.extract_commits", return_value=[]), patch(
-            "main.extract_reviewers", return_value=[]
-        ), patch(
-            "main.extract_comments", return_value=mock_comments
-        ) as mock_extract_comments:
-            result = list(
-                main.extract_pull_requests(mock_session, "mozilla/firefox")
-            )
+        with (
+            patch("main.extract_commits", return_value=[]),
+            patch("main.extract_reviewers", return_value=[]),
+            patch(
+                "main.extract_comments", return_value=mock_comments
+            ) as mock_extract_comments,
+        ):
+            result = list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
 
         assert result[0][0]["comment_data"] == mock_comments
         mock_extract_comments.assert_called_once()
@@ -350,9 +343,7 @@ def test_handles_rate_limit(self, mock_sleep, mock_session):
         # Successful response after rate limit
         mock_response_success = Mock()
         mock_response_success.status_code = 200
-        mock_response_success.json.return_value = [
-            {"number": 1, "title": "PR 1"}
-        ]
+        mock_response_success.json.return_value = [{"number": 1, "title": "PR 1"}]
         mock_response_success.links = {}
 
         mock_session.get.side_effect = [
@@ -360,12 +351,12 @@ def test_handles_rate_limit(self, mock_sleep, mock_session):
             mock_response_success,
         ]
 
-        with patch("main.extract_commits", return_value=[]), patch(
-            "main.extract_reviewers", return_value=[]
-        ), patch("main.extract_comments", return_value=[]):
-            result = list(
-                main.extract_pull_requests(mock_session, "mozilla/firefox")
-            )
+        with (
+            patch("main.extract_commits", return_value=[]),
+            patch("main.extract_reviewers", return_value=[]),
+            patch("main.extract_comments", return_value=[]),
+        ):
+            result = list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
 
         mock_sleep.assert_called_once_with(mock_response_rate_limit)
         assert len(result) == 1
@@ -403,9 +394,7 @@ def test_stops_on_empty_batch(self, mock_session):
         mock_response_1.status_code = 200
         mock_response_1.json.return_value = [{"number": 1}]
         mock_response_1.links = {
-            "next": {
-                "url": "https://api.github.com/repos/mozilla/firefox/pulls?page=2"
-            }
+            "next": {"url": "https://api.github.com/repos/mozilla/firefox/pulls?page=2"}
         }
 
         # Second page empty
@@ -416,12 +405,12 @@ def test_stops_on_empty_batch(self, mock_session):
 
         mock_session.get.side_effect = [mock_response_1, mock_response_2]
 
-        with patch("main.extract_commits", return_value=[]), patch(
-            "main.extract_reviewers", return_value=[]
-        ), patch("main.extract_comments", return_value=[]):
-            result = list(
-                main.extract_pull_requests(mock_session, "mozilla/firefox")
-            )
+        with (
+            patch("main.extract_commits", return_value=[]),
+            patch("main.extract_reviewers", return_value=[]),
+            patch("main.extract_comments", return_value=[]),
+        ):
+            result = list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
 
         # Should only have 1 chunk from first page
         assert len(result) == 1
@@ -440,12 +429,12 @@ def test_invalid_page_number_handling(self, mock_session):
 
         mock_session.get.return_value = mock_response_1
 
-        with patch("main.extract_commits", return_value=[]), patch(
-            "main.extract_reviewers", return_value=[]
-        ), patch("main.extract_comments", return_value=[]):
-            result = list(
-                main.extract_pull_requests(mock_session, "mozilla/firefox")
-            )
+        with (
+            patch("main.extract_commits", return_value=[]),
+            patch("main.extract_reviewers", return_value=[]),
+            patch("main.extract_comments", return_value=[]),
+        ):
+            result = list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
 
         # Should stop pagination on invalid page number
         assert len(result) == 1
@@ -461,9 +450,11 @@ def test_custom_github_api_url(self, mock_session):
 
         mock_session.get.return_value = mock_response
 
-        with patch("main.extract_commits", return_value=[]), patch(
-            "main.extract_reviewers", return_value=[]
-        ), patch("main.extract_comments", return_value=[]):
+        with (
+            patch("main.extract_commits", return_value=[]),
+            patch("main.extract_reviewers", return_value=[]),
+            patch("main.extract_comments", return_value=[]),
+        ):
             list(
                 main.extract_pull_requests(
                     mock_session, "mozilla/firefox", github_api_url=custom_url
@@ -487,12 +478,12 @@ def test_skips_prs_without_number_field(self, mock_session):
 
         mock_session.get.return_value = mock_response
 
-        with patch("main.extract_commits", return_value=[]) as mock_commits, patch(
-            "main.extract_reviewers", return_value=[]
-        ), patch("main.extract_comments", return_value=[]):
-            result = list(
-                main.extract_pull_requests(mock_session, "mozilla/firefox")
-            )
+        with (
+            patch("main.extract_commits", return_value=[]) as mock_commits,
+            patch("main.extract_reviewers", return_value=[]),
+            patch("main.extract_comments", return_value=[]),
+        ):
+            list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
 
         # extract_commits should only be called for PRs with number field
         assert mock_commits.call_count == 2
@@ -631,7 +622,11 @@ def test_commit_without_sha_field(self, mock_session):
         commit_detail_2.status_code = 200
         commit_detail_2.json.return_value = {"files": []}
 
-        mock_session.get.side_effect = [commits_response, commit_detail_1, commit_detail_2]
+        mock_session.get.side_effect = [
+            commits_response,
+            commit_detail_1,
+            commit_detail_2,
+        ]
 
         result = main.extract_commits(mock_session, "mozilla/firefox", 123)
 
@@ -1470,7 +1465,9 @@ def test_all_tables_have_target_repository(self):
                     {
                         "sha": "abc",
                         "commit": {"author": {"name": "Author", "date": "2024-01-01"}},
-                        "files": [{"filename": "test.py", "additions": 1, "deletions": 0}],
+                        "files": [
+                            {"filename": "test.py", "additions": 1, "deletions": 0}
+                        ],
                     }
                 ],
                 "reviewer_data": [
@@ -1594,7 +1591,12 @@ def test_raises_exception_on_insert_errors(self, mock_bigquery_client):
             {"index": 0, "errors": ["Insert failed"]}
         ]
 
-        transformed_data = {"pull_requests": [{"pull_request_id": 1}], "commits": [], "reviewers": [], "comments": []}
+        transformed_data = {
+            "pull_requests": [{"pull_request_id": 1}],
+            "commits": [],
+            "reviewers": [],
+            "comments": [],
+        }
 
         with pytest.raises(Exception) as exc_info:
             main.load_data(mock_bigquery_client, "test_dataset", transformed_data)
@@ -1647,7 +1649,9 @@ def test_requires_bigquery_project(
     ):
         """Test that BIGQUERY_PROJECT is required."""
         with patch.dict(
-            os.environ, {"GITHUB_REPOS": "mozilla/firefox", "BIGQUERY_DATASET": "test"}, clear=True
+            os.environ,
+            {"GITHUB_REPOS": "mozilla/firefox", "BIGQUERY_DATASET": "test"},
+            clear=True,
         ):
             with pytest.raises(SystemExit) as exc_info:
                 main.main()
@@ -1662,7 +1666,9 @@ def test_requires_bigquery_dataset(
     ):
         """Test that BIGQUERY_DATASET is required."""
         with patch.dict(
-            os.environ, {"GITHUB_REPOS": "mozilla/firefox", "BIGQUERY_PROJECT": "test"}, clear=True
+            os.environ,
+            {"GITHUB_REPOS": "mozilla/firefox", "BIGQUERY_PROJECT": "test"},
+            clear=True,
         ):
             with pytest.raises(SystemExit) as exc_info:
                 main.main()
@@ -1676,15 +1682,18 @@ def test_github_token_optional_with_warning(
         self, mock_session_class, mock_bq_client, mock_setup_logging
     ):
         """Test that GITHUB_TOKEN is optional but warns if missing."""
-        with patch.dict(
-            os.environ,
-            {
-                "GITHUB_REPOS": "mozilla/firefox",
-                "BIGQUERY_PROJECT": "test",
-                "BIGQUERY_DATASET": "test",
-            },
-            clear=True,
-        ), patch("main.extract_pull_requests", return_value=iter([])):
+        with (
+            patch.dict(
+                os.environ,
+                {
+                    "GITHUB_REPOS": "mozilla/firefox",
+                    "BIGQUERY_PROJECT": "test",
+                    "BIGQUERY_DATASET": "test",
+                },
+                clear=True,
+            ),
+            patch("main.extract_pull_requests", return_value=iter([])),
+        ):
             # Should not raise, but should log warning
             result = main.main()
             assert result == 0
@@ -1696,16 +1705,19 @@ def test_splits_github_repos_by_comma(
         self, mock_session_class, mock_bq_client, mock_setup_logging
     ):
         """Test that GITHUB_REPOS is split by comma."""
-        with patch.dict(
-            os.environ,
-            {
-                "GITHUB_REPOS": "mozilla/firefox,mozilla/gecko-dev",
-                "BIGQUERY_PROJECT": "test",
-                "BIGQUERY_DATASET": "test",
-                "GITHUB_TOKEN": "token",
-            },
-            clear=True,
-        ), patch("main.extract_pull_requests", return_value=iter([])) as mock_extract:
+        with (
+            patch.dict(
+                os.environ,
+                {
+                    "GITHUB_REPOS": "mozilla/firefox,mozilla/gecko-dev",
+                    "BIGQUERY_PROJECT": "test",
+                    "BIGQUERY_DATASET": "test",
+                    "GITHUB_TOKEN": "token",
+                },
+                clear=True,
+            ),
+            patch("main.extract_pull_requests", return_value=iter([])) as mock_extract,
+        ):
             main.main()
 
             # Should be called twice (once per repo)
@@ -1718,17 +1730,20 @@ def test_honors_github_api_url(
         self, mock_session_class, mock_bq_client, mock_setup_logging
     ):
         """Test that GITHUB_API_URL is honored."""
-        with patch.dict(
-            os.environ,
-            {
-                "GITHUB_REPOS": "mozilla/firefox",
-                "BIGQUERY_PROJECT": "test",
-                "BIGQUERY_DATASET": "test",
-                "GITHUB_TOKEN": "token",
-                "GITHUB_API_URL": "https://custom-api.example.com",
-            },
-            clear=True,
-        ), patch("main.extract_pull_requests", return_value=iter([])) as mock_extract:
+        with (
+            patch.dict(
+                os.environ,
+                {
+                    "GITHUB_REPOS": "mozilla/firefox",
+                    "BIGQUERY_PROJECT": "test",
+                    "BIGQUERY_DATASET": "test",
+                    "GITHUB_TOKEN": "token",
+                    "GITHUB_API_URL": "https://custom-api.example.com",
+                },
+                clear=True,
+            ),
+            patch("main.extract_pull_requests", return_value=iter([])) as mock_extract,
+        ):
             main.main()
 
             call_kwargs = mock_extract.call_args[1]
@@ -1741,17 +1756,20 @@ def test_honors_bigquery_emulator_host(
         self, mock_session_class, mock_bq_client_class, mock_setup_logging
     ):
         """Test that BIGQUERY_EMULATOR_HOST is honored."""
-        with patch.dict(
-            os.environ,
-            {
-                "GITHUB_REPOS": "mozilla/firefox",
-                "BIGQUERY_PROJECT": "test",
-                "BIGQUERY_DATASET": "test",
-                "GITHUB_TOKEN": "token",
-                "BIGQUERY_EMULATOR_HOST": "http://localhost:9050",
-            },
-            clear=True,
-        ), patch("main.extract_pull_requests", return_value=iter([])):
+        with (
+            patch.dict(
+                os.environ,
+                {
+                    "GITHUB_REPOS": "mozilla/firefox",
+                    "BIGQUERY_PROJECT": "test",
+                    "BIGQUERY_DATASET": "test",
+                    "GITHUB_TOKEN": "token",
+                    "BIGQUERY_EMULATOR_HOST": "http://localhost:9050",
+                },
+                clear=True,
+            ),
+            patch("main.extract_pull_requests", return_value=iter([])),
+        ):
             main.main()
 
             # Verify BigQuery client was created with emulator settings
@@ -1767,16 +1785,19 @@ def test_creates_session_with_headers(
         mock_session = MagicMock()
         mock_session_class.return_value = mock_session
 
-        with patch.dict(
-            os.environ,
-            {
-                "GITHUB_REPOS": "mozilla/firefox",
-                "BIGQUERY_PROJECT": "test",
-                "BIGQUERY_DATASET": "test",
-                "GITHUB_TOKEN": "token",
-            },
-            clear=True,
-        ), patch("main.extract_pull_requests", return_value=iter([])):
+        with (
+            patch.dict(
+                os.environ,
+                {
+                    "GITHUB_REPOS": "mozilla/firefox",
+                    "BIGQUERY_PROJECT": "test",
+                    "BIGQUERY_DATASET": "test",
+                    "GITHUB_TOKEN": "token",
+                },
+                clear=True,
+            ),
+            patch("main.extract_pull_requests", return_value=iter([])),
+        ):
             main.main()
 
             # Verify session headers were set
@@ -1795,16 +1816,19 @@ def test_sets_authorization_header_with_token(
         mock_session = MagicMock()
         mock_session_class.return_value = mock_session
 
-        with patch.dict(
-            os.environ,
-            {
-                "GITHUB_REPOS": "mozilla/firefox",
-                "BIGQUERY_PROJECT": "test",
-                "BIGQUERY_DATASET": "test",
-                "GITHUB_TOKEN": "test-token-123",
-            },
-            clear=True,
-        ), patch("main.extract_pull_requests", return_value=iter([])):
+        with (
+            patch.dict(
+                os.environ,
+                {
+                    "GITHUB_REPOS": "mozilla/firefox",
+                    "BIGQUERY_PROJECT": "test",
+                    "BIGQUERY_DATASET": "test",
+                    "GITHUB_TOKEN": "test-token-123",
+                },
+                clear=True,
+            ),
+            patch("main.extract_pull_requests", return_value=iter([])),
+        ):
             main.main()
 
             # Verify Authorization header was set
@@ -1827,7 +1851,12 @@ def test_single_repo_successful_etl(
     ):
         """Test successful ETL for single repository."""
         mock_extract.return_value = iter([[{"number": 1}]])
-        mock_transform.return_value = {"pull_requests": [{"pull_request_id": 1}], "commits": [], "reviewers": [], "comments": []}
+        mock_transform.return_value = {
+            "pull_requests": [{"pull_request_id": 1}],
+            "commits": [],
+            "reviewers": [],
+            "comments": [],
+        }
 
         with patch.dict(
             os.environ,
@@ -1863,7 +1892,12 @@ def test_multiple_repos_processing(
     ):
         """Test processing multiple repositories."""
         mock_extract.return_value = iter([[{"number": 1}]])
-        mock_transform.return_value = {"pull_requests": [{"pull_request_id": 1}], "commits": [], "reviewers": [], "comments": []}
+        mock_transform.return_value = {
+            "pull_requests": [{"pull_request_id": 1}],
+            "commits": [],
+            "reviewers": [],
+            "comments": [],
+        }
 
         with patch.dict(
             os.environ,
@@ -1898,12 +1932,19 @@ def test_processes_chunks_iteratively(
     ):
         """Test that chunks are processed iteratively from generator."""
         # Return 3 chunks
-        mock_extract.return_value = iter([
-            [{"number": 1}],
-            [{"number": 2}],
-            [{"number": 3}],
-        ])
-        mock_transform.return_value = {"pull_requests": [{"pull_request_id": 1}], "commits": [], "reviewers": [], "comments": []}
+        mock_extract.return_value = iter(
+            [
+                [{"number": 1}],
+                [{"number": 2}],
+                [{"number": 3}],
+            ]
+        )
+        mock_transform.return_value = {
+            "pull_requests": [{"pull_request_id": 1}],
+            "commits": [],
+            "reviewers": [],
+            "comments": [],
+        }
 
         with patch.dict(
             os.environ,
@@ -1929,16 +1970,19 @@ def test_returns_zero_on_success(
         self, mock_session_class, mock_bq_client, mock_setup_logging
     ):
         """Test that main returns 0 on success."""
-        with patch.dict(
-            os.environ,
-            {
-                "GITHUB_REPOS": "mozilla/firefox",
-                "BIGQUERY_PROJECT": "test",
-                "BIGQUERY_DATASET": "test",
-                "GITHUB_TOKEN": "token",
-            },
-            clear=True,
-        ), patch("main.extract_pull_requests", return_value=iter([])):
+        with (
+            patch.dict(
+                os.environ,
+                {
+                    "GITHUB_REPOS": "mozilla/firefox",
+                    "BIGQUERY_PROJECT": "test",
+                    "BIGQUERY_DATASET": "test",
+                    "GITHUB_TOKEN": "token",
+                },
+                clear=True,
+            ),
+            patch("main.extract_pull_requests", return_value=iter([])),
+        ):
             result = main.main()
 
         assert result == 0
@@ -2014,7 +2058,11 @@ def test_bug_id_extraction_through_pipeline(
         pr_response = Mock()
         pr_response.status_code = 200
         pr_response.json.return_value = [
-            {"number": 1, "title": "Bug 9876543 - Fix critical issue", "state": "closed"}
+            {
+                "number": 1,
+                "title": "Bug 9876543 - Fix critical issue",
+                "state": "closed",
+            }
         ]
         pr_response.links = {}
 

From 43b13f0dc7fce314fb642a17247c20765fa000fb Mon Sep 17 00:00:00 2001
From: David Lawrence <dkl@mozilla.com>
Date: Wed, 21 Jan 2026 18:50:58 -0500
Subject: [PATCH 03/12] Fix integretion test

---
 .github/workflows/tests.yml | 104 ++++++++++++++++--------------------
 1 file changed, 46 insertions(+), 58 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 87e2800..4e4f711 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -2,74 +2,62 @@ name: Tests and Linting
 
 on:
   pull_request:
-    branches: [ main ]
+    branches: [main]
 
 jobs:
   lint:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v4
-
-    - uses: actions/setup-python@v5
-      with:
-        python-version: '3.11'
-
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install black flake8 mypy isort
-
-    - name: Run black
-      run: black --check main.py test_main.py
-
-    - name: Run isort
-      run: isort --check-only main.py test_main.py
-
-    - name: Run flake8
-      run: flake8 main.py test_main.py --max-line-length=100 --extend-ignore=E203,W503
-
-    - name: Run mypy
-      run: mypy main.py --no-strict-optional --ignore-missing-imports
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install black flake8 mypy isort
+      - name: Run black
+        run: black --check main.py test_main.py
+      - name: Run isort
+        run: isort --check-only main.py test_main.py
+      - name: Run flake8
+        run: flake8 main.py test_main.py --max-line-length=100 --extend-ignore=E203,W503
+      - name: Run mypy
+        run: mypy main.py --no-strict-optional --ignore-missing-imports
 
   test:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v4
-
-    - uses: actions/setup-python@v5
-      with:
-        python-version: '3.11'
-
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install -r requirements.txt
-
-    - name: Run unit tests with coverage
-      run: |
-        pytest -m "not integration and not slow" --cov=main --cov-report=term-missing --cov-fail-under=80
-
-    - name: Run all tests
-      run: |
-        pytest --cov=main --cov-report=xml --cov-report=html
-
-    - name: Upload coverage reports
-      uses: actions/upload-artifact@v4
-      with:
-        name: coverage-reports
-        path: |
-          htmlcov/
-          coverage.xml
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+      - name: Run unit tests with coverage
+        run: |
+          pytest -m "not integration and not slow" --cov=main --cov-report=term-missing --cov-fail-under=80
+      - name: Run all tests
+        run: |
+          pytest --cov=main --cov-report=xml --cov-report=html
+      - name: Upload coverage reports
+        uses: actions/upload-artifact@v4
+        with:
+          name: coverage-reports
+          path: |
+            htmlcov/
+            coverage.xml
 
   integration-test:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v4
-
-    - name: Run integration test with docker-compose
-      run: |
-        docker-compose up --build --abort-on-container-exit --exit-code-from github-etl
-
-    - name: Cleanup
-      if: always()
-      run: docker-compose down -v
+      - uses: actions/checkout@v4
+      - name: Install docker-compose
+        run: sudo apt update && sudo apt install -y docker-compose
+      - name: Run integration test with docker-compose
+        run: docker-compose up --build --abort-on-container-exit --exit-code-from github-etl
+      - name: Cleanup
+        if: always()
+        run: docker-compose down -v

From 60426816b2744c77600b798204e95c29bde4e416 Mon Sep 17 00:00:00 2001
From: David Lawrence <dkl@mozilla.com>
Date: Wed, 21 Jan 2026 18:53:32 -0500
Subject: [PATCH 04/12] Black formatted

---
 main.py      | 15 ++++++++-------
 test_main.py |  1 -
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/main.py b/main.py
index 645f167..00c0d35 100755
--- a/main.py
+++ b/main.py
@@ -19,7 +19,6 @@
 from google.api_core.client_options import ClientOptions
 from google.auth.credentials import AnonymousCredentials
 
-
 BUG_RE = re.compile(r"\b(?:bug|b=)\s*#?(\d+)\b", re.I)
 
 
@@ -325,9 +324,11 @@ def transform_data(raw_data: list[dict], repo: str) -> dict:
             "bug_id": bug_id,
             "date_landed": pr.get("merged_at"),
             "date_approved": None,  # This will be filled later
-            "labels": [label.get("name") for label in pr.get("labels", [])]
-            if pr.get("labels")
-            else [],
+            "labels": (
+                [label.get("name") for label in pr.get("labels", [])]
+                if pr.get("labels")
+                else []
+            ),
         }
 
         # Extract and flatten commit data
@@ -387,9 +388,9 @@ def transform_data(raw_data: list[dict], repo: str) -> dict:
                 "date_created": comment.get("created_at"),
                 "author_email": None,  # TODO Placeholder for reviewer email extraction logic
                 "author_username": comment.get("user", {}).get("login"),
-                "character_count": len(comment.get("body", ""))
-                if comment.get("body")
-                else 0,
+                "character_count": (
+                    len(comment.get("body", "")) if comment.get("body") else 0
+                ),
                 "status": None,  # TODO
             }
 
diff --git a/test_main.py b/test_main.py
index 400c6d3..210029c 100644
--- a/test_main.py
+++ b/test_main.py
@@ -17,7 +17,6 @@
 
 import main
 
-
 # =============================================================================
 # FIXTURES
 # =============================================================================

From 39435822fa9d3324f44a14fff418eab91a64f746 Mon Sep 17 00:00:00 2001
From: David Lawrence <dkl@mozilla.com>
Date: Wed, 21 Jan 2026 18:55:44 -0500
Subject: [PATCH 05/12] Used isort to fix sorting order

---
 main.py      | 5 +++--
 test_main.py | 3 ++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/main.py b/main.py
index 00c0d35..d75abf9 100755
--- a/main.py
+++ b/main.py
@@ -9,15 +9,16 @@
 import logging
 import os
 import re
-import requests
 import sys
 import time
 from datetime import datetime, timezone
 from typing import Iterator, Optional
 from urllib.parse import parse_qs, urlparse
-from google.cloud import bigquery
+
+import requests
 from google.api_core.client_options import ClientOptions
 from google.auth.credentials import AnonymousCredentials
+from google.cloud import bigquery
 
 BUG_RE = re.compile(r"\b(?:bug|b=)\s*#?(\d+)\b", re.I)
 
diff --git a/test_main.py b/test_main.py
index 210029c..0850eae 100644
--- a/test_main.py
+++ b/test_main.py
@@ -10,7 +10,8 @@
 import os
 import time
 from datetime import datetime
-from unittest.mock import Mock, MagicMock, patch
+from unittest.mock import MagicMock, Mock, patch
+
 import pytest
 import requests
 from google.cloud import bigquery

From 483f19b877bc315da78c49134dce76babb4c1f89 Mon Sep 17 00:00:00 2001
From: David Lawrence <dkl@mozilla.com>
Date: Wed, 21 Jan 2026 20:59:03 -0500
Subject: [PATCH 06/12] Mypy test fixes

---
 main.py      | 23 ++++++++++++++---------
 test_main.py |  2 --
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/main.py b/main.py
index d75abf9..f76a281 100755
--- a/main.py
+++ b/main.py
@@ -59,7 +59,7 @@ def extract_pull_requests(
     # Support custom API URL for mocking/testing
     api_base = github_api_url or "https://api.github.com"
     base_url = f"{api_base}/repos/{repo}/pulls"
-    params = {
+    params: dict = {
         "state": "all",
         "per_page": chunk_size,
         "sort": "created",
@@ -298,7 +298,7 @@ def transform_data(raw_data: list[dict], repo: str) -> dict:
     logger = logging.getLogger(__name__)
     logger.info(f"Starting data transformation for {len(raw_data)} PRs")
 
-    transformed_data = {
+    transformed_data: dict = {
         "pull_requests": [],
         "commits": [],
         "reviewers": [],
@@ -371,7 +371,8 @@ def transform_data(raw_data: list[dict], repo: str) -> dict:
             }
             transformed_data["reviewers"].append(transformed_reviewer)
 
-            # If the request is approved then store the date in the date_approved for the pull request
+            # If the request is approved then store the date in the
+            # date_approved for the pull request
             if review.get("state") == "APPROVED":
                 approved_date = review.get("submitted_at")
                 if transformed_pr.get(
@@ -422,7 +423,8 @@ def load_data(
     Args:
         client: BigQuery client instance
         dataset_id: BigQuery dataset ID
-        transformed_data: Dictionary containing tables ('pull_requests', 'commits', 'reviewers', 'comments') mapped to lists of row dictionaries
+        transformed_data: Dictionary containing tables ('pull_requests',
+            'commits', 'reviewers', 'comments') mapped to lists of row dictionaries
     """
     logger = logging.getLogger(__name__)
 
@@ -457,7 +459,8 @@ def load_data(
             raise Exception(error_msg)
 
         logger.info(
-            f"Data loading completed successfully for table {table} with {len(load_table_data)} rows"
+            f"Data loading completed successfully for table {table} "
+            + "with {len(load_table_data)} rows"
         )
 
 
@@ -479,7 +482,8 @@ def main() -> int:
     github_token = os.environ.get("GITHUB_TOKEN")
     if not github_token:
         logger.warning(
-            "Warning: No token provided. You will hit very low rate limits and private repos won't work."
+            "Warning: No token provided. You will hit very low rate "
+            + "limits and private repos won't work."
         )
 
     # Read BigQuery configuration
@@ -522,9 +526,10 @@ def main() -> int:
         bigquery_client = bigquery.Client(project=bigquery_project)
 
     # Read GitHub repository configuration
-    github_repos = os.getenv("GITHUB_REPOS")
-    if github_repos:
-        github_repos = github_repos.split(",")
+    github_repos = []
+    github_repos_str = os.getenv("GITHUB_REPOS")
+    if github_repos_str:
+        github_repos = github_repos_str.split(",")
     else:
         raise SystemExit(
             "Environment variable GITHUB_REPOS is required (format: 'owner/repo,owner/repo')"
diff --git a/test_main.py b/test_main.py
index 0850eae..0e60118 100644
--- a/test_main.py
+++ b/test_main.py
@@ -8,8 +8,6 @@
 
 import logging
 import os
-import time
-from datetime import datetime
 from unittest.mock import MagicMock, Mock, patch
 
 import pytest

From af0db81174248598f680ca5c1444da7bf4634573 Mon Sep 17 00:00:00 2001
From: David Lawrence <dkl@mozilla.com>
Date: Wed, 21 Jan 2026 21:45:06 -0500
Subject: [PATCH 07/12] types-requests

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index 8ede7d4..39c369e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,3 +12,4 @@ black>=24.0.0
 flake8>=7.0.0
 mypy>=1.8.0
 isort>=5.13.0
+types-requests==2.32.4.20260107

From 9b3ba755e18ba0fdc8b3d29b4e5f58078b992d17 Mon Sep 17 00:00:00 2001
From: David Lawrence <dkl@mozilla.com>
Date: Wed, 21 Jan 2026 21:50:26 -0500
Subject: [PATCH 08/12] More types-requests fixes

---
 .github/workflows/tests.yml | 4 +++-
 requirements.txt            | 1 -
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 4e4f711..dda2014 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -23,7 +23,9 @@ jobs:
       - name: Run flake8
         run: flake8 main.py test_main.py --max-line-length=100 --extend-ignore=E203,W503
       - name: Run mypy
-        run: mypy main.py --no-strict-optional --ignore-missing-imports
+        run: |
+          pip install types-requests
+          mypy main.py --no-strict-optional --ignore-missing-imports
 
   test:
     runs-on: ubuntu-latest
diff --git a/requirements.txt b/requirements.txt
index 39c369e..8ede7d4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,4 +12,3 @@ black>=24.0.0
 flake8>=7.0.0
 mypy>=1.8.0
 isort>=5.13.0
-types-requests==2.32.4.20260107

From eaf389b4497ebcb3367724e65148a8d4a3c7351b Mon Sep 17 00:00:00 2001
From: David Lawrence <dkl@mozilla.com>
Date: Wed, 21 Jan 2026 22:11:34 -0500
Subject: [PATCH 09/12] Fixed typo in f-string

---
 main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main.py b/main.py
index f76a281..ec2f482 100755
--- a/main.py
+++ b/main.py
@@ -460,7 +460,7 @@ def load_data(
 
         logger.info(
             f"Data loading completed successfully for table {table} "
-            + "with {len(load_table_data)} rows"
+            + f"with {len(load_table_data)} rows"
         )
 
 

From 7caae207a05b059b2795a6ed422f573e0197798e Mon Sep 17 00:00:00 2001
From: David Lawrence <dkl@mozilla.com>
Date: Thu, 22 Jan 2026 16:35:31 -0500
Subject: [PATCH 10/12] Copilot fixes

---
 .github/workflows/tests.yml |  6 ++---
 TESTING.md                  | 53 +++++++++++++++++++------------------
 pytest.ini                  |  2 +-
 requirements.txt            |  1 +
 4 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index dda2014..513a509 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -15,7 +15,7 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install black flake8 mypy isort
+          pip install -r requirements.txt
       - name: Run black
         run: black --check main.py test_main.py
       - name: Run isort
@@ -23,9 +23,7 @@ jobs:
       - name: Run flake8
         run: flake8 main.py test_main.py --max-line-length=100 --extend-ignore=E203,W503
       - name: Run mypy
-        run: |
-          pip install types-requests
-          mypy main.py --no-strict-optional --ignore-missing-imports
+        run: mypy main.py --no-strict-optional --ignore-missing-imports
 
   test:
     runs-on: ubuntu-latest
diff --git a/TESTING.md b/TESTING.md
index 104d401..c6a541c 100644
--- a/TESTING.md
+++ b/TESTING.md
@@ -19,22 +19,22 @@ unit tests, integration tests, Docker testing, linting, and CI/CD workflows.
 ## Unit Testing
 
 The test suite in `test_main.py` provides comprehensive coverage for all functions in `main.py`.
-We have **95 unit tests** covering 9 functions with 80%+ code coverage requirement.
+We have unit tests covering 9 functions with 80%+ code coverage requirement.
 
 ### Test Structure
 
 Tests are organized into 10 test classes:
 
-1. **TestSetupLogging** (1 test) - Logging configuration
-2. **TestSleepForRateLimit** (4 tests) - Rate limit handling
-3. **TestExtractPullRequests** (14 tests) - PR extraction with pagination and enrichment
-4. **TestExtractCommits** (9 tests) - Commit and file extraction
-5. **TestExtractReviewers** (6 tests) - Reviewer extraction
-6. **TestExtractComments** (7 tests) - Comment extraction (uses /issues endpoint)
-7. **TestTransformData** (26 tests) - Data transformation for all 4 BigQuery tables
-8. **TestLoadData** (8 tests) - BigQuery data loading
-9. **TestMain** (17 tests) - Main ETL orchestration
-10. **TestIntegration** (3 tests) - End-to-end integration tests (marked with `@pytest.mark.integration`)
+1. **TestSetupLogging** - Logging configuration
+2. **TestSleepForRateLimit** - Rate limit handling
+3. **TestExtractPullRequests** - PR extraction with pagination and enrichment
+4. **TestExtractCommits** - Commit and file extraction
+5. **TestExtractReviewers** - Reviewer extraction
+6. **TestExtractComments** - Comment extraction (uses /issues endpoint)
+7. **TestTransformData** - Data transformation for all 4 BigQuery tables
+8. **TestLoadData** - BigQuery data loading
+9. **TestMain** - Main ETL orchestration
+10. **TestIntegration** - End-to-end integration tests (marked with `@pytest.mark.integration`)
 
 ### Fixtures
 
@@ -51,17 +51,17 @@ Reusable fixtures are defined at the top of `test_main.py`:
 
 ### Function Coverage
 
-| Function | Tests | Coverage Target | Key Test Areas |
-|----------|-------|-----------------|----------------|
-| `setup_logging()` | 1 | 100% | Logger configuration |
-| `sleep_for_rate_limit()` | 4 | 100% | Rate limit sleep logic, edge cases |
-| `extract_pull_requests()` | 14 | 90%+ | Pagination, rate limits, enrichment, error handling |
-| `extract_commits()` | 9 | 85%+ | Commit/file fetching, rate limits, errors |
-| `extract_reviewers()` | 6 | 85%+ | Reviewer states, rate limits, errors |
-| `extract_comments()` | 7 | 85%+ | Comment fetching (via /issues), rate limits |
-| `transform_data()` | 26 | 95%+ | Bug ID extraction, 4 tables, field mapping |
-| `load_data()` | 8 | 90%+ | BigQuery insertion, snapshot dates, errors |
-| `main()` | 17 | 85%+ | Env vars, orchestration, chunking |
+| Function |  Coverage Target | Key Test Areas |
+|----------|------------------|----------------|
+| `setup_logging()` | 100% | Logger configuration |
+| `sleep_for_rate_limit()` | 100% | Rate limit sleep logic, edge cases |
+| `extract_pull_requests()` | 90%+ | Pagination, rate limits, enrichment, error handling |
+| `extract_commits()` | 85%+ | Commit/file fetching, rate limits, errors |
+| `extract_reviewers()` | 85%+ | Reviewer states, rate limits, errors |
+| `extract_comments()` | 85%+ | Comment fetching (via /issues), rate limits |
+| `transform_data()` | 95%+ | Bug ID extraction, 4 tables, field mapping |
+| `load_data()` | 90%+ | BigQuery insertion, snapshot dates, errors |
+| `main()` | 85%+ | Env vars, orchestration, chunking |
 
 **Overall Target: 85-90% coverage** (80% minimum enforced in CI)
 
@@ -318,8 +318,8 @@ docker-compose down
   - 9050 (BigQuery API)
   - 9060 (Discovery/Admin API)
 - **Configuration**: Uses `data.yml` to define the schema
-- **Project**: test-project
-- **Dataset**: test_dataset
+- **Project**: test
+- **Dataset**: github_etl
 - **Table**: pull_requests
 
 ### ETL Service
@@ -328,8 +328,9 @@ The ETL service is configured via environment variables in `docker-compose.yml`:
 
 ```yaml
 environment:
-  GITHUB_REPOS: "mozilla/firefox"
-  GITHUB_API_URL: "http://mock-github-api:5000"  # Points to mock API
+  GITHUB_REPOS: "mozilla-firefox/firefox"
+  GITHUB_TOKEN: ""  # Not needed for mock API
+  GITHUB_API_URL: "http://mock-github-api:5000"
   BIGQUERY_PROJECT: "test"
   BIGQUERY_DATASET: "github_etl"
   BIGQUERY_EMULATOR_HOST: "http://bigquery-emulator:9050"
diff --git a/pytest.ini b/pytest.ini
index 33ef84b..d553b45 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -34,7 +34,7 @@ log_cli_date_format = %Y-%m-%d %H:%M:%S
 
 # Coverage options
 [coverage:run]
-source = .
+source = main
 omit =
     test_*.py
     .venv/*
diff --git a/requirements.txt b/requirements.txt
index 8ede7d4..e1e65e0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,3 +12,4 @@ black>=24.0.0
 flake8>=7.0.0
 mypy>=1.8.0
 isort>=5.13.0
+types-requests>=2.32.4

From 3b9260d4996ea15f66c041f92af707298e0757c2 Mon Sep 17 00:00:00 2001
From: David Lawrence <dkl@mozilla.com>
Date: Fri, 23 Jan 2026 18:16:04 -0500
Subject: [PATCH 11/12] Fixed review comments

---
 .github/workflows/tests.yml |   22 +-
 Dockerfile                  |    4 +-
 Dockerfile.mock             |    2 +-
 README.md                   |    2 +-
 TESTING.md                  |    2 +-
 main.py                     |    4 +-
 pyproject.toml              |  129 ++
 pytest.ini                  |   47 -
 requirements.txt            |  340 +++-
 test_formatting.py          |   16 +
 test_main.py                | 3456 +++++++++++++++++------------------
 11 files changed, 2199 insertions(+), 1825 deletions(-)
 create mode 100644 pyproject.toml
 delete mode 100644 pytest.ini
 create mode 100644 test_formatting.py

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 513a509..5480c08 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -5,33 +5,13 @@ on:
     branches: [main]
 
 jobs:
-  lint:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-      - uses: actions/setup-python@v5
-        with:
-          python-version: "3.11"
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install -r requirements.txt
-      - name: Run black
-        run: black --check main.py test_main.py
-      - name: Run isort
-        run: isort --check-only main.py test_main.py
-      - name: Run flake8
-        run: flake8 main.py test_main.py --max-line-length=100 --extend-ignore=E203,W503
-      - name: Run mypy
-        run: mypy main.py --no-strict-optional --ignore-missing-imports
-
   test:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
       - uses: actions/setup-python@v5
         with:
-          python-version: "3.11"
+          python-version: "3.14.2"
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
diff --git a/Dockerfile b/Dockerfile
index 5608295..bec1ed8 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,5 +1,5 @@
 # Use the latest stable Python image
-FROM python:3.11-slim
+FROM python:3.14.2-slim
 
 # Set environment variables
 ENV PYTHONDONTWRITEBYTECODE=1 \
@@ -34,4 +34,4 @@ RUN chown -R app:app /app
 USER app
 
 # Set the default command
-CMD ["python", "main.py"]
\ No newline at end of file
+CMD ["python", "main.py"]
diff --git a/Dockerfile.mock b/Dockerfile.mock
index 1098382..cf46078 100644
--- a/Dockerfile.mock
+++ b/Dockerfile.mock
@@ -1,5 +1,5 @@
 # Dockerfile for mock GitHub API service
-FROM python:3.11-slim
+FROM python:3.14.2-slim
 
 WORKDIR /app
 
diff --git a/README.md b/README.md
index 80a3afe..570bacb 100644
--- a/README.md
+++ b/README.md
@@ -66,7 +66,7 @@ docker run --rm \
 
 ### Container Specifications
 
-- **Base Image**: `python:3.11-slim` (latest stable Python)
+- **Base Image**: `python:3.14.2-slim` (latest stable Python)
 - **User**: `app` (uid: 1000, gid: 1000)
 - **Working Directory**: `/app`
 - **Ownership**: All files in `/app` are owned by the `app` user
diff --git a/TESTING.md b/TESTING.md
index c6a541c..6901d2f 100644
--- a/TESTING.md
+++ b/TESTING.md
@@ -604,7 +604,7 @@ If coverage is below 80%:
 
 ### Tests Pass Locally But Fail in CI
 
-- Check Python version (must be 3.11)
+- Check Python version (must be 3.14)
 - Verify all dependencies are in `requirements.txt`
 - Look for environment-specific issues
 
diff --git a/main.py b/main.py
index ec2f482..e6b92b0 100755
--- a/main.py
+++ b/main.py
@@ -91,7 +91,7 @@ def extract_pull_requests(
                 f"Extracted page {pages} with {len(batch)} PRs (total: {total})"
             )
 
-            for idx, pr in enumerate(batch):
+            for _idx, pr in enumerate(batch):
                 pr_number = pr.get("number")
                 if not pr_number:
                     continue
@@ -273,7 +273,7 @@ def extract_comments(
     return comments
 
 
-def sleep_for_rate_limit(resp):
+def sleep_for_rate_limit(resp: requests.Response) -> None:
     """Sleep until rate limit resets."""
     remaining = int(resp.headers.get("X-RateLimit-Remaining", 1))
     reset = int(resp.headers.get("X-RateLimit-Reset", 0))
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..198886d
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,129 @@
+[project]
+name = "github-etl"
+version = "0.1.0"
+description = "ETL script to extract data from Mozilla Organization Firefox repositories on GitHub and load them into BigQuery"
+readme = "README.md"
+requires-python = ">=3.14"
+license = {text = "MPL-2.0"}
+authors = [
+    {name = "Mozilla", email = "dev-platform@lists.mozilla.org"}
+]
+keywords = ["etl", "github", "bigquery", "mozilla"]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: Mozilla Public License 2.0 (MPL-2.0)",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.14",
+]
+
+dependencies = [
+    "requests>=2.25.0",
+    "google-cloud-bigquery==3.25.0",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest>=7.0.0",
+    "pytest-mock>=3.10.0",
+    "pytest-cov>=4.0.0",
+    "ruff>=0.14.14",
+    "black>=24.0.0",
+]
+
+[project.scripts]
+github-etl = "main:main"
+
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+
+[tool.setuptools]
+py-modules = ["main"]
+
+# Ruff configuration
+[tool.ruff]
+line-length = 88
+exclude = [
+    ".cache",
+    ".git",
+    ".hg",
+    "__pycache__",
+]
+
+[tool.ruff.lint]
+select = ["C", "E", "F", "W", "B", "B9", "I", "ANN"]
+ignore = [
+    "B006",
+    "B904",
+    "C901",
+    "E203",
+    "E501",
+    "ANN002",  # Missing type annotation for *args
+    "ANN003",  # Missing type annotation for **kwargs
+    "ANN202",  # Missing return type annotation for protected function
+]
+
+[tool.ruff.lint.isort]
+split-on-trailing-comma = true
+
+[tool.ruff.lint.flake8-annotations]
+suppress-none-returning = true
+
+[tool.ruff.lint.per-file-ignores]
+"**/*/tests/*" = ["ANN"]
+"**/*/conftest.py" = ["ANN"]
+
+# Black configuration
+[tool.black]
+line-length = 88
+target-version = ['py314']
+
+# Pytest configuration
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+python_files = ["test_*.py"]
+python_classes = ["Test*"]
+python_functions = ["test_*"]
+addopts = [
+    "-v",
+    "--strict-markers",
+    "--tb=short",
+    "--cov=main",
+    "--cov-report=term-missing",
+    "--cov-report=html",
+    "--cov-branch",
+    "--cov-fail-under=80",
+]
+markers = [
+    "unit: Unit tests for individual functions",
+    "integration: Integration tests that test multiple components",
+    "slow: Tests that take longer to run",
+]
+log_cli = false
+log_cli_level = "INFO"
+log_cli_format = "%(asctime)s [%(levelname)8s] %(message)s"
+log_cli_date_format = "%Y-%m-%d %H:%M:%S"
+
+# Coverage configuration
+[tool.coverage.run]
+source = ["main"]
+omit = [
+    "test_*.py",
+    ".venv/*",
+    "venv/*",
+    "*/site-packages/*",
+]
+
+[tool.coverage.report]
+precision = 2
+show_missing = true
+skip_covered = false
+exclude_lines = [
+    "pragma: no cover",
+    "def __repr__",
+    "raise AssertionError",
+    "raise NotImplementedError",
+    "if __name__ == .__main__.:",
+    "if TYPE_CHECKING:",
+]
diff --git a/pytest.ini b/pytest.ini
deleted file mode 100644
index d553b45..0000000
--- a/pytest.ini
+++ /dev/null
@@ -1,47 +0,0 @@
-[pytest]
-# Pytest configuration for GitHub ETL project
-
-# Test discovery patterns
-python_files = test_*.py
-python_classes = Test*
-python_functions = test_*
-
-# Output options
-addopts =
-    -v
-    --strict-markers
-    --tb=short
-    --cov=main
-    --cov-report=term-missing
-    --cov-report=html
-    --cov-branch
-    --cov-fail-under=80
-
-# Test paths
-testpaths = .
-
-# Markers for organizing tests
-markers =
-    unit: Unit tests for individual functions
-    integration: Integration tests that test multiple components
-    slow: Tests that take longer to run
-
-# Logging
-log_cli = false
-log_cli_level = INFO
-log_cli_format = %(asctime)s [%(levelname)8s] %(message)s
-log_cli_date_format = %Y-%m-%d %H:%M:%S
-
-# Coverage options
-[coverage:run]
-source = main
-omit =
-    test_*.py
-    .venv/*
-    venv/*
-    */site-packages/*
-
-[coverage:report]
-precision = 2
-show_missing = true
-skip_covered = false
diff --git a/requirements.txt b/requirements.txt
index e1e65e0..d487f50 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,15 +1,325 @@
-# Essential dependencies for GitHub ETL
-requests>=2.25.0
-google-cloud-bigquery==3.25.0
-
-# Testing dependencies
-pytest>=7.0.0
-pytest-mock>=3.10.0
-pytest-cov>=4.0.0
-
-# Linting and formatting tools
-black>=24.0.0
-flake8>=7.0.0
-mypy>=1.8.0
-isort>=5.13.0
-types-requests>=2.32.4
+#
+# This file is autogenerated by pip-compile with Python 3.10
+# by the following command:
+#
+#    pip-compile --generate-hashes pyproject.toml
+#
+certifi==2026.1.4 \
+    --hash=sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c \
+    --hash=sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120
+    # via requests
+charset-normalizer==3.4.4 \
+    --hash=sha256:027f6de494925c0ab2a55eab46ae5129951638a49a34d87f4c3eda90f696b4ad \
+    --hash=sha256:077fbb858e903c73f6c9db43374fd213b0b6a778106bc7032446a8e8b5b38b93 \
+    --hash=sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394 \
+    --hash=sha256:0d3d8f15c07f86e9ff82319b3d9ef6f4bf907608f53fe9d92b28ea9ae3d1fd89 \
+    --hash=sha256:0f04b14ffe5fdc8c4933862d8306109a2c51e0704acfa35d51598eb45a1e89fc \
+    --hash=sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86 \
+    --hash=sha256:194f08cbb32dc406d6e1aea671a68be0823673db2832b38405deba2fb0d88f63 \
+    --hash=sha256:1bee1e43c28aa63cb16e5c14e582580546b08e535299b8b6158a7c9c768a1f3d \
+    --hash=sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f \
+    --hash=sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8 \
+    --hash=sha256:244bfb999c71b35de57821b8ea746b24e863398194a4014e4c76adc2bbdfeff0 \
+    --hash=sha256:2677acec1a2f8ef614c6888b5b4ae4060cc184174a938ed4e8ef690e15d3e505 \
+    --hash=sha256:277e970e750505ed74c832b4bf75dac7476262ee2a013f5574dd49075879e161 \
+    --hash=sha256:2aaba3b0819274cc41757a1da876f810a3e4d7b6eb25699253a4effef9e8e4af \
+    --hash=sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152 \
+    --hash=sha256:2c9d3c380143a1fedbff95a312aa798578371eb29da42106a29019368a475318 \
+    --hash=sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72 \
+    --hash=sha256:31fd66405eaf47bb62e8cd575dc621c56c668f27d46a61d975a249930dd5e2a4 \
+    --hash=sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e \
+    --hash=sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3 \
+    --hash=sha256:44c2a8734b333e0578090c4cd6b16f275e07aa6614ca8715e6c038e865e70576 \
+    --hash=sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c \
+    --hash=sha256:4902828217069c3c5c71094537a8e623f5d097858ac6ca8252f7b4d10b7560f1 \
+    --hash=sha256:4bd5d4137d500351a30687c2d3971758aac9a19208fc110ccb9d7188fbe709e8 \
+    --hash=sha256:4fe7859a4e3e8457458e2ff592f15ccb02f3da787fcd31e0183879c3ad4692a1 \
+    --hash=sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2 \
+    --hash=sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44 \
+    --hash=sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26 \
+    --hash=sha256:5947809c8a2417be3267efc979c47d76a079758166f7d43ef5ae8e9f92751f88 \
+    --hash=sha256:5ae497466c7901d54b639cf42d5b8c1b6a4fead55215500d2f486d34db48d016 \
+    --hash=sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede \
+    --hash=sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf \
+    --hash=sha256:5cb4d72eea50c8868f5288b7f7f33ed276118325c1dfd3957089f6b519e1382a \
+    --hash=sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc \
+    --hash=sha256:5f819d5fe9234f9f82d75bdfa9aef3a3d72c4d24a6e57aeaebba32a704553aa0 \
+    --hash=sha256:64b55f9dce520635f018f907ff1b0df1fdc31f2795a922fb49dd14fbcdf48c84 \
+    --hash=sha256:6515f3182dbe4ea06ced2d9e8666d97b46ef4c75e326b79bb624110f122551db \
+    --hash=sha256:65e2befcd84bc6f37095f5961e68a6f077bf44946771354a28ad434c2cce0ae1 \
+    --hash=sha256:6aee717dcfead04c6eb1ce3bd29ac1e22663cdea57f943c87d1eab9a025438d7 \
+    --hash=sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed \
+    --hash=sha256:6e1fcf0720908f200cd21aa4e6750a48ff6ce4afe7ff5a79a90d5ed8a08296f8 \
+    --hash=sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133 \
+    --hash=sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e \
+    --hash=sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef \
+    --hash=sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14 \
+    --hash=sha256:778d2e08eda00f4256d7f672ca9fef386071c9202f5e4607920b86d7803387f2 \
+    --hash=sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0 \
+    --hash=sha256:798d75d81754988d2565bff1b97ba5a44411867c0cf32b77a7e8f8d84796b10d \
+    --hash=sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828 \
+    --hash=sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f \
+    --hash=sha256:7c308f7e26e4363d79df40ca5b2be1c6ba9f02bdbccfed5abddb7859a6ce72cf \
+    --hash=sha256:7fa17817dc5625de8a027cb8b26d9fefa3ea28c8253929b8d6649e705d2835b6 \
+    --hash=sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328 \
+    --hash=sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090 \
+    --hash=sha256:837c2ce8c5a65a2035be9b3569c684358dfbf109fd3b6969630a87535495ceaa \
+    --hash=sha256:840c25fb618a231545cbab0564a799f101b63b9901f2569faecd6b222ac72381 \
+    --hash=sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c \
+    --hash=sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb \
+    --hash=sha256:8ef3c867360f88ac904fd3f5e1f902f13307af9052646963ee08ff4f131adafc \
+    --hash=sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a \
+    --hash=sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec \
+    --hash=sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc \
+    --hash=sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac \
+    --hash=sha256:9cd98cdc06614a2f768d2b7286d66805f94c48cde050acdbbb7db2600ab3197e \
+    --hash=sha256:9d1bb833febdff5c8927f922386db610b49db6e0d4f4ee29601d71e7c2694313 \
+    --hash=sha256:9f7fcd74d410a36883701fafa2482a6af2ff5ba96b9a620e9e0721e28ead5569 \
+    --hash=sha256:a59cb51917aa591b1c4e6a43c132f0cdc3c76dbad6155df4e28ee626cc77a0a3 \
+    --hash=sha256:a61900df84c667873b292c3de315a786dd8dac506704dea57bc957bd31e22c7d \
+    --hash=sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525 \
+    --hash=sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894 \
+    --hash=sha256:a8bf8d0f749c5757af2142fe7903a9df1d2e8aa3841559b2bad34b08d0e2bcf3 \
+    --hash=sha256:a9768c477b9d7bd54bc0c86dbaebdec6f03306675526c9927c0e8a04e8f94af9 \
+    --hash=sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a \
+    --hash=sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9 \
+    --hash=sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14 \
+    --hash=sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25 \
+    --hash=sha256:b5d84d37db046c5ca74ee7bb47dd6cbc13f80665fdde3e8040bdd3fb015ecb50 \
+    --hash=sha256:b7cf1017d601aa35e6bb650b6ad28652c9cd78ee6caff19f3c28d03e1c80acbf \
+    --hash=sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1 \
+    --hash=sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3 \
+    --hash=sha256:c4ef880e27901b6cc782f1b95f82da9313c0eb95c3af699103088fa0ac3ce9ac \
+    --hash=sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e \
+    --hash=sha256:ca5862d5b3928c4940729dacc329aa9102900382fea192fc5e52eb69d6093815 \
+    --hash=sha256:cb01158d8b88ee68f15949894ccc6712278243d95f344770fa7593fa2d94410c \
+    --hash=sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6 \
+    --hash=sha256:cc00f04ed596e9dc0da42ed17ac5e596c6ccba999ba6bd92b0e0aef2f170f2d6 \
+    --hash=sha256:cd09d08005f958f370f539f186d10aec3377d55b9eeb0d796025d4886119d76e \
+    --hash=sha256:cd4b7ca9984e5e7985c12bc60a6f173f3c958eae74f3ef6624bb6b26e2abbae4 \
+    --hash=sha256:ce8a0633f41a967713a59c4139d29110c07e826d131a316b50ce11b1d79b4f84 \
+    --hash=sha256:cead0978fc57397645f12578bfd2d5ea9138ea0fac82b2f63f7f7c6877986a69 \
+    --hash=sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15 \
+    --hash=sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191 \
+    --hash=sha256:d9c7f57c3d666a53421049053eaacdd14bbd0a528e2186fcb2e672effd053bb0 \
+    --hash=sha256:d9e45d7faa48ee908174d8fe84854479ef838fc6a705c9315372eacbc2f02897 \
+    --hash=sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd \
+    --hash=sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2 \
+    --hash=sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794 \
+    --hash=sha256:e824f1492727fa856dd6eda4f7cee25f8518a12f3c4a56a74e8095695089cf6d \
+    --hash=sha256:e912091979546adf63357d7e2ccff9b44f026c075aeaf25a52d0e95ad2281074 \
+    --hash=sha256:eaabd426fe94daf8fd157c32e571c85cb12e66692f15516a83a03264b08d06c3 \
+    --hash=sha256:ebf3e58c7ec8a8bed6d66a75d7fb37b55e5015b03ceae72a8e7c74495551e224 \
+    --hash=sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838 \
+    --hash=sha256:eecbc200c7fd5ddb9a7f16c7decb07b566c29fa2161a16cf67b8d068bd21690a \
+    --hash=sha256:f155a433c2ec037d4e8df17d18922c3a0d9b3232a396690f17175d2946f0218d \
+    --hash=sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d \
+    --hash=sha256:f34be2938726fc13801220747472850852fe6b1ea75869a048d6f896838c896f \
+    --hash=sha256:f820802628d2694cb7e56db99213f930856014862f3fd943d290ea8438d07ca8 \
+    --hash=sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490 \
+    --hash=sha256:f8e160feb2aed042cd657a72acc0b481212ed28b1b9a95c0cee1621b524e1966 \
+    --hash=sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9 \
+    --hash=sha256:fa09f53c465e532f4d3db095e0c55b615f010ad81803d383195b6b5ca6cbf5f3 \
+    --hash=sha256:faa3a41b2b66b6e50f84ae4a68c64fcd0c44355741c6374813a800cd6695db9e \
+    --hash=sha256:fd44c878ea55ba351104cb93cc85e74916eb8fa440ca7903e57575e97394f608
+    # via requests
+google-api-core[grpc]==2.29.0 \
+    --hash=sha256:84181be0f8e6b04006df75ddfe728f24489f0af57c96a529ff7cf45bc28797f7 \
+    --hash=sha256:d30bc60980daa36e314b5d5a3e5958b0200cb44ca8fa1be2b614e932b75a3ea9
+    # via
+    #   google-cloud-bigquery
+    #   google-cloud-core
+google-auth==2.47.0 \
+    --hash=sha256:833229070a9dfee1a353ae9877dcd2dec069a8281a4e72e72f77d4a70ff945da \
+    --hash=sha256:c516d68336bfde7cf0da26aab674a36fedcf04b37ac4edd59c597178760c3498
+    # via
+    #   google-api-core
+    #   google-cloud-bigquery
+    #   google-cloud-core
+google-cloud-bigquery==3.25.0 \
+    --hash=sha256:5b2aff3205a854481117436836ae1403f11f2594e6810a98886afd57eda28509 \
+    --hash=sha256:7f0c371bc74d2a7fb74dacbc00ac0f90c8c2bec2289b51dd6685a275873b1ce9
+    # via github-etl (pyproject.toml)
+google-cloud-core==2.5.0 \
+    --hash=sha256:67d977b41ae6c7211ee830c7912e41003ea8194bff15ae7d72fd6f51e57acabc \
+    --hash=sha256:7c1b7ef5c92311717bd05301aa1a91ffbc565673d3b0b4163a52d8413a186963
+    # via google-cloud-bigquery
+google-crc32c==1.8.0 \
+    --hash=sha256:014a7e68d623e9a4222d663931febc3033c5c7c9730785727de2a81f87d5bab8 \
+    --hash=sha256:01f126a5cfddc378290de52095e2c7052be2ba7656a9f0caf4bcd1bfb1833f8a \
+    --hash=sha256:0470b8c3d73b5f4e3300165498e4cf25221c7eb37f1159e221d1825b6df8a7ff \
+    --hash=sha256:119fcd90c57c89f30040b47c211acee231b25a45d225e3225294386f5d258288 \
+    --hash=sha256:14f87e04d613dfa218d6135e81b78272c3b904e2a7053b841481b38a7d901411 \
+    --hash=sha256:17446feb05abddc187e5441a45971b8394ea4c1b6efd88ab0af393fd9e0a156a \
+    --hash=sha256:19b40d637a54cb71e0829179f6cb41835f0fbd9e8eb60552152a8b52c36cbe15 \
+    --hash=sha256:2a3dc3318507de089c5384cc74d54318401410f82aa65b2d9cdde9d297aca7cb \
+    --hash=sha256:3b9776774b24ba76831609ffbabce8cdf6fa2bd5e9df37b594221c7e333a81fa \
+    --hash=sha256:3cc0c8912038065eafa603b238abf252e204accab2a704c63b9e14837a854962 \
+    --hash=sha256:3d488e98b18809f5e322978d4506373599c0c13e6c5ad13e53bb44758e18d215 \
+    --hash=sha256:3ebb04528e83b2634857f43f9bb8ef5b2bbe7f10f140daeb01b58f972d04736b \
+    --hash=sha256:450dc98429d3e33ed2926fc99ee81001928d63460f8538f21a5d6060912a8e27 \
+    --hash=sha256:4b8286b659c1335172e39563ab0a768b8015e88e08329fa5321f774275fc3113 \
+    --hash=sha256:57a50a9035b75643996fbf224d6661e386c7162d1dfdab9bc4ca790947d1007f \
+    --hash=sha256:61f58b28e0b21fcb249a8247ad0db2e64114e201e2e9b4200af020f3b6242c9f \
+    --hash=sha256:6f35aaffc8ccd81ba3162443fabb920e65b1f20ab1952a31b13173a67811467d \
+    --hash=sha256:71734788a88f551fbd6a97be9668a0020698e07b2bf5b3aa26a36c10cdfb27b2 \
+    --hash=sha256:864abafe7d6e2c4c66395c1eb0fe12dc891879769b52a3d56499612ca93b6092 \
+    --hash=sha256:86cfc00fe45a0ac7359e5214a1704e51a99e757d0272554874f419f79838c5f7 \
+    --hash=sha256:87b0072c4ecc9505cfa16ee734b00cd7721d20a0f595be4d40d3d21b41f65ae2 \
+    --hash=sha256:87fa445064e7db928226b2e6f0d5304ab4cd0339e664a4e9a25029f384d9bb93 \
+    --hash=sha256:89c17d53d75562edfff86679244830599ee0a48efc216200691de8b02ab6b2b8 \
+    --hash=sha256:8b3f68782f3cbd1bce027e48768293072813469af6a61a86f6bb4977a4380f21 \
+    --hash=sha256:a428e25fb7691024de47fecfbff7ff957214da51eddded0da0ae0e0f03a2cf79 \
+    --hash=sha256:b0d1a7afc6e8e4635564ba8aa5c0548e3173e41b6384d7711a9123165f582de2 \
+    --hash=sha256:ba6aba18daf4d36ad4412feede6221414692f44d17e5428bdd81ad3fc1eee5dc \
+    --hash=sha256:cb5c869c2923d56cb0c8e6bcdd73c009c36ae39b652dbe46a05eb4ef0ad01454 \
+    --hash=sha256:d511b3153e7011a27ab6ee6bb3a5404a55b994dc1a7322c0b87b29606d9790e2 \
+    --hash=sha256:db3fe8eaf0612fc8b20fa21a5f25bd785bc3cd5be69f8f3412b0ac2ffd49e733 \
+    --hash=sha256:e6584b12cb06796d285d09e33f63309a09368b9d806a551d8036a4207ea43697 \
+    --hash=sha256:f4b51844ef67d6cf2e9425983274da75f18b1597bb2c998e1c0a0e8d46f8f651 \
+    --hash=sha256:f639065ea2042d5c034bf258a9f085eaa7af0cd250667c0635a3118e8f92c69c
+    # via google-resumable-media
+google-resumable-media==2.8.0 \
+    --hash=sha256:dd14a116af303845a8d932ddae161a26e86cc229645bc98b39f026f9b1717582 \
+    --hash=sha256:f1157ed8b46994d60a1bc432544db62352043113684d4e030ee02e77ebe9a1ae
+    # via google-cloud-bigquery
+googleapis-common-protos==1.72.0 \
+    --hash=sha256:4299c5a82d5ae1a9702ada957347726b167f9f8d1fc352477702a1e851ff4038 \
+    --hash=sha256:e55a601c1b32b52d7a3e65f43563e2aa61bcd737998ee672ac9b951cd49319f5
+    # via
+    #   google-api-core
+    #   grpcio-status
+grpcio==1.76.0 \
+    --hash=sha256:035d90bc79eaa4bed83f524331d55e35820725c9fbb00ffa1904d5550ed7ede3 \
+    --hash=sha256:04bbe1bfe3a68bbfd4e52402ab7d4eb59d72d02647ae2042204326cf4bbad280 \
+    --hash=sha256:063065249d9e7e0782d03d2bca50787f53bd0fb89a67de9a7b521c4a01f1989b \
+    --hash=sha256:06c3d6b076e7b593905d04fdba6a0525711b3466f43b3400266f04ff735de0cd \
+    --hash=sha256:08caea849a9d3c71a542827d6df9d5a69067b0a1efbea8a855633ff5d9571465 \
+    --hash=sha256:0aaa82d0813fd4c8e589fac9b65d7dd88702555f702fb10417f96e2a2a6d4c0f \
+    --hash=sha256:0b7604868b38c1bfd5cf72d768aedd7db41d78cb6a4a18585e33fb0f9f2363fd \
+    --hash=sha256:0c37db8606c258e2ee0c56b78c62fc9dee0e901b5dbdcf816c2dd4ad652b8b0c \
+    --hash=sha256:1c9b93f79f48b03ada57ea24725d83a30284a012ec27eab2cf7e50a550cbbbcc \
+    --hash=sha256:2107b0c024d1b35f4083f11245c0e23846ae64d02f40b2b226684840260ed054 \
+    --hash=sha256:2229ae655ec4e8999599469559e97630185fdd53ae1e8997d147b7c9b2b72cba \
+    --hash=sha256:25a18e9810fbc7e7f03ec2516addc116a957f8cbb8cbc95ccc80faa072743d03 \
+    --hash=sha256:26ef06c73eb53267c2b319f43e6634c7556ea37672029241a056629af27c10e2 \
+    --hash=sha256:2e1743fbd7f5fa713a1b0a8ac8ebabf0ec980b5d8809ec358d488e273b9cf02a \
+    --hash=sha256:32483fe2aab2c3794101c2a159070584e5db11d0aa091b2c0ea9c4fc43d0d749 \
+    --hash=sha256:3bf0f392c0b806905ed174dcd8bdd5e418a40d5567a05615a030a5aeddea692d \
+    --hash=sha256:3e2a27c89eb9ac3d81ec8835e12414d73536c6e620355d65102503064a4ed6eb \
+    --hash=sha256:40ad3afe81676fd9ec6d9d406eda00933f218038433980aa19d401490e46ecde \
+    --hash=sha256:4215d3a102bd95e2e11b5395c78562967959824156af11fa93d18fdd18050990 \
+    --hash=sha256:45d59a649a82df5718fd9527ce775fd66d1af35e6d31abdcdc906a49c6822958 \
+    --hash=sha256:45e0111e73f43f735d70786557dc38141185072d7ff8dc1829d6a77ac1471468 \
+    --hash=sha256:479496325ce554792dba6548fae3df31a72cef7bad71ca2e12b0e58f9b336bfc \
+    --hash=sha256:490fa6d203992c47c7b9e4a9d39003a0c2bcc1c9aa3c058730884bbbb0ee9f09 \
+    --hash=sha256:49ce47231818806067aea3324d4bf13825b658ad662d3b25fada0bdad9b8a6af \
+    --hash=sha256:4baf3cbe2f0be3289eb68ac8ae771156971848bb8aaff60bad42005539431980 \
+    --hash=sha256:522175aba7af9113c48ec10cc471b9b9bd4f6ceb36aeb4544a8e2c80ed9d252d \
+    --hash=sha256:5e8571632780e08526f118f74170ad8d50fb0a48c23a746bef2a6ebade3abd6f \
+    --hash=sha256:615ba64c208aaceb5ec83bfdce7728b80bfeb8be97562944836a7a0a9647d882 \
+    --hash=sha256:61f69297cba3950a524f61c7c8ee12e55c486cb5f7db47ff9dcee33da6f0d3ae \
+    --hash=sha256:65a20de41e85648e00305c1bb09a3598f840422e522277641145a32d42dcefcc \
+    --hash=sha256:6a15c17af8839b6801d554263c546c69c4d7718ad4321e3166175b37eaacca77 \
+    --hash=sha256:747fa73efa9b8b1488a95d0ba1039c8e2dca0f741612d80415b1e1c560febf4e \
+    --hash=sha256:7be78388d6da1a25c0d5ec506523db58b18be22d9c37d8d3a32c08be4987bd73 \
+    --hash=sha256:81fd9652b37b36f16138611c7e884eb82e0cec137c40d3ef7c3f9b3ed00f6ed8 \
+    --hash=sha256:83d57312a58dcfe2a3a0f9d1389b299438909a02db60e2f2ea2ae2d8034909d3 \
+    --hash=sha256:8843114c0cfce61b40ad48df65abcfc00d4dba82eae8718fab5352390848c5da \
+    --hash=sha256:8cc3309d8e08fd79089e13ed4819d0af72aa935dd8f435a195fd152796752ff2 \
+    --hash=sha256:8ebe63ee5f8fa4296b1b8cfc743f870d10e902ca18afc65c68cf46fd39bb0783 \
+    --hash=sha256:8eddfb4d203a237da6f3cc8a540dad0517d274b5a1e9e636fd8d2c79b5c1d397 \
+    --hash=sha256:922fa70ba549fce362d2e2871ab542082d66e2aaf0c19480ea453905b01f384e \
+    --hash=sha256:931091142fd8cc14edccc0845a79248bc155425eee9a98b2db2ea4f00a235a42 \
+    --hash=sha256:971fd5a1d6e62e00d945423a567e42eb1fa678ba89072832185ca836a94daaa6 \
+    --hash=sha256:980a846182ce88c4f2f7e2c22c56aefd515daeb36149d1c897f83cf57999e0b6 \
+    --hash=sha256:9d9adda641db7207e800a7f089068f6f645959f2df27e870ee81d44701dd9db3 \
+    --hash=sha256:9f8f757bebaaea112c00dba718fc0d3260052ce714e25804a03f93f5d1c6cc11 \
+    --hash=sha256:a6ae758eb08088d36812dd5d9af7a9859c05b1e0f714470ea243694b49278e7b \
+    --hash=sha256:a8c2cf1209497cf659a667d7dea88985e834c24b7c3b605e6254cbb5076d985c \
+    --hash=sha256:acab0277c40eff7143c2323190ea57b9ee5fd353d8190ee9652369fae735668a \
+    --hash=sha256:b331680e46239e090f5b3cead313cc772f6caa7d0fc8de349337563125361a4a \
+    --hash=sha256:c088e7a90b6017307f423efbb9d1ba97a22aa2170876223f9709e9d1de0b5347 \
+    --hash=sha256:d099566accf23d21037f18a2a63d323075bebace807742e4b0ac210971d4dd70 \
+    --hash=sha256:d388087771c837cdb6515539f43b9d4bf0b0f23593a24054ac16f7a960be16f4 \
+    --hash=sha256:dcfe41187da8992c5f40aa8c5ec086fa3672834d2be57a32384c08d5a05b4c00 \
+    --hash=sha256:e6d1db20594d9daba22f90da738b1a0441a7427552cc6e2e3d1297aeddc00378 \
+    --hash=sha256:ebea5cc3aa8ea72e04df9913492f9a96d9348db876f9dda3ad729cfedf7ac416 \
+    --hash=sha256:ebebf83299b0cb1721a8859ea98f3a77811e35dce7609c5c963b9ad90728f886 \
+    --hash=sha256:f0e34c2079d47ae9f6188211db9e777c619a21d4faba6977774e8fa43b085e48 \
+    --hash=sha256:f92f88e6c033db65a5ae3d97905c8fea9c725b63e28d5a75cb73b49bda5024d8 \
+    --hash=sha256:f9f7bd5faab55f47231ad8dba7787866b69f5e93bc306e3915606779bbfb4ba8 \
+    --hash=sha256:fd5ef5932f6475c436c4a55e4336ebbe47bd3272be04964a03d316bbf4afbcbc \
+    --hash=sha256:ff8a59ea85a1f2191a0ffcc61298c571bc566332f82e5f5be1b83c9d8e668a62
+    # via
+    #   google-api-core
+    #   grpcio-status
+grpcio-status==1.76.0 \
+    --hash=sha256:25fcbfec74c15d1a1cb5da3fab8ee9672852dc16a5a9eeb5baf7d7a9952943cd \
+    --hash=sha256:380568794055a8efbbd8871162df92012e0228a5f6dffaf57f2a00c534103b18
+    # via google-api-core
+idna==3.11 \
+    --hash=sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea \
+    --hash=sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902
+    # via requests
+packaging==26.0 \
+    --hash=sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4 \
+    --hash=sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529
+    # via google-cloud-bigquery
+proto-plus==1.27.0 \
+    --hash=sha256:1baa7f81cf0f8acb8bc1f6d085008ba4171eaf669629d1b6d1673b21ed1c0a82 \
+    --hash=sha256:873af56dd0d7e91836aee871e5799e1c6f1bda86ac9a983e0bb9f0c266a568c4
+    # via google-api-core
+protobuf==6.33.4 \
+    --hash=sha256:0f12ddbf96912690c3582f9dffb55530ef32015ad8e678cd494312bd78314c4f \
+    --hash=sha256:1fe3730068fcf2e595816a6c34fe66eeedd37d51d0400b72fabc848811fdc1bc \
+    --hash=sha256:2fe67f6c014c84f655ee06f6f66213f9254b3a8b6bda6cda0ccd4232c73c06f0 \
+    --hash=sha256:3df850c2f8db9934de4cf8f9152f8dc2558f49f298f37f90c517e8e5c84c30e9 \
+    --hash=sha256:757c978f82e74d75cba88eddec479df9b99a42b31193313b75e492c06a51764e \
+    --hash=sha256:8f11ffae31ec67fc2554c2ef891dcb561dae9a2a3ed941f9e134c2db06657dbc \
+    --hash=sha256:918966612c8232fc6c24c78e1cd89784307f5814ad7506c308ee3cf86662850d \
+    --hash=sha256:955478a89559fa4568f5a81dce77260eabc5c686f9e8366219ebd30debf06aa6 \
+    --hash=sha256:c7c64f259c618f0bef7bee042075e390debbf9682334be2b67408ec7c1c09ee6 \
+    --hash=sha256:dc2e61bca3b10470c1912d166fe0af67bfc20eb55971dcef8dfa48ce14f0ed91
+    # via
+    #   google-api-core
+    #   googleapis-common-protos
+    #   grpcio-status
+    #   proto-plus
+pyasn1==0.6.2 \
+    --hash=sha256:1eb26d860996a18e9b6ed05e7aae0e9fc21619fcee6af91cca9bad4fbea224bf \
+    --hash=sha256:9b59a2b25ba7e4f8197db7686c09fb33e658b98339fadb826e9512629017833b
+    # via
+    #   pyasn1-modules
+    #   rsa
+pyasn1-modules==0.4.2 \
+    --hash=sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a \
+    --hash=sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6
+    # via google-auth
+python-dateutil==2.9.0.post0 \
+    --hash=sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3 \
+    --hash=sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427
+    # via google-cloud-bigquery
+requests==2.32.5 \
+    --hash=sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6 \
+    --hash=sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf
+    # via
+    #   github-etl (pyproject.toml)
+    #   google-api-core
+    #   google-cloud-bigquery
+rsa==4.9.1 \
+    --hash=sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762 \
+    --hash=sha256:e7bdbfdb5497da4c07dfd35530e1a902659db6ff241e39d9953cad06ebd0ae75
+    # via google-auth
+six==1.17.0 \
+    --hash=sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274 \
+    --hash=sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81
+    # via python-dateutil
+typing-extensions==4.15.0 \
+    --hash=sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466 \
+    --hash=sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548
+    # via grpcio
+urllib3==2.6.3 \
+    --hash=sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed \
+    --hash=sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4
+    # via requests
diff --git a/test_formatting.py b/test_formatting.py
new file mode 100644
index 0000000..c92e534
--- /dev/null
+++ b/test_formatting.py
@@ -0,0 +1,16 @@
+"""
+Code Style Tests.
+"""
+
+import subprocess
+
+
+def test_black():
+    cmd = ("black", "--diff", "main.py")
+    output = subprocess.check_output(cmd)
+    assert not output, "The python code does not adhere to the project style."
+
+
+def test_ruff():
+    passed = subprocess.call(("ruff", "check", "main.py", "--target-version", "py314"))
+    assert not passed, "ruff did not run cleanly."
diff --git a/test_main.py b/test_main.py
index 0e60118..0d38ac3 100644
--- a/test_main.py
+++ b/test_main.py
@@ -116,1325 +116,839 @@ def mock_comment_response():
 # =============================================================================
 
 
-class TestSetupLogging:
-    """Tests for setup_logging function."""
 
-    def test_setup_logging_configures_logger(self):
-        """Test that setup_logging configures the root logger correctly."""
-        main.setup_logging()
-
-        root_logger = logging.getLogger()
-        assert root_logger.level == logging.INFO
-        assert len(root_logger.handlers) > 0
-
-        # Check that at least one handler is a StreamHandler
-        has_stream_handler = any(
-            isinstance(handler, logging.StreamHandler)
-            for handler in root_logger.handlers
-        )
-        assert has_stream_handler
-
-
-class TestSleepForRateLimit:
-    """Tests for sleep_for_rate_limit function."""
-
-    @patch("time.time")
-    @patch("time.sleep")
-    def test_sleep_for_rate_limit_when_remaining_is_zero(self, mock_sleep, mock_time):
-        """Test that sleep_for_rate_limit sleeps until reset time."""
-        mock_time.return_value = 1000
-
-        mock_response = Mock()
-        mock_response.headers = {
-            "X-RateLimit-Remaining": "0",
-            "X-RateLimit-Reset": "1120",  # 120 seconds from now
-        }
+# =============================================================================
+# TESTS FOR SETUP_LOGGING
+# =============================================================================
 
-        main.sleep_for_rate_limit(mock_response)
 
-        mock_sleep.assert_called_once_with(120)
+def test_setup_logging():
+    """Test that setup_logging configures logging correctly."""
+    main.setup_logging()
 
-    @patch("time.time")
-    @patch("time.sleep")
-    def test_sleep_for_rate_limit_when_reset_already_passed(
-        self, mock_sleep, mock_time
-    ):
-        """Test that sleep_for_rate_limit doesn't sleep negative time."""
-        mock_time.return_value = 2000
+    root_logger = logging.getLogger()
+    assert root_logger.level == logging.INFO
+    assert len(root_logger.handlers) > 0
 
-        mock_response = Mock()
-        mock_response.headers = {
-            "X-RateLimit-Remaining": "0",
-            "X-RateLimit-Reset": "1500",  # Already passed
-        }
+    # Check that at least one handler is a StreamHandler
+    has_stream_handler = any(
+        isinstance(handler, logging.StreamHandler)
+        for handler in root_logger.handlers
+    )
+    assert has_stream_handler
 
-        main.sleep_for_rate_limit(mock_response)
 
-        # Should sleep for 0 seconds (max of 0 and negative value)
-        mock_sleep.assert_called_once_with(0)
 
-    @patch("time.sleep")
-    def test_sleep_for_rate_limit_when_remaining_not_zero(self, mock_sleep):
-        """Test that sleep_for_rate_limit doesn't sleep when remaining > 0."""
-        mock_response = Mock()
-        mock_response.headers = {
-            "X-RateLimit-Remaining": "5",
-            "X-RateLimit-Reset": "1500",
-        }
+# =============================================================================
+# TESTS FOR SLEEP_FOR_RATE_LIMIT
+# =============================================================================
 
-        main.sleep_for_rate_limit(mock_response)
 
-        # Should not sleep when remaining > 0
-        mock_sleep.assert_not_called()
+@patch("time.time")
+@patch("time.sleep")
+def test_sleep_for_rate_limit_calculates_wait_time(mock_sleep, mock_time):
+    """Test that sleep_for_rate_limit calculates correct wait time."""
+    mock_time.return_value = 1000
 
-    @patch("time.sleep")
-    def test_sleep_for_rate_limit_with_missing_headers(self, mock_sleep):
-        """Test sleep_for_rate_limit with missing rate limit headers."""
-        mock_response = Mock()
-        mock_response.headers = {}
+    mock_response = Mock()
+    mock_response.headers = {
+        "X-RateLimit-Remaining": "0",
+        "X-RateLimit-Reset": "1120",  # 120 seconds from now
+    }
 
-        main.sleep_for_rate_limit(mock_response)
+    main.sleep_for_rate_limit(mock_response)
 
-        # Should not sleep when headers are missing (defaults to remaining=1)
-        mock_sleep.assert_not_called()
+    mock_sleep.assert_called_once_with(120)
 
 
-class TestExtractPullRequests:
-    """Tests for extract_pull_requests function."""
+@patch("time.time")
+@patch("time.sleep")
+def test_sleep_for_rate_limit_when_reset_already_passed(mock_sleep, mock_time):
+    """Test that sleep_for_rate_limit doesn't sleep negative time."""
+    mock_time.return_value = 2000
 
-    def test_extract_single_page(self, mock_session):
-        """Test extracting data from a single page of results."""
-        mock_response = Mock()
-        mock_response.status_code = 200
-        mock_response.json.return_value = [
-            {"number": 1, "title": "PR 1"},
-            {"number": 2, "title": "PR 2"},
-        ]
-        mock_response.links = {}
-
-        mock_session.get.return_value = mock_response
-
-        # Mock the extract functions
-        with (
-            patch("main.extract_commits", return_value=[]),
-            patch("main.extract_reviewers", return_value=[]),
-            patch("main.extract_comments", return_value=[]),
-        ):
-            result = list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
-
-        assert len(result) == 1
-        assert len(result[0]) == 2
-        assert result[0][0]["number"] == 1
-        assert result[0][1]["number"] == 2
-
-    def test_extract_multiple_pages(self, mock_session):
-        """Test extracting data across multiple pages with pagination."""
-        # First page response
-        mock_response_1 = Mock()
-        mock_response_1.status_code = 200
-        mock_response_1.json.return_value = [
-            {"number": 1, "title": "PR 1"},
-            {"number": 2, "title": "PR 2"},
-        ]
-        mock_response_1.links = {
-            "next": {"url": "https://api.github.com/repos/mozilla/firefox/pulls?page=2"}
-        }
+    mock_response = Mock()
+    mock_response.headers = {
+        "X-RateLimit-Remaining": "0",
+        "X-RateLimit-Reset": "1500",  # Already passed
+    }
 
-        # Second page response
-        mock_response_2 = Mock()
-        mock_response_2.status_code = 200
-        mock_response_2.json.return_value = [{"number": 3, "title": "PR 3"}]
-        mock_response_2.links = {}
-
-        mock_session.get.side_effect = [mock_response_1, mock_response_2]
-
-        with (
-            patch("main.extract_commits", return_value=[]),
-            patch("main.extract_reviewers", return_value=[]),
-            patch("main.extract_comments", return_value=[]),
-        ):
-            result = list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
-
-        assert len(result) == 2
-        assert len(result[0]) == 2
-        assert len(result[1]) == 1
-        assert result[0][0]["number"] == 1
-        assert result[1][0]["number"] == 3
-
-    def test_enriches_prs_with_commit_data(self, mock_session):
-        """Test that PRs are enriched with commit data."""
-        mock_response = Mock()
-        mock_response.status_code = 200
-        mock_response.json.return_value = [{"number": 1, "title": "PR 1"}]
-        mock_response.links = {}
-
-        mock_session.get.return_value = mock_response
-
-        mock_commits = [{"sha": "abc123"}]
-
-        with (
-            patch(
-                "main.extract_commits", return_value=mock_commits
-            ) as mock_extract_commits,
-            patch("main.extract_reviewers", return_value=[]),
-            patch("main.extract_comments", return_value=[]),
-        ):
-            result = list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
-
-        assert result[0][0]["commit_data"] == mock_commits
-        mock_extract_commits.assert_called_once()
-
-    def test_enriches_prs_with_reviewer_data(self, mock_session):
-        """Test that PRs are enriched with reviewer data."""
-        mock_response = Mock()
-        mock_response.status_code = 200
-        mock_response.json.return_value = [{"number": 1, "title": "PR 1"}]
-        mock_response.links = {}
-
-        mock_session.get.return_value = mock_response
-
-        mock_reviewers = [{"id": 789, "state": "APPROVED"}]
-
-        with (
-            patch("main.extract_commits", return_value=[]),
-            patch(
-                "main.extract_reviewers", return_value=mock_reviewers
-            ) as mock_extract_reviewers,
-            patch("main.extract_comments", return_value=[]),
-        ):
-            result = list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
-
-        assert result[0][0]["reviewer_data"] == mock_reviewers
-        mock_extract_reviewers.assert_called_once()
-
-    def test_enriches_prs_with_comment_data(self, mock_session):
-        """Test that PRs are enriched with comment data."""
-        mock_response = Mock()
-        mock_response.status_code = 200
-        mock_response.json.return_value = [{"number": 1, "title": "PR 1"}]
-        mock_response.links = {}
-
-        mock_session.get.return_value = mock_response
-
-        mock_comments = [{"id": 456, "body": "Great work!"}]
-
-        with (
-            patch("main.extract_commits", return_value=[]),
-            patch("main.extract_reviewers", return_value=[]),
-            patch(
-                "main.extract_comments", return_value=mock_comments
-            ) as mock_extract_comments,
-        ):
-            result = list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
-
-        assert result[0][0]["comment_data"] == mock_comments
-        mock_extract_comments.assert_called_once()
-
-    @patch("main.sleep_for_rate_limit")
-    def test_handles_rate_limit(self, mock_sleep, mock_session):
-        """Test that extract_pull_requests handles rate limiting correctly."""
-        # Rate limit response
-        mock_response_rate_limit = Mock()
-        mock_response_rate_limit.status_code = 403
-        mock_response_rate_limit.headers = {"X-RateLimit-Remaining": "0"}
-
-        # Successful response after rate limit
-        mock_response_success = Mock()
-        mock_response_success.status_code = 200
-        mock_response_success.json.return_value = [{"number": 1, "title": "PR 1"}]
-        mock_response_success.links = {}
-
-        mock_session.get.side_effect = [
-            mock_response_rate_limit,
-            mock_response_success,
-        ]
+    main.sleep_for_rate_limit(mock_response)
 
-        with (
-            patch("main.extract_commits", return_value=[]),
-            patch("main.extract_reviewers", return_value=[]),
-            patch("main.extract_comments", return_value=[]),
-        ):
-            result = list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
+    # Should sleep for 0 seconds (max of 0 and negative value)
+    mock_sleep.assert_called_once_with(0)
 
-        mock_sleep.assert_called_once_with(mock_response_rate_limit)
-        assert len(result) == 1
 
-    def test_handles_api_error_404(self, mock_session):
-        """Test that extract_pull_requests raises SystemExit on 404."""
-        mock_response = Mock()
-        mock_response.status_code = 404
-        mock_response.text = "Not Found"
+@patch("time.sleep")
+def test_sleep_for_rate_limit_when_remaining_not_zero(mock_sleep):
+    """Test that sleep_for_rate_limit doesn't sleep when remaining > 0."""
+    mock_response = Mock()
+    mock_response.headers = {
+        "X-RateLimit-Remaining": "5",
+        "X-RateLimit-Reset": "1500",
+    }
 
-        mock_session.get.return_value = mock_response
+    main.sleep_for_rate_limit(mock_response)
 
-        with pytest.raises(SystemExit) as exc_info:
-            list(main.extract_pull_requests(mock_session, "mozilla/nonexistent"))
+    # Should not sleep when remaining > 0
+    mock_sleep.assert_not_called()
 
-        assert "GitHub API error 404" in str(exc_info.value)
 
-    def test_handles_api_error_500(self, mock_session):
-        """Test that extract_pull_requests raises SystemExit on 500."""
-        mock_response = Mock()
-        mock_response.status_code = 500
-        mock_response.text = "Internal Server Error"
+@patch("time.sleep")
+def test_sleep_for_rate_limit_with_missing_headers(mock_sleep):
+    """Test sleep_for_rate_limit with missing rate limit headers."""
+    mock_response = Mock()
+    mock_response.headers = {}
 
-        mock_session.get.return_value = mock_response
+    main.sleep_for_rate_limit(mock_response)
 
-        with pytest.raises(SystemExit) as exc_info:
-            list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
-
-        assert "GitHub API error 500" in str(exc_info.value)
-
-    def test_stops_on_empty_batch(self, mock_session):
-        """Test that extraction stops when an empty batch is returned."""
-        # First page with data
-        mock_response_1 = Mock()
-        mock_response_1.status_code = 200
-        mock_response_1.json.return_value = [{"number": 1}]
-        mock_response_1.links = {
-            "next": {"url": "https://api.github.com/repos/mozilla/firefox/pulls?page=2"}
-        }
+    # Should not sleep when headers are missing (defaults to remaining=1)
+    mock_sleep.assert_not_called()
 
-        # Second page empty
-        mock_response_2 = Mock()
-        mock_response_2.status_code = 200
-        mock_response_2.json.return_value = []
-        mock_response_2.links = {}
-
-        mock_session.get.side_effect = [mock_response_1, mock_response_2]
-
-        with (
-            patch("main.extract_commits", return_value=[]),
-            patch("main.extract_reviewers", return_value=[]),
-            patch("main.extract_comments", return_value=[]),
-        ):
-            result = list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
-
-        # Should only have 1 chunk from first page
-        assert len(result) == 1
-        assert len(result[0]) == 1
-
-    def test_invalid_page_number_handling(self, mock_session):
-        """Test handling of invalid page number in pagination."""
-        mock_response_1 = Mock()
-        mock_response_1.status_code = 200
-        mock_response_1.json.return_value = [{"number": 1}]
-        mock_response_1.links = {
-            "next": {
-                "url": "https://api.github.com/repos/mozilla/firefox/pulls?page=invalid"
-            }
-        }
 
-        mock_session.get.return_value = mock_response_1
-
-        with (
-            patch("main.extract_commits", return_value=[]),
-            patch("main.extract_reviewers", return_value=[]),
-            patch("main.extract_comments", return_value=[]),
-        ):
-            result = list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
-
-        # Should stop pagination on invalid page number
-        assert len(result) == 1
-
-    def test_custom_github_api_url(self, mock_session):
-        """Test using custom GitHub API URL."""
-        custom_url = "https://mock-github.example.com"
-
-        mock_response = Mock()
-        mock_response.status_code = 200
-        mock_response.json.return_value = [{"number": 1}]
-        mock_response.links = {}
-
-        mock_session.get.return_value = mock_response
-
-        with (
-            patch("main.extract_commits", return_value=[]),
-            patch("main.extract_reviewers", return_value=[]),
-            patch("main.extract_comments", return_value=[]),
-        ):
-            list(
-                main.extract_pull_requests(
-                    mock_session, "mozilla/firefox", github_api_url=custom_url
-                )
-            )
 
-        # Verify custom URL was used
-        call_args = mock_session.get.call_args
-        assert custom_url in call_args[0][0]
-
-    def test_skips_prs_without_number_field(self, mock_session):
-        """Test that PRs without 'number' field are skipped."""
-        mock_response = Mock()
-        mock_response.status_code = 200
-        mock_response.json.return_value = [
-            {"number": 1, "title": "PR 1"},
-            {"title": "PR without number"},  # Missing number field
-            {"number": 2, "title": "PR 2"},
-        ]
-        mock_response.links = {}
+# =============================================================================
+# TESTS FOR EXTRACT_PULL_REQUESTS
+# =============================================================================
 
-        mock_session.get.return_value = mock_response
 
-        with (
-            patch("main.extract_commits", return_value=[]) as mock_commits,
-            patch("main.extract_reviewers", return_value=[]),
-            patch("main.extract_comments", return_value=[]),
-        ):
-            list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
+def test_extract_pull_requests_basic(mock_session):
+    """Test basic extraction of pull requests."""
+    mock_response = Mock()
+    mock_response.status_code = 200
+    mock_response.json.return_value = [
+        {"number": 1, "title": "PR 1"},
+        {"number": 2, "title": "PR 2"},
+    ]
+    mock_response.links = {}
 
-        # extract_commits should only be called for PRs with number field
-        assert mock_commits.call_count == 2
+    mock_session.get.return_value = mock_response
 
+    # Mock the extract functions
+    with (
+        patch("main.extract_commits", return_value=[]),
+        patch("main.extract_reviewers", return_value=[]),
+        patch("main.extract_comments", return_value=[]),
+    ):
+        result = list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
+
+    assert len(result) == 1
+    assert len(result[0]) == 2
+    assert result[0][0]["number"] == 1
+    assert result[0][1]["number"] == 2
+
+def test_extract_multiple_pages(mock_session):
+    """Test extracting data across multiple pages with pagination."""
+    # First page response
+    mock_response_1 = Mock()
+    mock_response_1.status_code = 200
+    mock_response_1.json.return_value = [
+        {"number": 1, "title": "PR 1"},
+        {"number": 2, "title": "PR 2"},
+    ]
+    mock_response_1.links = {
+        "next": {"url": "https://api.github.com/repos/mozilla/firefox/pulls?page=2"}
+    }
 
-class TestExtractCommits:
-    """Tests for extract_commits function."""
+    # Second page response
+    mock_response_2 = Mock()
+    mock_response_2.status_code = 200
+    mock_response_2.json.return_value = [{"number": 3, "title": "PR 3"}]
+    mock_response_2.links = {}
 
-    def test_fetch_commits_with_files(self, mock_session):
-        """Test fetching commits with files for a PR."""
-        # Mock commits list response
-        commits_response = Mock()
-        commits_response.status_code = 200
-        commits_response.json.return_value = [
-            {"sha": "abc123"},
-            {"sha": "def456"},
-        ]
+    mock_session.get.side_effect = [mock_response_1, mock_response_2]
 
-        # Mock individual commit responses
-        commit_detail_1 = Mock()
-        commit_detail_1.status_code = 200
-        commit_detail_1.json.return_value = {
-            "sha": "abc123",
-            "files": [{"filename": "file1.py", "additions": 10}],
-        }
+    with (
+        patch("main.extract_commits", return_value=[]),
+        patch("main.extract_reviewers", return_value=[]),
+        patch("main.extract_comments", return_value=[]),
+    ):
+        result = list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
+
+    assert len(result) == 2
+    assert len(result[0]) == 2
+    assert len(result[1]) == 1
+    assert result[0][0]["number"] == 1
+    assert result[1][0]["number"] == 3
+
+def test_enriches_prs_with_commit_data(mock_session):
+    """Test that PRs are enriched with commit data."""
+    mock_response = Mock()
+    mock_response.status_code = 200
+    mock_response.json.return_value = [{"number": 1, "title": "PR 1"}]
+    mock_response.links = {}
+
+    mock_session.get.return_value = mock_response
+
+    mock_commits = [{"sha": "abc123"}]
+
+    with (
+        patch(
+            "main.extract_commits", return_value=mock_commits
+        ) as mock_extract_commits,
+        patch("main.extract_reviewers", return_value=[]),
+        patch("main.extract_comments", return_value=[]),
+    ):
+        result = list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
 
-        commit_detail_2 = Mock()
-        commit_detail_2.status_code = 200
-        commit_detail_2.json.return_value = {
-            "sha": "def456",
-            "files": [{"filename": "file2.py", "deletions": 5}],
-        }
+    assert result[0][0]["commit_data"] == mock_commits
+    mock_extract_commits.assert_called_once()
 
-        mock_session.get.side_effect = [
-            commits_response,
-            commit_detail_1,
-            commit_detail_2,
-        ]
+def test_enriches_prs_with_reviewer_data(mock_session):
+    """Test that PRs are enriched with reviewer data."""
+    mock_response = Mock()
+    mock_response.status_code = 200
+    mock_response.json.return_value = [{"number": 1, "title": "PR 1"}]
+    mock_response.links = {}
 
-        result = main.extract_commits(mock_session, "mozilla/firefox", 123)
-
-        assert len(result) == 2
-        assert result[0]["sha"] == "abc123"
-        assert result[0]["files"][0]["filename"] == "file1.py"
-        assert result[1]["sha"] == "def456"
-        assert result[1]["files"][0]["filename"] == "file2.py"
-
-    def test_multiple_files_per_commit(self, mock_session):
-        """Test handling multiple files in a single commit."""
-        commits_response = Mock()
-        commits_response.status_code = 200
-        commits_response.json.return_value = [{"sha": "abc123"}]
-
-        commit_detail = Mock()
-        commit_detail.status_code = 200
-        commit_detail.json.return_value = {
-            "sha": "abc123",
-            "files": [
-                {"filename": "file1.py", "additions": 10},
-                {"filename": "file2.py", "additions": 20},
-                {"filename": "file3.py", "deletions": 5},
-            ],
-        }
+    mock_session.get.return_value = mock_response
 
-        mock_session.get.side_effect = [commits_response, commit_detail]
+    mock_reviewers = [{"id": 789, "state": "APPROVED"}]
 
-        result = main.extract_commits(mock_session, "mozilla/firefox", 123)
+    with (
+        patch("main.extract_commits", return_value=[]),
+        patch(
+            "main.extract_reviewers", return_value=mock_reviewers
+        ) as mock_extract_reviewers,
+        patch("main.extract_comments", return_value=[]),
+    ):
+        result = list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
 
-        assert len(result) == 1
-        assert len(result[0]["files"]) == 3
+    assert result[0][0]["reviewer_data"] == mock_reviewers
+    mock_extract_reviewers.assert_called_once()
 
-    @patch("main.sleep_for_rate_limit")
-    def test_rate_limit_on_commits_list(self, mock_sleep, mock_session):
-        """Test rate limit handling when fetching commits list."""
-        # Rate limit response
-        rate_limit_response = Mock()
-        rate_limit_response.status_code = 403
-        rate_limit_response.headers = {"X-RateLimit-Remaining": "0"}
+def test_enriches_prs_with_comment_data(mock_session):
+    """Test that PRs are enriched with comment data."""
+    mock_response = Mock()
+    mock_response.status_code = 200
+    mock_response.json.return_value = [{"number": 1, "title": "PR 1"}]
+    mock_response.links = {}
 
-        # Success response
-        success_response = Mock()
-        success_response.status_code = 200
-        success_response.json.return_value = []
+    mock_session.get.return_value = mock_response
 
-        mock_session.get.side_effect = [rate_limit_response, success_response]
+    mock_comments = [{"id": 456, "body": "Great work!"}]
 
-        result = main.extract_commits(mock_session, "mozilla/firefox", 123)
+    with (
+        patch("main.extract_commits", return_value=[]),
+        patch("main.extract_reviewers", return_value=[]),
+        patch(
+            "main.extract_comments", return_value=mock_comments
+        ) as mock_extract_comments,
+    ):
+        result = list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
+
+    assert result[0][0]["comment_data"] == mock_comments
+    mock_extract_comments.assert_called_once()
+
+@patch("main.sleep_for_rate_limit")
+def test_handles_rate_limit(mock_sleep, mock_session):
+    """Test that extract_pull_requests handles rate limiting correctly."""
+    # Rate limit response
+    mock_response_rate_limit = Mock()
+    mock_response_rate_limit.status_code = 403
+    mock_response_rate_limit.headers = {"X-RateLimit-Remaining": "0"}
+
+    # Successful response after rate limit
+    mock_response_success = Mock()
+    mock_response_success.status_code = 200
+    mock_response_success.json.return_value = [{"number": 1, "title": "PR 1"}]
+    mock_response_success.links = {}
+
+    mock_session.get.side_effect = [
+        mock_response_rate_limit,
+        mock_response_success,
+    ]
+
+    with (
+        patch("main.extract_commits", return_value=[]),
+        patch("main.extract_reviewers", return_value=[]),
+        patch("main.extract_comments", return_value=[]),
+    ):
+        result = list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
 
-        mock_sleep.assert_called_once()
-        assert result == []
+    mock_sleep.assert_called_once_with(mock_response_rate_limit)
+    assert len(result) == 1
 
-    def test_api_error_on_commits_list(self, mock_session):
-        """Test API error handling when fetching commits list."""
-        error_response = Mock()
-        error_response.status_code = 500
-        error_response.text = "Internal Server Error"
+def test_handles_api_error_404(mock_session):
+    """Test that extract_pull_requests raises SystemExit on 404."""
+    mock_response = Mock()
+    mock_response.status_code = 404
+    mock_response.text = "Not Found"
 
-        mock_session.get.return_value = error_response
+    mock_session.get.return_value = mock_response
 
-        with pytest.raises(SystemExit) as exc_info:
-            main.extract_commits(mock_session, "mozilla/firefox", 123)
+    with pytest.raises(SystemExit) as exc_info:
+        list(main.extract_pull_requests(mock_session, "mozilla/nonexistent"))
 
-        assert "GitHub API error 500" in str(exc_info.value)
+    assert "GitHub API error 404" in str(exc_info.value)
 
-    def test_api_error_on_individual_commit(self, mock_session):
-        """Test API error when fetching individual commit details."""
-        commits_response = Mock()
-        commits_response.status_code = 200
-        commits_response.json.return_value = [{"sha": "abc123"}]
+def test_handles_api_error_500(mock_session):
+    """Test that extract_pull_requests raises SystemExit on 500."""
+    mock_response = Mock()
+    mock_response.status_code = 500
+    mock_response.text = "Internal Server Error"
 
-        commit_error = Mock()
-        commit_error.status_code = 404
-        commit_error.text = "Commit not found"
+    mock_session.get.return_value = mock_response
 
-        mock_session.get.side_effect = [commits_response, commit_error]
+    with pytest.raises(SystemExit) as exc_info:
+        list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
 
-        with pytest.raises(SystemExit) as exc_info:
-            main.extract_commits(mock_session, "mozilla/firefox", 123)
+    assert "GitHub API error 500" in str(exc_info.value)
 
-        assert "GitHub API error 404" in str(exc_info.value)
+def test_stops_on_empty_batch(mock_session):
+    """Test that extraction stops when an empty batch is returned."""
+    # First page with data
+    mock_response_1 = Mock()
+    mock_response_1.status_code = 200
+    mock_response_1.json.return_value = [{"number": 1}]
+    mock_response_1.links = {
+        "next": {"url": "https://api.github.com/repos/mozilla/firefox/pulls?page=2"}
+    }
 
-    def test_commit_without_sha_field(self, mock_session):
-        """Test handling commits without sha field."""
-        commits_response = Mock()
-        commits_response.status_code = 200
-        commits_response.json.return_value = [
-            {"sha": "abc123"},
-            {},  # Missing sha field
-        ]
+    # Second page empty
+    mock_response_2 = Mock()
+    mock_response_2.status_code = 200
+    mock_response_2.json.return_value = []
+    mock_response_2.links = {}
 
-        commit_detail_1 = Mock()
-        commit_detail_1.status_code = 200
-        commit_detail_1.json.return_value = {"sha": "abc123", "files": []}
+    mock_session.get.side_effect = [mock_response_1, mock_response_2]
 
-        commit_detail_2 = Mock()
-        commit_detail_2.status_code = 200
-        commit_detail_2.json.return_value = {"files": []}
+    with (
+        patch("main.extract_commits", return_value=[]),
+        patch("main.extract_reviewers", return_value=[]),
+        patch("main.extract_comments", return_value=[]),
+    ):
+        result = list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
+
+    # Should only have 1 chunk from first page
+    assert len(result) == 1
+    assert len(result[0]) == 1
+
+def test_invalid_page_number_handling(mock_session):
+    """Test handling of invalid page number in pagination."""
+    mock_response_1 = Mock()
+    mock_response_1.status_code = 200
+    mock_response_1.json.return_value = [{"number": 1}]
+    mock_response_1.links = {
+        "next": {
+            "url": "https://api.github.com/repos/mozilla/firefox/pulls?page=invalid"
+        }
+    }
 
-        mock_session.get.side_effect = [
-            commits_response,
-            commit_detail_1,
-            commit_detail_2,
-        ]
+    mock_session.get.return_value = mock_response_1
 
-        result = main.extract_commits(mock_session, "mozilla/firefox", 123)
+    with (
+        patch("main.extract_commits", return_value=[]),
+        patch("main.extract_reviewers", return_value=[]),
+        patch("main.extract_comments", return_value=[]),
+    ):
+        result = list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
 
-        # Should handle the commit without sha gracefully
-        assert len(result) == 2
+    # Should stop pagination on invalid page number
+    assert len(result) == 1
 
-    def test_custom_github_api_url(self, mock_session):
-        """Test using custom GitHub API URL for commits."""
-        custom_url = "https://mock-github.example.com"
+def test_custom_github_api_url(mock_session):
+    """Test using custom GitHub API URL."""
+    custom_url = "https://mock-github.example.com"
 
-        commits_response = Mock()
-        commits_response.status_code = 200
-        commits_response.json.return_value = []
+    mock_response = Mock()
+    mock_response.status_code = 200
+    mock_response.json.return_value = [{"number": 1}]
+    mock_response.links = {}
 
-        mock_session.get.return_value = commits_response
+    mock_session.get.return_value = mock_response
 
-        main.extract_commits(
-            mock_session, "mozilla/firefox", 123, github_api_url=custom_url
+    with (
+        patch("main.extract_commits", return_value=[]),
+        patch("main.extract_reviewers", return_value=[]),
+        patch("main.extract_comments", return_value=[]),
+    ):
+        list(
+            main.extract_pull_requests(
+                mock_session, "mozilla/firefox", github_api_url=custom_url
+            )
         )
 
-        call_args = mock_session.get.call_args
-        assert custom_url in call_args[0][0]
-
-    def test_empty_commits_list(self, mock_session):
-        """Test handling PR with no commits."""
-        commits_response = Mock()
-        commits_response.status_code = 200
-        commits_response.json.return_value = []
-
-        mock_session.get.return_value = commits_response
-
-        result = main.extract_commits(mock_session, "mozilla/firefox", 123)
-
-        assert result == []
-
-
-class TestExtractReviewers:
-    """Tests for extract_reviewers function."""
+    # Verify custom URL was used
+    call_args = mock_session.get.call_args
+    assert custom_url in call_args[0][0]
+
+def test_skips_prs_without_number_field(mock_session):
+    """Test that PRs without 'number' field are skipped."""
+    mock_response = Mock()
+    mock_response.status_code = 200
+    mock_response.json.return_value = [
+        {"number": 1, "title": "PR 1"},
+        {"title": "PR without number"},  # Missing number field
+        {"number": 2, "title": "PR 2"},
+    ]
+    mock_response.links = {}
+
+    mock_session.get.return_value = mock_response
+
+    with (
+        patch("main.extract_commits", return_value=[]) as mock_commits,
+        patch("main.extract_reviewers", return_value=[]),
+        patch("main.extract_comments", return_value=[]),
+    ):
+        list(main.extract_pull_requests(mock_session, "mozilla/firefox"))
 
-    def test_fetch_reviewers(self, mock_session):
-        """Test fetching reviewers for a PR."""
-        reviewers_response = Mock()
-        reviewers_response.status_code = 200
-        reviewers_response.json.return_value = [
-            {
-                "id": 789,
-                "user": {"login": "reviewer1"},
-                "state": "APPROVED",
-                "submitted_at": "2024-01-01T15:00:00Z",
-            },
-            {
-                "id": 790,
-                "user": {"login": "reviewer2"},
-                "state": "CHANGES_REQUESTED",
-                "submitted_at": "2024-01-01T16:00:00Z",
-            },
-        ]
+    # extract_commits should only be called for PRs with number field
+    assert mock_commits.call_count == 2
 
-        mock_session.get.return_value = reviewers_response
 
-        result = main.extract_reviewers(mock_session, "mozilla/firefox", 123)
 
-        assert len(result) == 2
-        assert result[0]["state"] == "APPROVED"
-        assert result[1]["state"] == "CHANGES_REQUESTED"
+# =============================================================================
+# TESTS FOR EXTRACT_COMMITS
+# =============================================================================
 
-    def test_multiple_review_states(self, mock_session):
-        """Test handling multiple different review states."""
-        reviewers_response = Mock()
-        reviewers_response.status_code = 200
-        reviewers_response.json.return_value = [
-            {"id": 1, "state": "APPROVED", "user": {"login": "user1"}},
-            {"id": 2, "state": "CHANGES_REQUESTED", "user": {"login": "user2"}},
-            {"id": 3, "state": "COMMENTED", "user": {"login": "user3"}},
-            {"id": 4, "state": "DISMISSED", "user": {"login": "user4"}},
-        ]
+    # Mock commits list response
+    commits_response = Mock()
+    commits_response.status_code = 200
+    commits_response.json.return_value = [
+        {"sha": "abc123"},
+        {"sha": "def456"},
+    ]
+
+    # Mock individual commit responses
+    commit_detail_1 = Mock()
+    commit_detail_1.status_code = 200
+    commit_detail_1.json.return_value = {
+        "sha": "abc123",
+        "files": [{"filename": "file1.py", "additions": 10}],
+    }
 
-        mock_session.get.return_value = reviewers_response
+    commit_detail_2 = Mock()
+    commit_detail_2.status_code = 200
+    commit_detail_2.json.return_value = {
+        "sha": "def456",
+        "files": [{"filename": "file2.py", "deletions": 5}],
+    }
 
-        result = main.extract_reviewers(mock_session, "mozilla/firefox", 123)
+    mock_session.get.side_effect = [
+        commits_response,
+        commit_detail_1,
+        commit_detail_2,
+    ]
+
+    result = main.extract_commits(mock_session, "mozilla/firefox", 123)
+
+    assert len(result) == 2
+    assert result[0]["sha"] == "abc123"
+    assert result[0]["files"][0]["filename"] == "file1.py"
+    assert result[1]["sha"] == "def456"
+    assert result[1]["files"][0]["filename"] == "file2.py"
+
+def test_multiple_files_per_commit(mock_session):
+    """Test handling multiple files in a single commit."""
+    commits_response = Mock()
+    commits_response.status_code = 200
+    commits_response.json.return_value = [{"sha": "abc123"}]
+
+    commit_detail = Mock()
+    commit_detail.status_code = 200
+    commit_detail.json.return_value = {
+        "sha": "abc123",
+        "files": [
+            {"filename": "file1.py", "additions": 10},
+            {"filename": "file2.py", "additions": 20},
+            {"filename": "file3.py", "deletions": 5},
+        ],
+    }
 
-        assert len(result) == 4
-        states = [r["state"] for r in result]
-        assert "APPROVED" in states
-        assert "CHANGES_REQUESTED" in states
-        assert "COMMENTED" in states
+    mock_session.get.side_effect = [commits_response, commit_detail]
 
-    def test_empty_reviewers_list(self, mock_session):
-        """Test handling PR with no reviewers."""
-        reviewers_response = Mock()
-        reviewers_response.status_code = 200
-        reviewers_response.json.return_value = []
+    result = main.extract_commits(mock_session, "mozilla/firefox", 123)
 
-        mock_session.get.return_value = reviewers_response
+    assert len(result) == 1
+    assert len(result[0]["files"]) == 3
 
-        result = main.extract_reviewers(mock_session, "mozilla/firefox", 123)
+@patch("main.sleep_for_rate_limit")
+def test_rate_limit_on_commits_list(mock_sleep, mock_session):
+    """Test rate limit handling when fetching commits list."""
+    # Rate limit response
+    rate_limit_response = Mock()
+    rate_limit_response.status_code = 403
+    rate_limit_response.headers = {"X-RateLimit-Remaining": "0"}
 
-        assert result == []
+    # Success response
+    success_response = Mock()
+    success_response.status_code = 200
+    success_response.json.return_value = []
 
-    @patch("main.sleep_for_rate_limit")
-    def test_rate_limit_handling(self, mock_sleep, mock_session):
-        """Test rate limit handling when fetching reviewers."""
-        rate_limit_response = Mock()
-        rate_limit_response.status_code = 403
-        rate_limit_response.headers = {"X-RateLimit-Remaining": "0"}
+    mock_session.get.side_effect = [rate_limit_response, success_response]
 
-        success_response = Mock()
-        success_response.status_code = 200
-        success_response.json.return_value = []
+    result = main.extract_commits(mock_session, "mozilla/firefox", 123)
 
-        mock_session.get.side_effect = [rate_limit_response, success_response]
+    mock_sleep.assert_called_once()
+    assert result == []
 
-        result = main.extract_reviewers(mock_session, "mozilla/firefox", 123)
+def test_api_error_on_commits_list(mock_session):
+    """Test API error handling when fetching commits list."""
+    error_response = Mock()
+    error_response.status_code = 500
+    error_response.text = "Internal Server Error"
 
-        mock_sleep.assert_called_once()
-        assert result == []
+    mock_session.get.return_value = error_response
 
-    def test_api_error(self, mock_session):
-        """Test API error handling when fetching reviewers."""
-        error_response = Mock()
-        error_response.status_code = 500
-        error_response.text = "Internal Server Error"
+    with pytest.raises(SystemExit) as exc_info:
+        main.extract_commits(mock_session, "mozilla/firefox", 123)
 
-        mock_session.get.return_value = error_response
+    assert "GitHub API error 500" in str(exc_info.value)
 
-        with pytest.raises(SystemExit) as exc_info:
-            main.extract_reviewers(mock_session, "mozilla/firefox", 123)
+def test_api_error_on_individual_commit(mock_session):
+    """Test API error when fetching individual commit details."""
+    commits_response = Mock()
+    commits_response.status_code = 200
+    commits_response.json.return_value = [{"sha": "abc123"}]
 
-        assert "GitHub API error 500" in str(exc_info.value)
+    commit_error = Mock()
+    commit_error.status_code = 404
+    commit_error.text = "Commit not found"
 
-    def test_custom_github_api_url(self, mock_session):
-        """Test using custom GitHub API URL for reviewers."""
-        custom_url = "https://mock-github.example.com"
+    mock_session.get.side_effect = [commits_response, commit_error]
 
-        reviewers_response = Mock()
-        reviewers_response.status_code = 200
-        reviewers_response.json.return_value = []
+    with pytest.raises(SystemExit) as exc_info:
+        main.extract_commits(mock_session, "mozilla/firefox", 123)
 
-        mock_session.get.return_value = reviewers_response
+    assert "GitHub API error 404" in str(exc_info.value)
 
-        main.extract_reviewers(
-            mock_session, "mozilla/firefox", 123, github_api_url=custom_url
-        )
+def test_commit_without_sha_field(mock_session):
+    """Test handling commits without sha field."""
+    commits_response = Mock()
+    commits_response.status_code = 200
+    commits_response.json.return_value = [
+        {"sha": "abc123"},
+        {},  # Missing sha field
+    ]
 
-        call_args = mock_session.get.call_args
-        assert custom_url in call_args[0][0]
+    commit_detail_1 = Mock()
+    commit_detail_1.status_code = 200
+    commit_detail_1.json.return_value = {"sha": "abc123", "files": []}
 
+    commit_detail_2 = Mock()
+    commit_detail_2.status_code = 200
+    commit_detail_2.json.return_value = {"files": []}
 
-class TestExtractComments:
-    """Tests for extract_comments function."""
+    mock_session.get.side_effect = [
+        commits_response,
+        commit_detail_1,
+        commit_detail_2,
+    ]
 
-    def test_fetch_comments(self, mock_session):
-        """Test fetching comments for a PR."""
-        comments_response = Mock()
-        comments_response.status_code = 200
-        comments_response.json.return_value = [
-            {
-                "id": 456,
-                "user": {"login": "commenter1"},
-                "body": "This looks good",
-                "created_at": "2024-01-01T14:00:00Z",
-            },
-            {
-                "id": 457,
-                "user": {"login": "commenter2"},
-                "body": "I have concerns",
-                "created_at": "2024-01-01T15:00:00Z",
-            },
-        ]
+    result = main.extract_commits(mock_session, "mozilla/firefox", 123)
 
-        mock_session.get.return_value = comments_response
+    # Should handle the commit without sha gracefully
+    assert len(result) == 2
 
-        result = main.extract_comments(mock_session, "mozilla/firefox", 123)
+def test_custom_github_api_url(mock_session):
+    """Test using custom GitHub API URL for commits."""
+    custom_url = "https://mock-github.example.com"
 
-        assert len(result) == 2
-        assert result[0]["id"] == 456
-        assert result[1]["id"] == 457
+    commits_response = Mock()
+    commits_response.status_code = 200
+    commits_response.json.return_value = []
 
-    def test_uses_issues_endpoint(self, mock_session):
-        """Test that comments use /issues endpoint not /pulls."""
-        comments_response = Mock()
-        comments_response.status_code = 200
-        comments_response.json.return_value = []
+    mock_session.get.return_value = commits_response
 
-        mock_session.get.return_value = comments_response
+    main.extract_commits(
+        mock_session, "mozilla/firefox", 123, github_api_url=custom_url
+    )
 
-        main.extract_comments(mock_session, "mozilla/firefox", 123)
+    call_args = mock_session.get.call_args
+    assert custom_url in call_args[0][0]
 
-        call_args = mock_session.get.call_args
-        url = call_args[0][0]
-        assert "/issues/123/comments" in url
-        assert "/pulls/123/comments" not in url
-
-    def test_multiple_comments(self, mock_session):
-        """Test handling multiple comments."""
-        comments_response = Mock()
-        comments_response.status_code = 200
-        comments_response.json.return_value = [
-            {"id": i, "user": {"login": f"user{i}"}, "body": f"Comment {i}"}
-            for i in range(1, 11)
-        ]
+def test_empty_commits_list(mock_session):
+    """Test handling PR with no commits."""
+    commits_response = Mock()
+    commits_response.status_code = 200
+    commits_response.json.return_value = []
 
-        mock_session.get.return_value = comments_response
+    mock_session.get.return_value = commits_response
 
-        result = main.extract_comments(mock_session, "mozilla/firefox", 123)
+    result = main.extract_commits(mock_session, "mozilla/firefox", 123)
 
-        assert len(result) == 10
+    assert result == []
 
-    def test_empty_comments_list(self, mock_session):
-        """Test handling PR with no comments."""
-        comments_response = Mock()
-        comments_response.status_code = 200
-        comments_response.json.return_value = []
 
-        mock_session.get.return_value = comments_response
 
-        result = main.extract_comments(mock_session, "mozilla/firefox", 123)
+# =============================================================================
+# TESTS FOR EXTRACT_REVIEWERS
+# =============================================================================
 
-        assert result == []
+    reviewers_response = Mock()
+    reviewers_response.status_code = 200
+    reviewers_response.json.return_value = [
+        {
+            "id": 789,
+            "user": {"login": "reviewer1"},
+            "state": "APPROVED",
+            "submitted_at": "2024-01-01T15:00:00Z",
+        },
+        {
+            "id": 790,
+            "user": {"login": "reviewer2"},
+            "state": "CHANGES_REQUESTED",
+            "submitted_at": "2024-01-01T16:00:00Z",
+        },
+    ]
 
-    @patch("main.sleep_for_rate_limit")
-    def test_rate_limit_handling(self, mock_sleep, mock_session):
-        """Test rate limit handling when fetching comments."""
-        rate_limit_response = Mock()
-        rate_limit_response.status_code = 403
-        rate_limit_response.headers = {"X-RateLimit-Remaining": "0"}
+    mock_session.get.return_value = reviewers_response
 
-        success_response = Mock()
-        success_response.status_code = 200
-        success_response.json.return_value = []
+    result = main.extract_reviewers(mock_session, "mozilla/firefox", 123)
 
-        mock_session.get.side_effect = [rate_limit_response, success_response]
+    assert len(result) == 2
+    assert result[0]["state"] == "APPROVED"
+    assert result[1]["state"] == "CHANGES_REQUESTED"
 
-        result = main.extract_comments(mock_session, "mozilla/firefox", 123)
+def test_multiple_review_states(mock_session):
+    """Test handling multiple different review states."""
+    reviewers_response = Mock()
+    reviewers_response.status_code = 200
+    reviewers_response.json.return_value = [
+        {"id": 1, "state": "APPROVED", "user": {"login": "user1"}},
+        {"id": 2, "state": "CHANGES_REQUESTED", "user": {"login": "user2"}},
+        {"id": 3, "state": "COMMENTED", "user": {"login": "user3"}},
+        {"id": 4, "state": "DISMISSED", "user": {"login": "user4"}},
+    ]
 
-        mock_sleep.assert_called_once()
-        assert result == []
+    mock_session.get.return_value = reviewers_response
 
-    def test_api_error(self, mock_session):
-        """Test API error handling when fetching comments."""
-        error_response = Mock()
-        error_response.status_code = 404
-        error_response.text = "Not Found"
+    result = main.extract_reviewers(mock_session, "mozilla/firefox", 123)
 
-        mock_session.get.return_value = error_response
+    assert len(result) == 4
+    states = [r["state"] for r in result]
+    assert "APPROVED" in states
+    assert "CHANGES_REQUESTED" in states
+    assert "COMMENTED" in states
 
-        with pytest.raises(SystemExit) as exc_info:
-            main.extract_comments(mock_session, "mozilla/firefox", 123)
+def test_empty_reviewers_list(mock_session):
+    """Test handling PR with no reviewers."""
+    reviewers_response = Mock()
+    reviewers_response.status_code = 200
+    reviewers_response.json.return_value = []
 
-        assert "GitHub API error 404" in str(exc_info.value)
+    mock_session.get.return_value = reviewers_response
 
-    def test_custom_github_api_url(self, mock_session):
-        """Test using custom GitHub API URL for comments."""
-        custom_url = "https://mock-github.example.com"
+    result = main.extract_reviewers(mock_session, "mozilla/firefox", 123)
 
-        comments_response = Mock()
-        comments_response.status_code = 200
-        comments_response.json.return_value = []
+    assert result == []
 
-        mock_session.get.return_value = comments_response
+@patch("main.sleep_for_rate_limit")
+def test_rate_limit_handling(mock_sleep, mock_session):
+    """Test rate limit handling when fetching reviewers."""
+    rate_limit_response = Mock()
+    rate_limit_response.status_code = 403
+    rate_limit_response.headers = {"X-RateLimit-Remaining": "0"}
 
-        main.extract_comments(
-            mock_session, "mozilla/firefox", 123, github_api_url=custom_url
-        )
+    success_response = Mock()
+    success_response.status_code = 200
+    success_response.json.return_value = []
 
-        call_args = mock_session.get.call_args
-        assert custom_url in call_args[0][0]
+    mock_session.get.side_effect = [rate_limit_response, success_response]
 
+    result = main.extract_reviewers(mock_session, "mozilla/firefox", 123)
 
-class TestTransformData:
-    """Tests for transform_data function."""
+    mock_sleep.assert_called_once()
+    assert result == []
 
-    def test_basic_pr_transformation(self):
-        """Test basic pull request field mapping."""
-        raw_data = [
-            {
-                "number": 123,
-                "title": "Fix login bug",
-                "state": "closed",
-                "created_at": "2024-01-01T10:00:00Z",
-                "updated_at": "2024-01-02T10:00:00Z",
-                "merged_at": "2024-01-02T12:00:00Z",
-                "labels": [],
-                "commit_data": [],
-                "reviewer_data": [],
-                "comment_data": [],
-            }
-        ]
+def test_api_error(mock_session):
+    """Test API error handling when fetching reviewers."""
+    error_response = Mock()
+    error_response.status_code = 500
+    error_response.text = "Internal Server Error"
 
-        result = main.transform_data(raw_data, "mozilla/firefox")
+    mock_session.get.return_value = error_response
 
-        assert len(result["pull_requests"]) == 1
-        pr = result["pull_requests"][0]
-        assert pr["pull_request_id"] == 123
-        assert pr["current_status"] == "closed"
-        assert pr["date_created"] == "2024-01-01T10:00:00Z"
-        assert pr["date_modified"] == "2024-01-02T10:00:00Z"
-        assert pr["date_landed"] == "2024-01-02T12:00:00Z"
-        assert pr["target_repository"] == "mozilla/firefox"
-
-    def test_bug_id_extraction_basic(self):
-        """Test bug ID extraction from PR title."""
-        test_cases = [
-            ("Bug 1234567 - Fix issue", 1234567),
-            ("bug 1234567: Update code", 1234567),
-            ("Fix for bug 7654321", 7654321),
-            ("b=9876543 - Change behavior", 9876543),
-        ]
+    with pytest.raises(SystemExit) as exc_info:
+        main.extract_reviewers(mock_session, "mozilla/firefox", 123)
 
-        for title, expected_bug_id in test_cases:
-            raw_data = [
-                {
-                    "number": 1,
-                    "title": title,
-                    "state": "open",
-                    "labels": [],
-                    "commit_data": [],
-                    "reviewer_data": [],
-                    "comment_data": [],
-                }
-            ]
-
-            result = main.transform_data(raw_data, "mozilla/firefox")
-            assert result["pull_requests"][0]["bug_id"] == expected_bug_id
-
-    def test_bug_id_extraction_with_hash(self):
-        """Test bug ID extraction with # symbol."""
-        raw_data = [
-            {
-                "number": 1,
-                "title": "Bug #1234567 - Fix issue",
-                "state": "open",
-                "labels": [],
-                "commit_data": [],
-                "reviewer_data": [],
-                "comment_data": [],
-            }
-        ]
+    assert "GitHub API error 500" in str(exc_info.value)
 
-        result = main.transform_data(raw_data, "mozilla/firefox")
-        assert result["pull_requests"][0]["bug_id"] == 1234567
+def test_custom_github_api_url(mock_session):
+    """Test using custom GitHub API URL for reviewers."""
+    custom_url = "https://mock-github.example.com"
 
-    def test_bug_id_filter_large_numbers(self):
-        """Test that bug IDs >= 100000000 are filtered out."""
-        raw_data = [
-            {
-                "number": 1,
-                "title": "Bug 999999999 - Invalid bug ID",
-                "state": "open",
-                "labels": [],
-                "commit_data": [],
-                "reviewer_data": [],
-                "comment_data": [],
-            }
-        ]
+    reviewers_response = Mock()
+    reviewers_response.status_code = 200
+    reviewers_response.json.return_value = []
 
-        result = main.transform_data(raw_data, "mozilla/firefox")
-        assert result["pull_requests"][0]["bug_id"] is None
+    mock_session.get.return_value = reviewers_response
 
-    def test_bug_id_no_match(self):
-        """Test PR title with no bug ID."""
-        raw_data = [
-            {
-                "number": 1,
-                "title": "Update documentation",
-                "state": "open",
-                "labels": [],
-                "commit_data": [],
-                "reviewer_data": [],
-                "comment_data": [],
-            }
-        ]
+    main.extract_reviewers(
+        mock_session, "mozilla/firefox", 123, github_api_url=custom_url
+    )
 
-        result = main.transform_data(raw_data, "mozilla/firefox")
-        assert result["pull_requests"][0]["bug_id"] is None
+    call_args = mock_session.get.call_args
+    assert custom_url in call_args[0][0]
 
-    def test_labels_extraction(self):
-        """Test labels array extraction."""
-        raw_data = [
-            {
-                "number": 1,
-                "title": "PR with labels",
-                "state": "open",
-                "labels": [
-                    {"name": "bug"},
-                    {"name": "priority-high"},
-                    {"name": "needs-review"},
-                ],
-                "commit_data": [],
-                "reviewer_data": [],
-                "comment_data": [],
-            }
-        ]
 
-        result = main.transform_data(raw_data, "mozilla/firefox")
-        labels = result["pull_requests"][0]["labels"]
-        assert len(labels) == 3
-        assert "bug" in labels
-        assert "priority-high" in labels
-        assert "needs-review" in labels
-
-    def test_labels_empty_list(self):
-        """Test handling empty labels list."""
-        raw_data = [
-            {
-                "number": 1,
-                "title": "PR without labels",
-                "state": "open",
-                "labels": [],
-                "commit_data": [],
-                "reviewer_data": [],
-                "comment_data": [],
-            }
-        ]
 
-        result = main.transform_data(raw_data, "mozilla/firefox")
-        assert result["pull_requests"][0]["labels"] == []
+# =============================================================================
+# TESTS FOR EXTRACT_COMMENTS
+# =============================================================================
 
-    def test_commit_transformation(self):
-        """Test commit fields mapping."""
-        raw_data = [
-            {
-                "number": 123,
-                "title": "PR with commits",
-                "state": "open",
-                "labels": [],
-                "commit_data": [
-                    {
-                        "sha": "abc123",
-                        "commit": {
-                            "author": {
-                                "name": "Test Author",
-                                "date": "2024-01-01T12:00:00Z",
-                            }
-                        },
-                        "files": [
-                            {
-                                "filename": "src/main.py",
-                                "additions": 10,
-                                "deletions": 5,
-                            }
-                        ],
-                    }
-                ],
-                "reviewer_data": [],
-                "comment_data": [],
-            }
-        ]
+    comments_response = Mock()
+    comments_response.status_code = 200
+    comments_response.json.return_value = [
+        {
+            "id": 456,
+            "user": {"login": "commenter1"},
+            "body": "This looks good",
+            "created_at": "2024-01-01T14:00:00Z",
+        },
+        {
+            "id": 457,
+            "user": {"login": "commenter2"},
+            "body": "I have concerns",
+            "created_at": "2024-01-01T15:00:00Z",
+        },
+    ]
 
-        result = main.transform_data(raw_data, "mozilla/firefox")
+    mock_session.get.return_value = comments_response
 
-        assert len(result["commits"]) == 1
-        commit = result["commits"][0]
-        assert commit["pull_request_id"] == 123
-        assert commit["target_repository"] == "mozilla/firefox"
-        assert commit["commit_sha"] == "abc123"
-        assert commit["date_created"] == "2024-01-01T12:00:00Z"
-        assert commit["author_username"] == "Test Author"
-        assert commit["filename"] == "src/main.py"
-        assert commit["lines_added"] == 10
-        assert commit["lines_removed"] == 5
-
-    def test_commit_file_flattening(self):
-        """Test that each file becomes a separate row."""
-        raw_data = [
-            {
-                "number": 123,
-                "title": "PR with multiple files",
-                "state": "open",
-                "labels": [],
-                "commit_data": [
-                    {
-                        "sha": "abc123",
-                        "commit": {"author": {"name": "Author", "date": "2024-01-01"}},
-                        "files": [
-                            {"filename": "file1.py", "additions": 10, "deletions": 5},
-                            {"filename": "file2.py", "additions": 20, "deletions": 2},
-                            {"filename": "file3.py", "additions": 5, "deletions": 15},
-                        ],
-                    }
-                ],
-                "reviewer_data": [],
-                "comment_data": [],
-            }
-        ]
+    result = main.extract_comments(mock_session, "mozilla/firefox", 123)
 
-        result = main.transform_data(raw_data, "mozilla/firefox")
+    assert len(result) == 2
+    assert result[0]["id"] == 456
+    assert result[1]["id"] == 457
 
-        # Should have 3 rows in commits table (one per file)
-        assert len(result["commits"]) == 3
-        filenames = [c["filename"] for c in result["commits"]]
-        assert "file1.py" in filenames
-        assert "file2.py" in filenames
-        assert "file3.py" in filenames
+def test_uses_issues_endpoint(mock_session):
+    """Test that comments use /issues endpoint not /pulls."""
+    comments_response = Mock()
+    comments_response.status_code = 200
+    comments_response.json.return_value = []
 
-    def test_multiple_commits_with_files(self):
-        """Test multiple commits with multiple files per PR."""
-        raw_data = [
-            {
-                "number": 123,
-                "title": "PR with multiple commits",
-                "state": "open",
-                "labels": [],
-                "commit_data": [
-                    {
-                        "sha": "commit1",
-                        "commit": {"author": {"name": "Author1", "date": "2024-01-01"}},
-                        "files": [
-                            {"filename": "file1.py", "additions": 10, "deletions": 0}
-                        ],
-                    },
-                    {
-                        "sha": "commit2",
-                        "commit": {"author": {"name": "Author2", "date": "2024-01-02"}},
-                        "files": [
-                            {"filename": "file2.py", "additions": 5, "deletions": 2},
-                            {"filename": "file3.py", "additions": 8, "deletions": 3},
-                        ],
-                    },
-                ],
-                "reviewer_data": [],
-                "comment_data": [],
-            }
-        ]
+    mock_session.get.return_value = comments_response
 
-        result = main.transform_data(raw_data, "mozilla/firefox")
+    main.extract_comments(mock_session, "mozilla/firefox", 123)
 
-        # Should have 3 rows total (1 file from commit1, 2 files from commit2)
-        assert len(result["commits"]) == 3
-        assert result["commits"][0]["commit_sha"] == "commit1"
-        assert result["commits"][1]["commit_sha"] == "commit2"
-        assert result["commits"][2]["commit_sha"] == "commit2"
+    call_args = mock_session.get.call_args
+    url = call_args[0][0]
+    assert "/issues/123/comments" in url
+    assert "/pulls/123/comments" not in url
 
-    def test_reviewer_transformation(self):
-        """Test reviewer fields mapping."""
-        raw_data = [
-            {
-                "number": 123,
-                "title": "PR with reviewers",
-                "state": "open",
-                "labels": [],
-                "commit_data": [],
-                "reviewer_data": [
-                    {
-                        "id": 789,
-                        "user": {"login": "reviewer1"},
-                        "state": "APPROVED",
-                        "submitted_at": "2024-01-01T15:00:00Z",
-                    }
-                ],
-                "comment_data": [],
-            }
-        ]
+def test_multiple_comments(mock_session):
+    """Test handling multiple comments."""
+    comments_response = Mock()
+    comments_response.status_code = 200
+    comments_response.json.return_value = [
+        {"id": i, "user": {"login": f"user{i}"}, "body": f"Comment {i}"}
+        for i in range(1, 11)
+    ]
 
-        result = main.transform_data(raw_data, "mozilla/firefox")
+    mock_session.get.return_value = comments_response
 
-        assert len(result["reviewers"]) == 1
-        reviewer = result["reviewers"][0]
-        assert reviewer["pull_request_id"] == 123
-        assert reviewer["target_repository"] == "mozilla/firefox"
-        assert reviewer["reviewer_username"] == "reviewer1"
-        assert reviewer["status"] == "APPROVED"
-        assert reviewer["date_reviewed"] == "2024-01-01T15:00:00Z"
+    result = main.extract_comments(mock_session, "mozilla/firefox", 123)
 
-    def test_multiple_review_states(self):
-        """Test handling multiple review states."""
-        raw_data = [
-            {
-                "number": 123,
-                "title": "PR with multiple reviews",
-                "state": "open",
-                "labels": [],
-                "commit_data": [],
-                "reviewer_data": [
-                    {
-                        "id": 1,
-                        "user": {"login": "user1"},
-                        "state": "APPROVED",
-                        "submitted_at": "2024-01-01T15:00:00Z",
-                    },
-                    {
-                        "id": 2,
-                        "user": {"login": "user2"},
-                        "state": "CHANGES_REQUESTED",
-                        "submitted_at": "2024-01-01T16:00:00Z",
-                    },
-                    {
-                        "id": 3,
-                        "user": {"login": "user3"},
-                        "state": "COMMENTED",
-                        "submitted_at": "2024-01-01T17:00:00Z",
-                    },
-                ],
-                "comment_data": [],
-            }
-        ]
+    assert len(result) == 10
 
-        result = main.transform_data(raw_data, "mozilla/firefox")
+def test_empty_comments_list(mock_session):
+    """Test handling PR with no comments."""
+    comments_response = Mock()
+    comments_response.status_code = 200
+    comments_response.json.return_value = []
 
-        assert len(result["reviewers"]) == 3
-        states = [r["status"] for r in result["reviewers"]]
-        assert "APPROVED" in states
-        assert "CHANGES_REQUESTED" in states
-        assert "COMMENTED" in states
+    mock_session.get.return_value = comments_response
 
-    def test_date_approved_from_earliest_approval(self):
-        """Test that date_approved is set to earliest APPROVED review."""
-        raw_data = [
-            {
-                "number": 123,
-                "title": "PR with multiple approvals",
-                "state": "open",
-                "labels": [],
-                "commit_data": [],
-                "reviewer_data": [
-                    {
-                        "id": 1,
-                        "user": {"login": "user1"},
-                        "state": "APPROVED",
-                        "submitted_at": "2024-01-02T15:00:00Z",
-                    },
-                    {
-                        "id": 2,
-                        "user": {"login": "user2"},
-                        "state": "APPROVED",
-                        "submitted_at": "2024-01-01T14:00:00Z",  # Earliest
-                    },
-                    {
-                        "id": 3,
-                        "user": {"login": "user3"},
-                        "state": "APPROVED",
-                        "submitted_at": "2024-01-03T16:00:00Z",
-                    },
-                ],
-                "comment_data": [],
-            }
-        ]
+    result = main.extract_comments(mock_session, "mozilla/firefox", 123)
 
-        result = main.transform_data(raw_data, "mozilla/firefox")
+    assert result == []
 
-        pr = result["pull_requests"][0]
-        assert pr["date_approved"] == "2024-01-01T14:00:00Z"
+@patch("main.sleep_for_rate_limit")
+def test_rate_limit_handling(mock_sleep, mock_session):
+    """Test rate limit handling when fetching comments."""
+    rate_limit_response = Mock()
+    rate_limit_response.status_code = 403
+    rate_limit_response.headers = {"X-RateLimit-Remaining": "0"}
 
-    def test_comment_transformation(self):
-        """Test comment fields mapping."""
-        raw_data = [
-            {
-                "number": 123,
-                "title": "PR with comments",
-                "state": "open",
-                "labels": [],
-                "commit_data": [],
-                "reviewer_data": [],
-                "comment_data": [
-                    {
-                        "id": 456,
-                        "user": {"login": "commenter1"},
-                        "body": "This looks great!",
-                        "created_at": "2024-01-01T14:00:00Z",
-                        "pull_request_review_id": None,
-                    }
-                ],
-            }
-        ]
+    success_response = Mock()
+    success_response.status_code = 200
+    success_response.json.return_value = []
 
-        result = main.transform_data(raw_data, "mozilla/firefox")
+    mock_session.get.side_effect = [rate_limit_response, success_response]
 
-        assert len(result["comments"]) == 1
-        comment = result["comments"][0]
-        assert comment["pull_request_id"] == 123
-        assert comment["target_repository"] == "mozilla/firefox"
-        assert comment["comment_id"] == 456
-        assert comment["author_username"] == "commenter1"
-        assert comment["date_created"] == "2024-01-01T14:00:00Z"
-        assert comment["character_count"] == 17
-
-    def test_comment_character_count(self):
-        """Test character count calculation for comments."""
-        raw_data = [
-            {
-                "number": 123,
-                "title": "PR",
-                "state": "open",
-                "labels": [],
-                "commit_data": [],
-                "reviewer_data": [],
-                "comment_data": [
-                    {
-                        "id": 1,
-                        "user": {"login": "user1"},
-                        "body": "Short",
-                        "created_at": "2024-01-01",
-                    },
-                    {
-                        "id": 2,
-                        "user": {"login": "user2"},
-                        "body": "This is a much longer comment with more text",
-                        "created_at": "2024-01-01",
-                    },
-                ],
-            }
-        ]
+    result = main.extract_comments(mock_session, "mozilla/firefox", 123)
 
-        result = main.transform_data(raw_data, "mozilla/firefox")
+    mock_sleep.assert_called_once()
+    assert result == []
 
-        assert result["comments"][0]["character_count"] == 5
-        assert result["comments"][1]["character_count"] == 44
+def test_api_error(mock_session):
+    """Test API error handling when fetching comments."""
+    error_response = Mock()
+    error_response.status_code = 404
+    error_response.text = "Not Found"
 
-    def test_comment_status_from_review(self):
-        """Test that comment status is mapped from review_id_statuses."""
-        raw_data = [
-            {
-                "number": 123,
-                "title": "PR",
-                "state": "open",
-                "labels": [],
-                "commit_data": [],
-                "reviewer_data": [
-                    {
-                        "id": 789,
-                        "user": {"login": "reviewer"},
-                        "state": "APPROVED",
-                        "submitted_at": "2024-01-01",
-                    }
-                ],
-                "comment_data": [
-                    {
-                        "id": 456,
-                        "user": {"login": "commenter"},
-                        "body": "LGTM",
-                        "created_at": "2024-01-01",
-                        "pull_request_review_id": 789,
-                    }
-                ],
-            }
-        ]
+    mock_session.get.return_value = error_response
 
-        result = main.transform_data(raw_data, "mozilla/firefox")
+    with pytest.raises(SystemExit) as exc_info:
+        main.extract_comments(mock_session, "mozilla/firefox", 123)
 
-        # Comment should have status from the review
-        assert result["comments"][0]["status"] == "APPROVED"
+    assert "GitHub API error 404" in str(exc_info.value)
 
-    def test_comment_empty_body(self):
-        """Test handling comments with empty or None body."""
-        raw_data = [
-            {
-                "number": 123,
-                "title": "PR",
-                "state": "open",
-                "labels": [],
-                "commit_data": [],
-                "reviewer_data": [],
-                "comment_data": [
-                    {
-                        "id": 1,
-                        "user": {"login": "user1"},
-                        "body": None,
-                        "created_at": "2024-01-01",
-                    },
-                    {
-                        "id": 2,
-                        "user": {"login": "user2"},
-                        "body": "",
-                        "created_at": "2024-01-01",
-                    },
-                ],
-            }
-        ]
+def test_custom_github_api_url(mock_session):
+    """Test using custom GitHub API URL for comments."""
+    custom_url = "https://mock-github.example.com"
 
-        result = main.transform_data(raw_data, "mozilla/firefox")
+    comments_response = Mock()
+    comments_response.status_code = 200
+    comments_response.json.return_value = []
 
-        assert result["comments"][0]["character_count"] == 0
-        assert result["comments"][1]["character_count"] == 0
+    mock_session.get.return_value = comments_response
 
-    def test_empty_raw_data(self):
-        """Test handling empty input list."""
-        result = main.transform_data([], "mozilla/firefox")
+    main.extract_comments(
+        mock_session, "mozilla/firefox", 123, github_api_url=custom_url
+    )
 
-        assert result["pull_requests"] == []
-        assert result["commits"] == []
-        assert result["reviewers"] == []
-        assert result["comments"] == []
+    call_args = mock_session.get.call_args
+    assert custom_url in call_args[0][0]
 
-    def test_pr_without_commits_reviewers_comments(self):
-        """Test PR with no commits, reviewers, or comments."""
-        raw_data = [
-            {
-                "number": 123,
-                "title": "Minimal PR",
-                "state": "open",
-                "labels": [],
-                "commit_data": [],
-                "reviewer_data": [],
-                "comment_data": [],
-            }
-        ]
 
-        result = main.transform_data(raw_data, "mozilla/firefox")
 
-        assert len(result["pull_requests"]) == 1
-        assert len(result["commits"]) == 0
-        assert len(result["reviewers"]) == 0
-        assert len(result["comments"]) == 0
+# =============================================================================
+# TESTS FOR TRANSFORM_DATA
+# =============================================================================
 
-    def test_return_structure(self):
-        """Test that transform_data returns dict with 4 keys."""
+    raw_data = [
+        {
+            "number": 123,
+            "title": "Fix login bug",
+            "state": "closed",
+            "created_at": "2024-01-01T10:00:00Z",
+            "updated_at": "2024-01-02T10:00:00Z",
+            "merged_at": "2024-01-02T12:00:00Z",
+            "labels": [],
+            "commit_data": [],
+            "reviewer_data": [],
+            "comment_data": [],
+        }
+    ]
+
+    result = main.transform_data(raw_data, "mozilla/firefox")
+
+    assert len(result["pull_requests"]) == 1
+    pr = result["pull_requests"][0]
+    assert pr["pull_request_id"] == 123
+    assert pr["current_status"] == "closed"
+    assert pr["date_created"] == "2024-01-01T10:00:00Z"
+    assert pr["date_modified"] == "2024-01-02T10:00:00Z"
+    assert pr["date_landed"] == "2024-01-02T12:00:00Z"
+    assert pr["target_repository"] == "mozilla/firefox"
+
+def test_bug_id_extraction_basic():
+    """Test bug ID extraction from PR title."""
+    test_cases = [
+        ("Bug 1234567 - Fix issue", 1234567),
+        ("bug 1234567: Update code", 1234567),
+        ("Fix for bug 7654321", 7654321),
+        ("b=9876543 - Change behavior", 9876543),
+    ]
+
+    for title, expected_bug_id in test_cases:
         raw_data = [
             {
                 "number": 1,
-                "title": "Test",
+                "title": title,
                 "state": "open",
                 "labels": [],
                 "commit_data": [],
@@ -1444,638 +958,835 @@ def test_return_structure(self):
         ]
 
         result = main.transform_data(raw_data, "mozilla/firefox")
+        assert result["pull_requests"][0]["bug_id"] == expected_bug_id
+
+def test_bug_id_extraction_with_hash():
+    """Test bug ID extraction with # symbol."""
+    raw_data = [
+        {
+            "number": 1,
+            "title": "Bug #1234567 - Fix issue",
+            "state": "open",
+            "labels": [],
+            "commit_data": [],
+            "reviewer_data": [],
+            "comment_data": [],
+        }
+    ]
+
+    result = main.transform_data(raw_data, "mozilla/firefox")
+    assert result["pull_requests"][0]["bug_id"] == 1234567
+
+def test_bug_id_filter_large_numbers():
+    """Test that bug IDs >= 100000000 are filtered out."""
+    raw_data = [
+        {
+            "number": 1,
+            "title": "Bug 999999999 - Invalid bug ID",
+            "state": "open",
+            "labels": [],
+            "commit_data": [],
+            "reviewer_data": [],
+            "comment_data": [],
+        }
+    ]
+
+    result = main.transform_data(raw_data, "mozilla/firefox")
+    assert result["pull_requests"][0]["bug_id"] is None
+
+def test_bug_id_no_match():
+    """Test PR title with no bug ID."""
+    raw_data = [
+        {
+            "number": 1,
+            "title": "Update documentation",
+            "state": "open",
+            "labels": [],
+            "commit_data": [],
+            "reviewer_data": [],
+            "comment_data": [],
+        }
+    ]
+
+    result = main.transform_data(raw_data, "mozilla/firefox")
+    assert result["pull_requests"][0]["bug_id"] is None
+
+def test_labels_extraction():
+    """Test labels array extraction."""
+    raw_data = [
+        {
+            "number": 1,
+            "title": "PR with labels",
+            "state": "open",
+            "labels": [
+                {"name": "bug"},
+                {"name": "priority-high"},
+                {"name": "needs-review"},
+            ],
+            "commit_data": [],
+            "reviewer_data": [],
+            "comment_data": [],
+        }
+    ]
+
+    result = main.transform_data(raw_data, "mozilla/firefox")
+    labels = result["pull_requests"][0]["labels"]
+    assert len(labels) == 3
+    assert "bug" in labels
+    assert "priority-high" in labels
+    assert "needs-review" in labels
+
+def test_labels_empty_list():
+    """Test handling empty labels list."""
+    raw_data = [
+        {
+            "number": 1,
+            "title": "PR without labels",
+            "state": "open",
+            "labels": [],
+            "commit_data": [],
+            "reviewer_data": [],
+            "comment_data": [],
+        }
+    ]
+
+    result = main.transform_data(raw_data, "mozilla/firefox")
+    assert result["pull_requests"][0]["labels"] == []
+
+def test_commit_transformation():
+    """Test commit fields mapping."""
+    raw_data = [
+        {
+            "number": 123,
+            "title": "PR with commits",
+            "state": "open",
+            "labels": [],
+            "commit_data": [
+                {
+                    "sha": "abc123",
+                    "commit": {
+                        "author": {
+                            "name": "Test Author",
+                            "date": "2024-01-01T12:00:00Z",
+                        }
+                    },
+                    "files": [
+                        {
+                            "filename": "src/main.py",
+                            "additions": 10,
+                            "deletions": 5,
+                        }
+                    ],
+                }
+            ],
+            "reviewer_data": [],
+            "comment_data": [],
+        }
+    ]
+
+    result = main.transform_data(raw_data, "mozilla/firefox")
+
+    assert len(result["commits"]) == 1
+    commit = result["commits"][0]
+    assert commit["pull_request_id"] == 123
+    assert commit["target_repository"] == "mozilla/firefox"
+    assert commit["commit_sha"] == "abc123"
+    assert commit["date_created"] == "2024-01-01T12:00:00Z"
+    assert commit["author_username"] == "Test Author"
+    assert commit["filename"] == "src/main.py"
+    assert commit["lines_added"] == 10
+    assert commit["lines_removed"] == 5
+
+def test_commit_file_flattening():
+    """Test that each file becomes a separate row."""
+    raw_data = [
+        {
+            "number": 123,
+            "title": "PR with multiple files",
+            "state": "open",
+            "labels": [],
+            "commit_data": [
+                {
+                    "sha": "abc123",
+                    "commit": {"author": {"name": "Author", "date": "2024-01-01"}},
+                    "files": [
+                        {"filename": "file1.py", "additions": 10, "deletions": 5},
+                        {"filename": "file2.py", "additions": 20, "deletions": 2},
+                        {"filename": "file3.py", "additions": 5, "deletions": 15},
+                    ],
+                }
+            ],
+            "reviewer_data": [],
+            "comment_data": [],
+        }
+    ]
+
+    result = main.transform_data(raw_data, "mozilla/firefox")
+
+    # Should have 3 rows in commits table (one per file)
+    assert len(result["commits"]) == 3
+    filenames = [c["filename"] for c in result["commits"]]
+    assert "file1.py" in filenames
+    assert "file2.py" in filenames
+    assert "file3.py" in filenames
+
+def test_multiple_commits_with_files():
+    """Test multiple commits with multiple files per PR."""
+    raw_data = [
+        {
+            "number": 123,
+            "title": "PR with multiple commits",
+            "state": "open",
+            "labels": [],
+            "commit_data": [
+                {
+                    "sha": "commit1",
+                    "commit": {"author": {"name": "Author1", "date": "2024-01-01"}},
+                    "files": [
+                        {"filename": "file1.py", "additions": 10, "deletions": 0}
+                    ],
+                },
+                {
+                    "sha": "commit2",
+                    "commit": {"author": {"name": "Author2", "date": "2024-01-02"}},
+                    "files": [
+                        {"filename": "file2.py", "additions": 5, "deletions": 2},
+                        {"filename": "file3.py", "additions": 8, "deletions": 3},
+                    ],
+                },
+            ],
+            "reviewer_data": [],
+            "comment_data": [],
+        }
+    ]
+
+    result = main.transform_data(raw_data, "mozilla/firefox")
+
+    # Should have 3 rows total (1 file from commit1, 2 files from commit2)
+    assert len(result["commits"]) == 3
+    assert result["commits"][0]["commit_sha"] == "commit1"
+    assert result["commits"][1]["commit_sha"] == "commit2"
+    assert result["commits"][2]["commit_sha"] == "commit2"
+
+def test_reviewer_transformation():
+    """Test reviewer fields mapping."""
+    raw_data = [
+        {
+            "number": 123,
+            "title": "PR with reviewers",
+            "state": "open",
+            "labels": [],
+            "commit_data": [],
+            "reviewer_data": [
+                {
+                    "id": 789,
+                    "user": {"login": "reviewer1"},
+                    "state": "APPROVED",
+                    "submitted_at": "2024-01-01T15:00:00Z",
+                }
+            ],
+            "comment_data": [],
+        }
+    ]
+
+    result = main.transform_data(raw_data, "mozilla/firefox")
+
+    assert len(result["reviewers"]) == 1
+    reviewer = result["reviewers"][0]
+    assert reviewer["pull_request_id"] == 123
+    assert reviewer["target_repository"] == "mozilla/firefox"
+    assert reviewer["reviewer_username"] == "reviewer1"
+    assert reviewer["status"] == "APPROVED"
+    assert reviewer["date_reviewed"] == "2024-01-01T15:00:00Z"
+
+def test_multiple_review_states():
+    """Test handling multiple review states."""
+    raw_data = [
+        {
+            "number": 123,
+            "title": "PR with multiple reviews",
+            "state": "open",
+            "labels": [],
+            "commit_data": [],
+            "reviewer_data": [
+                {
+                    "id": 1,
+                    "user": {"login": "user1"},
+                    "state": "APPROVED",
+                    "submitted_at": "2024-01-01T15:00:00Z",
+                },
+                {
+                    "id": 2,
+                    "user": {"login": "user2"},
+                    "state": "CHANGES_REQUESTED",
+                    "submitted_at": "2024-01-01T16:00:00Z",
+                },
+                {
+                    "id": 3,
+                    "user": {"login": "user3"},
+                    "state": "COMMENTED",
+                    "submitted_at": "2024-01-01T17:00:00Z",
+                },
+            ],
+            "comment_data": [],
+        }
+    ]
+
+    result = main.transform_data(raw_data, "mozilla/firefox")
+
+    assert len(result["reviewers"]) == 3
+    states = [r["status"] for r in result["reviewers"]]
+    assert "APPROVED" in states
+    assert "CHANGES_REQUESTED" in states
+    assert "COMMENTED" in states
+
+def test_date_approved_from_earliest_approval():
+    """Test that date_approved is set to earliest APPROVED review."""
+    raw_data = [
+        {
+            "number": 123,
+            "title": "PR with multiple approvals",
+            "state": "open",
+            "labels": [],
+            "commit_data": [],
+            "reviewer_data": [
+                {
+                    "id": 1,
+                    "user": {"login": "user1"},
+                    "state": "APPROVED",
+                    "submitted_at": "2024-01-02T15:00:00Z",
+                },
+                {
+                    "id": 2,
+                    "user": {"login": "user2"},
+                    "state": "APPROVED",
+                    "submitted_at": "2024-01-01T14:00:00Z",  # Earliest
+                },
+                {
+                    "id": 3,
+                    "user": {"login": "user3"},
+                    "state": "APPROVED",
+                    "submitted_at": "2024-01-03T16:00:00Z",
+                },
+            ],
+            "comment_data": [],
+        }
+    ]
+
+    result = main.transform_data(raw_data, "mozilla/firefox")
+
+    pr = result["pull_requests"][0]
+    assert pr["date_approved"] == "2024-01-01T14:00:00Z"
+
+def test_comment_transformation():
+    """Test comment fields mapping."""
+    raw_data = [
+        {
+            "number": 123,
+            "title": "PR with comments",
+            "state": "open",
+            "labels": [],
+            "commit_data": [],
+            "reviewer_data": [],
+            "comment_data": [
+                {
+                    "id": 456,
+                    "user": {"login": "commenter1"},
+                    "body": "This looks great!",
+                    "created_at": "2024-01-01T14:00:00Z",
+                    "pull_request_review_id": None,
+                }
+            ],
+        }
+    ]
+
+    result = main.transform_data(raw_data, "mozilla/firefox")
+
+    assert len(result["comments"]) == 1
+    comment = result["comments"][0]
+    assert comment["pull_request_id"] == 123
+    assert comment["target_repository"] == "mozilla/firefox"
+    assert comment["comment_id"] == 456
+    assert comment["author_username"] == "commenter1"
+    assert comment["date_created"] == "2024-01-01T14:00:00Z"
+    assert comment["character_count"] == 17
+
+def test_comment_character_count():
+    """Test character count calculation for comments."""
+    raw_data = [
+        {
+            "number": 123,
+            "title": "PR",
+            "state": "open",
+            "labels": [],
+            "commit_data": [],
+            "reviewer_data": [],
+            "comment_data": [
+                {
+                    "id": 1,
+                    "user": {"login": "user1"},
+                    "body": "Short",
+                    "created_at": "2024-01-01",
+                },
+                {
+                    "id": 2,
+                    "user": {"login": "user2"},
+                    "body": "This is a much longer comment with more text",
+                    "created_at": "2024-01-01",
+                },
+            ],
+        }
+    ]
+
+    result = main.transform_data(raw_data, "mozilla/firefox")
+
+    assert result["comments"][0]["character_count"] == 5
+    assert result["comments"][1]["character_count"] == 44
+
+def test_comment_status_from_review():
+    """Test that comment status is mapped from review_id_statuses."""
+    raw_data = [
+        {
+            "number": 123,
+            "title": "PR",
+            "state": "open",
+            "labels": [],
+            "commit_data": [],
+            "reviewer_data": [
+                {
+                    "id": 789,
+                    "user": {"login": "reviewer"},
+                    "state": "APPROVED",
+                    "submitted_at": "2024-01-01",
+                }
+            ],
+            "comment_data": [
+                {
+                    "id": 456,
+                    "user": {"login": "commenter"},
+                    "body": "LGTM",
+                    "created_at": "2024-01-01",
+                    "pull_request_review_id": 789,
+                }
+            ],
+        }
+    ]
+
+    result = main.transform_data(raw_data, "mozilla/firefox")
+
+    # Comment should have status from the review
+    assert result["comments"][0]["status"] == "APPROVED"
+
+def test_comment_empty_body():
+    """Test handling comments with empty or None body."""
+    raw_data = [
+        {
+            "number": 123,
+            "title": "PR",
+            "state": "open",
+            "labels": [],
+            "commit_data": [],
+            "reviewer_data": [],
+            "comment_data": [
+                {
+                    "id": 1,
+                    "user": {"login": "user1"},
+                    "body": None,
+                    "created_at": "2024-01-01",
+                },
+                {
+                    "id": 2,
+                    "user": {"login": "user2"},
+                    "body": "",
+                    "created_at": "2024-01-01",
+                },
+            ],
+        }
+    ]
+
+    result = main.transform_data(raw_data, "mozilla/firefox")
+
+    assert result["comments"][0]["character_count"] == 0
+    assert result["comments"][1]["character_count"] == 0
+
+def test_empty_raw_data():
+    """Test handling empty input list."""
+    result = main.transform_data([], "mozilla/firefox")
+
+    assert result["pull_requests"] == []
+    assert result["commits"] == []
+    assert result["reviewers"] == []
+    assert result["comments"] == []
+
+def test_pr_without_commits_reviewers_comments():
+    """Test PR with no commits, reviewers, or comments."""
+    raw_data = [
+        {
+            "number": 123,
+            "title": "Minimal PR",
+            "state": "open",
+            "labels": [],
+            "commit_data": [],
+            "reviewer_data": [],
+            "comment_data": [],
+        }
+    ]
+
+    result = main.transform_data(raw_data, "mozilla/firefox")
+
+    assert len(result["pull_requests"]) == 1
+    assert len(result["commits"]) == 0
+    assert len(result["reviewers"]) == 0
+    assert len(result["comments"]) == 0
+
+def test_return_structure():
+    """Test that transform_data returns dict with 4 keys."""
+    raw_data = [
+        {
+            "number": 1,
+            "title": "Test",
+            "state": "open",
+            "labels": [],
+            "commit_data": [],
+            "reviewer_data": [],
+            "comment_data": [],
+        }
+    ]
+
+    result = main.transform_data(raw_data, "mozilla/firefox")
+
+    assert isinstance(result, dict)
+    assert "pull_requests" in result
+    assert "commits" in result
+    assert "reviewers" in result
+    assert "comments" in result
+
+def test_all_tables_have_target_repository():
+    """Test that all tables include target_repository field."""
+    raw_data = [
+        {
+            "number": 123,
+            "title": "Test PR",
+            "state": "open",
+            "labels": [],
+            "commit_data": [
+                {
+                    "sha": "abc",
+                    "commit": {"author": {"name": "Author", "date": "2024-01-01"}},
+                    "files": [
+                        {"filename": "test.py", "additions": 1, "deletions": 0}
+                    ],
+                }
+            ],
+            "reviewer_data": [
+                {
+                    "id": 1,
+                    "user": {"login": "reviewer"},
+                    "state": "APPROVED",
+                    "submitted_at": "2024-01-01",
+                }
+            ],
+            "comment_data": [
+                {
+                    "id": 2,
+                    "user": {"login": "commenter"},
+                    "body": "Test",
+                    "created_at": "2024-01-01",
+                }
+            ],
+        }
+    ]
 
-        assert isinstance(result, dict)
-        assert "pull_requests" in result
-        assert "commits" in result
-        assert "reviewers" in result
-        assert "comments" in result
-
-    def test_all_tables_have_target_repository(self):
-        """Test that all tables include target_repository field."""
-        raw_data = [
-            {
-                "number": 123,
-                "title": "Test PR",
-                "state": "open",
-                "labels": [],
-                "commit_data": [
-                    {
-                        "sha": "abc",
-                        "commit": {"author": {"name": "Author", "date": "2024-01-01"}},
-                        "files": [
-                            {"filename": "test.py", "additions": 1, "deletions": 0}
-                        ],
-                    }
-                ],
-                "reviewer_data": [
-                    {
-                        "id": 1,
-                        "user": {"login": "reviewer"},
-                        "state": "APPROVED",
-                        "submitted_at": "2024-01-01",
-                    }
-                ],
-                "comment_data": [
-                    {
-                        "id": 2,
-                        "user": {"login": "commenter"},
-                        "body": "Test",
-                        "created_at": "2024-01-01",
-                    }
-                ],
-            }
-        ]
-
-        result = main.transform_data(raw_data, "mozilla/firefox")
+    result = main.transform_data(raw_data, "mozilla/firefox")
 
-        assert result["pull_requests"][0]["target_repository"] == "mozilla/firefox"
-        assert result["commits"][0]["target_repository"] == "mozilla/firefox"
-        assert result["reviewers"][0]["target_repository"] == "mozilla/firefox"
-        assert result["comments"][0]["target_repository"] == "mozilla/firefox"
+    assert result["pull_requests"][0]["target_repository"] == "mozilla/firefox"
+    assert result["commits"][0]["target_repository"] == "mozilla/firefox"
+    assert result["reviewers"][0]["target_repository"] == "mozilla/firefox"
+    assert result["comments"][0]["target_repository"] == "mozilla/firefox"
 
 
-class TestLoadData:
-    """Tests for load_data function."""
 
-    @patch("main.datetime")
-    def test_load_all_tables(self, mock_datetime, mock_bigquery_client):
-        """Test loading all 4 tables to BigQuery."""
-        mock_datetime.now.return_value.strftime.return_value = "2024-01-15"
+# =============================================================================
+# TESTS FOR LOAD_DATA
+# =============================================================================
 
-        transformed_data = {
-            "pull_requests": [{"pull_request_id": 1}],
-            "commits": [{"commit_sha": "abc"}],
-            "reviewers": [{"reviewer_username": "user1"}],
-            "comments": [{"comment_id": 123}],
-        }
 
-        main.load_data(mock_bigquery_client, "test_dataset", transformed_data)
+@patch("main.datetime")
+def test_load_data_inserts_all_tables(mock_datetime, mock_bigquery_client):
+    """Test that load_data inserts all tables correctly."""
+    mock_datetime.now.return_value.strftime.return_value = "2024-01-15"
 
-        # Should call insert_rows_json 4 times (once per table)
-        assert mock_bigquery_client.insert_rows_json.call_count == 4
+    transformed_data = {
+        "pull_requests": [{"pull_request_id": 1}],
+        "commits": [{"commit_sha": "abc"}],
+        "reviewers": [{"reviewer_username": "user1"}],
+        "comments": [{"comment_id": 123}],
+    }
 
-    @patch("main.datetime")
-    def test_adds_snapshot_date(self, mock_datetime, mock_bigquery_client):
-        """Test that snapshot_date is added to all rows."""
-        mock_datetime.now.return_value.strftime.return_value = "2024-01-15"
+    main.load_data(mock_bigquery_client, "test_dataset", transformed_data)
 
-        transformed_data = {
-            "pull_requests": [{"pull_request_id": 1}, {"pull_request_id": 2}],
-            "commits": [],
-            "reviewers": [],
-            "comments": [],
-        }
+    # Should call insert_rows_json 4 times (once per table)
+    assert mock_bigquery_client.insert_rows_json.call_count == 4
 
-        main.load_data(mock_bigquery_client, "test_dataset", transformed_data)
+@patch("main.datetime")
+def test_adds_snapshot_date(mock_datetime, mock_bigquery_client):
+    """Test that snapshot_date is added to all rows."""
+    mock_datetime.now.return_value.strftime.return_value = "2024-01-15"
 
-        call_args = mock_bigquery_client.insert_rows_json.call_args
-        rows = call_args[0][1]
-        assert all(row["snapshot_date"] == "2024-01-15" for row in rows)
-
-    def test_constructs_correct_table_ref(self, mock_bigquery_client):
-        """Test that table_ref is constructed correctly."""
-        transformed_data = {
-            "pull_requests": [{"pull_request_id": 1}],
-            "commits": [],
-            "reviewers": [],
-            "comments": [],
-        }
+    transformed_data = {
+        "pull_requests": [{"pull_request_id": 1}, {"pull_request_id": 2}],
+        "commits": [],
+        "reviewers": [],
+        "comments": [],
+    }
 
-        main.load_data(mock_bigquery_client, "my_dataset", transformed_data)
+    main.load_data(mock_bigquery_client, "test_dataset", transformed_data)
 
-        call_args = mock_bigquery_client.insert_rows_json.call_args
-        table_ref = call_args[0][0]
-        assert table_ref == "test-project.my_dataset.pull_requests"
+    call_args = mock_bigquery_client.insert_rows_json.call_args
+    rows = call_args[0][1]
+    assert all(row["snapshot_date"] == "2024-01-15" for row in rows)
 
-    def test_empty_transformed_data_skipped(self, mock_bigquery_client):
-        """Test that empty transformed_data dict is skipped."""
-        transformed_data = {}
+def test_constructs_correct_table_ref(mock_bigquery_client):
+    """Test that table_ref is constructed correctly."""
+    transformed_data = {
+        "pull_requests": [{"pull_request_id": 1}],
+        "commits": [],
+        "reviewers": [],
+        "comments": [],
+    }
 
-        main.load_data(mock_bigquery_client, "test_dataset", transformed_data)
+    main.load_data(mock_bigquery_client, "my_dataset", transformed_data)
 
-        mock_bigquery_client.insert_rows_json.assert_not_called()
+    call_args = mock_bigquery_client.insert_rows_json.call_args
+    table_ref = call_args[0][0]
+    assert table_ref == "test-project.my_dataset.pull_requests"
 
-    def test_skips_empty_tables_individually(self, mock_bigquery_client):
-        """Test that empty tables are skipped individually."""
-        transformed_data = {
-            "pull_requests": [{"pull_request_id": 1}],
-            "commits": [],  # Empty, should be skipped
-            "reviewers": [],  # Empty, should be skipped
-            "comments": [{"comment_id": 456}],
-        }
+def test_empty_transformed_data_skipped(mock_bigquery_client):
+    """Test that empty transformed_data dict is skipped."""
+    transformed_data = {}
 
-        main.load_data(mock_bigquery_client, "test_dataset", transformed_data)
+    main.load_data(mock_bigquery_client, "test_dataset", transformed_data)
 
-        # Should only call insert_rows_json twice (for PRs and comments)
-        assert mock_bigquery_client.insert_rows_json.call_count == 2
+    mock_bigquery_client.insert_rows_json.assert_not_called()
 
-    def test_only_pull_requests_table(self, mock_bigquery_client):
-        """Test loading only pull_requests table."""
-        transformed_data = {
-            "pull_requests": [{"pull_request_id": 1}],
-            "commits": [],
-            "reviewers": [],
-            "comments": [],
-        }
+def test_skips_empty_tables_individually(mock_bigquery_client):
+    """Test that empty tables are skipped individually."""
+    transformed_data = {
+        "pull_requests": [{"pull_request_id": 1}],
+        "commits": [],  # Empty, should be skipped
+        "reviewers": [],  # Empty, should be skipped
+        "comments": [{"comment_id": 456}],
+    }
 
-        main.load_data(mock_bigquery_client, "test_dataset", transformed_data)
+    main.load_data(mock_bigquery_client, "test_dataset", transformed_data)
 
-        assert mock_bigquery_client.insert_rows_json.call_count == 1
+    # Should only call insert_rows_json twice (for PRs and comments)
+    assert mock_bigquery_client.insert_rows_json.call_count == 2
 
-    def test_raises_exception_on_insert_errors(self, mock_bigquery_client):
-        """Test that Exception is raised on BigQuery insert errors."""
-        mock_bigquery_client.insert_rows_json.return_value = [
-            {"index": 0, "errors": ["Insert failed"]}
-        ]
+def test_only_pull_requests_table(mock_bigquery_client):
+    """Test loading only pull_requests table."""
+    transformed_data = {
+        "pull_requests": [{"pull_request_id": 1}],
+        "commits": [],
+        "reviewers": [],
+        "comments": [],
+    }
 
-        transformed_data = {
-            "pull_requests": [{"pull_request_id": 1}],
-            "commits": [],
-            "reviewers": [],
-            "comments": [],
-        }
+    main.load_data(mock_bigquery_client, "test_dataset", transformed_data)
 
-        with pytest.raises(Exception) as exc_info:
-            main.load_data(mock_bigquery_client, "test_dataset", transformed_data)
+    assert mock_bigquery_client.insert_rows_json.call_count == 1
 
-        assert "BigQuery insert errors" in str(exc_info.value)
+def test_raises_exception_on_insert_errors(mock_bigquery_client):
+    """Test that Exception is raised on BigQuery insert errors."""
+    mock_bigquery_client.insert_rows_json.return_value = [
+        {"index": 0, "errors": ["Insert failed"]}
+    ]
 
-    def test_verifies_client_insert_called_correctly(self, mock_bigquery_client):
-        """Test that client.insert_rows_json is called with correct arguments."""
-        transformed_data = {
-            "pull_requests": [{"pull_request_id": 1}, {"pull_request_id": 2}],
-            "commits": [],
-            "reviewers": [],
-            "comments": [],
-        }
+    transformed_data = {
+        "pull_requests": [{"pull_request_id": 1}],
+        "commits": [],
+        "reviewers": [],
+        "comments": [],
+    }
 
+    with pytest.raises(Exception) as exc_info:
         main.load_data(mock_bigquery_client, "test_dataset", transformed_data)
 
-        call_args = mock_bigquery_client.insert_rows_json.call_args
-        table_ref, rows = call_args[0]
+    assert "BigQuery insert errors" in str(exc_info.value)
 
-        assert "pull_requests" in table_ref
-        assert len(rows) == 2
+def test_verifies_client_insert_called_correctly(mock_bigquery_client):
+    """Test that client.insert_rows_json is called with correct arguments."""
+    transformed_data = {
+        "pull_requests": [{"pull_request_id": 1}, {"pull_request_id": 2}],
+        "commits": [],
+        "reviewers": [],
+        "comments": [],
+    }
 
+    main.load_data(mock_bigquery_client, "test_dataset", transformed_data)
 
-class TestMain:
-    """Tests for main function."""
+    call_args = mock_bigquery_client.insert_rows_json.call_args
+    table_ref, rows = call_args[0]
 
-    @patch("main.setup_logging")
-    @patch("main.bigquery.Client")
-    @patch("requests.Session")
-    def test_requires_github_repos(
-        self, mock_session_class, mock_bq_client, mock_setup_logging
-    ):
-        """Test that GITHUB_REPOS is required."""
-        with patch.dict(
-            os.environ,
-            {"BIGQUERY_PROJECT": "test", "BIGQUERY_DATASET": "test"},
-            clear=True,
-        ):
-            with pytest.raises(SystemExit) as exc_info:
-                main.main()
+    assert "pull_requests" in table_ref
+    assert len(rows) == 2
 
-            assert "GITHUB_REPOS" in str(exc_info.value)
 
-    @patch("main.setup_logging")
-    @patch("main.bigquery.Client")
-    @patch("requests.Session")
-    def test_requires_bigquery_project(
-        self, mock_session_class, mock_bq_client, mock_setup_logging
-    ):
-        """Test that BIGQUERY_PROJECT is required."""
-        with patch.dict(
-            os.environ,
-            {"GITHUB_REPOS": "mozilla/firefox", "BIGQUERY_DATASET": "test"},
-            clear=True,
-        ):
-            with pytest.raises(SystemExit) as exc_info:
-                main.main()
 
-            assert "BIGQUERY_PROJECT" in str(exc_info.value)
-
-    @patch("main.setup_logging")
-    @patch("main.bigquery.Client")
-    @patch("requests.Session")
-    def test_requires_bigquery_dataset(
-        self, mock_session_class, mock_bq_client, mock_setup_logging
-    ):
-        """Test that BIGQUERY_DATASET is required."""
-        with patch.dict(
-            os.environ,
-            {"GITHUB_REPOS": "mozilla/firefox", "BIGQUERY_PROJECT": "test"},
-            clear=True,
-        ):
-            with pytest.raises(SystemExit) as exc_info:
-                main.main()
+# =============================================================================
+# TESTS FOR MAIN
+# =============================================================================
 
-            assert "BIGQUERY_DATASET" in str(exc_info.value)
 
-    @patch("main.setup_logging")
-    @patch("main.bigquery.Client")
-    @patch("requests.Session")
-    def test_github_token_optional_with_warning(
-        self, mock_session_class, mock_bq_client, mock_setup_logging
+@patch("main.setup_logging")
+@patch("main.bigquery.Client")
+@patch("requests.Session")
+def test_requires_github_repos(mock_session_class, mock_bq_client, mock_setup_logging):
+    """Test that GITHUB_REPOS is required."""
+    with patch.dict(
+        os.environ,
+        {"BIGQUERY_PROJECT": "test", "BIGQUERY_DATASET": "test"},
+        clear=True,
     ):
-        """Test that GITHUB_TOKEN is optional but warns if missing."""
-        with (
-            patch.dict(
-                os.environ,
-                {
-                    "GITHUB_REPOS": "mozilla/firefox",
-                    "BIGQUERY_PROJECT": "test",
-                    "BIGQUERY_DATASET": "test",
-                },
-                clear=True,
-            ),
-            patch("main.extract_pull_requests", return_value=iter([])),
-        ):
-            # Should not raise, but should log warning
-            result = main.main()
-            assert result == 0
-
-    @patch("main.setup_logging")
-    @patch("main.bigquery.Client")
-    @patch("requests.Session")
-    def test_splits_github_repos_by_comma(
-        self, mock_session_class, mock_bq_client, mock_setup_logging
-    ):
-        """Test that GITHUB_REPOS is split by comma."""
-        with (
-            patch.dict(
-                os.environ,
-                {
-                    "GITHUB_REPOS": "mozilla/firefox,mozilla/gecko-dev",
-                    "BIGQUERY_PROJECT": "test",
-                    "BIGQUERY_DATASET": "test",
-                    "GITHUB_TOKEN": "token",
-                },
-                clear=True,
-            ),
-            patch("main.extract_pull_requests", return_value=iter([])) as mock_extract,
-        ):
+        with pytest.raises(SystemExit) as exc_info:
             main.main()
 
-            # Should be called twice (once per repo)
-            assert mock_extract.call_count == 2
+        assert "GITHUB_REPOS" in str(exc_info.value)
 
-    @patch("main.setup_logging")
-    @patch("main.bigquery.Client")
-    @patch("requests.Session")
-    def test_honors_github_api_url(
-        self, mock_session_class, mock_bq_client, mock_setup_logging
-    ):
-        """Test that GITHUB_API_URL is honored."""
-        with (
-            patch.dict(
-                os.environ,
-                {
-                    "GITHUB_REPOS": "mozilla/firefox",
-                    "BIGQUERY_PROJECT": "test",
-                    "BIGQUERY_DATASET": "test",
-                    "GITHUB_TOKEN": "token",
-                    "GITHUB_API_URL": "https://custom-api.example.com",
-                },
-                clear=True,
-            ),
-            patch("main.extract_pull_requests", return_value=iter([])) as mock_extract,
-        ):
-            main.main()
-
-            call_kwargs = mock_extract.call_args[1]
-            assert call_kwargs["github_api_url"] == "https://custom-api.example.com"
 
-    @patch("main.setup_logging")
-    @patch("main.bigquery.Client")
-    @patch("requests.Session")
-    def test_honors_bigquery_emulator_host(
-        self, mock_session_class, mock_bq_client_class, mock_setup_logging
+@patch("main.setup_logging")
+@patch("main.bigquery.Client")
+@patch("requests.Session")
+def test_requires_bigquery_project(mock_session_class, mock_bq_client, mock_setup_logging):
+    """Test that BIGQUERY_PROJECT is required."""
+    with patch.dict(
+        os.environ,
+        {"GITHUB_REPOS": "mozilla/firefox", "BIGQUERY_DATASET": "test"},
+        clear=True,
     ):
-        """Test that BIGQUERY_EMULATOR_HOST is honored."""
-        with (
-            patch.dict(
-                os.environ,
-                {
-                    "GITHUB_REPOS": "mozilla/firefox",
-                    "BIGQUERY_PROJECT": "test",
-                    "BIGQUERY_DATASET": "test",
-                    "GITHUB_TOKEN": "token",
-                    "BIGQUERY_EMULATOR_HOST": "http://localhost:9050",
-                },
-                clear=True,
-            ),
-            patch("main.extract_pull_requests", return_value=iter([])),
-        ):
+        with pytest.raises(SystemExit) as exc_info:
             main.main()
 
-            # Verify BigQuery client was created with emulator settings
-            mock_bq_client_class.assert_called_once()
-
-    @patch("main.setup_logging")
-    @patch("main.bigquery.Client")
-    @patch("requests.Session")
-    def test_creates_session_with_headers(
-        self, mock_session_class, mock_bq_client, mock_setup_logging
-    ):
-        """Test that session is created with Accept and User-Agent headers."""
-        mock_session = MagicMock()
-        mock_session_class.return_value = mock_session
+        assert "BIGQUERY_PROJECT" in str(exc_info.value)
 
-        with (
-            patch.dict(
-                os.environ,
-                {
-                    "GITHUB_REPOS": "mozilla/firefox",
-                    "BIGQUERY_PROJECT": "test",
-                    "BIGQUERY_DATASET": "test",
-                    "GITHUB_TOKEN": "token",
-                },
-                clear=True,
-            ),
-            patch("main.extract_pull_requests", return_value=iter([])),
-        ):
-            main.main()
 
-            # Verify session headers were set
-            assert mock_session.headers.update.called
-            call_args = mock_session.headers.update.call_args[0][0]
-            assert "Accept" in call_args
-            assert "User-Agent" in call_args
-
-    @patch("main.setup_logging")
-    @patch("main.bigquery.Client")
-    @patch("requests.Session")
-    def test_sets_authorization_header_with_token(
-        self, mock_session_class, mock_bq_client, mock_setup_logging
+@patch("main.setup_logging")
+@patch("main.bigquery.Client")
+@patch("requests.Session")
+def test_requires_bigquery_dataset(mock_session_class, mock_bq_client, mock_setup_logging):
+    """Test that BIGQUERY_DATASET is required."""
+    with patch.dict(
+        os.environ,
+        {"GITHUB_REPOS": "mozilla/firefox", "BIGQUERY_PROJECT": "test"},
+        clear=True,
     ):
-        """Test that Authorization header is set when token provided."""
-        mock_session = MagicMock()
-        mock_session_class.return_value = mock_session
-
-        with (
-            patch.dict(
-                os.environ,
-                {
-                    "GITHUB_REPOS": "mozilla/firefox",
-                    "BIGQUERY_PROJECT": "test",
-                    "BIGQUERY_DATASET": "test",
-                    "GITHUB_TOKEN": "test-token-123",
-                },
-                clear=True,
-            ),
-            patch("main.extract_pull_requests", return_value=iter([])),
-        ):
+        with pytest.raises(SystemExit) as exc_info:
             main.main()
 
-            # Verify Authorization header was set
-            assert mock_session.headers.__setitem__.called
-
-    @patch("main.setup_logging")
-    @patch("main.bigquery.Client")
-    @patch("requests.Session")
-    @patch("main.extract_pull_requests")
-    @patch("main.transform_data")
-    @patch("main.load_data")
-    def test_single_repo_successful_etl(
-        self,
-        mock_load,
-        mock_transform,
-        mock_extract,
-        mock_session_class,
-        mock_bq_client,
-        mock_setup_logging,
-    ):
-        """Test successful ETL for single repository."""
-        mock_extract.return_value = iter([[{"number": 1}]])
-        mock_transform.return_value = {
-            "pull_requests": [{"pull_request_id": 1}],
-            "commits": [],
-            "reviewers": [],
-            "comments": [],
-        }
+        assert "BIGQUERY_DATASET" in str(exc_info.value)
 
-        with patch.dict(
+@patch("main.setup_logging")
+@patch("main.bigquery.Client")
+@patch("requests.Session")
+def test_github_token_optional_with_warning(mock_session_class, mock_bq_client, mock_setup_logging):
+    """Test that GITHUB_TOKEN is optional but warns if missing."""
+    with (
+        patch.dict(
             os.environ,
             {
                 "GITHUB_REPOS": "mozilla/firefox",
                 "BIGQUERY_PROJECT": "test",
                 "BIGQUERY_DATASET": "test",
-                "GITHUB_TOKEN": "token",
             },
             clear=True,
-        ):
-            result = main.main()
-
-        assert result == 0
-        mock_extract.assert_called_once()
-        mock_transform.assert_called_once()
-        mock_load.assert_called_once()
-
-    @patch("main.setup_logging")
-    @patch("main.bigquery.Client")
-    @patch("requests.Session")
-    @patch("main.extract_pull_requests")
-    @patch("main.transform_data")
-    @patch("main.load_data")
-    def test_multiple_repos_processing(
-        self,
-        mock_load,
-        mock_transform,
-        mock_extract,
-        mock_session_class,
-        mock_bq_client,
-        mock_setup_logging,
+        ),
+        patch("main.extract_pull_requests", return_value=iter([])),
     ):
-        """Test processing multiple repositories."""
-        mock_extract.return_value = iter([[{"number": 1}]])
-        mock_transform.return_value = {
-            "pull_requests": [{"pull_request_id": 1}],
-            "commits": [],
-            "reviewers": [],
-            "comments": [],
-        }
+        # Should not raise, but should log warning
+        result = main.main()
+        assert result == 0
 
-        with patch.dict(
+@patch("main.setup_logging")
+@patch("main.bigquery.Client")
+@patch("requests.Session")
+def test_splits_github_repos_by_comma(mock_session_class, mock_bq_client, mock_setup_logging):
+    """Test that GITHUB_REPOS is split by comma."""
+    with (
+        patch.dict(
             os.environ,
             {
-                "GITHUB_REPOS": "mozilla/firefox,mozilla/gecko-dev,mozilla/addons",
+                "GITHUB_REPOS": "mozilla/firefox,mozilla/gecko-dev",
                 "BIGQUERY_PROJECT": "test",
                 "BIGQUERY_DATASET": "test",
                 "GITHUB_TOKEN": "token",
             },
             clear=True,
-        ):
-            result = main.main()
-
-        assert result == 0
-        # Should process 3 repositories
-        assert mock_extract.call_count == 3
-
-    @patch("main.setup_logging")
-    @patch("main.bigquery.Client")
-    @patch("requests.Session")
-    @patch("main.extract_pull_requests")
-    @patch("main.transform_data")
-    @patch("main.load_data")
-    def test_processes_chunks_iteratively(
-        self,
-        mock_load,
-        mock_transform,
-        mock_extract,
-        mock_session_class,
-        mock_bq_client,
-        mock_setup_logging,
+        ),
+        patch("main.extract_pull_requests", return_value=iter([])) as mock_extract,
     ):
-        """Test that chunks are processed iteratively from generator."""
-        # Return 3 chunks
-        mock_extract.return_value = iter(
-            [
-                [{"number": 1}],
-                [{"number": 2}],
-                [{"number": 3}],
-            ]
-        )
-        mock_transform.return_value = {
-            "pull_requests": [{"pull_request_id": 1}],
-            "commits": [],
-            "reviewers": [],
-            "comments": [],
-        }
-
-        with patch.dict(
+        main.main()
+
+        # Should be called twice (once per repo)
+        assert mock_extract.call_count == 2
+
+@patch("main.setup_logging")
+@patch("main.bigquery.Client")
+@patch("requests.Session")
+def test_honors_github_api_url(mock_session_class, mock_bq_client, mock_setup_logging):
+    """Test that GITHUB_API_URL is honored."""
+    with (
+        patch.dict(
             os.environ,
             {
                 "GITHUB_REPOS": "mozilla/firefox",
                 "BIGQUERY_PROJECT": "test",
                 "BIGQUERY_DATASET": "test",
                 "GITHUB_TOKEN": "token",
+                "GITHUB_API_URL": "https://custom-api.example.com",
             },
             clear=True,
-        ):
-            result = main.main()
-
-        assert result == 0
-        # Transform and load should be called 3 times (once per chunk)
-        assert mock_transform.call_count == 3
-        assert mock_load.call_count == 3
-
-    @patch("main.setup_logging")
-    @patch("main.bigquery.Client")
-    @patch("requests.Session")
-    def test_returns_zero_on_success(
-        self, mock_session_class, mock_bq_client, mock_setup_logging
-    ):
-        """Test that main returns 0 on success."""
-        with (
-            patch.dict(
-                os.environ,
-                {
-                    "GITHUB_REPOS": "mozilla/firefox",
-                    "BIGQUERY_PROJECT": "test",
-                    "BIGQUERY_DATASET": "test",
-                    "GITHUB_TOKEN": "token",
-                },
-                clear=True,
-            ),
-            patch("main.extract_pull_requests", return_value=iter([])),
-        ):
-            result = main.main()
-
-        assert result == 0
-
-
-@pytest.mark.integration
-class TestIntegration:
-    """Integration tests that test multiple components together."""
-
-    @patch("main.setup_logging")
-    @patch("main.load_data")
-    @patch("main.bigquery.Client")
-    @patch("requests.Session")
-    def test_end_to_end_with_mocked_github(
-        self, mock_session_class, mock_bq_client, mock_load, mock_setup_logging
+        ),
+        patch("main.extract_pull_requests", return_value=iter([])) as mock_extract,
     ):
-        """Test end-to-end flow with mocked GitHub responses."""
-        mock_session = MagicMock()
-        mock_session_class.return_value = mock_session
-
-        # Mock PR response
-        pr_response = Mock()
-        pr_response.status_code = 200
-        pr_response.json.return_value = [
-            {"number": 1, "title": "Bug 1234567 - Test PR", "state": "open"}
-        ]
-        pr_response.links = {}
-
-        # Mock commits, reviewers, comments responses
-        empty_response = Mock()
-        empty_response.status_code = 200
-        empty_response.json.return_value = []
-
-        mock_session.get.side_effect = [
-            pr_response,
-            empty_response,
-            empty_response,
-            empty_response,
-        ]
-
-        with patch.dict(
+        main.main()
+
+        call_kwargs = mock_extract.call_args[1]
+        assert call_kwargs["github_api_url"] == "https://custom-api.example.com"
+
+@patch("main.setup_logging")
+@patch("main.bigquery.Client")
+@patch("requests.Session")
+def test_honors_bigquery_emulator_host(mock_session_class, mock_bq_client_class, mock_setup_logging):
+    """Test that BIGQUERY_EMULATOR_HOST is honored."""
+    with (
+        patch.dict(
             os.environ,
             {
                 "GITHUB_REPOS": "mozilla/firefox",
                 "BIGQUERY_PROJECT": "test",
                 "BIGQUERY_DATASET": "test",
                 "GITHUB_TOKEN": "token",
+                "BIGQUERY_EMULATOR_HOST": "http://localhost:9050",
             },
             clear=True,
-        ):
-            result = main.main()
-
-        assert result == 0
-        mock_load.assert_called_once()
-
-        # Verify transformed data structure
-        call_args = mock_load.call_args[0]
-        transformed_data = call_args[2]
-        assert "pull_requests" in transformed_data
-        assert len(transformed_data["pull_requests"]) == 1
-
-    @patch("main.setup_logging")
-    @patch("main.load_data")
-    @patch("main.bigquery.Client")
-    @patch("requests.Session")
-    def test_bug_id_extraction_through_pipeline(
-        self, mock_session_class, mock_bq_client, mock_load, mock_setup_logging
+        ),
+        patch("main.extract_pull_requests", return_value=iter([])),
     ):
-        """Test bug ID extraction through full pipeline."""
-        mock_session = MagicMock()
-        mock_session_class.return_value = mock_session
+        main.main()
 
-        pr_response = Mock()
-        pr_response.status_code = 200
-        pr_response.json.return_value = [
-            {
-                "number": 1,
-                "title": "Bug 9876543 - Fix critical issue",
-                "state": "closed",
-            }
-        ]
-        pr_response.links = {}
+        # Verify BigQuery client was created with emulator settings
+        mock_bq_client_class.assert_called_once()
 
-        empty_response = Mock()
-        empty_response.status_code = 200
-        empty_response.json.return_value = []
+@patch("main.setup_logging")
+@patch("main.bigquery.Client")
+@patch("requests.Session")
+def test_creates_session_with_headers(mock_session_class, mock_bq_client, mock_setup_logging):
+    """Test that session is created with Accept and User-Agent headers."""
+    mock_session = MagicMock()
+    mock_session_class.return_value = mock_session
 
-        mock_session.get.side_effect = [
-            pr_response,
-            empty_response,
-            empty_response,
-            empty_response,
-        ]
-
-        with patch.dict(
+    with (
+        patch.dict(
             os.environ,
             {
                 "GITHUB_REPOS": "mozilla/firefox",
@@ -2084,59 +1795,176 @@ def test_bug_id_extraction_through_pipeline(
                 "GITHUB_TOKEN": "token",
             },
             clear=True,
-        ):
-            main.main()
+        ),
+        patch("main.extract_pull_requests", return_value=iter([])),
+    ):
+        main.main()
+
+        # Verify session headers were set
+        assert mock_session.headers.update.called
+        call_args = mock_session.headers.update.call_args[0][0]
+        assert "Accept" in call_args
+        assert "User-Agent" in call_args
+
+@patch("main.setup_logging")
+@patch("main.bigquery.Client")
+@patch("requests.Session")
+def test_sets_authorization_header_with_token(mock_session_class, mock_bq_client, mock_setup_logging):
+    """Test that Authorization header is set when token provided."""
+    mock_session = MagicMock()
+    mock_session_class.return_value = mock_session
+
+    with (
+        patch.dict(
+            os.environ,
+            {
+                "GITHUB_REPOS": "mozilla/firefox",
+                "BIGQUERY_PROJECT": "test",
+                "BIGQUERY_DATASET": "test",
+                "GITHUB_TOKEN": "test-token-123",
+            },
+            clear=True,
+        ),
+        patch("main.extract_pull_requests", return_value=iter([])),
+    ):
+        main.main()
+
+        # Verify Authorization header was set
+        assert mock_session.headers.__setitem__.called
+
+@patch("main.setup_logging")
+@patch("main.bigquery.Client")
+@patch("requests.Session")
+@patch("main.extract_pull_requests")
+@patch("main.transform_data")
+@patch("main.load_data")
+def test_single_repo_successful_etl(
+    mock_load,
+    mock_transform,
+    mock_extract,
+    mock_session_class,
+    mock_bq_client,
+    mock_setup_logging,
+):
+    """Test successful ETL for single repository."""
+    mock_extract.return_value = iter([[{"number": 1}]])
+    mock_transform.return_value = {
+        "pull_requests": [{"pull_request_id": 1}],
+        "commits": [],
+        "reviewers": [],
+        "comments": [],
+    }
 
-        call_args = mock_load.call_args[0]
-        transformed_data = call_args[2]
-        pr = transformed_data["pull_requests"][0]
-        assert pr["bug_id"] == 9876543
-
-    @patch("main.setup_logging")
-    @patch("main.load_data")
-    @patch("main.bigquery.Client")
-    @patch("requests.Session")
-    def test_pagination_through_full_flow(
-        self, mock_session_class, mock_bq_client, mock_load, mock_setup_logging
+    with patch.dict(
+        os.environ,
+        {
+            "GITHUB_REPOS": "mozilla/firefox",
+            "BIGQUERY_PROJECT": "test",
+            "BIGQUERY_DATASET": "test",
+            "GITHUB_TOKEN": "token",
+        },
+        clear=True,
     ):
-        """Test pagination through full ETL flow."""
-        mock_session = MagicMock()
-        mock_session_class.return_value = mock_session
-
-        # First page
-        pr_response_1 = Mock()
-        pr_response_1.status_code = 200
-        pr_response_1.json.return_value = [
-            {"number": 1, "title": "PR 1", "state": "open"}
-        ]
-        pr_response_1.links = {
-            "next": {"url": "https://api.github.com/repos/mozilla/firefox/pulls?page=2"}
-        }
+        result = main.main()
+
+    assert result == 0
+    mock_extract.assert_called_once()
+    mock_transform.assert_called_once()
+    mock_load.assert_called_once()
+
+@patch("main.setup_logging")
+@patch("main.bigquery.Client")
+@patch("requests.Session")
+@patch("main.extract_pull_requests")
+@patch("main.transform_data")
+@patch("main.load_data")
+def test_multiple_repos_processing(
+    mock_load,
+    mock_transform,
+    mock_extract,
+    mock_session_class,
+    mock_bq_client,
+    mock_setup_logging,
+):
+    """Test processing multiple repositories."""
+    mock_extract.return_value = iter([[{"number": 1}]])
+    mock_transform.return_value = {
+        "pull_requests": [{"pull_request_id": 1}],
+        "commits": [],
+        "reviewers": [],
+        "comments": [],
+    }
 
-        # Second page
-        pr_response_2 = Mock()
-        pr_response_2.status_code = 200
-        pr_response_2.json.return_value = [
-            {"number": 2, "title": "PR 2", "state": "open"}
-        ]
-        pr_response_2.links = {}
-
-        empty_response = Mock()
-        empty_response.status_code = 200
-        empty_response.json.return_value = []
-
-        mock_session.get.side_effect = [
-            pr_response_1,
-            empty_response,
-            empty_response,
-            empty_response,
-            pr_response_2,
-            empty_response,
-            empty_response,
-            empty_response,
+    with patch.dict(
+        os.environ,
+        {
+            "GITHUB_REPOS": "mozilla/firefox,mozilla/gecko-dev,mozilla/addons",
+            "BIGQUERY_PROJECT": "test",
+            "BIGQUERY_DATASET": "test",
+            "GITHUB_TOKEN": "token",
+        },
+        clear=True,
+    ):
+        result = main.main()
+
+    assert result == 0
+    # Should process 3 repositories
+    assert mock_extract.call_count == 3
+
+@patch("main.setup_logging")
+@patch("main.bigquery.Client")
+@patch("requests.Session")
+@patch("main.extract_pull_requests")
+@patch("main.transform_data")
+@patch("main.load_data")
+def test_processes_chunks_iteratively(
+    mock_load,
+    mock_transform,
+    mock_extract,
+    mock_session_class,
+    mock_bq_client,
+    mock_setup_logging,
+):
+    """Test that chunks are processed iteratively from generator."""
+    # Return 3 chunks
+    mock_extract.return_value = iter(
+        [
+            [{"number": 1}],
+            [{"number": 2}],
+            [{"number": 3}],
         ]
+    )
+    mock_transform.return_value = {
+        "pull_requests": [{"pull_request_id": 1}],
+        "commits": [],
+        "reviewers": [],
+        "comments": [],
+    }
 
-        with patch.dict(
+    with patch.dict(
+        os.environ,
+        {
+            "GITHUB_REPOS": "mozilla/firefox",
+            "BIGQUERY_PROJECT": "test",
+            "BIGQUERY_DATASET": "test",
+            "GITHUB_TOKEN": "token",
+        },
+        clear=True,
+    ):
+        result = main.main()
+
+    assert result == 0
+    # Transform and load should be called 3 times (once per chunk)
+    assert mock_transform.call_count == 3
+    assert mock_load.call_count == 3
+
+@patch("main.setup_logging")
+@patch("main.bigquery.Client")
+@patch("requests.Session")
+def test_returns_zero_on_success(mock_session_class, mock_bq_client, mock_setup_logging):
+    """Test that main returns 0 on success."""
+    with (
+        patch.dict(
             os.environ,
             {
                 "GITHUB_REPOS": "mozilla/firefox",
@@ -2145,8 +1973,166 @@ def test_pagination_through_full_flow(
                 "GITHUB_TOKEN": "token",
             },
             clear=True,
-        ):
-            main.main()
+        ),
+        patch("main.extract_pull_requests", return_value=iter([])),
+    ):
+        result = main.main()
+
+    assert result == 0
+
+
+@pytest.mark.integration
+@patch("main.setup_logging")
+@patch("main.load_data")
+@patch("main.bigquery.Client")
+@patch("requests.Session")
+def test_full_etl_flow_transforms_data_correctly(mock_session_class, mock_bq_client, mock_load, mock_setup_logging):
+    """Test full ETL flow with mocked GitHub responses."""
+    mock_session = MagicMock()
+    mock_session_class.return_value = mock_session
+
+    # Mock PR response
+    pr_response = Mock()
+    pr_response.status_code = 200
+    pr_response.json.return_value = [
+        {"number": 1, "title": "Bug 1234567 - Test PR", "state": "open"}
+    ]
+    pr_response.links = {}
+
+    # Mock commits, reviewers, comments responses
+    empty_response = Mock()
+    empty_response.status_code = 200
+    empty_response.json.return_value = []
+
+    mock_session.get.side_effect = [
+        pr_response,
+        empty_response,
+        empty_response,
+        empty_response,
+    ]
+
+    with patch.dict(
+        os.environ,
+        {
+            "GITHUB_REPOS": "mozilla/firefox",
+            "BIGQUERY_PROJECT": "test",
+            "BIGQUERY_DATASET": "test",
+            "GITHUB_TOKEN": "token",
+        },
+        clear=True,
+    ):
+        result = main.main()
+
+    assert result == 0
+    mock_load.assert_called_once()
+
+    # Verify transformed data structure
+    call_args = mock_load.call_args[0]
+    transformed_data = call_args[2]
+    assert "pull_requests" in transformed_data
+    assert len(transformed_data["pull_requests"]) == 1
+
+@patch("main.setup_logging")
+@patch("main.load_data")
+@patch("main.bigquery.Client")
+@patch("requests.Session")
+def test_bug_id_extraction_through_pipeline(mock_session_class, mock_bq_client, mock_load, mock_setup_logging):
+    """Test bug ID extraction through full pipeline."""
+    mock_session = MagicMock()
+    mock_session_class.return_value = mock_session
+
+    pr_response = Mock()
+    pr_response.status_code = 200
+    pr_response.json.return_value = [
+        {
+            "number": 1,
+            "title": "Bug 9876543 - Fix critical issue",
+            "state": "closed",
+        }
+    ]
+    pr_response.links = {}
+
+    empty_response = Mock()
+    empty_response.status_code = 200
+    empty_response.json.return_value = []
+
+    mock_session.get.side_effect = [
+        pr_response,
+        empty_response,
+        empty_response,
+        empty_response,
+    ]
+
+    with patch.dict(
+        os.environ,
+        {
+            "GITHUB_REPOS": "mozilla/firefox",
+            "BIGQUERY_PROJECT": "test",
+            "BIGQUERY_DATASET": "test",
+            "GITHUB_TOKEN": "token",
+        },
+        clear=True,
+    ):
+        main.main()
+
+    call_args = mock_load.call_args[0]
+    transformed_data = call_args[2]
+    pr = transformed_data["pull_requests"][0]
+    assert pr["bug_id"] == 9876543
+
+@patch("main.setup_logging")
+@patch("main.load_data")
+@patch("main.bigquery.Client")
+@patch("requests.Session")
+def test_pagination_through_full_flow(mock_session_class, mock_bq_client, mock_load, mock_setup_logging):
+    """Test pagination through full ETL flow."""
+    mock_session = MagicMock()
+    mock_session_class.return_value = mock_session
+
+    # First page
+    pr_response_1 = Mock()
+    pr_response_1.status_code = 200
+    pr_response_1.json.return_value = [
+        {"number": 1, "title": "PR 1", "state": "open"}
+    ]
+    pr_response_1.links = {
+        "next": {"url": "https://api.github.com/repos/mozilla/firefox/pulls?page=2"}
+    }
+
+    # Second page
+    pr_response_2 = Mock()
+    pr_response_2.status_code = 200
+    pr_response_2.json.return_value = [
+        {"number": 2, "title": "PR 2", "state": "open"}
+    ]
+    pr_response_2.links = {}
+
+    empty_response = Mock()
+    empty_response.status_code = 200
+    empty_response.json.return_value = []
+
+    mock_session.get.side_effect = [
+        pr_response_1,
+        empty_response,
+        empty_response,
+        empty_response,
+        pr_response_2,
+        empty_response,
+        empty_response,
+        empty_response,
+    ]
+
+    with patch.dict(
+        os.environ,
+        {
+            "GITHUB_REPOS": "mozilla/firefox",
+            "BIGQUERY_PROJECT": "test",
+            "BIGQUERY_DATASET": "test",
+            "GITHUB_TOKEN": "token",
+        },
+        clear=True,
+    ):
+        main.main()
 
-        # Should be called twice (once per chunk/page)
-        assert mock_load.call_count == 2
+    # Should be called twice (once per chunk/page)
+    assert mock_load.call_count == 2

From cb582d3b60a2476725d94aa75db7aad186077747 Mon Sep 17 00:00:00 2001
From: David Lawrence <dkl@mozilla.com>
Date: Fri, 23 Jan 2026 18:20:32 -0500
Subject: [PATCH 12/12] Fixed action to install testing dependencies

---
 .github/workflows/tests.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 5480c08..d77e706 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -16,6 +16,7 @@ jobs:
         run: |
           python -m pip install --upgrade pip
           pip install -r requirements.txt
+          pip install -e ".[dev]"
       - name: Run unit tests with coverage
         run: |
           pytest -m "not integration and not slow" --cov=main --cov-report=term-missing --cov-fail-under=80