From da8cb65cbe8e603ce8ef6972a59140ddeb4393f5 Mon Sep 17 00:00:00 2001 From: David Lawrence Date: Wed, 21 Jan 2026 16:57:08 -0500 Subject: [PATCH 01/12] feat: Added comprehensive unit testing and github action to run tests on new pull requests --- .github/workflows/tests.yml | 75 ++ TESTING.md | 621 +++++++++++ pytest.ini | 49 + requirements.txt | 6 + test_main.py | 2106 +++++++++++++++++++++++++++++++++++ 5 files changed, 2857 insertions(+) create mode 100644 .github/workflows/tests.yml create mode 100644 TESTING.md create mode 100644 pytest.ini create mode 100644 test_main.py diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..87e2800 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,75 @@ +name: Tests and Linting + +on: + pull_request: + branches: [ main ] + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install black flake8 mypy isort + + - name: Run black + run: black --check main.py test_main.py + + - name: Run isort + run: isort --check-only main.py test_main.py + + - name: Run flake8 + run: flake8 main.py test_main.py --max-line-length=100 --extend-ignore=E203,W503 + + - name: Run mypy + run: mypy main.py --no-strict-optional --ignore-missing-imports + + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + + - name: Run unit tests with coverage + run: | + pytest -m "not integration and not slow" --cov=main --cov-report=term-missing --cov-fail-under=80 + + - name: Run all tests + run: | + pytest --cov=main --cov-report=xml --cov-report=html + + - name: Upload coverage reports + uses: actions/upload-artifact@v4 + with: + name: coverage-reports + path: | + htmlcov/ + coverage.xml + + integration-test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Run integration test with docker-compose + run: | + docker-compose up --build --abort-on-container-exit --exit-code-from github-etl + + - name: Cleanup + if: always() + run: docker-compose down -v diff --git a/TESTING.md b/TESTING.md new file mode 100644 index 0000000..c0bb5dd --- /dev/null +++ b/TESTING.md @@ -0,0 +1,621 @@ +# Testing Guide for GitHub ETL + +This document describes comprehensive testing for the GitHub ETL pipeline, including +unit tests, integration tests, Docker testing, linting, and CI/CD workflows. + +## Table of Contents + +1. [Unit Testing](#unit-testing) +2. [Test Organization](#test-organization) +3. [Running Tests](#running-tests) +4. [Code Coverage](#code-coverage) +5. [Linting and Code Quality](#linting-and-code-quality) +6. [CI/CD Integration](#cicd-integration) +7. [Docker Testing](#docker-testing) +8. [Adding New Tests](#adding-new-tests) + +--- + +## Unit Testing + +The test suite in `test_main.py` provides comprehensive coverage for all functions in `main.py`. +We have **95 unit tests** covering 9 functions with 80%+ code coverage requirement. + +### Test Structure + +Tests are organized into 10 test classes: + +1. **TestSetupLogging** (1 test) - Logging configuration +2. **TestSleepForRateLimit** (4 tests) - Rate limit handling +3. **TestExtractPullRequests** (14 tests) - PR extraction with pagination and enrichment +4. **TestExtractCommits** (9 tests) - Commit and file extraction +5. **TestExtractReviewers** (6 tests) - Reviewer extraction +6. **TestExtractComments** (7 tests) - Comment extraction (uses /issues endpoint) +7. **TestTransformData** (26 tests) - Data transformation for all 4 BigQuery tables +8. **TestLoadData** (8 tests) - BigQuery data loading +9. **TestMain** (17 tests) - Main ETL orchestration +10. **TestIntegration** (3 tests) - End-to-end integration tests (marked with `@pytest.mark.integration`) + +### Fixtures + +Reusable fixtures are defined at the top of `test_main.py`: + +- `mock_session` - Mocked `requests.Session` +- `mock_bigquery_client` - Mocked BigQuery client +- `mock_pr_response` - Realistic pull request response +- `mock_commit_response` - Realistic commit with files +- `mock_reviewer_response` - Realistic reviewer response +- `mock_comment_response` - Realistic comment response + +## Test Organization + +### Function Coverage + +| Function | Tests | Coverage Target | Key Test Areas | +|----------|-------|-----------------|----------------| +| `setup_logging()` | 1 | 100% | Logger configuration | +| `sleep_for_rate_limit()` | 4 | 100% | Rate limit sleep logic, edge cases | +| `extract_pull_requests()` | 14 | 90%+ | Pagination, rate limits, enrichment, error handling | +| `extract_commits()` | 9 | 85%+ | Commit/file fetching, rate limits, errors | +| `extract_reviewers()` | 6 | 85%+ | Reviewer states, rate limits, errors | +| `extract_comments()` | 7 | 85%+ | Comment fetching (via /issues), rate limits | +| `transform_data()` | 26 | 95%+ | Bug ID extraction, 4 tables, field mapping | +| `load_data()` | 8 | 90%+ | BigQuery insertion, snapshot dates, errors | +| `main()` | 17 | 85%+ | Env vars, orchestration, chunking | + +**Overall Target: 85-90% coverage** (80% minimum enforced in CI) + +### Critical Test Cases + +#### Bug ID Extraction +Tests verify the regex pattern matches: +- `Bug 1234567 - Fix` → 1234567 +- `bug 1234567` → 1234567 +- `b=1234567` → 1234567 +- `Bug #1234567` → 1234567 +- Filters out IDs >= 100000000 + +#### Data Transformation +Tests ensure correct transformation for all 4 BigQuery tables: +- **pull_requests**: PR metadata, bug IDs, labels, date_approved +- **commits**: Flattened files (one row per file), commit metadata +- **reviewers**: Review states, date_approved calculation +- **comments**: Character count, status mapping from reviews + +#### Rate Limiting +Tests verify rate limit handling at all API levels: +- Pull requests pagination +- Commit fetching +- Reviewer fetching +- Comment fetching + +## Running Tests + +### All Tests with Coverage + +```bash +pytest +``` + +This runs all tests with coverage reporting (configured in `pytest.ini`). + +### Fast Unit Tests Only (Skip Integration) + +```bash +pytest -m "not integration and not slow" +``` + +Use this for fast feedback during development. + +### Specific Test Class + +```bash +pytest test_main.py::TestTransformData +``` + +### Specific Test Function + +```bash +pytest test_main.py::TestTransformData::test_bug_id_extraction_basic -v +``` + +### With Verbose Output + +```bash +pytest -v +``` + +### With Coverage Report + +```bash +# Terminal report +pytest --cov=main --cov-report=term-missing + +# HTML report +pytest --cov=main --cov-report=html +open htmlcov/index.html +``` + +### Integration Tests Only + +```bash +pytest -m integration +``` + +## Code Coverage + +### Coverage Requirements + +- **Minimum**: 80% (enforced in CI via `--cov-fail-under=80`) +- **Target**: 85-90% +- **Current**: Run `pytest --cov=main` to see current coverage + +### Coverage Configuration + +Coverage settings are in `pytest.ini`: + +```ini +[pytest] +addopts = + --cov=main + --cov-report=term-missing + --cov-report=html + --cov-branch + --cov-fail-under=80 +``` + +### Viewing Coverage + +```bash +# Generate HTML coverage report +pytest --cov=main --cov-report=html + +# Open in browser +xdg-open htmlcov/index.html # Linux +open htmlcov/index.html # macOS +``` + +The HTML report shows: +- Line-by-line coverage +- Branch coverage +- Missing lines highlighted +- Per-file coverage percentages + +## Linting and Code Quality + +### Available Linters + +The project uses these linting tools (defined in `requirements.txt`): + +- **black** - Code formatting +- **isort** - Import sorting +- **flake8** - Style and syntax checking +- **mypy** - Static type checking + +### Running Linters + +```bash +# Run black (auto-format) +black main.py test_main.py + +# Check formatting without changes +black --check main.py test_main.py + +# Sort imports +isort main.py test_main.py + +# Check import sorting +isort --check-only main.py test_main.py + +# Run flake8 +flake8 main.py test_main.py --max-line-length=100 --extend-ignore=E203,W503 + +# Run mypy +mypy main.py --no-strict-optional --ignore-missing-imports +``` + +### All Linting Checks + +```bash +# Run all linters in sequence +black --check main.py test_main.py && \ +isort --check-only main.py test_main.py && \ +flake8 main.py test_main.py --max-line-length=100 --extend-ignore=E203,W503 && \ +mypy main.py --no-strict-optional --ignore-missing-imports +``` + +## CI/CD Integration + +### GitHub Actions Workflow + +The `.github/workflows/tests.yml` workflow runs on every push and pull request: + +**Lint Job:** +1. Runs black (format check) +2. Runs isort (import check) +3. Runs flake8 (style check) +4. Runs mypy (type check) + +**Test Job:** +1. Runs fast unit tests with 80% coverage threshold +2. Runs all tests (including integration) +3. Uploads coverage reports as artifacts + +### Workflow Triggers + +- Push to `main` or `unit-tests` branch +- Pull requests to `main` branch + +### Viewing Results + +- Check the Actions tab in GitHub +- Coverage artifacts are uploaded for each run +- Failed linting or tests will block merges + +## Docker Testing + +## Overview + +The `docker-compose.yml` configuration provides a complete local testing environment with: + +1. **Mock GitHub API** - A Flask-based mock service that simulates the GitHub Pull Requests API +2. **BigQuery Emulator** - A local BigQuery instance for testing data loads +3. **ETL Service** - The main GitHub ETL application configured to use the mock services + +## Quick Start + +### Start all services + +```bash +docker-compose up --build +``` + +This will: + +- Build and start the mock GitHub API (port 5000) +- Start the BigQuery emulator (ports 9050, 9060) +- Build and run the ETL service + +The ETL service will automatically: + +- Fetch 250 mock pull requests from the mock GitHub API +- Transform the data +- Load it into the BigQuery emulator + +### View logs + +```bash +# All services +docker-compose logs -f + +# Specific service +docker-compose logs -f github-etl +docker-compose logs -f bigquery-emulator +docker-compose logs -f mock-github-api +``` + +### Stop services + +```bash +docker-compose down +``` + +## Architecture + +### Mock GitHub API Service + +- **Port**: 5000 +- **Endpoint**: `http://localhost:5000/repos/{owner}/{repo}/pulls` +- **Mock data**: Generates 250 sample pull requests with realistic data +- **Features**: + - Pagination support (per_page, page parameters) + - Realistic PR data (numbers, titles, states, timestamps, users, etc.) + - Mock rate limit headers + - No authentication required + +### BigQuery Emulator Service + +- **Ports**: + - 9050 (BigQuery API) + - 9060 (Discovery/Admin API) +- **Configuration**: Uses `data.yml` to define the schema +- **Project**: test-project +- **Dataset**: test_dataset +- **Table**: pull_requests + +### ETL Service + +The ETL service is configured via environment variables in `docker-compose.yml`: + +```yaml +environment: + GITHUB_REPOS: "mozilla/firefox" + GITHUB_API_URL: "http://mock-github-api:5000" # Points to mock API + BIGQUERY_PROJECT: "test" + BIGQUERY_DATASET: "github_etl" + BIGQUERY_EMULATOR_HOST: "http://bigquery-emulator:9050" +``` + +## Customization + +### Using Real GitHub API + +To test with the real GitHub API instead of the mock: + +1. Set `GITHUB_TOKEN` environment variable +2. Remove or comment out `GITHUB_API_URL` in docker-compose.yml +3. Update `depends_on` to not require mock-github-api + +```bash +export GITHUB_TOKEN="your_github_token" +docker-compose up github-etl bigquery-emulator +``` + +### Adjusting Mock Data + +Edit `mock_github_api.py` to customize: + +- Total number of PRs (default: 250) +- PR field values +- Pagination behavior + +### Modifying BigQuery Schema + +Edit `data.yml` to change the table schema. The schema matches the fields +extracted in `main.py`'s `transform_data()` function. + +## Querying the BigQuery Emulator + +You can query the BigQuery emulator using the BigQuery Python client: + +```python +from google.cloud import bigquery +from google.api_core.client_options import ClientOptions + +client = bigquery.Client( + project="test-project", + client_options=ClientOptions(api_endpoint="http://localhost:9050") +) + +query = """ +SELECT pr_number, title, state, user_login +FROM `test-project.test_dataset.pull_requests` +LIMIT 10 +""" + +for row in client.query(query): + print(f"PR #{row.pr_number}: {row.title} - {row.state}") +``` + +Or use the `bq` command-line tool with the emulator endpoint. + +## Troubleshooting + +### Services not starting + +Check if ports are already in use: + +```bash +lsof -i :5000 # Mock GitHub API +lsof -i :9050 # BigQuery emulator +``` + +### ETL fails to connect + +Ensure services are healthy: + +```bash +docker-compose ps +``` + +Check service logs: + +```bash +docker-compose logs bigquery-emulator +docker-compose logs mock-github-api +``` + +### Schema mismatch errors + +Verify `data.yml` schema matches fields in `main.py:transform_data()`. + +## Development Workflow + +1. Make changes to `main.py` +2. Restart the ETL service: `docker-compose restart github-etl` +3. View logs: `docker-compose logs -f github-etl` + +The `main.py` file is mounted as a volume, so changes are reflected without rebuilding. + +## Cleanup + +Remove all containers and volumes: + +```bash +docker-compose down -v +``` + +Remove built images: + +```bash +docker-compose down --rmi all +``` + +--- + +## Adding New Tests + +### Testing Patterns + +#### 1. Mock External Dependencies + +Always mock external API calls and BigQuery operations: + +```python +@patch("requests.Session") +def test_api_call(mock_session_class): + mock_session = MagicMock() + mock_session_class.return_value = mock_session + + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = [{"id": 1}] + + mock_session.get.return_value = mock_response + # Test code here +``` + +#### 2. Use Fixtures + +Leverage existing fixtures for common test data: + +```python +def test_with_fixtures(mock_session, mock_pr_response): + # Use mock_session and mock_pr_response + pass +``` + +#### 3. Test Edge Cases + +Always test: +- Empty inputs +- None values +- Missing fields +- Rate limits +- API errors (404, 500, etc.) +- Boundary conditions + +#### 4. Verify Call Arguments + +Check that functions are called with correct parameters: + +```python +mock_extract.assert_called_once_with( + session=mock_session, + repo="mozilla/firefox", + github_api_url="https://api.github.com" +) +``` + +### Example: Adding a New Test + +```python +class TestNewFunction: + """Tests for new_function.""" + + def test_basic_functionality(self, mock_session): + """Test basic happy path.""" + # Arrange + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"result": "success"} + mock_session.get.return_value = mock_response + + # Act + result = main.new_function(mock_session, "arg1") + + # Assert + assert result == {"result": "success"} + mock_session.get.assert_called_once() + + def test_error_handling(self, mock_session): + """Test error handling.""" + mock_response = Mock() + mock_response.status_code = 500 + mock_response.text = "Internal Error" + mock_session.get.return_value = mock_response + + with pytest.raises(SystemExit) as exc_info: + main.new_function(mock_session, "arg1") + + assert "500" in str(exc_info.value) +``` + +### Test Organization Guidelines + +1. **Group related tests** in test classes +2. **Use descriptive names** like `test_handles_rate_limit_on_commits` +3. **One assertion concept per test** - Test one thing at a time +4. **Arrange-Act-Assert pattern** - Structure tests clearly +5. **Add docstrings** to explain what each test verifies + +### Mocking Patterns + +#### Mocking Time + +```python +@patch("time.time") +@patch("time.sleep") +def test_with_time(mock_sleep, mock_time): + mock_time.return_value = 1000 + # Test code +``` + +#### Mocking Environment Variables + +```python +with patch.dict(os.environ, {"VAR_NAME": "value"}, clear=True): + # Test code +``` + +#### Mocking Generators + +```python +mock_extract.return_value = iter([[{"id": 1}], [{"id": 2}]]) +``` + +### Running Tests During Development + +```bash +# Auto-run tests on file changes (requires pytest-watch) +pip install pytest-watch +ptw -- --cov=main -m "not integration" +``` + +### Debugging Tests + +```bash +# Drop into debugger on failures +pytest --pdb + +# Show print statements +pytest -s + +# Verbose with full diff +pytest -vv +``` + +### Coverage Tips + +If coverage is below 80%: + +1. Run `pytest --cov=main --cov-report=term-missing` to see missing lines +2. Look for untested branches (if/else paths) +3. Check error handling paths +4. Verify edge cases are covered + +## Resources + +- [pytest documentation](https://docs.pytest.org/) +- [pytest-cov documentation](https://pytest-cov.readthedocs.io/) +- [unittest.mock documentation](https://docs.python.org/3/library/unittest.mock.html) + +## Troubleshooting + +### Tests Pass Locally But Fail in CI + +- Check Python version (must be 3.11) +- Verify all dependencies are in `requirements.txt` +- Look for environment-specific issues + +### Coverage Dropped Below 80% + +- Run locally: `pytest --cov=main --cov-report=html` +- Open `htmlcov/index.html` to see uncovered lines +- Add tests for missing coverage + +### Import Errors + +- Ensure `PYTHONPATH` includes project root +- Check that `__init__.py` files exist if needed +- Verify module names match file names diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..d4a601a --- /dev/null +++ b/pytest.ini @@ -0,0 +1,49 @@ +[pytest] +# Pytest configuration for GitHub ETL project + +# Test discovery patterns +python_files = test_*.py +python_classes = Test* +python_functions = test_* + +# Output options +addopts = + -v + --strict-markers + --tb=short + --cov=main + --cov-report=term-missing + --cov-report=html + --cov-branch + +# Minimum coverage threshold (can adjust as needed) +--cov-fail-under=80 + +# Test paths +testpaths = . + +# Markers for organizing tests +markers = + unit: Unit tests for individual functions + integration: Integration tests that test multiple components + slow: Tests that take longer to run + +# Logging +log_cli = false +log_cli_level = INFO +log_cli_format = %(asctime)s [%(levelname)8s] %(message)s +log_cli_date_format = %Y-%m-%d %H:%M:%S + +# Coverage options +[coverage:run] +source = . +omit = + test_*.py + .venv/* + venv/* + */site-packages/* + +[coverage:report] +precision = 2 +show_missing = true +skip_covered = false diff --git a/requirements.txt b/requirements.txt index 008aa8a..8ede7d4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,9 @@ google-cloud-bigquery==3.25.0 pytest>=7.0.0 pytest-mock>=3.10.0 pytest-cov>=4.0.0 + +# Linting and formatting tools +black>=24.0.0 +flake8>=7.0.0 +mypy>=1.8.0 +isort>=5.13.0 diff --git a/test_main.py b/test_main.py new file mode 100644 index 0000000..7165677 --- /dev/null +++ b/test_main.py @@ -0,0 +1,2106 @@ +#!/usr/bin/env python3 +""" +Comprehensive test suite for GitHub ETL main.py + +This test suite provides complete coverage for all functions in main.py, +including extraction, transformation, loading, and orchestration logic. +""" + +import logging +import os +import sys +import time +from datetime import datetime, timezone +from unittest.mock import Mock, MagicMock, patch, call +import pytest +import requests +from google.cloud import bigquery + +import main + + +# ============================================================================= +# FIXTURES +# ============================================================================= + + +@pytest.fixture +def mock_session(): + """Provide a mocked requests.Session for testing.""" + session = Mock(spec=requests.Session) + session.headers = {} + return session + + +@pytest.fixture +def mock_bigquery_client(): + """Provide a mocked BigQuery client for testing.""" + client = Mock(spec=bigquery.Client) + client.project = "test-project" + client.insert_rows_json = Mock(return_value=[]) + return client + + +@pytest.fixture +def mock_pr_response(): + """Provide a realistic pull request response for testing.""" + return { + "number": 123, + "title": "Bug 1234567 - Fix login issue", + "state": "closed", + "created_at": "2024-01-01T10:00:00Z", + "updated_at": "2024-01-02T10:00:00Z", + "merged_at": "2024-01-02T10:00:00Z", + "user": {"login": "testuser"}, + "head": {"ref": "fix-branch"}, + "base": {"ref": "main"}, + "labels": [{"name": "bug"}, {"name": "priority-high"}], + "commit_data": [], + "reviewer_data": [], + "comment_data": [], + } + + +@pytest.fixture +def mock_commit_response(): + """Provide a realistic commit response with files.""" + return { + "sha": "abc123def456", + "commit": { + "author": { + "name": "Test Author", + "email": "test@example.com", + "date": "2024-01-01T12:00:00Z", + } + }, + "files": [ + { + "filename": "src/login.py", + "additions": 10, + "deletions": 5, + "changes": 15, + }, + { + "filename": "tests/test_login.py", + "additions": 20, + "deletions": 2, + "changes": 22, + }, + ], + } + + +@pytest.fixture +def mock_reviewer_response(): + """Provide a realistic reviewer response.""" + return { + "id": 789, + "user": {"login": "reviewer1"}, + "state": "APPROVED", + "submitted_at": "2024-01-01T15:00:00Z", + "body": "LGTM", + } + + +@pytest.fixture +def mock_comment_response(): + """Provide a realistic comment response.""" + return { + "id": 456, + "user": {"login": "commenter1"}, + "created_at": "2024-01-01T14:00:00Z", + "body": "This looks good to me", + "pull_request_review_id": None, + } + + +# ============================================================================= +# TEST CLASSES +# ============================================================================= + + +class TestSetupLogging: + """Tests for setup_logging function.""" + + def test_setup_logging_configures_logger(self): + """Test that setup_logging configures the root logger correctly.""" + main.setup_logging() + + root_logger = logging.getLogger() + assert root_logger.level == logging.INFO + assert len(root_logger.handlers) > 0 + + # Check that at least one handler is a StreamHandler + has_stream_handler = any( + isinstance(handler, logging.StreamHandler) + for handler in root_logger.handlers + ) + assert has_stream_handler + + +class TestSleepForRateLimit: + """Tests for sleep_for_rate_limit function.""" + + @patch("time.time") + @patch("time.sleep") + def test_sleep_for_rate_limit_when_remaining_is_zero( + self, mock_sleep, mock_time + ): + """Test that sleep_for_rate_limit sleeps until reset time.""" + mock_time.return_value = 1000 + + mock_response = Mock() + mock_response.headers = { + "X-RateLimit-Remaining": "0", + "X-RateLimit-Reset": "1120", # 120 seconds from now + } + + main.sleep_for_rate_limit(mock_response) + + mock_sleep.assert_called_once_with(120) + + @patch("time.time") + @patch("time.sleep") + def test_sleep_for_rate_limit_when_reset_already_passed( + self, mock_sleep, mock_time + ): + """Test that sleep_for_rate_limit doesn't sleep negative time.""" + mock_time.return_value = 2000 + + mock_response = Mock() + mock_response.headers = { + "X-RateLimit-Remaining": "0", + "X-RateLimit-Reset": "1500", # Already passed + } + + main.sleep_for_rate_limit(mock_response) + + # Should sleep for 0 seconds (max of 0 and negative value) + mock_sleep.assert_called_once_with(0) + + @patch("time.sleep") + def test_sleep_for_rate_limit_when_remaining_not_zero(self, mock_sleep): + """Test that sleep_for_rate_limit doesn't sleep when remaining > 0.""" + mock_response = Mock() + mock_response.headers = { + "X-RateLimit-Remaining": "5", + "X-RateLimit-Reset": "1500", + } + + main.sleep_for_rate_limit(mock_response) + + # Should not sleep when remaining > 0 + mock_sleep.assert_not_called() + + @patch("time.sleep") + def test_sleep_for_rate_limit_with_missing_headers(self, mock_sleep): + """Test sleep_for_rate_limit with missing rate limit headers.""" + mock_response = Mock() + mock_response.headers = {} + + main.sleep_for_rate_limit(mock_response) + + # Should not sleep when headers are missing (defaults to remaining=1) + mock_sleep.assert_not_called() + + +class TestExtractPullRequests: + """Tests for extract_pull_requests function.""" + + def test_extract_single_page(self, mock_session): + """Test extracting data from a single page of results.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = [ + {"number": 1, "title": "PR 1"}, + {"number": 2, "title": "PR 2"}, + ] + mock_response.links = {} + + mock_session.get.return_value = mock_response + + # Mock the extract functions + with patch("main.extract_commits", return_value=[]), patch( + "main.extract_reviewers", return_value=[] + ), patch("main.extract_comments", return_value=[]): + result = list( + main.extract_pull_requests(mock_session, "mozilla/firefox") + ) + + assert len(result) == 1 + assert len(result[0]) == 2 + assert result[0][0]["number"] == 1 + assert result[0][1]["number"] == 2 + + def test_extract_multiple_pages(self, mock_session): + """Test extracting data across multiple pages with pagination.""" + # First page response + mock_response_1 = Mock() + mock_response_1.status_code = 200 + mock_response_1.json.return_value = [ + {"number": 1, "title": "PR 1"}, + {"number": 2, "title": "PR 2"}, + ] + mock_response_1.links = { + "next": { + "url": "https://api.github.com/repos/mozilla/firefox/pulls?page=2" + } + } + + # Second page response + mock_response_2 = Mock() + mock_response_2.status_code = 200 + mock_response_2.json.return_value = [{"number": 3, "title": "PR 3"}] + mock_response_2.links = {} + + mock_session.get.side_effect = [mock_response_1, mock_response_2] + + with patch("main.extract_commits", return_value=[]), patch( + "main.extract_reviewers", return_value=[] + ), patch("main.extract_comments", return_value=[]): + result = list( + main.extract_pull_requests(mock_session, "mozilla/firefox") + ) + + assert len(result) == 2 + assert len(result[0]) == 2 + assert len(result[1]) == 1 + assert result[0][0]["number"] == 1 + assert result[1][0]["number"] == 3 + + def test_enriches_prs_with_commit_data(self, mock_session): + """Test that PRs are enriched with commit data.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = [{"number": 1, "title": "PR 1"}] + mock_response.links = {} + + mock_session.get.return_value = mock_response + + mock_commits = [{"sha": "abc123"}] + + with patch( + "main.extract_commits", return_value=mock_commits + ) as mock_extract_commits, patch( + "main.extract_reviewers", return_value=[] + ), patch( + "main.extract_comments", return_value=[] + ): + result = list( + main.extract_pull_requests(mock_session, "mozilla/firefox") + ) + + assert result[0][0]["commit_data"] == mock_commits + mock_extract_commits.assert_called_once() + + def test_enriches_prs_with_reviewer_data(self, mock_session): + """Test that PRs are enriched with reviewer data.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = [{"number": 1, "title": "PR 1"}] + mock_response.links = {} + + mock_session.get.return_value = mock_response + + mock_reviewers = [{"id": 789, "state": "APPROVED"}] + + with patch("main.extract_commits", return_value=[]), patch( + "main.extract_reviewers", return_value=mock_reviewers + ) as mock_extract_reviewers, patch( + "main.extract_comments", return_value=[] + ): + result = list( + main.extract_pull_requests(mock_session, "mozilla/firefox") + ) + + assert result[0][0]["reviewer_data"] == mock_reviewers + mock_extract_reviewers.assert_called_once() + + def test_enriches_prs_with_comment_data(self, mock_session): + """Test that PRs are enriched with comment data.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = [{"number": 1, "title": "PR 1"}] + mock_response.links = {} + + mock_session.get.return_value = mock_response + + mock_comments = [{"id": 456, "body": "Great work!"}] + + with patch("main.extract_commits", return_value=[]), patch( + "main.extract_reviewers", return_value=[] + ), patch( + "main.extract_comments", return_value=mock_comments + ) as mock_extract_comments: + result = list( + main.extract_pull_requests(mock_session, "mozilla/firefox") + ) + + assert result[0][0]["comment_data"] == mock_comments + mock_extract_comments.assert_called_once() + + @patch("main.sleep_for_rate_limit") + def test_handles_rate_limit(self, mock_sleep, mock_session): + """Test that extract_pull_requests handles rate limiting correctly.""" + # Rate limit response + mock_response_rate_limit = Mock() + mock_response_rate_limit.status_code = 403 + mock_response_rate_limit.headers = {"X-RateLimit-Remaining": "0"} + + # Successful response after rate limit + mock_response_success = Mock() + mock_response_success.status_code = 200 + mock_response_success.json.return_value = [ + {"number": 1, "title": "PR 1"} + ] + mock_response_success.links = {} + + mock_session.get.side_effect = [ + mock_response_rate_limit, + mock_response_success, + ] + + with patch("main.extract_commits", return_value=[]), patch( + "main.extract_reviewers", return_value=[] + ), patch("main.extract_comments", return_value=[]): + result = list( + main.extract_pull_requests(mock_session, "mozilla/firefox") + ) + + mock_sleep.assert_called_once_with(mock_response_rate_limit) + assert len(result) == 1 + + def test_handles_api_error_404(self, mock_session): + """Test that extract_pull_requests raises SystemExit on 404.""" + mock_response = Mock() + mock_response.status_code = 404 + mock_response.text = "Not Found" + + mock_session.get.return_value = mock_response + + with pytest.raises(SystemExit) as exc_info: + list(main.extract_pull_requests(mock_session, "mozilla/nonexistent")) + + assert "GitHub API error 404" in str(exc_info.value) + + def test_handles_api_error_500(self, mock_session): + """Test that extract_pull_requests raises SystemExit on 500.""" + mock_response = Mock() + mock_response.status_code = 500 + mock_response.text = "Internal Server Error" + + mock_session.get.return_value = mock_response + + with pytest.raises(SystemExit) as exc_info: + list(main.extract_pull_requests(mock_session, "mozilla/firefox")) + + assert "GitHub API error 500" in str(exc_info.value) + + def test_stops_on_empty_batch(self, mock_session): + """Test that extraction stops when an empty batch is returned.""" + # First page with data + mock_response_1 = Mock() + mock_response_1.status_code = 200 + mock_response_1.json.return_value = [{"number": 1}] + mock_response_1.links = { + "next": { + "url": "https://api.github.com/repos/mozilla/firefox/pulls?page=2" + } + } + + # Second page empty + mock_response_2 = Mock() + mock_response_2.status_code = 200 + mock_response_2.json.return_value = [] + mock_response_2.links = {} + + mock_session.get.side_effect = [mock_response_1, mock_response_2] + + with patch("main.extract_commits", return_value=[]), patch( + "main.extract_reviewers", return_value=[] + ), patch("main.extract_comments", return_value=[]): + result = list( + main.extract_pull_requests(mock_session, "mozilla/firefox") + ) + + # Should only have 1 chunk from first page + assert len(result) == 1 + assert len(result[0]) == 1 + + def test_invalid_page_number_handling(self, mock_session): + """Test handling of invalid page number in pagination.""" + mock_response_1 = Mock() + mock_response_1.status_code = 200 + mock_response_1.json.return_value = [{"number": 1}] + mock_response_1.links = { + "next": { + "url": "https://api.github.com/repos/mozilla/firefox/pulls?page=invalid" + } + } + + mock_session.get.return_value = mock_response_1 + + with patch("main.extract_commits", return_value=[]), patch( + "main.extract_reviewers", return_value=[] + ), patch("main.extract_comments", return_value=[]): + result = list( + main.extract_pull_requests(mock_session, "mozilla/firefox") + ) + + # Should stop pagination on invalid page number + assert len(result) == 1 + + def test_custom_github_api_url(self, mock_session): + """Test using custom GitHub API URL.""" + custom_url = "https://mock-github.example.com" + + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = [{"number": 1}] + mock_response.links = {} + + mock_session.get.return_value = mock_response + + with patch("main.extract_commits", return_value=[]), patch( + "main.extract_reviewers", return_value=[] + ), patch("main.extract_comments", return_value=[]): + list( + main.extract_pull_requests( + mock_session, "mozilla/firefox", github_api_url=custom_url + ) + ) + + # Verify custom URL was used + call_args = mock_session.get.call_args + assert custom_url in call_args[0][0] + + def test_skips_prs_without_number_field(self, mock_session): + """Test that PRs without 'number' field are skipped.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = [ + {"number": 1, "title": "PR 1"}, + {"title": "PR without number"}, # Missing number field + {"number": 2, "title": "PR 2"}, + ] + mock_response.links = {} + + mock_session.get.return_value = mock_response + + with patch("main.extract_commits", return_value=[]) as mock_commits, patch( + "main.extract_reviewers", return_value=[] + ), patch("main.extract_comments", return_value=[]): + result = list( + main.extract_pull_requests(mock_session, "mozilla/firefox") + ) + + # extract_commits should only be called for PRs with number field + assert mock_commits.call_count == 2 + + +class TestExtractCommits: + """Tests for extract_commits function.""" + + def test_fetch_commits_with_files(self, mock_session): + """Test fetching commits with files for a PR.""" + # Mock commits list response + commits_response = Mock() + commits_response.status_code = 200 + commits_response.json.return_value = [ + {"sha": "abc123"}, + {"sha": "def456"}, + ] + + # Mock individual commit responses + commit_detail_1 = Mock() + commit_detail_1.status_code = 200 + commit_detail_1.json.return_value = { + "sha": "abc123", + "files": [{"filename": "file1.py", "additions": 10}], + } + + commit_detail_2 = Mock() + commit_detail_2.status_code = 200 + commit_detail_2.json.return_value = { + "sha": "def456", + "files": [{"filename": "file2.py", "deletions": 5}], + } + + mock_session.get.side_effect = [ + commits_response, + commit_detail_1, + commit_detail_2, + ] + + result = main.extract_commits(mock_session, "mozilla/firefox", 123) + + assert len(result) == 2 + assert result[0]["sha"] == "abc123" + assert result[0]["files"][0]["filename"] == "file1.py" + assert result[1]["sha"] == "def456" + assert result[1]["files"][0]["filename"] == "file2.py" + + def test_multiple_files_per_commit(self, mock_session): + """Test handling multiple files in a single commit.""" + commits_response = Mock() + commits_response.status_code = 200 + commits_response.json.return_value = [{"sha": "abc123"}] + + commit_detail = Mock() + commit_detail.status_code = 200 + commit_detail.json.return_value = { + "sha": "abc123", + "files": [ + {"filename": "file1.py", "additions": 10}, + {"filename": "file2.py", "additions": 20}, + {"filename": "file3.py", "deletions": 5}, + ], + } + + mock_session.get.side_effect = [commits_response, commit_detail] + + result = main.extract_commits(mock_session, "mozilla/firefox", 123) + + assert len(result) == 1 + assert len(result[0]["files"]) == 3 + + @patch("main.sleep_for_rate_limit") + def test_rate_limit_on_commits_list(self, mock_sleep, mock_session): + """Test rate limit handling when fetching commits list.""" + # Rate limit response + rate_limit_response = Mock() + rate_limit_response.status_code = 403 + rate_limit_response.headers = {"X-RateLimit-Remaining": "0"} + + # Success response + success_response = Mock() + success_response.status_code = 200 + success_response.json.return_value = [] + + mock_session.get.side_effect = [rate_limit_response, success_response] + + result = main.extract_commits(mock_session, "mozilla/firefox", 123) + + mock_sleep.assert_called_once() + assert result == [] + + def test_api_error_on_commits_list(self, mock_session): + """Test API error handling when fetching commits list.""" + error_response = Mock() + error_response.status_code = 500 + error_response.text = "Internal Server Error" + + mock_session.get.return_value = error_response + + with pytest.raises(SystemExit) as exc_info: + main.extract_commits(mock_session, "mozilla/firefox", 123) + + assert "GitHub API error 500" in str(exc_info.value) + + def test_api_error_on_individual_commit(self, mock_session): + """Test API error when fetching individual commit details.""" + commits_response = Mock() + commits_response.status_code = 200 + commits_response.json.return_value = [{"sha": "abc123"}] + + commit_error = Mock() + commit_error.status_code = 404 + commit_error.text = "Commit not found" + + mock_session.get.side_effect = [commits_response, commit_error] + + with pytest.raises(SystemExit) as exc_info: + main.extract_commits(mock_session, "mozilla/firefox", 123) + + assert "GitHub API error 404" in str(exc_info.value) + + def test_commit_without_sha_field(self, mock_session): + """Test handling commits without sha field.""" + commits_response = Mock() + commits_response.status_code = 200 + commits_response.json.return_value = [ + {"sha": "abc123"}, + {}, # Missing sha field + ] + + commit_detail_1 = Mock() + commit_detail_1.status_code = 200 + commit_detail_1.json.return_value = {"sha": "abc123", "files": []} + + commit_detail_2 = Mock() + commit_detail_2.status_code = 200 + commit_detail_2.json.return_value = {"files": []} + + mock_session.get.side_effect = [commits_response, commit_detail_1, commit_detail_2] + + result = main.extract_commits(mock_session, "mozilla/firefox", 123) + + # Should handle the commit without sha gracefully + assert len(result) == 2 + + def test_custom_github_api_url(self, mock_session): + """Test using custom GitHub API URL for commits.""" + custom_url = "https://mock-github.example.com" + + commits_response = Mock() + commits_response.status_code = 200 + commits_response.json.return_value = [] + + mock_session.get.return_value = commits_response + + main.extract_commits( + mock_session, "mozilla/firefox", 123, github_api_url=custom_url + ) + + call_args = mock_session.get.call_args + assert custom_url in call_args[0][0] + + def test_empty_commits_list(self, mock_session): + """Test handling PR with no commits.""" + commits_response = Mock() + commits_response.status_code = 200 + commits_response.json.return_value = [] + + mock_session.get.return_value = commits_response + + result = main.extract_commits(mock_session, "mozilla/firefox", 123) + + assert result == [] + + +class TestExtractReviewers: + """Tests for extract_reviewers function.""" + + def test_fetch_reviewers(self, mock_session): + """Test fetching reviewers for a PR.""" + reviewers_response = Mock() + reviewers_response.status_code = 200 + reviewers_response.json.return_value = [ + { + "id": 789, + "user": {"login": "reviewer1"}, + "state": "APPROVED", + "submitted_at": "2024-01-01T15:00:00Z", + }, + { + "id": 790, + "user": {"login": "reviewer2"}, + "state": "CHANGES_REQUESTED", + "submitted_at": "2024-01-01T16:00:00Z", + }, + ] + + mock_session.get.return_value = reviewers_response + + result = main.extract_reviewers(mock_session, "mozilla/firefox", 123) + + assert len(result) == 2 + assert result[0]["state"] == "APPROVED" + assert result[1]["state"] == "CHANGES_REQUESTED" + + def test_multiple_review_states(self, mock_session): + """Test handling multiple different review states.""" + reviewers_response = Mock() + reviewers_response.status_code = 200 + reviewers_response.json.return_value = [ + {"id": 1, "state": "APPROVED", "user": {"login": "user1"}}, + {"id": 2, "state": "CHANGES_REQUESTED", "user": {"login": "user2"}}, + {"id": 3, "state": "COMMENTED", "user": {"login": "user3"}}, + {"id": 4, "state": "DISMISSED", "user": {"login": "user4"}}, + ] + + mock_session.get.return_value = reviewers_response + + result = main.extract_reviewers(mock_session, "mozilla/firefox", 123) + + assert len(result) == 4 + states = [r["state"] for r in result] + assert "APPROVED" in states + assert "CHANGES_REQUESTED" in states + assert "COMMENTED" in states + + def test_empty_reviewers_list(self, mock_session): + """Test handling PR with no reviewers.""" + reviewers_response = Mock() + reviewers_response.status_code = 200 + reviewers_response.json.return_value = [] + + mock_session.get.return_value = reviewers_response + + result = main.extract_reviewers(mock_session, "mozilla/firefox", 123) + + assert result == [] + + @patch("main.sleep_for_rate_limit") + def test_rate_limit_handling(self, mock_sleep, mock_session): + """Test rate limit handling when fetching reviewers.""" + rate_limit_response = Mock() + rate_limit_response.status_code = 403 + rate_limit_response.headers = {"X-RateLimit-Remaining": "0"} + + success_response = Mock() + success_response.status_code = 200 + success_response.json.return_value = [] + + mock_session.get.side_effect = [rate_limit_response, success_response] + + result = main.extract_reviewers(mock_session, "mozilla/firefox", 123) + + mock_sleep.assert_called_once() + assert result == [] + + def test_api_error(self, mock_session): + """Test API error handling when fetching reviewers.""" + error_response = Mock() + error_response.status_code = 500 + error_response.text = "Internal Server Error" + + mock_session.get.return_value = error_response + + with pytest.raises(SystemExit) as exc_info: + main.extract_reviewers(mock_session, "mozilla/firefox", 123) + + assert "GitHub API error 500" in str(exc_info.value) + + def test_custom_github_api_url(self, mock_session): + """Test using custom GitHub API URL for reviewers.""" + custom_url = "https://mock-github.example.com" + + reviewers_response = Mock() + reviewers_response.status_code = 200 + reviewers_response.json.return_value = [] + + mock_session.get.return_value = reviewers_response + + main.extract_reviewers( + mock_session, "mozilla/firefox", 123, github_api_url=custom_url + ) + + call_args = mock_session.get.call_args + assert custom_url in call_args[0][0] + + +class TestExtractComments: + """Tests for extract_comments function.""" + + def test_fetch_comments(self, mock_session): + """Test fetching comments for a PR.""" + comments_response = Mock() + comments_response.status_code = 200 + comments_response.json.return_value = [ + { + "id": 456, + "user": {"login": "commenter1"}, + "body": "This looks good", + "created_at": "2024-01-01T14:00:00Z", + }, + { + "id": 457, + "user": {"login": "commenter2"}, + "body": "I have concerns", + "created_at": "2024-01-01T15:00:00Z", + }, + ] + + mock_session.get.return_value = comments_response + + result = main.extract_comments(mock_session, "mozilla/firefox", 123) + + assert len(result) == 2 + assert result[0]["id"] == 456 + assert result[1]["id"] == 457 + + def test_uses_issues_endpoint(self, mock_session): + """Test that comments use /issues endpoint not /pulls.""" + comments_response = Mock() + comments_response.status_code = 200 + comments_response.json.return_value = [] + + mock_session.get.return_value = comments_response + + main.extract_comments(mock_session, "mozilla/firefox", 123) + + call_args = mock_session.get.call_args + url = call_args[0][0] + assert "/issues/123/comments" in url + assert "/pulls/123/comments" not in url + + def test_multiple_comments(self, mock_session): + """Test handling multiple comments.""" + comments_response = Mock() + comments_response.status_code = 200 + comments_response.json.return_value = [ + {"id": i, "user": {"login": f"user{i}"}, "body": f"Comment {i}"} + for i in range(1, 11) + ] + + mock_session.get.return_value = comments_response + + result = main.extract_comments(mock_session, "mozilla/firefox", 123) + + assert len(result) == 10 + + def test_empty_comments_list(self, mock_session): + """Test handling PR with no comments.""" + comments_response = Mock() + comments_response.status_code = 200 + comments_response.json.return_value = [] + + mock_session.get.return_value = comments_response + + result = main.extract_comments(mock_session, "mozilla/firefox", 123) + + assert result == [] + + @patch("main.sleep_for_rate_limit") + def test_rate_limit_handling(self, mock_sleep, mock_session): + """Test rate limit handling when fetching comments.""" + rate_limit_response = Mock() + rate_limit_response.status_code = 403 + rate_limit_response.headers = {"X-RateLimit-Remaining": "0"} + + success_response = Mock() + success_response.status_code = 200 + success_response.json.return_value = [] + + mock_session.get.side_effect = [rate_limit_response, success_response] + + result = main.extract_comments(mock_session, "mozilla/firefox", 123) + + mock_sleep.assert_called_once() + assert result == [] + + def test_api_error(self, mock_session): + """Test API error handling when fetching comments.""" + error_response = Mock() + error_response.status_code = 404 + error_response.text = "Not Found" + + mock_session.get.return_value = error_response + + with pytest.raises(SystemExit) as exc_info: + main.extract_comments(mock_session, "mozilla/firefox", 123) + + assert "GitHub API error 404" in str(exc_info.value) + + def test_custom_github_api_url(self, mock_session): + """Test using custom GitHub API URL for comments.""" + custom_url = "https://mock-github.example.com" + + comments_response = Mock() + comments_response.status_code = 200 + comments_response.json.return_value = [] + + mock_session.get.return_value = comments_response + + main.extract_comments( + mock_session, "mozilla/firefox", 123, github_api_url=custom_url + ) + + call_args = mock_session.get.call_args + assert custom_url in call_args[0][0] + + +class TestTransformData: + """Tests for transform_data function.""" + + def test_basic_pr_transformation(self): + """Test basic pull request field mapping.""" + raw_data = [ + { + "number": 123, + "title": "Fix login bug", + "state": "closed", + "created_at": "2024-01-01T10:00:00Z", + "updated_at": "2024-01-02T10:00:00Z", + "merged_at": "2024-01-02T12:00:00Z", + "labels": [], + "commit_data": [], + "reviewer_data": [], + "comment_data": [], + } + ] + + result = main.transform_data(raw_data, "mozilla/firefox") + + assert len(result["pull_requests"]) == 1 + pr = result["pull_requests"][0] + assert pr["pull_request_id"] == 123 + assert pr["current_status"] == "closed" + assert pr["date_created"] == "2024-01-01T10:00:00Z" + assert pr["date_modified"] == "2024-01-02T10:00:00Z" + assert pr["date_landed"] == "2024-01-02T12:00:00Z" + assert pr["target_repository"] == "mozilla/firefox" + + def test_bug_id_extraction_basic(self): + """Test bug ID extraction from PR title.""" + test_cases = [ + ("Bug 1234567 - Fix issue", 1234567), + ("bug 1234567: Update code", 1234567), + ("Fix for bug 7654321", 7654321), + ("b=9876543 - Change behavior", 9876543), + ] + + for title, expected_bug_id in test_cases: + raw_data = [ + { + "number": 1, + "title": title, + "state": "open", + "labels": [], + "commit_data": [], + "reviewer_data": [], + "comment_data": [], + } + ] + + result = main.transform_data(raw_data, "mozilla/firefox") + assert result["pull_requests"][0]["bug_id"] == expected_bug_id + + def test_bug_id_extraction_with_hash(self): + """Test bug ID extraction with # symbol.""" + raw_data = [ + { + "number": 1, + "title": "Bug #1234567 - Fix issue", + "state": "open", + "labels": [], + "commit_data": [], + "reviewer_data": [], + "comment_data": [], + } + ] + + result = main.transform_data(raw_data, "mozilla/firefox") + assert result["pull_requests"][0]["bug_id"] == 1234567 + + def test_bug_id_filter_large_numbers(self): + """Test that bug IDs >= 100000000 are filtered out.""" + raw_data = [ + { + "number": 1, + "title": "Bug 999999999 - Invalid bug ID", + "state": "open", + "labels": [], + "commit_data": [], + "reviewer_data": [], + "comment_data": [], + } + ] + + result = main.transform_data(raw_data, "mozilla/firefox") + assert result["pull_requests"][0]["bug_id"] is None + + def test_bug_id_no_match(self): + """Test PR title with no bug ID.""" + raw_data = [ + { + "number": 1, + "title": "Update documentation", + "state": "open", + "labels": [], + "commit_data": [], + "reviewer_data": [], + "comment_data": [], + } + ] + + result = main.transform_data(raw_data, "mozilla/firefox") + assert result["pull_requests"][0]["bug_id"] is None + + def test_labels_extraction(self): + """Test labels array extraction.""" + raw_data = [ + { + "number": 1, + "title": "PR with labels", + "state": "open", + "labels": [ + {"name": "bug"}, + {"name": "priority-high"}, + {"name": "needs-review"}, + ], + "commit_data": [], + "reviewer_data": [], + "comment_data": [], + } + ] + + result = main.transform_data(raw_data, "mozilla/firefox") + labels = result["pull_requests"][0]["labels"] + assert len(labels) == 3 + assert "bug" in labels + assert "priority-high" in labels + assert "needs-review" in labels + + def test_labels_empty_list(self): + """Test handling empty labels list.""" + raw_data = [ + { + "number": 1, + "title": "PR without labels", + "state": "open", + "labels": [], + "commit_data": [], + "reviewer_data": [], + "comment_data": [], + } + ] + + result = main.transform_data(raw_data, "mozilla/firefox") + assert result["pull_requests"][0]["labels"] == [] + + def test_commit_transformation(self): + """Test commit fields mapping.""" + raw_data = [ + { + "number": 123, + "title": "PR with commits", + "state": "open", + "labels": [], + "commit_data": [ + { + "sha": "abc123", + "commit": { + "author": { + "name": "Test Author", + "date": "2024-01-01T12:00:00Z", + } + }, + "files": [ + { + "filename": "src/main.py", + "additions": 10, + "deletions": 5, + } + ], + } + ], + "reviewer_data": [], + "comment_data": [], + } + ] + + result = main.transform_data(raw_data, "mozilla/firefox") + + assert len(result["commits"]) == 1 + commit = result["commits"][0] + assert commit["pull_request_id"] == 123 + assert commit["target_repository"] == "mozilla/firefox" + assert commit["commit_sha"] == "abc123" + assert commit["date_created"] == "2024-01-01T12:00:00Z" + assert commit["author_username"] == "Test Author" + assert commit["filename"] == "src/main.py" + assert commit["lines_added"] == 10 + assert commit["lines_removed"] == 5 + + def test_commit_file_flattening(self): + """Test that each file becomes a separate row.""" + raw_data = [ + { + "number": 123, + "title": "PR with multiple files", + "state": "open", + "labels": [], + "commit_data": [ + { + "sha": "abc123", + "commit": {"author": {"name": "Author", "date": "2024-01-01"}}, + "files": [ + {"filename": "file1.py", "additions": 10, "deletions": 5}, + {"filename": "file2.py", "additions": 20, "deletions": 2}, + {"filename": "file3.py", "additions": 5, "deletions": 15}, + ], + } + ], + "reviewer_data": [], + "comment_data": [], + } + ] + + result = main.transform_data(raw_data, "mozilla/firefox") + + # Should have 3 rows in commits table (one per file) + assert len(result["commits"]) == 3 + filenames = [c["filename"] for c in result["commits"]] + assert "file1.py" in filenames + assert "file2.py" in filenames + assert "file3.py" in filenames + + def test_multiple_commits_with_files(self): + """Test multiple commits with multiple files per PR.""" + raw_data = [ + { + "number": 123, + "title": "PR with multiple commits", + "state": "open", + "labels": [], + "commit_data": [ + { + "sha": "commit1", + "commit": {"author": {"name": "Author1", "date": "2024-01-01"}}, + "files": [ + {"filename": "file1.py", "additions": 10, "deletions": 0} + ], + }, + { + "sha": "commit2", + "commit": {"author": {"name": "Author2", "date": "2024-01-02"}}, + "files": [ + {"filename": "file2.py", "additions": 5, "deletions": 2}, + {"filename": "file3.py", "additions": 8, "deletions": 3}, + ], + }, + ], + "reviewer_data": [], + "comment_data": [], + } + ] + + result = main.transform_data(raw_data, "mozilla/firefox") + + # Should have 3 rows total (1 file from commit1, 2 files from commit2) + assert len(result["commits"]) == 3 + assert result["commits"][0]["commit_sha"] == "commit1" + assert result["commits"][1]["commit_sha"] == "commit2" + assert result["commits"][2]["commit_sha"] == "commit2" + + def test_reviewer_transformation(self): + """Test reviewer fields mapping.""" + raw_data = [ + { + "number": 123, + "title": "PR with reviewers", + "state": "open", + "labels": [], + "commit_data": [], + "reviewer_data": [ + { + "id": 789, + "user": {"login": "reviewer1"}, + "state": "APPROVED", + "submitted_at": "2024-01-01T15:00:00Z", + } + ], + "comment_data": [], + } + ] + + result = main.transform_data(raw_data, "mozilla/firefox") + + assert len(result["reviewers"]) == 1 + reviewer = result["reviewers"][0] + assert reviewer["pull_request_id"] == 123 + assert reviewer["target_repository"] == "mozilla/firefox" + assert reviewer["reviewer_username"] == "reviewer1" + assert reviewer["status"] == "APPROVED" + assert reviewer["date_reviewed"] == "2024-01-01T15:00:00Z" + + def test_multiple_review_states(self): + """Test handling multiple review states.""" + raw_data = [ + { + "number": 123, + "title": "PR with multiple reviews", + "state": "open", + "labels": [], + "commit_data": [], + "reviewer_data": [ + { + "id": 1, + "user": {"login": "user1"}, + "state": "APPROVED", + "submitted_at": "2024-01-01T15:00:00Z", + }, + { + "id": 2, + "user": {"login": "user2"}, + "state": "CHANGES_REQUESTED", + "submitted_at": "2024-01-01T16:00:00Z", + }, + { + "id": 3, + "user": {"login": "user3"}, + "state": "COMMENTED", + "submitted_at": "2024-01-01T17:00:00Z", + }, + ], + "comment_data": [], + } + ] + + result = main.transform_data(raw_data, "mozilla/firefox") + + assert len(result["reviewers"]) == 3 + states = [r["status"] for r in result["reviewers"]] + assert "APPROVED" in states + assert "CHANGES_REQUESTED" in states + assert "COMMENTED" in states + + def test_date_approved_from_earliest_approval(self): + """Test that date_approved is set to earliest APPROVED review.""" + raw_data = [ + { + "number": 123, + "title": "PR with multiple approvals", + "state": "open", + "labels": [], + "commit_data": [], + "reviewer_data": [ + { + "id": 1, + "user": {"login": "user1"}, + "state": "APPROVED", + "submitted_at": "2024-01-02T15:00:00Z", + }, + { + "id": 2, + "user": {"login": "user2"}, + "state": "APPROVED", + "submitted_at": "2024-01-01T14:00:00Z", # Earliest + }, + { + "id": 3, + "user": {"login": "user3"}, + "state": "APPROVED", + "submitted_at": "2024-01-03T16:00:00Z", + }, + ], + "comment_data": [], + } + ] + + result = main.transform_data(raw_data, "mozilla/firefox") + + pr = result["pull_requests"][0] + assert pr["date_approved"] == "2024-01-01T14:00:00Z" + + def test_comment_transformation(self): + """Test comment fields mapping.""" + raw_data = [ + { + "number": 123, + "title": "PR with comments", + "state": "open", + "labels": [], + "commit_data": [], + "reviewer_data": [], + "comment_data": [ + { + "id": 456, + "user": {"login": "commenter1"}, + "body": "This looks great!", + "created_at": "2024-01-01T14:00:00Z", + "pull_request_review_id": None, + } + ], + } + ] + + result = main.transform_data(raw_data, "mozilla/firefox") + + assert len(result["comments"]) == 1 + comment = result["comments"][0] + assert comment["pull_request_id"] == 123 + assert comment["target_repository"] == "mozilla/firefox" + assert comment["comment_id"] == 456 + assert comment["author_username"] == "commenter1" + assert comment["date_created"] == "2024-01-01T14:00:00Z" + assert comment["character_count"] == 17 + + def test_comment_character_count(self): + """Test character count calculation for comments.""" + raw_data = [ + { + "number": 123, + "title": "PR", + "state": "open", + "labels": [], + "commit_data": [], + "reviewer_data": [], + "comment_data": [ + { + "id": 1, + "user": {"login": "user1"}, + "body": "Short", + "created_at": "2024-01-01", + }, + { + "id": 2, + "user": {"login": "user2"}, + "body": "This is a much longer comment with more text", + "created_at": "2024-01-01", + }, + ], + } + ] + + result = main.transform_data(raw_data, "mozilla/firefox") + + assert result["comments"][0]["character_count"] == 5 + assert result["comments"][1]["character_count"] == 44 + + def test_comment_status_from_review(self): + """Test that comment status is mapped from review_id_statuses.""" + raw_data = [ + { + "number": 123, + "title": "PR", + "state": "open", + "labels": [], + "commit_data": [], + "reviewer_data": [ + { + "id": 789, + "user": {"login": "reviewer"}, + "state": "APPROVED", + "submitted_at": "2024-01-01", + } + ], + "comment_data": [ + { + "id": 456, + "user": {"login": "commenter"}, + "body": "LGTM", + "created_at": "2024-01-01", + "pull_request_review_id": 789, + } + ], + } + ] + + result = main.transform_data(raw_data, "mozilla/firefox") + + # Comment should have status from the review + assert result["comments"][0]["status"] == "APPROVED" + + def test_comment_empty_body(self): + """Test handling comments with empty or None body.""" + raw_data = [ + { + "number": 123, + "title": "PR", + "state": "open", + "labels": [], + "commit_data": [], + "reviewer_data": [], + "comment_data": [ + { + "id": 1, + "user": {"login": "user1"}, + "body": None, + "created_at": "2024-01-01", + }, + { + "id": 2, + "user": {"login": "user2"}, + "body": "", + "created_at": "2024-01-01", + }, + ], + } + ] + + result = main.transform_data(raw_data, "mozilla/firefox") + + assert result["comments"][0]["character_count"] == 0 + assert result["comments"][1]["character_count"] == 0 + + def test_empty_raw_data(self): + """Test handling empty input list.""" + result = main.transform_data([], "mozilla/firefox") + + assert result["pull_requests"] == [] + assert result["commits"] == [] + assert result["reviewers"] == [] + assert result["comments"] == [] + + def test_pr_without_commits_reviewers_comments(self): + """Test PR with no commits, reviewers, or comments.""" + raw_data = [ + { + "number": 123, + "title": "Minimal PR", + "state": "open", + "labels": [], + "commit_data": [], + "reviewer_data": [], + "comment_data": [], + } + ] + + result = main.transform_data(raw_data, "mozilla/firefox") + + assert len(result["pull_requests"]) == 1 + assert len(result["commits"]) == 0 + assert len(result["reviewers"]) == 0 + assert len(result["comments"]) == 0 + + def test_return_structure(self): + """Test that transform_data returns dict with 4 keys.""" + raw_data = [ + { + "number": 1, + "title": "Test", + "state": "open", + "labels": [], + "commit_data": [], + "reviewer_data": [], + "comment_data": [], + } + ] + + result = main.transform_data(raw_data, "mozilla/firefox") + + assert isinstance(result, dict) + assert "pull_requests" in result + assert "commits" in result + assert "reviewers" in result + assert "comments" in result + + def test_all_tables_have_target_repository(self): + """Test that all tables include target_repository field.""" + raw_data = [ + { + "number": 123, + "title": "Test PR", + "state": "open", + "labels": [], + "commit_data": [ + { + "sha": "abc", + "commit": {"author": {"name": "Author", "date": "2024-01-01"}}, + "files": [{"filename": "test.py", "additions": 1, "deletions": 0}], + } + ], + "reviewer_data": [ + { + "id": 1, + "user": {"login": "reviewer"}, + "state": "APPROVED", + "submitted_at": "2024-01-01", + } + ], + "comment_data": [ + { + "id": 2, + "user": {"login": "commenter"}, + "body": "Test", + "created_at": "2024-01-01", + } + ], + } + ] + + result = main.transform_data(raw_data, "mozilla/firefox") + + assert result["pull_requests"][0]["target_repository"] == "mozilla/firefox" + assert result["commits"][0]["target_repository"] == "mozilla/firefox" + assert result["reviewers"][0]["target_repository"] == "mozilla/firefox" + assert result["comments"][0]["target_repository"] == "mozilla/firefox" + + +class TestLoadData: + """Tests for load_data function.""" + + @patch("main.datetime") + def test_load_all_tables(self, mock_datetime, mock_bigquery_client): + """Test loading all 4 tables to BigQuery.""" + mock_datetime.now.return_value.strftime.return_value = "2024-01-15" + + transformed_data = { + "pull_requests": [{"pull_request_id": 1}], + "commits": [{"commit_sha": "abc"}], + "reviewers": [{"reviewer_username": "user1"}], + "comments": [{"comment_id": 123}], + } + + main.load_data(mock_bigquery_client, "test_dataset", transformed_data) + + # Should call insert_rows_json 4 times (once per table) + assert mock_bigquery_client.insert_rows_json.call_count == 4 + + @patch("main.datetime") + def test_adds_snapshot_date(self, mock_datetime, mock_bigquery_client): + """Test that snapshot_date is added to all rows.""" + mock_datetime.now.return_value.strftime.return_value = "2024-01-15" + + transformed_data = { + "pull_requests": [{"pull_request_id": 1}, {"pull_request_id": 2}], + "commits": [], + "reviewers": [], + "comments": [], + } + + main.load_data(mock_bigquery_client, "test_dataset", transformed_data) + + call_args = mock_bigquery_client.insert_rows_json.call_args + rows = call_args[0][1] + assert all(row["snapshot_date"] == "2024-01-15" for row in rows) + + def test_constructs_correct_table_ref(self, mock_bigquery_client): + """Test that table_ref is constructed correctly.""" + transformed_data = { + "pull_requests": [{"pull_request_id": 1}], + "commits": [], + "reviewers": [], + "comments": [], + } + + main.load_data(mock_bigquery_client, "my_dataset", transformed_data) + + call_args = mock_bigquery_client.insert_rows_json.call_args + table_ref = call_args[0][0] + assert table_ref == "test-project.my_dataset.pull_requests" + + def test_empty_transformed_data_skipped(self, mock_bigquery_client): + """Test that empty transformed_data dict is skipped.""" + transformed_data = {} + + main.load_data(mock_bigquery_client, "test_dataset", transformed_data) + + mock_bigquery_client.insert_rows_json.assert_not_called() + + def test_skips_empty_tables_individually(self, mock_bigquery_client): + """Test that empty tables are skipped individually.""" + transformed_data = { + "pull_requests": [{"pull_request_id": 1}], + "commits": [], # Empty, should be skipped + "reviewers": [], # Empty, should be skipped + "comments": [{"comment_id": 456}], + } + + main.load_data(mock_bigquery_client, "test_dataset", transformed_data) + + # Should only call insert_rows_json twice (for PRs and comments) + assert mock_bigquery_client.insert_rows_json.call_count == 2 + + def test_only_pull_requests_table(self, mock_bigquery_client): + """Test loading only pull_requests table.""" + transformed_data = { + "pull_requests": [{"pull_request_id": 1}], + "commits": [], + "reviewers": [], + "comments": [], + } + + main.load_data(mock_bigquery_client, "test_dataset", transformed_data) + + assert mock_bigquery_client.insert_rows_json.call_count == 1 + + def test_raises_exception_on_insert_errors(self, mock_bigquery_client): + """Test that Exception is raised on BigQuery insert errors.""" + mock_bigquery_client.insert_rows_json.return_value = [ + {"index": 0, "errors": ["Insert failed"]} + ] + + transformed_data = {"pull_requests": [{"pull_request_id": 1}], "commits": [], "reviewers": [], "comments": []} + + with pytest.raises(Exception) as exc_info: + main.load_data(mock_bigquery_client, "test_dataset", transformed_data) + + assert "BigQuery insert errors" in str(exc_info.value) + + def test_verifies_client_insert_called_correctly(self, mock_bigquery_client): + """Test that client.insert_rows_json is called with correct arguments.""" + transformed_data = { + "pull_requests": [{"pull_request_id": 1}, {"pull_request_id": 2}], + "commits": [], + "reviewers": [], + "comments": [], + } + + main.load_data(mock_bigquery_client, "test_dataset", transformed_data) + + call_args = mock_bigquery_client.insert_rows_json.call_args + table_ref, rows = call_args[0] + + assert "pull_requests" in table_ref + assert len(rows) == 2 + + +class TestMain: + """Tests for main function.""" + + @patch("main.setup_logging") + @patch("main.bigquery.Client") + @patch("requests.Session") + def test_requires_github_repos( + self, mock_session_class, mock_bq_client, mock_setup_logging + ): + """Test that GITHUB_REPOS is required.""" + with patch.dict( + os.environ, + {"BIGQUERY_PROJECT": "test", "BIGQUERY_DATASET": "test"}, + clear=True, + ): + with pytest.raises(SystemExit) as exc_info: + main.main() + + assert "GITHUB_REPOS" in str(exc_info.value) + + @patch("main.setup_logging") + @patch("main.bigquery.Client") + @patch("requests.Session") + def test_requires_bigquery_project( + self, mock_session_class, mock_bq_client, mock_setup_logging + ): + """Test that BIGQUERY_PROJECT is required.""" + with patch.dict( + os.environ, {"GITHUB_REPOS": "mozilla/firefox", "BIGQUERY_DATASET": "test"}, clear=True + ): + with pytest.raises(SystemExit) as exc_info: + main.main() + + assert "BIGQUERY_PROJECT" in str(exc_info.value) + + @patch("main.setup_logging") + @patch("main.bigquery.Client") + @patch("requests.Session") + def test_requires_bigquery_dataset( + self, mock_session_class, mock_bq_client, mock_setup_logging + ): + """Test that BIGQUERY_DATASET is required.""" + with patch.dict( + os.environ, {"GITHUB_REPOS": "mozilla/firefox", "BIGQUERY_PROJECT": "test"}, clear=True + ): + with pytest.raises(SystemExit) as exc_info: + main.main() + + assert "BIGQUERY_DATASET" in str(exc_info.value) + + @patch("main.setup_logging") + @patch("main.bigquery.Client") + @patch("requests.Session") + def test_github_token_optional_with_warning( + self, mock_session_class, mock_bq_client, mock_setup_logging + ): + """Test that GITHUB_TOKEN is optional but warns if missing.""" + with patch.dict( + os.environ, + { + "GITHUB_REPOS": "mozilla/firefox", + "BIGQUERY_PROJECT": "test", + "BIGQUERY_DATASET": "test", + }, + clear=True, + ), patch("main.extract_pull_requests", return_value=iter([])): + # Should not raise, but should log warning + result = main.main() + assert result == 0 + + @patch("main.setup_logging") + @patch("main.bigquery.Client") + @patch("requests.Session") + def test_splits_github_repos_by_comma( + self, mock_session_class, mock_bq_client, mock_setup_logging + ): + """Test that GITHUB_REPOS is split by comma.""" + with patch.dict( + os.environ, + { + "GITHUB_REPOS": "mozilla/firefox,mozilla/gecko-dev", + "BIGQUERY_PROJECT": "test", + "BIGQUERY_DATASET": "test", + "GITHUB_TOKEN": "token", + }, + clear=True, + ), patch("main.extract_pull_requests", return_value=iter([])) as mock_extract: + main.main() + + # Should be called twice (once per repo) + assert mock_extract.call_count == 2 + + @patch("main.setup_logging") + @patch("main.bigquery.Client") + @patch("requests.Session") + def test_honors_github_api_url( + self, mock_session_class, mock_bq_client, mock_setup_logging + ): + """Test that GITHUB_API_URL is honored.""" + with patch.dict( + os.environ, + { + "GITHUB_REPOS": "mozilla/firefox", + "BIGQUERY_PROJECT": "test", + "BIGQUERY_DATASET": "test", + "GITHUB_TOKEN": "token", + "GITHUB_API_URL": "https://custom-api.example.com", + }, + clear=True, + ), patch("main.extract_pull_requests", return_value=iter([])) as mock_extract: + main.main() + + call_kwargs = mock_extract.call_args[1] + assert call_kwargs["github_api_url"] == "https://custom-api.example.com" + + @patch("main.setup_logging") + @patch("main.bigquery.Client") + @patch("requests.Session") + def test_honors_bigquery_emulator_host( + self, mock_session_class, mock_bq_client_class, mock_setup_logging + ): + """Test that BIGQUERY_EMULATOR_HOST is honored.""" + with patch.dict( + os.environ, + { + "GITHUB_REPOS": "mozilla/firefox", + "BIGQUERY_PROJECT": "test", + "BIGQUERY_DATASET": "test", + "GITHUB_TOKEN": "token", + "BIGQUERY_EMULATOR_HOST": "http://localhost:9050", + }, + clear=True, + ), patch("main.extract_pull_requests", return_value=iter([])): + main.main() + + # Verify BigQuery client was created with emulator settings + mock_bq_client_class.assert_called_once() + + @patch("main.setup_logging") + @patch("main.bigquery.Client") + @patch("requests.Session") + def test_creates_session_with_headers( + self, mock_session_class, mock_bq_client, mock_setup_logging + ): + """Test that session is created with Accept and User-Agent headers.""" + mock_session = MagicMock() + mock_session_class.return_value = mock_session + + with patch.dict( + os.environ, + { + "GITHUB_REPOS": "mozilla/firefox", + "BIGQUERY_PROJECT": "test", + "BIGQUERY_DATASET": "test", + "GITHUB_TOKEN": "token", + }, + clear=True, + ), patch("main.extract_pull_requests", return_value=iter([])): + main.main() + + # Verify session headers were set + assert mock_session.headers.update.called + call_args = mock_session.headers.update.call_args[0][0] + assert "Accept" in call_args + assert "User-Agent" in call_args + + @patch("main.setup_logging") + @patch("main.bigquery.Client") + @patch("requests.Session") + def test_sets_authorization_header_with_token( + self, mock_session_class, mock_bq_client, mock_setup_logging + ): + """Test that Authorization header is set when token provided.""" + mock_session = MagicMock() + mock_session_class.return_value = mock_session + + with patch.dict( + os.environ, + { + "GITHUB_REPOS": "mozilla/firefox", + "BIGQUERY_PROJECT": "test", + "BIGQUERY_DATASET": "test", + "GITHUB_TOKEN": "test-token-123", + }, + clear=True, + ), patch("main.extract_pull_requests", return_value=iter([])): + main.main() + + # Verify Authorization header was set + assert mock_session.headers.__setitem__.called + + @patch("main.setup_logging") + @patch("main.bigquery.Client") + @patch("requests.Session") + @patch("main.extract_pull_requests") + @patch("main.transform_data") + @patch("main.load_data") + def test_single_repo_successful_etl( + self, + mock_load, + mock_transform, + mock_extract, + mock_session_class, + mock_bq_client, + mock_setup_logging, + ): + """Test successful ETL for single repository.""" + mock_extract.return_value = iter([[{"number": 1}]]) + mock_transform.return_value = {"pull_requests": [{"pull_request_id": 1}], "commits": [], "reviewers": [], "comments": []} + + with patch.dict( + os.environ, + { + "GITHUB_REPOS": "mozilla/firefox", + "BIGQUERY_PROJECT": "test", + "BIGQUERY_DATASET": "test", + "GITHUB_TOKEN": "token", + }, + clear=True, + ): + result = main.main() + + assert result == 0 + mock_extract.assert_called_once() + mock_transform.assert_called_once() + mock_load.assert_called_once() + + @patch("main.setup_logging") + @patch("main.bigquery.Client") + @patch("requests.Session") + @patch("main.extract_pull_requests") + @patch("main.transform_data") + @patch("main.load_data") + def test_multiple_repos_processing( + self, + mock_load, + mock_transform, + mock_extract, + mock_session_class, + mock_bq_client, + mock_setup_logging, + ): + """Test processing multiple repositories.""" + mock_extract.return_value = iter([[{"number": 1}]]) + mock_transform.return_value = {"pull_requests": [{"pull_request_id": 1}], "commits": [], "reviewers": [], "comments": []} + + with patch.dict( + os.environ, + { + "GITHUB_REPOS": "mozilla/firefox,mozilla/gecko-dev,mozilla/addons", + "BIGQUERY_PROJECT": "test", + "BIGQUERY_DATASET": "test", + "GITHUB_TOKEN": "token", + }, + clear=True, + ): + result = main.main() + + assert result == 0 + # Should process 3 repositories + assert mock_extract.call_count == 3 + + @patch("main.setup_logging") + @patch("main.bigquery.Client") + @patch("requests.Session") + @patch("main.extract_pull_requests") + @patch("main.transform_data") + @patch("main.load_data") + def test_processes_chunks_iteratively( + self, + mock_load, + mock_transform, + mock_extract, + mock_session_class, + mock_bq_client, + mock_setup_logging, + ): + """Test that chunks are processed iteratively from generator.""" + # Return 3 chunks + mock_extract.return_value = iter([ + [{"number": 1}], + [{"number": 2}], + [{"number": 3}], + ]) + mock_transform.return_value = {"pull_requests": [{"pull_request_id": 1}], "commits": [], "reviewers": [], "comments": []} + + with patch.dict( + os.environ, + { + "GITHUB_REPOS": "mozilla/firefox", + "BIGQUERY_PROJECT": "test", + "BIGQUERY_DATASET": "test", + "GITHUB_TOKEN": "token", + }, + clear=True, + ): + result = main.main() + + assert result == 0 + # Transform and load should be called 3 times (once per chunk) + assert mock_transform.call_count == 3 + assert mock_load.call_count == 3 + + @patch("main.setup_logging") + @patch("main.bigquery.Client") + @patch("requests.Session") + def test_returns_zero_on_success( + self, mock_session_class, mock_bq_client, mock_setup_logging + ): + """Test that main returns 0 on success.""" + with patch.dict( + os.environ, + { + "GITHUB_REPOS": "mozilla/firefox", + "BIGQUERY_PROJECT": "test", + "BIGQUERY_DATASET": "test", + "GITHUB_TOKEN": "token", + }, + clear=True, + ), patch("main.extract_pull_requests", return_value=iter([])): + result = main.main() + + assert result == 0 + + +@pytest.mark.integration +class TestIntegration: + """Integration tests that test multiple components together.""" + + @patch("main.setup_logging") + @patch("main.load_data") + @patch("main.bigquery.Client") + @patch("requests.Session") + def test_end_to_end_with_mocked_github( + self, mock_session_class, mock_bq_client, mock_load, mock_setup_logging + ): + """Test end-to-end flow with mocked GitHub responses.""" + mock_session = MagicMock() + mock_session_class.return_value = mock_session + + # Mock PR response + pr_response = Mock() + pr_response.status_code = 200 + pr_response.json.return_value = [ + {"number": 1, "title": "Bug 1234567 - Test PR", "state": "open"} + ] + pr_response.links = {} + + # Mock commits, reviewers, comments responses + empty_response = Mock() + empty_response.status_code = 200 + empty_response.json.return_value = [] + + mock_session.get.side_effect = [ + pr_response, + empty_response, + empty_response, + empty_response, + ] + + with patch.dict( + os.environ, + { + "GITHUB_REPOS": "mozilla/firefox", + "BIGQUERY_PROJECT": "test", + "BIGQUERY_DATASET": "test", + "GITHUB_TOKEN": "token", + }, + clear=True, + ): + result = main.main() + + assert result == 0 + mock_load.assert_called_once() + + # Verify transformed data structure + call_args = mock_load.call_args[0] + transformed_data = call_args[2] + assert "pull_requests" in transformed_data + assert len(transformed_data["pull_requests"]) == 1 + + @patch("main.setup_logging") + @patch("main.load_data") + @patch("main.bigquery.Client") + @patch("requests.Session") + def test_bug_id_extraction_through_pipeline( + self, mock_session_class, mock_bq_client, mock_load, mock_setup_logging + ): + """Test bug ID extraction through full pipeline.""" + mock_session = MagicMock() + mock_session_class.return_value = mock_session + + pr_response = Mock() + pr_response.status_code = 200 + pr_response.json.return_value = [ + {"number": 1, "title": "Bug 9876543 - Fix critical issue", "state": "closed"} + ] + pr_response.links = {} + + empty_response = Mock() + empty_response.status_code = 200 + empty_response.json.return_value = [] + + mock_session.get.side_effect = [ + pr_response, + empty_response, + empty_response, + empty_response, + ] + + with patch.dict( + os.environ, + { + "GITHUB_REPOS": "mozilla/firefox", + "BIGQUERY_PROJECT": "test", + "BIGQUERY_DATASET": "test", + "GITHUB_TOKEN": "token", + }, + clear=True, + ): + main.main() + + call_args = mock_load.call_args[0] + transformed_data = call_args[2] + pr = transformed_data["pull_requests"][0] + assert pr["bug_id"] == 9876543 + + @patch("main.setup_logging") + @patch("main.load_data") + @patch("main.bigquery.Client") + @patch("requests.Session") + def test_pagination_through_full_flow( + self, mock_session_class, mock_bq_client, mock_load, mock_setup_logging + ): + """Test pagination through full ETL flow.""" + mock_session = MagicMock() + mock_session_class.return_value = mock_session + + # First page + pr_response_1 = Mock() + pr_response_1.status_code = 200 + pr_response_1.json.return_value = [ + {"number": 1, "title": "PR 1", "state": "open"} + ] + pr_response_1.links = { + "next": {"url": "https://api.github.com/repos/mozilla/firefox/pulls?page=2"} + } + + # Second page + pr_response_2 = Mock() + pr_response_2.status_code = 200 + pr_response_2.json.return_value = [ + {"number": 2, "title": "PR 2", "state": "open"} + ] + pr_response_2.links = {} + + empty_response = Mock() + empty_response.status_code = 200 + empty_response.json.return_value = [] + + mock_session.get.side_effect = [ + pr_response_1, + empty_response, + empty_response, + empty_response, + pr_response_2, + empty_response, + empty_response, + empty_response, + ] + + with patch.dict( + os.environ, + { + "GITHUB_REPOS": "mozilla/firefox", + "BIGQUERY_PROJECT": "test", + "BIGQUERY_DATASET": "test", + "GITHUB_TOKEN": "token", + }, + clear=True, + ): + main.main() + + # Should be called twice (once per chunk/page) + assert mock_load.call_count == 2 From 89d1edb4aaaf6b218ded786ea1edfe9468c1383c Mon Sep 17 00:00:00 2001 From: David Lawrence Date: Wed, 21 Jan 2026 18:47:57 -0500 Subject: [PATCH 02/12] Copilot suggested fixes --- TESTING.md | 3 +- main.py | 1 + pytest.ini | 4 +- test_main.py | 374 +++++++++++++++++++++++++++++---------------------- 4 files changed, 214 insertions(+), 168 deletions(-) diff --git a/TESTING.md b/TESTING.md index c0bb5dd..104d401 100644 --- a/TESTING.md +++ b/TESTING.md @@ -228,7 +228,7 @@ mypy main.py --no-strict-optional --ignore-missing-imports ### GitHub Actions Workflow -The `.github/workflows/tests.yml` workflow runs on every push and pull request: +The `.github/workflows/tests.yml` workflow runs on every pull request: **Lint Job:** 1. Runs black (format check) @@ -243,7 +243,6 @@ The `.github/workflows/tests.yml` workflow runs on every push and pull request: ### Workflow Triggers -- Push to `main` or `unit-tests` branch - Pull requests to `main` branch ### Viewing Results diff --git a/main.py b/main.py index db80d03..645f167 100755 --- a/main.py +++ b/main.py @@ -29,6 +29,7 @@ def setup_logging() -> None: level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", handlers=[logging.StreamHandler(sys.stdout)], + force=True, ) diff --git a/pytest.ini b/pytest.ini index d4a601a..33ef84b 100644 --- a/pytest.ini +++ b/pytest.ini @@ -15,9 +15,7 @@ addopts = --cov-report=term-missing --cov-report=html --cov-branch - -# Minimum coverage threshold (can adjust as needed) ---cov-fail-under=80 + --cov-fail-under=80 # Test paths testpaths = . diff --git a/test_main.py b/test_main.py index 7165677..400c6d3 100644 --- a/test_main.py +++ b/test_main.py @@ -8,10 +8,9 @@ import logging import os -import sys import time -from datetime import datetime, timezone -from unittest.mock import Mock, MagicMock, patch, call +from datetime import datetime +from unittest.mock import Mock, MagicMock, patch import pytest import requests from google.cloud import bigquery @@ -143,9 +142,7 @@ class TestSleepForRateLimit: @patch("time.time") @patch("time.sleep") - def test_sleep_for_rate_limit_when_remaining_is_zero( - self, mock_sleep, mock_time - ): + def test_sleep_for_rate_limit_when_remaining_is_zero(self, mock_sleep, mock_time): """Test that sleep_for_rate_limit sleeps until reset time.""" mock_time.return_value = 1000 @@ -220,12 +217,12 @@ def test_extract_single_page(self, mock_session): mock_session.get.return_value = mock_response # Mock the extract functions - with patch("main.extract_commits", return_value=[]), patch( - "main.extract_reviewers", return_value=[] - ), patch("main.extract_comments", return_value=[]): - result = list( - main.extract_pull_requests(mock_session, "mozilla/firefox") - ) + with ( + patch("main.extract_commits", return_value=[]), + patch("main.extract_reviewers", return_value=[]), + patch("main.extract_comments", return_value=[]), + ): + result = list(main.extract_pull_requests(mock_session, "mozilla/firefox")) assert len(result) == 1 assert len(result[0]) == 2 @@ -242,9 +239,7 @@ def test_extract_multiple_pages(self, mock_session): {"number": 2, "title": "PR 2"}, ] mock_response_1.links = { - "next": { - "url": "https://api.github.com/repos/mozilla/firefox/pulls?page=2" - } + "next": {"url": "https://api.github.com/repos/mozilla/firefox/pulls?page=2"} } # Second page response @@ -255,12 +250,12 @@ def test_extract_multiple_pages(self, mock_session): mock_session.get.side_effect = [mock_response_1, mock_response_2] - with patch("main.extract_commits", return_value=[]), patch( - "main.extract_reviewers", return_value=[] - ), patch("main.extract_comments", return_value=[]): - result = list( - main.extract_pull_requests(mock_session, "mozilla/firefox") - ) + with ( + patch("main.extract_commits", return_value=[]), + patch("main.extract_reviewers", return_value=[]), + patch("main.extract_comments", return_value=[]), + ): + result = list(main.extract_pull_requests(mock_session, "mozilla/firefox")) assert len(result) == 2 assert len(result[0]) == 2 @@ -279,16 +274,14 @@ def test_enriches_prs_with_commit_data(self, mock_session): mock_commits = [{"sha": "abc123"}] - with patch( - "main.extract_commits", return_value=mock_commits - ) as mock_extract_commits, patch( - "main.extract_reviewers", return_value=[] - ), patch( - "main.extract_comments", return_value=[] + with ( + patch( + "main.extract_commits", return_value=mock_commits + ) as mock_extract_commits, + patch("main.extract_reviewers", return_value=[]), + patch("main.extract_comments", return_value=[]), ): - result = list( - main.extract_pull_requests(mock_session, "mozilla/firefox") - ) + result = list(main.extract_pull_requests(mock_session, "mozilla/firefox")) assert result[0][0]["commit_data"] == mock_commits mock_extract_commits.assert_called_once() @@ -304,14 +297,14 @@ def test_enriches_prs_with_reviewer_data(self, mock_session): mock_reviewers = [{"id": 789, "state": "APPROVED"}] - with patch("main.extract_commits", return_value=[]), patch( - "main.extract_reviewers", return_value=mock_reviewers - ) as mock_extract_reviewers, patch( - "main.extract_comments", return_value=[] + with ( + patch("main.extract_commits", return_value=[]), + patch( + "main.extract_reviewers", return_value=mock_reviewers + ) as mock_extract_reviewers, + patch("main.extract_comments", return_value=[]), ): - result = list( - main.extract_pull_requests(mock_session, "mozilla/firefox") - ) + result = list(main.extract_pull_requests(mock_session, "mozilla/firefox")) assert result[0][0]["reviewer_data"] == mock_reviewers mock_extract_reviewers.assert_called_once() @@ -327,14 +320,14 @@ def test_enriches_prs_with_comment_data(self, mock_session): mock_comments = [{"id": 456, "body": "Great work!"}] - with patch("main.extract_commits", return_value=[]), patch( - "main.extract_reviewers", return_value=[] - ), patch( - "main.extract_comments", return_value=mock_comments - ) as mock_extract_comments: - result = list( - main.extract_pull_requests(mock_session, "mozilla/firefox") - ) + with ( + patch("main.extract_commits", return_value=[]), + patch("main.extract_reviewers", return_value=[]), + patch( + "main.extract_comments", return_value=mock_comments + ) as mock_extract_comments, + ): + result = list(main.extract_pull_requests(mock_session, "mozilla/firefox")) assert result[0][0]["comment_data"] == mock_comments mock_extract_comments.assert_called_once() @@ -350,9 +343,7 @@ def test_handles_rate_limit(self, mock_sleep, mock_session): # Successful response after rate limit mock_response_success = Mock() mock_response_success.status_code = 200 - mock_response_success.json.return_value = [ - {"number": 1, "title": "PR 1"} - ] + mock_response_success.json.return_value = [{"number": 1, "title": "PR 1"}] mock_response_success.links = {} mock_session.get.side_effect = [ @@ -360,12 +351,12 @@ def test_handles_rate_limit(self, mock_sleep, mock_session): mock_response_success, ] - with patch("main.extract_commits", return_value=[]), patch( - "main.extract_reviewers", return_value=[] - ), patch("main.extract_comments", return_value=[]): - result = list( - main.extract_pull_requests(mock_session, "mozilla/firefox") - ) + with ( + patch("main.extract_commits", return_value=[]), + patch("main.extract_reviewers", return_value=[]), + patch("main.extract_comments", return_value=[]), + ): + result = list(main.extract_pull_requests(mock_session, "mozilla/firefox")) mock_sleep.assert_called_once_with(mock_response_rate_limit) assert len(result) == 1 @@ -403,9 +394,7 @@ def test_stops_on_empty_batch(self, mock_session): mock_response_1.status_code = 200 mock_response_1.json.return_value = [{"number": 1}] mock_response_1.links = { - "next": { - "url": "https://api.github.com/repos/mozilla/firefox/pulls?page=2" - } + "next": {"url": "https://api.github.com/repos/mozilla/firefox/pulls?page=2"} } # Second page empty @@ -416,12 +405,12 @@ def test_stops_on_empty_batch(self, mock_session): mock_session.get.side_effect = [mock_response_1, mock_response_2] - with patch("main.extract_commits", return_value=[]), patch( - "main.extract_reviewers", return_value=[] - ), patch("main.extract_comments", return_value=[]): - result = list( - main.extract_pull_requests(mock_session, "mozilla/firefox") - ) + with ( + patch("main.extract_commits", return_value=[]), + patch("main.extract_reviewers", return_value=[]), + patch("main.extract_comments", return_value=[]), + ): + result = list(main.extract_pull_requests(mock_session, "mozilla/firefox")) # Should only have 1 chunk from first page assert len(result) == 1 @@ -440,12 +429,12 @@ def test_invalid_page_number_handling(self, mock_session): mock_session.get.return_value = mock_response_1 - with patch("main.extract_commits", return_value=[]), patch( - "main.extract_reviewers", return_value=[] - ), patch("main.extract_comments", return_value=[]): - result = list( - main.extract_pull_requests(mock_session, "mozilla/firefox") - ) + with ( + patch("main.extract_commits", return_value=[]), + patch("main.extract_reviewers", return_value=[]), + patch("main.extract_comments", return_value=[]), + ): + result = list(main.extract_pull_requests(mock_session, "mozilla/firefox")) # Should stop pagination on invalid page number assert len(result) == 1 @@ -461,9 +450,11 @@ def test_custom_github_api_url(self, mock_session): mock_session.get.return_value = mock_response - with patch("main.extract_commits", return_value=[]), patch( - "main.extract_reviewers", return_value=[] - ), patch("main.extract_comments", return_value=[]): + with ( + patch("main.extract_commits", return_value=[]), + patch("main.extract_reviewers", return_value=[]), + patch("main.extract_comments", return_value=[]), + ): list( main.extract_pull_requests( mock_session, "mozilla/firefox", github_api_url=custom_url @@ -487,12 +478,12 @@ def test_skips_prs_without_number_field(self, mock_session): mock_session.get.return_value = mock_response - with patch("main.extract_commits", return_value=[]) as mock_commits, patch( - "main.extract_reviewers", return_value=[] - ), patch("main.extract_comments", return_value=[]): - result = list( - main.extract_pull_requests(mock_session, "mozilla/firefox") - ) + with ( + patch("main.extract_commits", return_value=[]) as mock_commits, + patch("main.extract_reviewers", return_value=[]), + patch("main.extract_comments", return_value=[]), + ): + list(main.extract_pull_requests(mock_session, "mozilla/firefox")) # extract_commits should only be called for PRs with number field assert mock_commits.call_count == 2 @@ -631,7 +622,11 @@ def test_commit_without_sha_field(self, mock_session): commit_detail_2.status_code = 200 commit_detail_2.json.return_value = {"files": []} - mock_session.get.side_effect = [commits_response, commit_detail_1, commit_detail_2] + mock_session.get.side_effect = [ + commits_response, + commit_detail_1, + commit_detail_2, + ] result = main.extract_commits(mock_session, "mozilla/firefox", 123) @@ -1470,7 +1465,9 @@ def test_all_tables_have_target_repository(self): { "sha": "abc", "commit": {"author": {"name": "Author", "date": "2024-01-01"}}, - "files": [{"filename": "test.py", "additions": 1, "deletions": 0}], + "files": [ + {"filename": "test.py", "additions": 1, "deletions": 0} + ], } ], "reviewer_data": [ @@ -1594,7 +1591,12 @@ def test_raises_exception_on_insert_errors(self, mock_bigquery_client): {"index": 0, "errors": ["Insert failed"]} ] - transformed_data = {"pull_requests": [{"pull_request_id": 1}], "commits": [], "reviewers": [], "comments": []} + transformed_data = { + "pull_requests": [{"pull_request_id": 1}], + "commits": [], + "reviewers": [], + "comments": [], + } with pytest.raises(Exception) as exc_info: main.load_data(mock_bigquery_client, "test_dataset", transformed_data) @@ -1647,7 +1649,9 @@ def test_requires_bigquery_project( ): """Test that BIGQUERY_PROJECT is required.""" with patch.dict( - os.environ, {"GITHUB_REPOS": "mozilla/firefox", "BIGQUERY_DATASET": "test"}, clear=True + os.environ, + {"GITHUB_REPOS": "mozilla/firefox", "BIGQUERY_DATASET": "test"}, + clear=True, ): with pytest.raises(SystemExit) as exc_info: main.main() @@ -1662,7 +1666,9 @@ def test_requires_bigquery_dataset( ): """Test that BIGQUERY_DATASET is required.""" with patch.dict( - os.environ, {"GITHUB_REPOS": "mozilla/firefox", "BIGQUERY_PROJECT": "test"}, clear=True + os.environ, + {"GITHUB_REPOS": "mozilla/firefox", "BIGQUERY_PROJECT": "test"}, + clear=True, ): with pytest.raises(SystemExit) as exc_info: main.main() @@ -1676,15 +1682,18 @@ def test_github_token_optional_with_warning( self, mock_session_class, mock_bq_client, mock_setup_logging ): """Test that GITHUB_TOKEN is optional but warns if missing.""" - with patch.dict( - os.environ, - { - "GITHUB_REPOS": "mozilla/firefox", - "BIGQUERY_PROJECT": "test", - "BIGQUERY_DATASET": "test", - }, - clear=True, - ), patch("main.extract_pull_requests", return_value=iter([])): + with ( + patch.dict( + os.environ, + { + "GITHUB_REPOS": "mozilla/firefox", + "BIGQUERY_PROJECT": "test", + "BIGQUERY_DATASET": "test", + }, + clear=True, + ), + patch("main.extract_pull_requests", return_value=iter([])), + ): # Should not raise, but should log warning result = main.main() assert result == 0 @@ -1696,16 +1705,19 @@ def test_splits_github_repos_by_comma( self, mock_session_class, mock_bq_client, mock_setup_logging ): """Test that GITHUB_REPOS is split by comma.""" - with patch.dict( - os.environ, - { - "GITHUB_REPOS": "mozilla/firefox,mozilla/gecko-dev", - "BIGQUERY_PROJECT": "test", - "BIGQUERY_DATASET": "test", - "GITHUB_TOKEN": "token", - }, - clear=True, - ), patch("main.extract_pull_requests", return_value=iter([])) as mock_extract: + with ( + patch.dict( + os.environ, + { + "GITHUB_REPOS": "mozilla/firefox,mozilla/gecko-dev", + "BIGQUERY_PROJECT": "test", + "BIGQUERY_DATASET": "test", + "GITHUB_TOKEN": "token", + }, + clear=True, + ), + patch("main.extract_pull_requests", return_value=iter([])) as mock_extract, + ): main.main() # Should be called twice (once per repo) @@ -1718,17 +1730,20 @@ def test_honors_github_api_url( self, mock_session_class, mock_bq_client, mock_setup_logging ): """Test that GITHUB_API_URL is honored.""" - with patch.dict( - os.environ, - { - "GITHUB_REPOS": "mozilla/firefox", - "BIGQUERY_PROJECT": "test", - "BIGQUERY_DATASET": "test", - "GITHUB_TOKEN": "token", - "GITHUB_API_URL": "https://custom-api.example.com", - }, - clear=True, - ), patch("main.extract_pull_requests", return_value=iter([])) as mock_extract: + with ( + patch.dict( + os.environ, + { + "GITHUB_REPOS": "mozilla/firefox", + "BIGQUERY_PROJECT": "test", + "BIGQUERY_DATASET": "test", + "GITHUB_TOKEN": "token", + "GITHUB_API_URL": "https://custom-api.example.com", + }, + clear=True, + ), + patch("main.extract_pull_requests", return_value=iter([])) as mock_extract, + ): main.main() call_kwargs = mock_extract.call_args[1] @@ -1741,17 +1756,20 @@ def test_honors_bigquery_emulator_host( self, mock_session_class, mock_bq_client_class, mock_setup_logging ): """Test that BIGQUERY_EMULATOR_HOST is honored.""" - with patch.dict( - os.environ, - { - "GITHUB_REPOS": "mozilla/firefox", - "BIGQUERY_PROJECT": "test", - "BIGQUERY_DATASET": "test", - "GITHUB_TOKEN": "token", - "BIGQUERY_EMULATOR_HOST": "http://localhost:9050", - }, - clear=True, - ), patch("main.extract_pull_requests", return_value=iter([])): + with ( + patch.dict( + os.environ, + { + "GITHUB_REPOS": "mozilla/firefox", + "BIGQUERY_PROJECT": "test", + "BIGQUERY_DATASET": "test", + "GITHUB_TOKEN": "token", + "BIGQUERY_EMULATOR_HOST": "http://localhost:9050", + }, + clear=True, + ), + patch("main.extract_pull_requests", return_value=iter([])), + ): main.main() # Verify BigQuery client was created with emulator settings @@ -1767,16 +1785,19 @@ def test_creates_session_with_headers( mock_session = MagicMock() mock_session_class.return_value = mock_session - with patch.dict( - os.environ, - { - "GITHUB_REPOS": "mozilla/firefox", - "BIGQUERY_PROJECT": "test", - "BIGQUERY_DATASET": "test", - "GITHUB_TOKEN": "token", - }, - clear=True, - ), patch("main.extract_pull_requests", return_value=iter([])): + with ( + patch.dict( + os.environ, + { + "GITHUB_REPOS": "mozilla/firefox", + "BIGQUERY_PROJECT": "test", + "BIGQUERY_DATASET": "test", + "GITHUB_TOKEN": "token", + }, + clear=True, + ), + patch("main.extract_pull_requests", return_value=iter([])), + ): main.main() # Verify session headers were set @@ -1795,16 +1816,19 @@ def test_sets_authorization_header_with_token( mock_session = MagicMock() mock_session_class.return_value = mock_session - with patch.dict( - os.environ, - { - "GITHUB_REPOS": "mozilla/firefox", - "BIGQUERY_PROJECT": "test", - "BIGQUERY_DATASET": "test", - "GITHUB_TOKEN": "test-token-123", - }, - clear=True, - ), patch("main.extract_pull_requests", return_value=iter([])): + with ( + patch.dict( + os.environ, + { + "GITHUB_REPOS": "mozilla/firefox", + "BIGQUERY_PROJECT": "test", + "BIGQUERY_DATASET": "test", + "GITHUB_TOKEN": "test-token-123", + }, + clear=True, + ), + patch("main.extract_pull_requests", return_value=iter([])), + ): main.main() # Verify Authorization header was set @@ -1827,7 +1851,12 @@ def test_single_repo_successful_etl( ): """Test successful ETL for single repository.""" mock_extract.return_value = iter([[{"number": 1}]]) - mock_transform.return_value = {"pull_requests": [{"pull_request_id": 1}], "commits": [], "reviewers": [], "comments": []} + mock_transform.return_value = { + "pull_requests": [{"pull_request_id": 1}], + "commits": [], + "reviewers": [], + "comments": [], + } with patch.dict( os.environ, @@ -1863,7 +1892,12 @@ def test_multiple_repos_processing( ): """Test processing multiple repositories.""" mock_extract.return_value = iter([[{"number": 1}]]) - mock_transform.return_value = {"pull_requests": [{"pull_request_id": 1}], "commits": [], "reviewers": [], "comments": []} + mock_transform.return_value = { + "pull_requests": [{"pull_request_id": 1}], + "commits": [], + "reviewers": [], + "comments": [], + } with patch.dict( os.environ, @@ -1898,12 +1932,19 @@ def test_processes_chunks_iteratively( ): """Test that chunks are processed iteratively from generator.""" # Return 3 chunks - mock_extract.return_value = iter([ - [{"number": 1}], - [{"number": 2}], - [{"number": 3}], - ]) - mock_transform.return_value = {"pull_requests": [{"pull_request_id": 1}], "commits": [], "reviewers": [], "comments": []} + mock_extract.return_value = iter( + [ + [{"number": 1}], + [{"number": 2}], + [{"number": 3}], + ] + ) + mock_transform.return_value = { + "pull_requests": [{"pull_request_id": 1}], + "commits": [], + "reviewers": [], + "comments": [], + } with patch.dict( os.environ, @@ -1929,16 +1970,19 @@ def test_returns_zero_on_success( self, mock_session_class, mock_bq_client, mock_setup_logging ): """Test that main returns 0 on success.""" - with patch.dict( - os.environ, - { - "GITHUB_REPOS": "mozilla/firefox", - "BIGQUERY_PROJECT": "test", - "BIGQUERY_DATASET": "test", - "GITHUB_TOKEN": "token", - }, - clear=True, - ), patch("main.extract_pull_requests", return_value=iter([])): + with ( + patch.dict( + os.environ, + { + "GITHUB_REPOS": "mozilla/firefox", + "BIGQUERY_PROJECT": "test", + "BIGQUERY_DATASET": "test", + "GITHUB_TOKEN": "token", + }, + clear=True, + ), + patch("main.extract_pull_requests", return_value=iter([])), + ): result = main.main() assert result == 0 @@ -2014,7 +2058,11 @@ def test_bug_id_extraction_through_pipeline( pr_response = Mock() pr_response.status_code = 200 pr_response.json.return_value = [ - {"number": 1, "title": "Bug 9876543 - Fix critical issue", "state": "closed"} + { + "number": 1, + "title": "Bug 9876543 - Fix critical issue", + "state": "closed", + } ] pr_response.links = {} From 43b13f0dc7fce314fb642a17247c20765fa000fb Mon Sep 17 00:00:00 2001 From: David Lawrence Date: Wed, 21 Jan 2026 18:50:58 -0500 Subject: [PATCH 03/12] Fix integretion test --- .github/workflows/tests.yml | 104 ++++++++++++++++-------------------- 1 file changed, 46 insertions(+), 58 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 87e2800..4e4f711 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -2,74 +2,62 @@ name: Tests and Linting on: pull_request: - branches: [ main ] + branches: [main] jobs: lint: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - - - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install black flake8 mypy isort - - - name: Run black - run: black --check main.py test_main.py - - - name: Run isort - run: isort --check-only main.py test_main.py - - - name: Run flake8 - run: flake8 main.py test_main.py --max-line-length=100 --extend-ignore=E203,W503 - - - name: Run mypy - run: mypy main.py --no-strict-optional --ignore-missing-imports + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install black flake8 mypy isort + - name: Run black + run: black --check main.py test_main.py + - name: Run isort + run: isort --check-only main.py test_main.py + - name: Run flake8 + run: flake8 main.py test_main.py --max-line-length=100 --extend-ignore=E203,W503 + - name: Run mypy + run: mypy main.py --no-strict-optional --ignore-missing-imports test: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - - - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt - - - name: Run unit tests with coverage - run: | - pytest -m "not integration and not slow" --cov=main --cov-report=term-missing --cov-fail-under=80 - - - name: Run all tests - run: | - pytest --cov=main --cov-report=xml --cov-report=html - - - name: Upload coverage reports - uses: actions/upload-artifact@v4 - with: - name: coverage-reports - path: | - htmlcov/ - coverage.xml + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + - name: Run unit tests with coverage + run: | + pytest -m "not integration and not slow" --cov=main --cov-report=term-missing --cov-fail-under=80 + - name: Run all tests + run: | + pytest --cov=main --cov-report=xml --cov-report=html + - name: Upload coverage reports + uses: actions/upload-artifact@v4 + with: + name: coverage-reports + path: | + htmlcov/ + coverage.xml integration-test: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - - - name: Run integration test with docker-compose - run: | - docker-compose up --build --abort-on-container-exit --exit-code-from github-etl - - - name: Cleanup - if: always() - run: docker-compose down -v + - uses: actions/checkout@v4 + - name: Install docker-compose + run: sudo apt update && sudo apt install -y docker-compose + - name: Run integration test with docker-compose + run: docker-compose up --build --abort-on-container-exit --exit-code-from github-etl + - name: Cleanup + if: always() + run: docker-compose down -v From 60426816b2744c77600b798204e95c29bde4e416 Mon Sep 17 00:00:00 2001 From: David Lawrence Date: Wed, 21 Jan 2026 18:53:32 -0500 Subject: [PATCH 04/12] Black formatted --- main.py | 15 ++++++++------- test_main.py | 1 - 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/main.py b/main.py index 645f167..00c0d35 100755 --- a/main.py +++ b/main.py @@ -19,7 +19,6 @@ from google.api_core.client_options import ClientOptions from google.auth.credentials import AnonymousCredentials - BUG_RE = re.compile(r"\b(?:bug|b=)\s*#?(\d+)\b", re.I) @@ -325,9 +324,11 @@ def transform_data(raw_data: list[dict], repo: str) -> dict: "bug_id": bug_id, "date_landed": pr.get("merged_at"), "date_approved": None, # This will be filled later - "labels": [label.get("name") for label in pr.get("labels", [])] - if pr.get("labels") - else [], + "labels": ( + [label.get("name") for label in pr.get("labels", [])] + if pr.get("labels") + else [] + ), } # Extract and flatten commit data @@ -387,9 +388,9 @@ def transform_data(raw_data: list[dict], repo: str) -> dict: "date_created": comment.get("created_at"), "author_email": None, # TODO Placeholder for reviewer email extraction logic "author_username": comment.get("user", {}).get("login"), - "character_count": len(comment.get("body", "")) - if comment.get("body") - else 0, + "character_count": ( + len(comment.get("body", "")) if comment.get("body") else 0 + ), "status": None, # TODO } diff --git a/test_main.py b/test_main.py index 400c6d3..210029c 100644 --- a/test_main.py +++ b/test_main.py @@ -17,7 +17,6 @@ import main - # ============================================================================= # FIXTURES # ============================================================================= From 39435822fa9d3324f44a14fff418eab91a64f746 Mon Sep 17 00:00:00 2001 From: David Lawrence Date: Wed, 21 Jan 2026 18:55:44 -0500 Subject: [PATCH 05/12] Used isort to fix sorting order --- main.py | 5 +++-- test_main.py | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/main.py b/main.py index 00c0d35..d75abf9 100755 --- a/main.py +++ b/main.py @@ -9,15 +9,16 @@ import logging import os import re -import requests import sys import time from datetime import datetime, timezone from typing import Iterator, Optional from urllib.parse import parse_qs, urlparse -from google.cloud import bigquery + +import requests from google.api_core.client_options import ClientOptions from google.auth.credentials import AnonymousCredentials +from google.cloud import bigquery BUG_RE = re.compile(r"\b(?:bug|b=)\s*#?(\d+)\b", re.I) diff --git a/test_main.py b/test_main.py index 210029c..0850eae 100644 --- a/test_main.py +++ b/test_main.py @@ -10,7 +10,8 @@ import os import time from datetime import datetime -from unittest.mock import Mock, MagicMock, patch +from unittest.mock import MagicMock, Mock, patch + import pytest import requests from google.cloud import bigquery From 483f19b877bc315da78c49134dce76babb4c1f89 Mon Sep 17 00:00:00 2001 From: David Lawrence Date: Wed, 21 Jan 2026 20:59:03 -0500 Subject: [PATCH 06/12] Mypy test fixes --- main.py | 23 ++++++++++++++--------- test_main.py | 2 -- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/main.py b/main.py index d75abf9..f76a281 100755 --- a/main.py +++ b/main.py @@ -59,7 +59,7 @@ def extract_pull_requests( # Support custom API URL for mocking/testing api_base = github_api_url or "https://api.github.com" base_url = f"{api_base}/repos/{repo}/pulls" - params = { + params: dict = { "state": "all", "per_page": chunk_size, "sort": "created", @@ -298,7 +298,7 @@ def transform_data(raw_data: list[dict], repo: str) -> dict: logger = logging.getLogger(__name__) logger.info(f"Starting data transformation for {len(raw_data)} PRs") - transformed_data = { + transformed_data: dict = { "pull_requests": [], "commits": [], "reviewers": [], @@ -371,7 +371,8 @@ def transform_data(raw_data: list[dict], repo: str) -> dict: } transformed_data["reviewers"].append(transformed_reviewer) - # If the request is approved then store the date in the date_approved for the pull request + # If the request is approved then store the date in the + # date_approved for the pull request if review.get("state") == "APPROVED": approved_date = review.get("submitted_at") if transformed_pr.get( @@ -422,7 +423,8 @@ def load_data( Args: client: BigQuery client instance dataset_id: BigQuery dataset ID - transformed_data: Dictionary containing tables ('pull_requests', 'commits', 'reviewers', 'comments') mapped to lists of row dictionaries + transformed_data: Dictionary containing tables ('pull_requests', + 'commits', 'reviewers', 'comments') mapped to lists of row dictionaries """ logger = logging.getLogger(__name__) @@ -457,7 +459,8 @@ def load_data( raise Exception(error_msg) logger.info( - f"Data loading completed successfully for table {table} with {len(load_table_data)} rows" + f"Data loading completed successfully for table {table} " + + "with {len(load_table_data)} rows" ) @@ -479,7 +482,8 @@ def main() -> int: github_token = os.environ.get("GITHUB_TOKEN") if not github_token: logger.warning( - "Warning: No token provided. You will hit very low rate limits and private repos won't work." + "Warning: No token provided. You will hit very low rate " + + "limits and private repos won't work." ) # Read BigQuery configuration @@ -522,9 +526,10 @@ def main() -> int: bigquery_client = bigquery.Client(project=bigquery_project) # Read GitHub repository configuration - github_repos = os.getenv("GITHUB_REPOS") - if github_repos: - github_repos = github_repos.split(",") + github_repos = [] + github_repos_str = os.getenv("GITHUB_REPOS") + if github_repos_str: + github_repos = github_repos_str.split(",") else: raise SystemExit( "Environment variable GITHUB_REPOS is required (format: 'owner/repo,owner/repo')" diff --git a/test_main.py b/test_main.py index 0850eae..0e60118 100644 --- a/test_main.py +++ b/test_main.py @@ -8,8 +8,6 @@ import logging import os -import time -from datetime import datetime from unittest.mock import MagicMock, Mock, patch import pytest From af0db81174248598f680ca5c1444da7bf4634573 Mon Sep 17 00:00:00 2001 From: David Lawrence Date: Wed, 21 Jan 2026 21:45:06 -0500 Subject: [PATCH 07/12] types-requests --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 8ede7d4..39c369e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,3 +12,4 @@ black>=24.0.0 flake8>=7.0.0 mypy>=1.8.0 isort>=5.13.0 +types-requests==2.32.4.20260107 From 9b3ba755e18ba0fdc8b3d29b4e5f58078b992d17 Mon Sep 17 00:00:00 2001 From: David Lawrence Date: Wed, 21 Jan 2026 21:50:26 -0500 Subject: [PATCH 08/12] More types-requests fixes --- .github/workflows/tests.yml | 4 +++- requirements.txt | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 4e4f711..dda2014 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -23,7 +23,9 @@ jobs: - name: Run flake8 run: flake8 main.py test_main.py --max-line-length=100 --extend-ignore=E203,W503 - name: Run mypy - run: mypy main.py --no-strict-optional --ignore-missing-imports + run: | + pip install types-requests + mypy main.py --no-strict-optional --ignore-missing-imports test: runs-on: ubuntu-latest diff --git a/requirements.txt b/requirements.txt index 39c369e..8ede7d4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,4 +12,3 @@ black>=24.0.0 flake8>=7.0.0 mypy>=1.8.0 isort>=5.13.0 -types-requests==2.32.4.20260107 From eaf389b4497ebcb3367724e65148a8d4a3c7351b Mon Sep 17 00:00:00 2001 From: David Lawrence Date: Wed, 21 Jan 2026 22:11:34 -0500 Subject: [PATCH 09/12] Fixed typo in f-string --- main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.py b/main.py index f76a281..ec2f482 100755 --- a/main.py +++ b/main.py @@ -460,7 +460,7 @@ def load_data( logger.info( f"Data loading completed successfully for table {table} " - + "with {len(load_table_data)} rows" + + f"with {len(load_table_data)} rows" ) From 7caae207a05b059b2795a6ed422f573e0197798e Mon Sep 17 00:00:00 2001 From: David Lawrence Date: Thu, 22 Jan 2026 16:35:31 -0500 Subject: [PATCH 10/12] Copilot fixes --- .github/workflows/tests.yml | 6 ++--- TESTING.md | 53 +++++++++++++++++++------------------ pytest.ini | 2 +- requirements.txt | 1 + 4 files changed, 31 insertions(+), 31 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index dda2014..513a509 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -15,7 +15,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install black flake8 mypy isort + pip install -r requirements.txt - name: Run black run: black --check main.py test_main.py - name: Run isort @@ -23,9 +23,7 @@ jobs: - name: Run flake8 run: flake8 main.py test_main.py --max-line-length=100 --extend-ignore=E203,W503 - name: Run mypy - run: | - pip install types-requests - mypy main.py --no-strict-optional --ignore-missing-imports + run: mypy main.py --no-strict-optional --ignore-missing-imports test: runs-on: ubuntu-latest diff --git a/TESTING.md b/TESTING.md index 104d401..c6a541c 100644 --- a/TESTING.md +++ b/TESTING.md @@ -19,22 +19,22 @@ unit tests, integration tests, Docker testing, linting, and CI/CD workflows. ## Unit Testing The test suite in `test_main.py` provides comprehensive coverage for all functions in `main.py`. -We have **95 unit tests** covering 9 functions with 80%+ code coverage requirement. +We have unit tests covering 9 functions with 80%+ code coverage requirement. ### Test Structure Tests are organized into 10 test classes: -1. **TestSetupLogging** (1 test) - Logging configuration -2. **TestSleepForRateLimit** (4 tests) - Rate limit handling -3. **TestExtractPullRequests** (14 tests) - PR extraction with pagination and enrichment -4. **TestExtractCommits** (9 tests) - Commit and file extraction -5. **TestExtractReviewers** (6 tests) - Reviewer extraction -6. **TestExtractComments** (7 tests) - Comment extraction (uses /issues endpoint) -7. **TestTransformData** (26 tests) - Data transformation for all 4 BigQuery tables -8. **TestLoadData** (8 tests) - BigQuery data loading -9. **TestMain** (17 tests) - Main ETL orchestration -10. **TestIntegration** (3 tests) - End-to-end integration tests (marked with `@pytest.mark.integration`) +1. **TestSetupLogging** - Logging configuration +2. **TestSleepForRateLimit** - Rate limit handling +3. **TestExtractPullRequests** - PR extraction with pagination and enrichment +4. **TestExtractCommits** - Commit and file extraction +5. **TestExtractReviewers** - Reviewer extraction +6. **TestExtractComments** - Comment extraction (uses /issues endpoint) +7. **TestTransformData** - Data transformation for all 4 BigQuery tables +8. **TestLoadData** - BigQuery data loading +9. **TestMain** - Main ETL orchestration +10. **TestIntegration** - End-to-end integration tests (marked with `@pytest.mark.integration`) ### Fixtures @@ -51,17 +51,17 @@ Reusable fixtures are defined at the top of `test_main.py`: ### Function Coverage -| Function | Tests | Coverage Target | Key Test Areas | -|----------|-------|-----------------|----------------| -| `setup_logging()` | 1 | 100% | Logger configuration | -| `sleep_for_rate_limit()` | 4 | 100% | Rate limit sleep logic, edge cases | -| `extract_pull_requests()` | 14 | 90%+ | Pagination, rate limits, enrichment, error handling | -| `extract_commits()` | 9 | 85%+ | Commit/file fetching, rate limits, errors | -| `extract_reviewers()` | 6 | 85%+ | Reviewer states, rate limits, errors | -| `extract_comments()` | 7 | 85%+ | Comment fetching (via /issues), rate limits | -| `transform_data()` | 26 | 95%+ | Bug ID extraction, 4 tables, field mapping | -| `load_data()` | 8 | 90%+ | BigQuery insertion, snapshot dates, errors | -| `main()` | 17 | 85%+ | Env vars, orchestration, chunking | +| Function | Coverage Target | Key Test Areas | +|----------|------------------|----------------| +| `setup_logging()` | 100% | Logger configuration | +| `sleep_for_rate_limit()` | 100% | Rate limit sleep logic, edge cases | +| `extract_pull_requests()` | 90%+ | Pagination, rate limits, enrichment, error handling | +| `extract_commits()` | 85%+ | Commit/file fetching, rate limits, errors | +| `extract_reviewers()` | 85%+ | Reviewer states, rate limits, errors | +| `extract_comments()` | 85%+ | Comment fetching (via /issues), rate limits | +| `transform_data()` | 95%+ | Bug ID extraction, 4 tables, field mapping | +| `load_data()` | 90%+ | BigQuery insertion, snapshot dates, errors | +| `main()` | 85%+ | Env vars, orchestration, chunking | **Overall Target: 85-90% coverage** (80% minimum enforced in CI) @@ -318,8 +318,8 @@ docker-compose down - 9050 (BigQuery API) - 9060 (Discovery/Admin API) - **Configuration**: Uses `data.yml` to define the schema -- **Project**: test-project -- **Dataset**: test_dataset +- **Project**: test +- **Dataset**: github_etl - **Table**: pull_requests ### ETL Service @@ -328,8 +328,9 @@ The ETL service is configured via environment variables in `docker-compose.yml`: ```yaml environment: - GITHUB_REPOS: "mozilla/firefox" - GITHUB_API_URL: "http://mock-github-api:5000" # Points to mock API + GITHUB_REPOS: "mozilla-firefox/firefox" + GITHUB_TOKEN: "" # Not needed for mock API + GITHUB_API_URL: "http://mock-github-api:5000" BIGQUERY_PROJECT: "test" BIGQUERY_DATASET: "github_etl" BIGQUERY_EMULATOR_HOST: "http://bigquery-emulator:9050" diff --git a/pytest.ini b/pytest.ini index 33ef84b..d553b45 100644 --- a/pytest.ini +++ b/pytest.ini @@ -34,7 +34,7 @@ log_cli_date_format = %Y-%m-%d %H:%M:%S # Coverage options [coverage:run] -source = . +source = main omit = test_*.py .venv/* diff --git a/requirements.txt b/requirements.txt index 8ede7d4..e1e65e0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,3 +12,4 @@ black>=24.0.0 flake8>=7.0.0 mypy>=1.8.0 isort>=5.13.0 +types-requests>=2.32.4 From 3b9260d4996ea15f66c041f92af707298e0757c2 Mon Sep 17 00:00:00 2001 From: David Lawrence Date: Fri, 23 Jan 2026 18:16:04 -0500 Subject: [PATCH 11/12] Fixed review comments --- .github/workflows/tests.yml | 22 +- Dockerfile | 4 +- Dockerfile.mock | 2 +- README.md | 2 +- TESTING.md | 2 +- main.py | 4 +- pyproject.toml | 129 ++ pytest.ini | 47 - requirements.txt | 340 +++- test_formatting.py | 16 + test_main.py | 3456 +++++++++++++++++------------------ 11 files changed, 2199 insertions(+), 1825 deletions(-) create mode 100644 pyproject.toml delete mode 100644 pytest.ini create mode 100644 test_formatting.py diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 513a509..5480c08 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -5,33 +5,13 @@ on: branches: [main] jobs: - lint: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - with: - python-version: "3.11" - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r requirements.txt - - name: Run black - run: black --check main.py test_main.py - - name: Run isort - run: isort --check-only main.py test_main.py - - name: Run flake8 - run: flake8 main.py test_main.py --max-line-length=100 --extend-ignore=E203,W503 - - name: Run mypy - run: mypy main.py --no-strict-optional --ignore-missing-imports - test: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: - python-version: "3.11" + python-version: "3.14.2" - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/Dockerfile b/Dockerfile index 5608295..bec1ed8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # Use the latest stable Python image -FROM python:3.11-slim +FROM python:3.14.2-slim # Set environment variables ENV PYTHONDONTWRITEBYTECODE=1 \ @@ -34,4 +34,4 @@ RUN chown -R app:app /app USER app # Set the default command -CMD ["python", "main.py"] \ No newline at end of file +CMD ["python", "main.py"] diff --git a/Dockerfile.mock b/Dockerfile.mock index 1098382..cf46078 100644 --- a/Dockerfile.mock +++ b/Dockerfile.mock @@ -1,5 +1,5 @@ # Dockerfile for mock GitHub API service -FROM python:3.11-slim +FROM python:3.14.2-slim WORKDIR /app diff --git a/README.md b/README.md index 80a3afe..570bacb 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,7 @@ docker run --rm \ ### Container Specifications -- **Base Image**: `python:3.11-slim` (latest stable Python) +- **Base Image**: `python:3.14.2-slim` (latest stable Python) - **User**: `app` (uid: 1000, gid: 1000) - **Working Directory**: `/app` - **Ownership**: All files in `/app` are owned by the `app` user diff --git a/TESTING.md b/TESTING.md index c6a541c..6901d2f 100644 --- a/TESTING.md +++ b/TESTING.md @@ -604,7 +604,7 @@ If coverage is below 80%: ### Tests Pass Locally But Fail in CI -- Check Python version (must be 3.11) +- Check Python version (must be 3.14) - Verify all dependencies are in `requirements.txt` - Look for environment-specific issues diff --git a/main.py b/main.py index ec2f482..e6b92b0 100755 --- a/main.py +++ b/main.py @@ -91,7 +91,7 @@ def extract_pull_requests( f"Extracted page {pages} with {len(batch)} PRs (total: {total})" ) - for idx, pr in enumerate(batch): + for _idx, pr in enumerate(batch): pr_number = pr.get("number") if not pr_number: continue @@ -273,7 +273,7 @@ def extract_comments( return comments -def sleep_for_rate_limit(resp): +def sleep_for_rate_limit(resp: requests.Response) -> None: """Sleep until rate limit resets.""" remaining = int(resp.headers.get("X-RateLimit-Remaining", 1)) reset = int(resp.headers.get("X-RateLimit-Reset", 0)) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..198886d --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,129 @@ +[project] +name = "github-etl" +version = "0.1.0" +description = "ETL script to extract data from Mozilla Organization Firefox repositories on GitHub and load them into BigQuery" +readme = "README.md" +requires-python = ">=3.14" +license = {text = "MPL-2.0"} +authors = [ + {name = "Mozilla", email = "dev-platform@lists.mozilla.org"} +] +keywords = ["etl", "github", "bigquery", "mozilla"] +classifiers = [ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "License :: OSI Approved :: Mozilla Public License 2.0 (MPL-2.0)", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.14", +] + +dependencies = [ + "requests>=2.25.0", + "google-cloud-bigquery==3.25.0", +] + +[project.optional-dependencies] +dev = [ + "pytest>=7.0.0", + "pytest-mock>=3.10.0", + "pytest-cov>=4.0.0", + "ruff>=0.14.14", + "black>=24.0.0", +] + +[project.scripts] +github-etl = "main:main" + +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[tool.setuptools] +py-modules = ["main"] + +# Ruff configuration +[tool.ruff] +line-length = 88 +exclude = [ + ".cache", + ".git", + ".hg", + "__pycache__", +] + +[tool.ruff.lint] +select = ["C", "E", "F", "W", "B", "B9", "I", "ANN"] +ignore = [ + "B006", + "B904", + "C901", + "E203", + "E501", + "ANN002", # Missing type annotation for *args + "ANN003", # Missing type annotation for **kwargs + "ANN202", # Missing return type annotation for protected function +] + +[tool.ruff.lint.isort] +split-on-trailing-comma = true + +[tool.ruff.lint.flake8-annotations] +suppress-none-returning = true + +[tool.ruff.lint.per-file-ignores] +"**/*/tests/*" = ["ANN"] +"**/*/conftest.py" = ["ANN"] + +# Black configuration +[tool.black] +line-length = 88 +target-version = ['py314'] + +# Pytest configuration +[tool.pytest.ini_options] +testpaths = ["tests"] +python_files = ["test_*.py"] +python_classes = ["Test*"] +python_functions = ["test_*"] +addopts = [ + "-v", + "--strict-markers", + "--tb=short", + "--cov=main", + "--cov-report=term-missing", + "--cov-report=html", + "--cov-branch", + "--cov-fail-under=80", +] +markers = [ + "unit: Unit tests for individual functions", + "integration: Integration tests that test multiple components", + "slow: Tests that take longer to run", +] +log_cli = false +log_cli_level = "INFO" +log_cli_format = "%(asctime)s [%(levelname)8s] %(message)s" +log_cli_date_format = "%Y-%m-%d %H:%M:%S" + +# Coverage configuration +[tool.coverage.run] +source = ["main"] +omit = [ + "test_*.py", + ".venv/*", + "venv/*", + "*/site-packages/*", +] + +[tool.coverage.report] +precision = 2 +show_missing = true +skip_covered = false +exclude_lines = [ + "pragma: no cover", + "def __repr__", + "raise AssertionError", + "raise NotImplementedError", + "if __name__ == .__main__.:", + "if TYPE_CHECKING:", +] diff --git a/pytest.ini b/pytest.ini deleted file mode 100644 index d553b45..0000000 --- a/pytest.ini +++ /dev/null @@ -1,47 +0,0 @@ -[pytest] -# Pytest configuration for GitHub ETL project - -# Test discovery patterns -python_files = test_*.py -python_classes = Test* -python_functions = test_* - -# Output options -addopts = - -v - --strict-markers - --tb=short - --cov=main - --cov-report=term-missing - --cov-report=html - --cov-branch - --cov-fail-under=80 - -# Test paths -testpaths = . - -# Markers for organizing tests -markers = - unit: Unit tests for individual functions - integration: Integration tests that test multiple components - slow: Tests that take longer to run - -# Logging -log_cli = false -log_cli_level = INFO -log_cli_format = %(asctime)s [%(levelname)8s] %(message)s -log_cli_date_format = %Y-%m-%d %H:%M:%S - -# Coverage options -[coverage:run] -source = main -omit = - test_*.py - .venv/* - venv/* - */site-packages/* - -[coverage:report] -precision = 2 -show_missing = true -skip_covered = false diff --git a/requirements.txt b/requirements.txt index e1e65e0..d487f50 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,15 +1,325 @@ -# Essential dependencies for GitHub ETL -requests>=2.25.0 -google-cloud-bigquery==3.25.0 - -# Testing dependencies -pytest>=7.0.0 -pytest-mock>=3.10.0 -pytest-cov>=4.0.0 - -# Linting and formatting tools -black>=24.0.0 -flake8>=7.0.0 -mypy>=1.8.0 -isort>=5.13.0 -types-requests>=2.32.4 +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile --generate-hashes pyproject.toml +# +certifi==2026.1.4 \ + --hash=sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c \ + --hash=sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120 + # via requests +charset-normalizer==3.4.4 \ + --hash=sha256:027f6de494925c0ab2a55eab46ae5129951638a49a34d87f4c3eda90f696b4ad \ + --hash=sha256:077fbb858e903c73f6c9db43374fd213b0b6a778106bc7032446a8e8b5b38b93 \ + --hash=sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394 \ + --hash=sha256:0d3d8f15c07f86e9ff82319b3d9ef6f4bf907608f53fe9d92b28ea9ae3d1fd89 \ + --hash=sha256:0f04b14ffe5fdc8c4933862d8306109a2c51e0704acfa35d51598eb45a1e89fc \ + --hash=sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86 \ + --hash=sha256:194f08cbb32dc406d6e1aea671a68be0823673db2832b38405deba2fb0d88f63 \ + --hash=sha256:1bee1e43c28aa63cb16e5c14e582580546b08e535299b8b6158a7c9c768a1f3d \ + --hash=sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f \ + --hash=sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8 \ + --hash=sha256:244bfb999c71b35de57821b8ea746b24e863398194a4014e4c76adc2bbdfeff0 \ + --hash=sha256:2677acec1a2f8ef614c6888b5b4ae4060cc184174a938ed4e8ef690e15d3e505 \ + --hash=sha256:277e970e750505ed74c832b4bf75dac7476262ee2a013f5574dd49075879e161 \ + --hash=sha256:2aaba3b0819274cc41757a1da876f810a3e4d7b6eb25699253a4effef9e8e4af \ + --hash=sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152 \ + --hash=sha256:2c9d3c380143a1fedbff95a312aa798578371eb29da42106a29019368a475318 \ + --hash=sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72 \ + --hash=sha256:31fd66405eaf47bb62e8cd575dc621c56c668f27d46a61d975a249930dd5e2a4 \ + --hash=sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e \ + --hash=sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3 \ + --hash=sha256:44c2a8734b333e0578090c4cd6b16f275e07aa6614ca8715e6c038e865e70576 \ + --hash=sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c \ + --hash=sha256:4902828217069c3c5c71094537a8e623f5d097858ac6ca8252f7b4d10b7560f1 \ + --hash=sha256:4bd5d4137d500351a30687c2d3971758aac9a19208fc110ccb9d7188fbe709e8 \ + --hash=sha256:4fe7859a4e3e8457458e2ff592f15ccb02f3da787fcd31e0183879c3ad4692a1 \ + --hash=sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2 \ + --hash=sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44 \ + --hash=sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26 \ + --hash=sha256:5947809c8a2417be3267efc979c47d76a079758166f7d43ef5ae8e9f92751f88 \ + --hash=sha256:5ae497466c7901d54b639cf42d5b8c1b6a4fead55215500d2f486d34db48d016 \ + --hash=sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede \ + --hash=sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf \ + --hash=sha256:5cb4d72eea50c8868f5288b7f7f33ed276118325c1dfd3957089f6b519e1382a \ + --hash=sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc \ + --hash=sha256:5f819d5fe9234f9f82d75bdfa9aef3a3d72c4d24a6e57aeaebba32a704553aa0 \ + --hash=sha256:64b55f9dce520635f018f907ff1b0df1fdc31f2795a922fb49dd14fbcdf48c84 \ + --hash=sha256:6515f3182dbe4ea06ced2d9e8666d97b46ef4c75e326b79bb624110f122551db \ + --hash=sha256:65e2befcd84bc6f37095f5961e68a6f077bf44946771354a28ad434c2cce0ae1 \ + --hash=sha256:6aee717dcfead04c6eb1ce3bd29ac1e22663cdea57f943c87d1eab9a025438d7 \ + --hash=sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed \ + --hash=sha256:6e1fcf0720908f200cd21aa4e6750a48ff6ce4afe7ff5a79a90d5ed8a08296f8 \ + --hash=sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133 \ + --hash=sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e \ + --hash=sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef \ + --hash=sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14 \ + --hash=sha256:778d2e08eda00f4256d7f672ca9fef386071c9202f5e4607920b86d7803387f2 \ + --hash=sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0 \ + --hash=sha256:798d75d81754988d2565bff1b97ba5a44411867c0cf32b77a7e8f8d84796b10d \ + --hash=sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828 \ + --hash=sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f \ + --hash=sha256:7c308f7e26e4363d79df40ca5b2be1c6ba9f02bdbccfed5abddb7859a6ce72cf \ + --hash=sha256:7fa17817dc5625de8a027cb8b26d9fefa3ea28c8253929b8d6649e705d2835b6 \ + --hash=sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328 \ + --hash=sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090 \ + --hash=sha256:837c2ce8c5a65a2035be9b3569c684358dfbf109fd3b6969630a87535495ceaa \ + --hash=sha256:840c25fb618a231545cbab0564a799f101b63b9901f2569faecd6b222ac72381 \ + --hash=sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c \ + --hash=sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb \ + --hash=sha256:8ef3c867360f88ac904fd3f5e1f902f13307af9052646963ee08ff4f131adafc \ + --hash=sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a \ + --hash=sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec \ + --hash=sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc \ + --hash=sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac \ + --hash=sha256:9cd98cdc06614a2f768d2b7286d66805f94c48cde050acdbbb7db2600ab3197e \ + --hash=sha256:9d1bb833febdff5c8927f922386db610b49db6e0d4f4ee29601d71e7c2694313 \ + --hash=sha256:9f7fcd74d410a36883701fafa2482a6af2ff5ba96b9a620e9e0721e28ead5569 \ + --hash=sha256:a59cb51917aa591b1c4e6a43c132f0cdc3c76dbad6155df4e28ee626cc77a0a3 \ + --hash=sha256:a61900df84c667873b292c3de315a786dd8dac506704dea57bc957bd31e22c7d \ + --hash=sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525 \ + --hash=sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894 \ + --hash=sha256:a8bf8d0f749c5757af2142fe7903a9df1d2e8aa3841559b2bad34b08d0e2bcf3 \ + --hash=sha256:a9768c477b9d7bd54bc0c86dbaebdec6f03306675526c9927c0e8a04e8f94af9 \ + --hash=sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a \ + --hash=sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9 \ + --hash=sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14 \ + --hash=sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25 \ + --hash=sha256:b5d84d37db046c5ca74ee7bb47dd6cbc13f80665fdde3e8040bdd3fb015ecb50 \ + --hash=sha256:b7cf1017d601aa35e6bb650b6ad28652c9cd78ee6caff19f3c28d03e1c80acbf \ + --hash=sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1 \ + --hash=sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3 \ + --hash=sha256:c4ef880e27901b6cc782f1b95f82da9313c0eb95c3af699103088fa0ac3ce9ac \ + --hash=sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e \ + --hash=sha256:ca5862d5b3928c4940729dacc329aa9102900382fea192fc5e52eb69d6093815 \ + --hash=sha256:cb01158d8b88ee68f15949894ccc6712278243d95f344770fa7593fa2d94410c \ + --hash=sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6 \ + --hash=sha256:cc00f04ed596e9dc0da42ed17ac5e596c6ccba999ba6bd92b0e0aef2f170f2d6 \ + --hash=sha256:cd09d08005f958f370f539f186d10aec3377d55b9eeb0d796025d4886119d76e \ + --hash=sha256:cd4b7ca9984e5e7985c12bc60a6f173f3c958eae74f3ef6624bb6b26e2abbae4 \ + --hash=sha256:ce8a0633f41a967713a59c4139d29110c07e826d131a316b50ce11b1d79b4f84 \ + --hash=sha256:cead0978fc57397645f12578bfd2d5ea9138ea0fac82b2f63f7f7c6877986a69 \ + --hash=sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15 \ + --hash=sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191 \ + --hash=sha256:d9c7f57c3d666a53421049053eaacdd14bbd0a528e2186fcb2e672effd053bb0 \ + --hash=sha256:d9e45d7faa48ee908174d8fe84854479ef838fc6a705c9315372eacbc2f02897 \ + --hash=sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd \ + --hash=sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2 \ + --hash=sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794 \ + --hash=sha256:e824f1492727fa856dd6eda4f7cee25f8518a12f3c4a56a74e8095695089cf6d \ + --hash=sha256:e912091979546adf63357d7e2ccff9b44f026c075aeaf25a52d0e95ad2281074 \ + --hash=sha256:eaabd426fe94daf8fd157c32e571c85cb12e66692f15516a83a03264b08d06c3 \ + --hash=sha256:ebf3e58c7ec8a8bed6d66a75d7fb37b55e5015b03ceae72a8e7c74495551e224 \ + --hash=sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838 \ + --hash=sha256:eecbc200c7fd5ddb9a7f16c7decb07b566c29fa2161a16cf67b8d068bd21690a \ + --hash=sha256:f155a433c2ec037d4e8df17d18922c3a0d9b3232a396690f17175d2946f0218d \ + --hash=sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d \ + --hash=sha256:f34be2938726fc13801220747472850852fe6b1ea75869a048d6f896838c896f \ + --hash=sha256:f820802628d2694cb7e56db99213f930856014862f3fd943d290ea8438d07ca8 \ + --hash=sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490 \ + --hash=sha256:f8e160feb2aed042cd657a72acc0b481212ed28b1b9a95c0cee1621b524e1966 \ + --hash=sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9 \ + --hash=sha256:fa09f53c465e532f4d3db095e0c55b615f010ad81803d383195b6b5ca6cbf5f3 \ + --hash=sha256:faa3a41b2b66b6e50f84ae4a68c64fcd0c44355741c6374813a800cd6695db9e \ + --hash=sha256:fd44c878ea55ba351104cb93cc85e74916eb8fa440ca7903e57575e97394f608 + # via requests +google-api-core[grpc]==2.29.0 \ + --hash=sha256:84181be0f8e6b04006df75ddfe728f24489f0af57c96a529ff7cf45bc28797f7 \ + --hash=sha256:d30bc60980daa36e314b5d5a3e5958b0200cb44ca8fa1be2b614e932b75a3ea9 + # via + # google-cloud-bigquery + # google-cloud-core +google-auth==2.47.0 \ + --hash=sha256:833229070a9dfee1a353ae9877dcd2dec069a8281a4e72e72f77d4a70ff945da \ + --hash=sha256:c516d68336bfde7cf0da26aab674a36fedcf04b37ac4edd59c597178760c3498 + # via + # google-api-core + # google-cloud-bigquery + # google-cloud-core +google-cloud-bigquery==3.25.0 \ + --hash=sha256:5b2aff3205a854481117436836ae1403f11f2594e6810a98886afd57eda28509 \ + --hash=sha256:7f0c371bc74d2a7fb74dacbc00ac0f90c8c2bec2289b51dd6685a275873b1ce9 + # via github-etl (pyproject.toml) +google-cloud-core==2.5.0 \ + --hash=sha256:67d977b41ae6c7211ee830c7912e41003ea8194bff15ae7d72fd6f51e57acabc \ + --hash=sha256:7c1b7ef5c92311717bd05301aa1a91ffbc565673d3b0b4163a52d8413a186963 + # via google-cloud-bigquery +google-crc32c==1.8.0 \ + --hash=sha256:014a7e68d623e9a4222d663931febc3033c5c7c9730785727de2a81f87d5bab8 \ + --hash=sha256:01f126a5cfddc378290de52095e2c7052be2ba7656a9f0caf4bcd1bfb1833f8a \ + --hash=sha256:0470b8c3d73b5f4e3300165498e4cf25221c7eb37f1159e221d1825b6df8a7ff \ + --hash=sha256:119fcd90c57c89f30040b47c211acee231b25a45d225e3225294386f5d258288 \ + --hash=sha256:14f87e04d613dfa218d6135e81b78272c3b904e2a7053b841481b38a7d901411 \ + --hash=sha256:17446feb05abddc187e5441a45971b8394ea4c1b6efd88ab0af393fd9e0a156a \ + --hash=sha256:19b40d637a54cb71e0829179f6cb41835f0fbd9e8eb60552152a8b52c36cbe15 \ + --hash=sha256:2a3dc3318507de089c5384cc74d54318401410f82aa65b2d9cdde9d297aca7cb \ + --hash=sha256:3b9776774b24ba76831609ffbabce8cdf6fa2bd5e9df37b594221c7e333a81fa \ + --hash=sha256:3cc0c8912038065eafa603b238abf252e204accab2a704c63b9e14837a854962 \ + --hash=sha256:3d488e98b18809f5e322978d4506373599c0c13e6c5ad13e53bb44758e18d215 \ + --hash=sha256:3ebb04528e83b2634857f43f9bb8ef5b2bbe7f10f140daeb01b58f972d04736b \ + --hash=sha256:450dc98429d3e33ed2926fc99ee81001928d63460f8538f21a5d6060912a8e27 \ + --hash=sha256:4b8286b659c1335172e39563ab0a768b8015e88e08329fa5321f774275fc3113 \ + --hash=sha256:57a50a9035b75643996fbf224d6661e386c7162d1dfdab9bc4ca790947d1007f \ + --hash=sha256:61f58b28e0b21fcb249a8247ad0db2e64114e201e2e9b4200af020f3b6242c9f \ + --hash=sha256:6f35aaffc8ccd81ba3162443fabb920e65b1f20ab1952a31b13173a67811467d \ + --hash=sha256:71734788a88f551fbd6a97be9668a0020698e07b2bf5b3aa26a36c10cdfb27b2 \ + --hash=sha256:864abafe7d6e2c4c66395c1eb0fe12dc891879769b52a3d56499612ca93b6092 \ + --hash=sha256:86cfc00fe45a0ac7359e5214a1704e51a99e757d0272554874f419f79838c5f7 \ + --hash=sha256:87b0072c4ecc9505cfa16ee734b00cd7721d20a0f595be4d40d3d21b41f65ae2 \ + --hash=sha256:87fa445064e7db928226b2e6f0d5304ab4cd0339e664a4e9a25029f384d9bb93 \ + --hash=sha256:89c17d53d75562edfff86679244830599ee0a48efc216200691de8b02ab6b2b8 \ + --hash=sha256:8b3f68782f3cbd1bce027e48768293072813469af6a61a86f6bb4977a4380f21 \ + --hash=sha256:a428e25fb7691024de47fecfbff7ff957214da51eddded0da0ae0e0f03a2cf79 \ + --hash=sha256:b0d1a7afc6e8e4635564ba8aa5c0548e3173e41b6384d7711a9123165f582de2 \ + --hash=sha256:ba6aba18daf4d36ad4412feede6221414692f44d17e5428bdd81ad3fc1eee5dc \ + --hash=sha256:cb5c869c2923d56cb0c8e6bcdd73c009c36ae39b652dbe46a05eb4ef0ad01454 \ + --hash=sha256:d511b3153e7011a27ab6ee6bb3a5404a55b994dc1a7322c0b87b29606d9790e2 \ + --hash=sha256:db3fe8eaf0612fc8b20fa21a5f25bd785bc3cd5be69f8f3412b0ac2ffd49e733 \ + --hash=sha256:e6584b12cb06796d285d09e33f63309a09368b9d806a551d8036a4207ea43697 \ + --hash=sha256:f4b51844ef67d6cf2e9425983274da75f18b1597bb2c998e1c0a0e8d46f8f651 \ + --hash=sha256:f639065ea2042d5c034bf258a9f085eaa7af0cd250667c0635a3118e8f92c69c + # via google-resumable-media +google-resumable-media==2.8.0 \ + --hash=sha256:dd14a116af303845a8d932ddae161a26e86cc229645bc98b39f026f9b1717582 \ + --hash=sha256:f1157ed8b46994d60a1bc432544db62352043113684d4e030ee02e77ebe9a1ae + # via google-cloud-bigquery +googleapis-common-protos==1.72.0 \ + --hash=sha256:4299c5a82d5ae1a9702ada957347726b167f9f8d1fc352477702a1e851ff4038 \ + --hash=sha256:e55a601c1b32b52d7a3e65f43563e2aa61bcd737998ee672ac9b951cd49319f5 + # via + # google-api-core + # grpcio-status +grpcio==1.76.0 \ + --hash=sha256:035d90bc79eaa4bed83f524331d55e35820725c9fbb00ffa1904d5550ed7ede3 \ + --hash=sha256:04bbe1bfe3a68bbfd4e52402ab7d4eb59d72d02647ae2042204326cf4bbad280 \ + --hash=sha256:063065249d9e7e0782d03d2bca50787f53bd0fb89a67de9a7b521c4a01f1989b \ + --hash=sha256:06c3d6b076e7b593905d04fdba6a0525711b3466f43b3400266f04ff735de0cd \ + --hash=sha256:08caea849a9d3c71a542827d6df9d5a69067b0a1efbea8a855633ff5d9571465 \ + --hash=sha256:0aaa82d0813fd4c8e589fac9b65d7dd88702555f702fb10417f96e2a2a6d4c0f \ + --hash=sha256:0b7604868b38c1bfd5cf72d768aedd7db41d78cb6a4a18585e33fb0f9f2363fd \ + --hash=sha256:0c37db8606c258e2ee0c56b78c62fc9dee0e901b5dbdcf816c2dd4ad652b8b0c \ + --hash=sha256:1c9b93f79f48b03ada57ea24725d83a30284a012ec27eab2cf7e50a550cbbbcc \ + --hash=sha256:2107b0c024d1b35f4083f11245c0e23846ae64d02f40b2b226684840260ed054 \ + --hash=sha256:2229ae655ec4e8999599469559e97630185fdd53ae1e8997d147b7c9b2b72cba \ + --hash=sha256:25a18e9810fbc7e7f03ec2516addc116a957f8cbb8cbc95ccc80faa072743d03 \ + --hash=sha256:26ef06c73eb53267c2b319f43e6634c7556ea37672029241a056629af27c10e2 \ + --hash=sha256:2e1743fbd7f5fa713a1b0a8ac8ebabf0ec980b5d8809ec358d488e273b9cf02a \ + --hash=sha256:32483fe2aab2c3794101c2a159070584e5db11d0aa091b2c0ea9c4fc43d0d749 \ + --hash=sha256:3bf0f392c0b806905ed174dcd8bdd5e418a40d5567a05615a030a5aeddea692d \ + --hash=sha256:3e2a27c89eb9ac3d81ec8835e12414d73536c6e620355d65102503064a4ed6eb \ + --hash=sha256:40ad3afe81676fd9ec6d9d406eda00933f218038433980aa19d401490e46ecde \ + --hash=sha256:4215d3a102bd95e2e11b5395c78562967959824156af11fa93d18fdd18050990 \ + --hash=sha256:45d59a649a82df5718fd9527ce775fd66d1af35e6d31abdcdc906a49c6822958 \ + --hash=sha256:45e0111e73f43f735d70786557dc38141185072d7ff8dc1829d6a77ac1471468 \ + --hash=sha256:479496325ce554792dba6548fae3df31a72cef7bad71ca2e12b0e58f9b336bfc \ + --hash=sha256:490fa6d203992c47c7b9e4a9d39003a0c2bcc1c9aa3c058730884bbbb0ee9f09 \ + --hash=sha256:49ce47231818806067aea3324d4bf13825b658ad662d3b25fada0bdad9b8a6af \ + --hash=sha256:4baf3cbe2f0be3289eb68ac8ae771156971848bb8aaff60bad42005539431980 \ + --hash=sha256:522175aba7af9113c48ec10cc471b9b9bd4f6ceb36aeb4544a8e2c80ed9d252d \ + --hash=sha256:5e8571632780e08526f118f74170ad8d50fb0a48c23a746bef2a6ebade3abd6f \ + --hash=sha256:615ba64c208aaceb5ec83bfdce7728b80bfeb8be97562944836a7a0a9647d882 \ + --hash=sha256:61f69297cba3950a524f61c7c8ee12e55c486cb5f7db47ff9dcee33da6f0d3ae \ + --hash=sha256:65a20de41e85648e00305c1bb09a3598f840422e522277641145a32d42dcefcc \ + --hash=sha256:6a15c17af8839b6801d554263c546c69c4d7718ad4321e3166175b37eaacca77 \ + --hash=sha256:747fa73efa9b8b1488a95d0ba1039c8e2dca0f741612d80415b1e1c560febf4e \ + --hash=sha256:7be78388d6da1a25c0d5ec506523db58b18be22d9c37d8d3a32c08be4987bd73 \ + --hash=sha256:81fd9652b37b36f16138611c7e884eb82e0cec137c40d3ef7c3f9b3ed00f6ed8 \ + --hash=sha256:83d57312a58dcfe2a3a0f9d1389b299438909a02db60e2f2ea2ae2d8034909d3 \ + --hash=sha256:8843114c0cfce61b40ad48df65abcfc00d4dba82eae8718fab5352390848c5da \ + --hash=sha256:8cc3309d8e08fd79089e13ed4819d0af72aa935dd8f435a195fd152796752ff2 \ + --hash=sha256:8ebe63ee5f8fa4296b1b8cfc743f870d10e902ca18afc65c68cf46fd39bb0783 \ + --hash=sha256:8eddfb4d203a237da6f3cc8a540dad0517d274b5a1e9e636fd8d2c79b5c1d397 \ + --hash=sha256:922fa70ba549fce362d2e2871ab542082d66e2aaf0c19480ea453905b01f384e \ + --hash=sha256:931091142fd8cc14edccc0845a79248bc155425eee9a98b2db2ea4f00a235a42 \ + --hash=sha256:971fd5a1d6e62e00d945423a567e42eb1fa678ba89072832185ca836a94daaa6 \ + --hash=sha256:980a846182ce88c4f2f7e2c22c56aefd515daeb36149d1c897f83cf57999e0b6 \ + --hash=sha256:9d9adda641db7207e800a7f089068f6f645959f2df27e870ee81d44701dd9db3 \ + --hash=sha256:9f8f757bebaaea112c00dba718fc0d3260052ce714e25804a03f93f5d1c6cc11 \ + --hash=sha256:a6ae758eb08088d36812dd5d9af7a9859c05b1e0f714470ea243694b49278e7b \ + --hash=sha256:a8c2cf1209497cf659a667d7dea88985e834c24b7c3b605e6254cbb5076d985c \ + --hash=sha256:acab0277c40eff7143c2323190ea57b9ee5fd353d8190ee9652369fae735668a \ + --hash=sha256:b331680e46239e090f5b3cead313cc772f6caa7d0fc8de349337563125361a4a \ + --hash=sha256:c088e7a90b6017307f423efbb9d1ba97a22aa2170876223f9709e9d1de0b5347 \ + --hash=sha256:d099566accf23d21037f18a2a63d323075bebace807742e4b0ac210971d4dd70 \ + --hash=sha256:d388087771c837cdb6515539f43b9d4bf0b0f23593a24054ac16f7a960be16f4 \ + --hash=sha256:dcfe41187da8992c5f40aa8c5ec086fa3672834d2be57a32384c08d5a05b4c00 \ + --hash=sha256:e6d1db20594d9daba22f90da738b1a0441a7427552cc6e2e3d1297aeddc00378 \ + --hash=sha256:ebea5cc3aa8ea72e04df9913492f9a96d9348db876f9dda3ad729cfedf7ac416 \ + --hash=sha256:ebebf83299b0cb1721a8859ea98f3a77811e35dce7609c5c963b9ad90728f886 \ + --hash=sha256:f0e34c2079d47ae9f6188211db9e777c619a21d4faba6977774e8fa43b085e48 \ + --hash=sha256:f92f88e6c033db65a5ae3d97905c8fea9c725b63e28d5a75cb73b49bda5024d8 \ + --hash=sha256:f9f7bd5faab55f47231ad8dba7787866b69f5e93bc306e3915606779bbfb4ba8 \ + --hash=sha256:fd5ef5932f6475c436c4a55e4336ebbe47bd3272be04964a03d316bbf4afbcbc \ + --hash=sha256:ff8a59ea85a1f2191a0ffcc61298c571bc566332f82e5f5be1b83c9d8e668a62 + # via + # google-api-core + # grpcio-status +grpcio-status==1.76.0 \ + --hash=sha256:25fcbfec74c15d1a1cb5da3fab8ee9672852dc16a5a9eeb5baf7d7a9952943cd \ + --hash=sha256:380568794055a8efbbd8871162df92012e0228a5f6dffaf57f2a00c534103b18 + # via google-api-core +idna==3.11 \ + --hash=sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea \ + --hash=sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902 + # via requests +packaging==26.0 \ + --hash=sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4 \ + --hash=sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529 + # via google-cloud-bigquery +proto-plus==1.27.0 \ + --hash=sha256:1baa7f81cf0f8acb8bc1f6d085008ba4171eaf669629d1b6d1673b21ed1c0a82 \ + --hash=sha256:873af56dd0d7e91836aee871e5799e1c6f1bda86ac9a983e0bb9f0c266a568c4 + # via google-api-core +protobuf==6.33.4 \ + --hash=sha256:0f12ddbf96912690c3582f9dffb55530ef32015ad8e678cd494312bd78314c4f \ + --hash=sha256:1fe3730068fcf2e595816a6c34fe66eeedd37d51d0400b72fabc848811fdc1bc \ + --hash=sha256:2fe67f6c014c84f655ee06f6f66213f9254b3a8b6bda6cda0ccd4232c73c06f0 \ + --hash=sha256:3df850c2f8db9934de4cf8f9152f8dc2558f49f298f37f90c517e8e5c84c30e9 \ + --hash=sha256:757c978f82e74d75cba88eddec479df9b99a42b31193313b75e492c06a51764e \ + --hash=sha256:8f11ffae31ec67fc2554c2ef891dcb561dae9a2a3ed941f9e134c2db06657dbc \ + --hash=sha256:918966612c8232fc6c24c78e1cd89784307f5814ad7506c308ee3cf86662850d \ + --hash=sha256:955478a89559fa4568f5a81dce77260eabc5c686f9e8366219ebd30debf06aa6 \ + --hash=sha256:c7c64f259c618f0bef7bee042075e390debbf9682334be2b67408ec7c1c09ee6 \ + --hash=sha256:dc2e61bca3b10470c1912d166fe0af67bfc20eb55971dcef8dfa48ce14f0ed91 + # via + # google-api-core + # googleapis-common-protos + # grpcio-status + # proto-plus +pyasn1==0.6.2 \ + --hash=sha256:1eb26d860996a18e9b6ed05e7aae0e9fc21619fcee6af91cca9bad4fbea224bf \ + --hash=sha256:9b59a2b25ba7e4f8197db7686c09fb33e658b98339fadb826e9512629017833b + # via + # pyasn1-modules + # rsa +pyasn1-modules==0.4.2 \ + --hash=sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a \ + --hash=sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6 + # via google-auth +python-dateutil==2.9.0.post0 \ + --hash=sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3 \ + --hash=sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427 + # via google-cloud-bigquery +requests==2.32.5 \ + --hash=sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6 \ + --hash=sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf + # via + # github-etl (pyproject.toml) + # google-api-core + # google-cloud-bigquery +rsa==4.9.1 \ + --hash=sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762 \ + --hash=sha256:e7bdbfdb5497da4c07dfd35530e1a902659db6ff241e39d9953cad06ebd0ae75 + # via google-auth +six==1.17.0 \ + --hash=sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274 \ + --hash=sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81 + # via python-dateutil +typing-extensions==4.15.0 \ + --hash=sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466 \ + --hash=sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548 + # via grpcio +urllib3==2.6.3 \ + --hash=sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed \ + --hash=sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4 + # via requests diff --git a/test_formatting.py b/test_formatting.py new file mode 100644 index 0000000..c92e534 --- /dev/null +++ b/test_formatting.py @@ -0,0 +1,16 @@ +""" +Code Style Tests. +""" + +import subprocess + + +def test_black(): + cmd = ("black", "--diff", "main.py") + output = subprocess.check_output(cmd) + assert not output, "The python code does not adhere to the project style." + + +def test_ruff(): + passed = subprocess.call(("ruff", "check", "main.py", "--target-version", "py314")) + assert not passed, "ruff did not run cleanly." diff --git a/test_main.py b/test_main.py index 0e60118..0d38ac3 100644 --- a/test_main.py +++ b/test_main.py @@ -116,1325 +116,839 @@ def mock_comment_response(): # ============================================================================= -class TestSetupLogging: - """Tests for setup_logging function.""" - def test_setup_logging_configures_logger(self): - """Test that setup_logging configures the root logger correctly.""" - main.setup_logging() - - root_logger = logging.getLogger() - assert root_logger.level == logging.INFO - assert len(root_logger.handlers) > 0 - - # Check that at least one handler is a StreamHandler - has_stream_handler = any( - isinstance(handler, logging.StreamHandler) - for handler in root_logger.handlers - ) - assert has_stream_handler - - -class TestSleepForRateLimit: - """Tests for sleep_for_rate_limit function.""" - - @patch("time.time") - @patch("time.sleep") - def test_sleep_for_rate_limit_when_remaining_is_zero(self, mock_sleep, mock_time): - """Test that sleep_for_rate_limit sleeps until reset time.""" - mock_time.return_value = 1000 - - mock_response = Mock() - mock_response.headers = { - "X-RateLimit-Remaining": "0", - "X-RateLimit-Reset": "1120", # 120 seconds from now - } +# ============================================================================= +# TESTS FOR SETUP_LOGGING +# ============================================================================= - main.sleep_for_rate_limit(mock_response) - mock_sleep.assert_called_once_with(120) +def test_setup_logging(): + """Test that setup_logging configures logging correctly.""" + main.setup_logging() - @patch("time.time") - @patch("time.sleep") - def test_sleep_for_rate_limit_when_reset_already_passed( - self, mock_sleep, mock_time - ): - """Test that sleep_for_rate_limit doesn't sleep negative time.""" - mock_time.return_value = 2000 + root_logger = logging.getLogger() + assert root_logger.level == logging.INFO + assert len(root_logger.handlers) > 0 - mock_response = Mock() - mock_response.headers = { - "X-RateLimit-Remaining": "0", - "X-RateLimit-Reset": "1500", # Already passed - } + # Check that at least one handler is a StreamHandler + has_stream_handler = any( + isinstance(handler, logging.StreamHandler) + for handler in root_logger.handlers + ) + assert has_stream_handler - main.sleep_for_rate_limit(mock_response) - # Should sleep for 0 seconds (max of 0 and negative value) - mock_sleep.assert_called_once_with(0) - @patch("time.sleep") - def test_sleep_for_rate_limit_when_remaining_not_zero(self, mock_sleep): - """Test that sleep_for_rate_limit doesn't sleep when remaining > 0.""" - mock_response = Mock() - mock_response.headers = { - "X-RateLimit-Remaining": "5", - "X-RateLimit-Reset": "1500", - } +# ============================================================================= +# TESTS FOR SLEEP_FOR_RATE_LIMIT +# ============================================================================= - main.sleep_for_rate_limit(mock_response) - # Should not sleep when remaining > 0 - mock_sleep.assert_not_called() +@patch("time.time") +@patch("time.sleep") +def test_sleep_for_rate_limit_calculates_wait_time(mock_sleep, mock_time): + """Test that sleep_for_rate_limit calculates correct wait time.""" + mock_time.return_value = 1000 - @patch("time.sleep") - def test_sleep_for_rate_limit_with_missing_headers(self, mock_sleep): - """Test sleep_for_rate_limit with missing rate limit headers.""" - mock_response = Mock() - mock_response.headers = {} + mock_response = Mock() + mock_response.headers = { + "X-RateLimit-Remaining": "0", + "X-RateLimit-Reset": "1120", # 120 seconds from now + } - main.sleep_for_rate_limit(mock_response) + main.sleep_for_rate_limit(mock_response) - # Should not sleep when headers are missing (defaults to remaining=1) - mock_sleep.assert_not_called() + mock_sleep.assert_called_once_with(120) -class TestExtractPullRequests: - """Tests for extract_pull_requests function.""" +@patch("time.time") +@patch("time.sleep") +def test_sleep_for_rate_limit_when_reset_already_passed(mock_sleep, mock_time): + """Test that sleep_for_rate_limit doesn't sleep negative time.""" + mock_time.return_value = 2000 - def test_extract_single_page(self, mock_session): - """Test extracting data from a single page of results.""" - mock_response = Mock() - mock_response.status_code = 200 - mock_response.json.return_value = [ - {"number": 1, "title": "PR 1"}, - {"number": 2, "title": "PR 2"}, - ] - mock_response.links = {} - - mock_session.get.return_value = mock_response - - # Mock the extract functions - with ( - patch("main.extract_commits", return_value=[]), - patch("main.extract_reviewers", return_value=[]), - patch("main.extract_comments", return_value=[]), - ): - result = list(main.extract_pull_requests(mock_session, "mozilla/firefox")) - - assert len(result) == 1 - assert len(result[0]) == 2 - assert result[0][0]["number"] == 1 - assert result[0][1]["number"] == 2 - - def test_extract_multiple_pages(self, mock_session): - """Test extracting data across multiple pages with pagination.""" - # First page response - mock_response_1 = Mock() - mock_response_1.status_code = 200 - mock_response_1.json.return_value = [ - {"number": 1, "title": "PR 1"}, - {"number": 2, "title": "PR 2"}, - ] - mock_response_1.links = { - "next": {"url": "https://api.github.com/repos/mozilla/firefox/pulls?page=2"} - } + mock_response = Mock() + mock_response.headers = { + "X-RateLimit-Remaining": "0", + "X-RateLimit-Reset": "1500", # Already passed + } - # Second page response - mock_response_2 = Mock() - mock_response_2.status_code = 200 - mock_response_2.json.return_value = [{"number": 3, "title": "PR 3"}] - mock_response_2.links = {} - - mock_session.get.side_effect = [mock_response_1, mock_response_2] - - with ( - patch("main.extract_commits", return_value=[]), - patch("main.extract_reviewers", return_value=[]), - patch("main.extract_comments", return_value=[]), - ): - result = list(main.extract_pull_requests(mock_session, "mozilla/firefox")) - - assert len(result) == 2 - assert len(result[0]) == 2 - assert len(result[1]) == 1 - assert result[0][0]["number"] == 1 - assert result[1][0]["number"] == 3 - - def test_enriches_prs_with_commit_data(self, mock_session): - """Test that PRs are enriched with commit data.""" - mock_response = Mock() - mock_response.status_code = 200 - mock_response.json.return_value = [{"number": 1, "title": "PR 1"}] - mock_response.links = {} - - mock_session.get.return_value = mock_response - - mock_commits = [{"sha": "abc123"}] - - with ( - patch( - "main.extract_commits", return_value=mock_commits - ) as mock_extract_commits, - patch("main.extract_reviewers", return_value=[]), - patch("main.extract_comments", return_value=[]), - ): - result = list(main.extract_pull_requests(mock_session, "mozilla/firefox")) - - assert result[0][0]["commit_data"] == mock_commits - mock_extract_commits.assert_called_once() - - def test_enriches_prs_with_reviewer_data(self, mock_session): - """Test that PRs are enriched with reviewer data.""" - mock_response = Mock() - mock_response.status_code = 200 - mock_response.json.return_value = [{"number": 1, "title": "PR 1"}] - mock_response.links = {} - - mock_session.get.return_value = mock_response - - mock_reviewers = [{"id": 789, "state": "APPROVED"}] - - with ( - patch("main.extract_commits", return_value=[]), - patch( - "main.extract_reviewers", return_value=mock_reviewers - ) as mock_extract_reviewers, - patch("main.extract_comments", return_value=[]), - ): - result = list(main.extract_pull_requests(mock_session, "mozilla/firefox")) - - assert result[0][0]["reviewer_data"] == mock_reviewers - mock_extract_reviewers.assert_called_once() - - def test_enriches_prs_with_comment_data(self, mock_session): - """Test that PRs are enriched with comment data.""" - mock_response = Mock() - mock_response.status_code = 200 - mock_response.json.return_value = [{"number": 1, "title": "PR 1"}] - mock_response.links = {} - - mock_session.get.return_value = mock_response - - mock_comments = [{"id": 456, "body": "Great work!"}] - - with ( - patch("main.extract_commits", return_value=[]), - patch("main.extract_reviewers", return_value=[]), - patch( - "main.extract_comments", return_value=mock_comments - ) as mock_extract_comments, - ): - result = list(main.extract_pull_requests(mock_session, "mozilla/firefox")) - - assert result[0][0]["comment_data"] == mock_comments - mock_extract_comments.assert_called_once() - - @patch("main.sleep_for_rate_limit") - def test_handles_rate_limit(self, mock_sleep, mock_session): - """Test that extract_pull_requests handles rate limiting correctly.""" - # Rate limit response - mock_response_rate_limit = Mock() - mock_response_rate_limit.status_code = 403 - mock_response_rate_limit.headers = {"X-RateLimit-Remaining": "0"} - - # Successful response after rate limit - mock_response_success = Mock() - mock_response_success.status_code = 200 - mock_response_success.json.return_value = [{"number": 1, "title": "PR 1"}] - mock_response_success.links = {} - - mock_session.get.side_effect = [ - mock_response_rate_limit, - mock_response_success, - ] + main.sleep_for_rate_limit(mock_response) - with ( - patch("main.extract_commits", return_value=[]), - patch("main.extract_reviewers", return_value=[]), - patch("main.extract_comments", return_value=[]), - ): - result = list(main.extract_pull_requests(mock_session, "mozilla/firefox")) + # Should sleep for 0 seconds (max of 0 and negative value) + mock_sleep.assert_called_once_with(0) - mock_sleep.assert_called_once_with(mock_response_rate_limit) - assert len(result) == 1 - def test_handles_api_error_404(self, mock_session): - """Test that extract_pull_requests raises SystemExit on 404.""" - mock_response = Mock() - mock_response.status_code = 404 - mock_response.text = "Not Found" +@patch("time.sleep") +def test_sleep_for_rate_limit_when_remaining_not_zero(mock_sleep): + """Test that sleep_for_rate_limit doesn't sleep when remaining > 0.""" + mock_response = Mock() + mock_response.headers = { + "X-RateLimit-Remaining": "5", + "X-RateLimit-Reset": "1500", + } - mock_session.get.return_value = mock_response + main.sleep_for_rate_limit(mock_response) - with pytest.raises(SystemExit) as exc_info: - list(main.extract_pull_requests(mock_session, "mozilla/nonexistent")) + # Should not sleep when remaining > 0 + mock_sleep.assert_not_called() - assert "GitHub API error 404" in str(exc_info.value) - def test_handles_api_error_500(self, mock_session): - """Test that extract_pull_requests raises SystemExit on 500.""" - mock_response = Mock() - mock_response.status_code = 500 - mock_response.text = "Internal Server Error" +@patch("time.sleep") +def test_sleep_for_rate_limit_with_missing_headers(mock_sleep): + """Test sleep_for_rate_limit with missing rate limit headers.""" + mock_response = Mock() + mock_response.headers = {} - mock_session.get.return_value = mock_response + main.sleep_for_rate_limit(mock_response) - with pytest.raises(SystemExit) as exc_info: - list(main.extract_pull_requests(mock_session, "mozilla/firefox")) - - assert "GitHub API error 500" in str(exc_info.value) - - def test_stops_on_empty_batch(self, mock_session): - """Test that extraction stops when an empty batch is returned.""" - # First page with data - mock_response_1 = Mock() - mock_response_1.status_code = 200 - mock_response_1.json.return_value = [{"number": 1}] - mock_response_1.links = { - "next": {"url": "https://api.github.com/repos/mozilla/firefox/pulls?page=2"} - } + # Should not sleep when headers are missing (defaults to remaining=1) + mock_sleep.assert_not_called() - # Second page empty - mock_response_2 = Mock() - mock_response_2.status_code = 200 - mock_response_2.json.return_value = [] - mock_response_2.links = {} - - mock_session.get.side_effect = [mock_response_1, mock_response_2] - - with ( - patch("main.extract_commits", return_value=[]), - patch("main.extract_reviewers", return_value=[]), - patch("main.extract_comments", return_value=[]), - ): - result = list(main.extract_pull_requests(mock_session, "mozilla/firefox")) - - # Should only have 1 chunk from first page - assert len(result) == 1 - assert len(result[0]) == 1 - - def test_invalid_page_number_handling(self, mock_session): - """Test handling of invalid page number in pagination.""" - mock_response_1 = Mock() - mock_response_1.status_code = 200 - mock_response_1.json.return_value = [{"number": 1}] - mock_response_1.links = { - "next": { - "url": "https://api.github.com/repos/mozilla/firefox/pulls?page=invalid" - } - } - mock_session.get.return_value = mock_response_1 - - with ( - patch("main.extract_commits", return_value=[]), - patch("main.extract_reviewers", return_value=[]), - patch("main.extract_comments", return_value=[]), - ): - result = list(main.extract_pull_requests(mock_session, "mozilla/firefox")) - - # Should stop pagination on invalid page number - assert len(result) == 1 - - def test_custom_github_api_url(self, mock_session): - """Test using custom GitHub API URL.""" - custom_url = "https://mock-github.example.com" - - mock_response = Mock() - mock_response.status_code = 200 - mock_response.json.return_value = [{"number": 1}] - mock_response.links = {} - - mock_session.get.return_value = mock_response - - with ( - patch("main.extract_commits", return_value=[]), - patch("main.extract_reviewers", return_value=[]), - patch("main.extract_comments", return_value=[]), - ): - list( - main.extract_pull_requests( - mock_session, "mozilla/firefox", github_api_url=custom_url - ) - ) - # Verify custom URL was used - call_args = mock_session.get.call_args - assert custom_url in call_args[0][0] - - def test_skips_prs_without_number_field(self, mock_session): - """Test that PRs without 'number' field are skipped.""" - mock_response = Mock() - mock_response.status_code = 200 - mock_response.json.return_value = [ - {"number": 1, "title": "PR 1"}, - {"title": "PR without number"}, # Missing number field - {"number": 2, "title": "PR 2"}, - ] - mock_response.links = {} +# ============================================================================= +# TESTS FOR EXTRACT_PULL_REQUESTS +# ============================================================================= - mock_session.get.return_value = mock_response - with ( - patch("main.extract_commits", return_value=[]) as mock_commits, - patch("main.extract_reviewers", return_value=[]), - patch("main.extract_comments", return_value=[]), - ): - list(main.extract_pull_requests(mock_session, "mozilla/firefox")) +def test_extract_pull_requests_basic(mock_session): + """Test basic extraction of pull requests.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = [ + {"number": 1, "title": "PR 1"}, + {"number": 2, "title": "PR 2"}, + ] + mock_response.links = {} - # extract_commits should only be called for PRs with number field - assert mock_commits.call_count == 2 + mock_session.get.return_value = mock_response + # Mock the extract functions + with ( + patch("main.extract_commits", return_value=[]), + patch("main.extract_reviewers", return_value=[]), + patch("main.extract_comments", return_value=[]), + ): + result = list(main.extract_pull_requests(mock_session, "mozilla/firefox")) + + assert len(result) == 1 + assert len(result[0]) == 2 + assert result[0][0]["number"] == 1 + assert result[0][1]["number"] == 2 + +def test_extract_multiple_pages(mock_session): + """Test extracting data across multiple pages with pagination.""" + # First page response + mock_response_1 = Mock() + mock_response_1.status_code = 200 + mock_response_1.json.return_value = [ + {"number": 1, "title": "PR 1"}, + {"number": 2, "title": "PR 2"}, + ] + mock_response_1.links = { + "next": {"url": "https://api.github.com/repos/mozilla/firefox/pulls?page=2"} + } -class TestExtractCommits: - """Tests for extract_commits function.""" + # Second page response + mock_response_2 = Mock() + mock_response_2.status_code = 200 + mock_response_2.json.return_value = [{"number": 3, "title": "PR 3"}] + mock_response_2.links = {} - def test_fetch_commits_with_files(self, mock_session): - """Test fetching commits with files for a PR.""" - # Mock commits list response - commits_response = Mock() - commits_response.status_code = 200 - commits_response.json.return_value = [ - {"sha": "abc123"}, - {"sha": "def456"}, - ] + mock_session.get.side_effect = [mock_response_1, mock_response_2] - # Mock individual commit responses - commit_detail_1 = Mock() - commit_detail_1.status_code = 200 - commit_detail_1.json.return_value = { - "sha": "abc123", - "files": [{"filename": "file1.py", "additions": 10}], - } + with ( + patch("main.extract_commits", return_value=[]), + patch("main.extract_reviewers", return_value=[]), + patch("main.extract_comments", return_value=[]), + ): + result = list(main.extract_pull_requests(mock_session, "mozilla/firefox")) + + assert len(result) == 2 + assert len(result[0]) == 2 + assert len(result[1]) == 1 + assert result[0][0]["number"] == 1 + assert result[1][0]["number"] == 3 + +def test_enriches_prs_with_commit_data(mock_session): + """Test that PRs are enriched with commit data.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = [{"number": 1, "title": "PR 1"}] + mock_response.links = {} + + mock_session.get.return_value = mock_response + + mock_commits = [{"sha": "abc123"}] + + with ( + patch( + "main.extract_commits", return_value=mock_commits + ) as mock_extract_commits, + patch("main.extract_reviewers", return_value=[]), + patch("main.extract_comments", return_value=[]), + ): + result = list(main.extract_pull_requests(mock_session, "mozilla/firefox")) - commit_detail_2 = Mock() - commit_detail_2.status_code = 200 - commit_detail_2.json.return_value = { - "sha": "def456", - "files": [{"filename": "file2.py", "deletions": 5}], - } + assert result[0][0]["commit_data"] == mock_commits + mock_extract_commits.assert_called_once() - mock_session.get.side_effect = [ - commits_response, - commit_detail_1, - commit_detail_2, - ] +def test_enriches_prs_with_reviewer_data(mock_session): + """Test that PRs are enriched with reviewer data.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = [{"number": 1, "title": "PR 1"}] + mock_response.links = {} - result = main.extract_commits(mock_session, "mozilla/firefox", 123) - - assert len(result) == 2 - assert result[0]["sha"] == "abc123" - assert result[0]["files"][0]["filename"] == "file1.py" - assert result[1]["sha"] == "def456" - assert result[1]["files"][0]["filename"] == "file2.py" - - def test_multiple_files_per_commit(self, mock_session): - """Test handling multiple files in a single commit.""" - commits_response = Mock() - commits_response.status_code = 200 - commits_response.json.return_value = [{"sha": "abc123"}] - - commit_detail = Mock() - commit_detail.status_code = 200 - commit_detail.json.return_value = { - "sha": "abc123", - "files": [ - {"filename": "file1.py", "additions": 10}, - {"filename": "file2.py", "additions": 20}, - {"filename": "file3.py", "deletions": 5}, - ], - } + mock_session.get.return_value = mock_response - mock_session.get.side_effect = [commits_response, commit_detail] + mock_reviewers = [{"id": 789, "state": "APPROVED"}] - result = main.extract_commits(mock_session, "mozilla/firefox", 123) + with ( + patch("main.extract_commits", return_value=[]), + patch( + "main.extract_reviewers", return_value=mock_reviewers + ) as mock_extract_reviewers, + patch("main.extract_comments", return_value=[]), + ): + result = list(main.extract_pull_requests(mock_session, "mozilla/firefox")) - assert len(result) == 1 - assert len(result[0]["files"]) == 3 + assert result[0][0]["reviewer_data"] == mock_reviewers + mock_extract_reviewers.assert_called_once() - @patch("main.sleep_for_rate_limit") - def test_rate_limit_on_commits_list(self, mock_sleep, mock_session): - """Test rate limit handling when fetching commits list.""" - # Rate limit response - rate_limit_response = Mock() - rate_limit_response.status_code = 403 - rate_limit_response.headers = {"X-RateLimit-Remaining": "0"} +def test_enriches_prs_with_comment_data(mock_session): + """Test that PRs are enriched with comment data.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = [{"number": 1, "title": "PR 1"}] + mock_response.links = {} - # Success response - success_response = Mock() - success_response.status_code = 200 - success_response.json.return_value = [] + mock_session.get.return_value = mock_response - mock_session.get.side_effect = [rate_limit_response, success_response] + mock_comments = [{"id": 456, "body": "Great work!"}] - result = main.extract_commits(mock_session, "mozilla/firefox", 123) + with ( + patch("main.extract_commits", return_value=[]), + patch("main.extract_reviewers", return_value=[]), + patch( + "main.extract_comments", return_value=mock_comments + ) as mock_extract_comments, + ): + result = list(main.extract_pull_requests(mock_session, "mozilla/firefox")) + + assert result[0][0]["comment_data"] == mock_comments + mock_extract_comments.assert_called_once() + +@patch("main.sleep_for_rate_limit") +def test_handles_rate_limit(mock_sleep, mock_session): + """Test that extract_pull_requests handles rate limiting correctly.""" + # Rate limit response + mock_response_rate_limit = Mock() + mock_response_rate_limit.status_code = 403 + mock_response_rate_limit.headers = {"X-RateLimit-Remaining": "0"} + + # Successful response after rate limit + mock_response_success = Mock() + mock_response_success.status_code = 200 + mock_response_success.json.return_value = [{"number": 1, "title": "PR 1"}] + mock_response_success.links = {} + + mock_session.get.side_effect = [ + mock_response_rate_limit, + mock_response_success, + ] + + with ( + patch("main.extract_commits", return_value=[]), + patch("main.extract_reviewers", return_value=[]), + patch("main.extract_comments", return_value=[]), + ): + result = list(main.extract_pull_requests(mock_session, "mozilla/firefox")) - mock_sleep.assert_called_once() - assert result == [] + mock_sleep.assert_called_once_with(mock_response_rate_limit) + assert len(result) == 1 - def test_api_error_on_commits_list(self, mock_session): - """Test API error handling when fetching commits list.""" - error_response = Mock() - error_response.status_code = 500 - error_response.text = "Internal Server Error" +def test_handles_api_error_404(mock_session): + """Test that extract_pull_requests raises SystemExit on 404.""" + mock_response = Mock() + mock_response.status_code = 404 + mock_response.text = "Not Found" - mock_session.get.return_value = error_response + mock_session.get.return_value = mock_response - with pytest.raises(SystemExit) as exc_info: - main.extract_commits(mock_session, "mozilla/firefox", 123) + with pytest.raises(SystemExit) as exc_info: + list(main.extract_pull_requests(mock_session, "mozilla/nonexistent")) - assert "GitHub API error 500" in str(exc_info.value) + assert "GitHub API error 404" in str(exc_info.value) - def test_api_error_on_individual_commit(self, mock_session): - """Test API error when fetching individual commit details.""" - commits_response = Mock() - commits_response.status_code = 200 - commits_response.json.return_value = [{"sha": "abc123"}] +def test_handles_api_error_500(mock_session): + """Test that extract_pull_requests raises SystemExit on 500.""" + mock_response = Mock() + mock_response.status_code = 500 + mock_response.text = "Internal Server Error" - commit_error = Mock() - commit_error.status_code = 404 - commit_error.text = "Commit not found" + mock_session.get.return_value = mock_response - mock_session.get.side_effect = [commits_response, commit_error] + with pytest.raises(SystemExit) as exc_info: + list(main.extract_pull_requests(mock_session, "mozilla/firefox")) - with pytest.raises(SystemExit) as exc_info: - main.extract_commits(mock_session, "mozilla/firefox", 123) + assert "GitHub API error 500" in str(exc_info.value) - assert "GitHub API error 404" in str(exc_info.value) +def test_stops_on_empty_batch(mock_session): + """Test that extraction stops when an empty batch is returned.""" + # First page with data + mock_response_1 = Mock() + mock_response_1.status_code = 200 + mock_response_1.json.return_value = [{"number": 1}] + mock_response_1.links = { + "next": {"url": "https://api.github.com/repos/mozilla/firefox/pulls?page=2"} + } - def test_commit_without_sha_field(self, mock_session): - """Test handling commits without sha field.""" - commits_response = Mock() - commits_response.status_code = 200 - commits_response.json.return_value = [ - {"sha": "abc123"}, - {}, # Missing sha field - ] + # Second page empty + mock_response_2 = Mock() + mock_response_2.status_code = 200 + mock_response_2.json.return_value = [] + mock_response_2.links = {} - commit_detail_1 = Mock() - commit_detail_1.status_code = 200 - commit_detail_1.json.return_value = {"sha": "abc123", "files": []} + mock_session.get.side_effect = [mock_response_1, mock_response_2] - commit_detail_2 = Mock() - commit_detail_2.status_code = 200 - commit_detail_2.json.return_value = {"files": []} + with ( + patch("main.extract_commits", return_value=[]), + patch("main.extract_reviewers", return_value=[]), + patch("main.extract_comments", return_value=[]), + ): + result = list(main.extract_pull_requests(mock_session, "mozilla/firefox")) + + # Should only have 1 chunk from first page + assert len(result) == 1 + assert len(result[0]) == 1 + +def test_invalid_page_number_handling(mock_session): + """Test handling of invalid page number in pagination.""" + mock_response_1 = Mock() + mock_response_1.status_code = 200 + mock_response_1.json.return_value = [{"number": 1}] + mock_response_1.links = { + "next": { + "url": "https://api.github.com/repos/mozilla/firefox/pulls?page=invalid" + } + } - mock_session.get.side_effect = [ - commits_response, - commit_detail_1, - commit_detail_2, - ] + mock_session.get.return_value = mock_response_1 - result = main.extract_commits(mock_session, "mozilla/firefox", 123) + with ( + patch("main.extract_commits", return_value=[]), + patch("main.extract_reviewers", return_value=[]), + patch("main.extract_comments", return_value=[]), + ): + result = list(main.extract_pull_requests(mock_session, "mozilla/firefox")) - # Should handle the commit without sha gracefully - assert len(result) == 2 + # Should stop pagination on invalid page number + assert len(result) == 1 - def test_custom_github_api_url(self, mock_session): - """Test using custom GitHub API URL for commits.""" - custom_url = "https://mock-github.example.com" +def test_custom_github_api_url(mock_session): + """Test using custom GitHub API URL.""" + custom_url = "https://mock-github.example.com" - commits_response = Mock() - commits_response.status_code = 200 - commits_response.json.return_value = [] + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = [{"number": 1}] + mock_response.links = {} - mock_session.get.return_value = commits_response + mock_session.get.return_value = mock_response - main.extract_commits( - mock_session, "mozilla/firefox", 123, github_api_url=custom_url + with ( + patch("main.extract_commits", return_value=[]), + patch("main.extract_reviewers", return_value=[]), + patch("main.extract_comments", return_value=[]), + ): + list( + main.extract_pull_requests( + mock_session, "mozilla/firefox", github_api_url=custom_url + ) ) - call_args = mock_session.get.call_args - assert custom_url in call_args[0][0] - - def test_empty_commits_list(self, mock_session): - """Test handling PR with no commits.""" - commits_response = Mock() - commits_response.status_code = 200 - commits_response.json.return_value = [] - - mock_session.get.return_value = commits_response - - result = main.extract_commits(mock_session, "mozilla/firefox", 123) - - assert result == [] - - -class TestExtractReviewers: - """Tests for extract_reviewers function.""" + # Verify custom URL was used + call_args = mock_session.get.call_args + assert custom_url in call_args[0][0] + +def test_skips_prs_without_number_field(mock_session): + """Test that PRs without 'number' field are skipped.""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = [ + {"number": 1, "title": "PR 1"}, + {"title": "PR without number"}, # Missing number field + {"number": 2, "title": "PR 2"}, + ] + mock_response.links = {} + + mock_session.get.return_value = mock_response + + with ( + patch("main.extract_commits", return_value=[]) as mock_commits, + patch("main.extract_reviewers", return_value=[]), + patch("main.extract_comments", return_value=[]), + ): + list(main.extract_pull_requests(mock_session, "mozilla/firefox")) - def test_fetch_reviewers(self, mock_session): - """Test fetching reviewers for a PR.""" - reviewers_response = Mock() - reviewers_response.status_code = 200 - reviewers_response.json.return_value = [ - { - "id": 789, - "user": {"login": "reviewer1"}, - "state": "APPROVED", - "submitted_at": "2024-01-01T15:00:00Z", - }, - { - "id": 790, - "user": {"login": "reviewer2"}, - "state": "CHANGES_REQUESTED", - "submitted_at": "2024-01-01T16:00:00Z", - }, - ] + # extract_commits should only be called for PRs with number field + assert mock_commits.call_count == 2 - mock_session.get.return_value = reviewers_response - result = main.extract_reviewers(mock_session, "mozilla/firefox", 123) - assert len(result) == 2 - assert result[0]["state"] == "APPROVED" - assert result[1]["state"] == "CHANGES_REQUESTED" +# ============================================================================= +# TESTS FOR EXTRACT_COMMITS +# ============================================================================= - def test_multiple_review_states(self, mock_session): - """Test handling multiple different review states.""" - reviewers_response = Mock() - reviewers_response.status_code = 200 - reviewers_response.json.return_value = [ - {"id": 1, "state": "APPROVED", "user": {"login": "user1"}}, - {"id": 2, "state": "CHANGES_REQUESTED", "user": {"login": "user2"}}, - {"id": 3, "state": "COMMENTED", "user": {"login": "user3"}}, - {"id": 4, "state": "DISMISSED", "user": {"login": "user4"}}, - ] + # Mock commits list response + commits_response = Mock() + commits_response.status_code = 200 + commits_response.json.return_value = [ + {"sha": "abc123"}, + {"sha": "def456"}, + ] + + # Mock individual commit responses + commit_detail_1 = Mock() + commit_detail_1.status_code = 200 + commit_detail_1.json.return_value = { + "sha": "abc123", + "files": [{"filename": "file1.py", "additions": 10}], + } - mock_session.get.return_value = reviewers_response + commit_detail_2 = Mock() + commit_detail_2.status_code = 200 + commit_detail_2.json.return_value = { + "sha": "def456", + "files": [{"filename": "file2.py", "deletions": 5}], + } - result = main.extract_reviewers(mock_session, "mozilla/firefox", 123) + mock_session.get.side_effect = [ + commits_response, + commit_detail_1, + commit_detail_2, + ] + + result = main.extract_commits(mock_session, "mozilla/firefox", 123) + + assert len(result) == 2 + assert result[0]["sha"] == "abc123" + assert result[0]["files"][0]["filename"] == "file1.py" + assert result[1]["sha"] == "def456" + assert result[1]["files"][0]["filename"] == "file2.py" + +def test_multiple_files_per_commit(mock_session): + """Test handling multiple files in a single commit.""" + commits_response = Mock() + commits_response.status_code = 200 + commits_response.json.return_value = [{"sha": "abc123"}] + + commit_detail = Mock() + commit_detail.status_code = 200 + commit_detail.json.return_value = { + "sha": "abc123", + "files": [ + {"filename": "file1.py", "additions": 10}, + {"filename": "file2.py", "additions": 20}, + {"filename": "file3.py", "deletions": 5}, + ], + } - assert len(result) == 4 - states = [r["state"] for r in result] - assert "APPROVED" in states - assert "CHANGES_REQUESTED" in states - assert "COMMENTED" in states + mock_session.get.side_effect = [commits_response, commit_detail] - def test_empty_reviewers_list(self, mock_session): - """Test handling PR with no reviewers.""" - reviewers_response = Mock() - reviewers_response.status_code = 200 - reviewers_response.json.return_value = [] + result = main.extract_commits(mock_session, "mozilla/firefox", 123) - mock_session.get.return_value = reviewers_response + assert len(result) == 1 + assert len(result[0]["files"]) == 3 - result = main.extract_reviewers(mock_session, "mozilla/firefox", 123) +@patch("main.sleep_for_rate_limit") +def test_rate_limit_on_commits_list(mock_sleep, mock_session): + """Test rate limit handling when fetching commits list.""" + # Rate limit response + rate_limit_response = Mock() + rate_limit_response.status_code = 403 + rate_limit_response.headers = {"X-RateLimit-Remaining": "0"} - assert result == [] + # Success response + success_response = Mock() + success_response.status_code = 200 + success_response.json.return_value = [] - @patch("main.sleep_for_rate_limit") - def test_rate_limit_handling(self, mock_sleep, mock_session): - """Test rate limit handling when fetching reviewers.""" - rate_limit_response = Mock() - rate_limit_response.status_code = 403 - rate_limit_response.headers = {"X-RateLimit-Remaining": "0"} + mock_session.get.side_effect = [rate_limit_response, success_response] - success_response = Mock() - success_response.status_code = 200 - success_response.json.return_value = [] + result = main.extract_commits(mock_session, "mozilla/firefox", 123) - mock_session.get.side_effect = [rate_limit_response, success_response] + mock_sleep.assert_called_once() + assert result == [] - result = main.extract_reviewers(mock_session, "mozilla/firefox", 123) +def test_api_error_on_commits_list(mock_session): + """Test API error handling when fetching commits list.""" + error_response = Mock() + error_response.status_code = 500 + error_response.text = "Internal Server Error" - mock_sleep.assert_called_once() - assert result == [] + mock_session.get.return_value = error_response - def test_api_error(self, mock_session): - """Test API error handling when fetching reviewers.""" - error_response = Mock() - error_response.status_code = 500 - error_response.text = "Internal Server Error" + with pytest.raises(SystemExit) as exc_info: + main.extract_commits(mock_session, "mozilla/firefox", 123) - mock_session.get.return_value = error_response + assert "GitHub API error 500" in str(exc_info.value) - with pytest.raises(SystemExit) as exc_info: - main.extract_reviewers(mock_session, "mozilla/firefox", 123) +def test_api_error_on_individual_commit(mock_session): + """Test API error when fetching individual commit details.""" + commits_response = Mock() + commits_response.status_code = 200 + commits_response.json.return_value = [{"sha": "abc123"}] - assert "GitHub API error 500" in str(exc_info.value) + commit_error = Mock() + commit_error.status_code = 404 + commit_error.text = "Commit not found" - def test_custom_github_api_url(self, mock_session): - """Test using custom GitHub API URL for reviewers.""" - custom_url = "https://mock-github.example.com" + mock_session.get.side_effect = [commits_response, commit_error] - reviewers_response = Mock() - reviewers_response.status_code = 200 - reviewers_response.json.return_value = [] + with pytest.raises(SystemExit) as exc_info: + main.extract_commits(mock_session, "mozilla/firefox", 123) - mock_session.get.return_value = reviewers_response + assert "GitHub API error 404" in str(exc_info.value) - main.extract_reviewers( - mock_session, "mozilla/firefox", 123, github_api_url=custom_url - ) +def test_commit_without_sha_field(mock_session): + """Test handling commits without sha field.""" + commits_response = Mock() + commits_response.status_code = 200 + commits_response.json.return_value = [ + {"sha": "abc123"}, + {}, # Missing sha field + ] - call_args = mock_session.get.call_args - assert custom_url in call_args[0][0] + commit_detail_1 = Mock() + commit_detail_1.status_code = 200 + commit_detail_1.json.return_value = {"sha": "abc123", "files": []} + commit_detail_2 = Mock() + commit_detail_2.status_code = 200 + commit_detail_2.json.return_value = {"files": []} -class TestExtractComments: - """Tests for extract_comments function.""" + mock_session.get.side_effect = [ + commits_response, + commit_detail_1, + commit_detail_2, + ] - def test_fetch_comments(self, mock_session): - """Test fetching comments for a PR.""" - comments_response = Mock() - comments_response.status_code = 200 - comments_response.json.return_value = [ - { - "id": 456, - "user": {"login": "commenter1"}, - "body": "This looks good", - "created_at": "2024-01-01T14:00:00Z", - }, - { - "id": 457, - "user": {"login": "commenter2"}, - "body": "I have concerns", - "created_at": "2024-01-01T15:00:00Z", - }, - ] + result = main.extract_commits(mock_session, "mozilla/firefox", 123) - mock_session.get.return_value = comments_response + # Should handle the commit without sha gracefully + assert len(result) == 2 - result = main.extract_comments(mock_session, "mozilla/firefox", 123) +def test_custom_github_api_url(mock_session): + """Test using custom GitHub API URL for commits.""" + custom_url = "https://mock-github.example.com" - assert len(result) == 2 - assert result[0]["id"] == 456 - assert result[1]["id"] == 457 + commits_response = Mock() + commits_response.status_code = 200 + commits_response.json.return_value = [] - def test_uses_issues_endpoint(self, mock_session): - """Test that comments use /issues endpoint not /pulls.""" - comments_response = Mock() - comments_response.status_code = 200 - comments_response.json.return_value = [] + mock_session.get.return_value = commits_response - mock_session.get.return_value = comments_response + main.extract_commits( + mock_session, "mozilla/firefox", 123, github_api_url=custom_url + ) - main.extract_comments(mock_session, "mozilla/firefox", 123) + call_args = mock_session.get.call_args + assert custom_url in call_args[0][0] - call_args = mock_session.get.call_args - url = call_args[0][0] - assert "/issues/123/comments" in url - assert "/pulls/123/comments" not in url - - def test_multiple_comments(self, mock_session): - """Test handling multiple comments.""" - comments_response = Mock() - comments_response.status_code = 200 - comments_response.json.return_value = [ - {"id": i, "user": {"login": f"user{i}"}, "body": f"Comment {i}"} - for i in range(1, 11) - ] +def test_empty_commits_list(mock_session): + """Test handling PR with no commits.""" + commits_response = Mock() + commits_response.status_code = 200 + commits_response.json.return_value = [] - mock_session.get.return_value = comments_response + mock_session.get.return_value = commits_response - result = main.extract_comments(mock_session, "mozilla/firefox", 123) + result = main.extract_commits(mock_session, "mozilla/firefox", 123) - assert len(result) == 10 + assert result == [] - def test_empty_comments_list(self, mock_session): - """Test handling PR with no comments.""" - comments_response = Mock() - comments_response.status_code = 200 - comments_response.json.return_value = [] - mock_session.get.return_value = comments_response - result = main.extract_comments(mock_session, "mozilla/firefox", 123) +# ============================================================================= +# TESTS FOR EXTRACT_REVIEWERS +# ============================================================================= - assert result == [] + reviewers_response = Mock() + reviewers_response.status_code = 200 + reviewers_response.json.return_value = [ + { + "id": 789, + "user": {"login": "reviewer1"}, + "state": "APPROVED", + "submitted_at": "2024-01-01T15:00:00Z", + }, + { + "id": 790, + "user": {"login": "reviewer2"}, + "state": "CHANGES_REQUESTED", + "submitted_at": "2024-01-01T16:00:00Z", + }, + ] - @patch("main.sleep_for_rate_limit") - def test_rate_limit_handling(self, mock_sleep, mock_session): - """Test rate limit handling when fetching comments.""" - rate_limit_response = Mock() - rate_limit_response.status_code = 403 - rate_limit_response.headers = {"X-RateLimit-Remaining": "0"} + mock_session.get.return_value = reviewers_response - success_response = Mock() - success_response.status_code = 200 - success_response.json.return_value = [] + result = main.extract_reviewers(mock_session, "mozilla/firefox", 123) - mock_session.get.side_effect = [rate_limit_response, success_response] + assert len(result) == 2 + assert result[0]["state"] == "APPROVED" + assert result[1]["state"] == "CHANGES_REQUESTED" - result = main.extract_comments(mock_session, "mozilla/firefox", 123) +def test_multiple_review_states(mock_session): + """Test handling multiple different review states.""" + reviewers_response = Mock() + reviewers_response.status_code = 200 + reviewers_response.json.return_value = [ + {"id": 1, "state": "APPROVED", "user": {"login": "user1"}}, + {"id": 2, "state": "CHANGES_REQUESTED", "user": {"login": "user2"}}, + {"id": 3, "state": "COMMENTED", "user": {"login": "user3"}}, + {"id": 4, "state": "DISMISSED", "user": {"login": "user4"}}, + ] - mock_sleep.assert_called_once() - assert result == [] + mock_session.get.return_value = reviewers_response - def test_api_error(self, mock_session): - """Test API error handling when fetching comments.""" - error_response = Mock() - error_response.status_code = 404 - error_response.text = "Not Found" + result = main.extract_reviewers(mock_session, "mozilla/firefox", 123) - mock_session.get.return_value = error_response + assert len(result) == 4 + states = [r["state"] for r in result] + assert "APPROVED" in states + assert "CHANGES_REQUESTED" in states + assert "COMMENTED" in states - with pytest.raises(SystemExit) as exc_info: - main.extract_comments(mock_session, "mozilla/firefox", 123) +def test_empty_reviewers_list(mock_session): + """Test handling PR with no reviewers.""" + reviewers_response = Mock() + reviewers_response.status_code = 200 + reviewers_response.json.return_value = [] - assert "GitHub API error 404" in str(exc_info.value) + mock_session.get.return_value = reviewers_response - def test_custom_github_api_url(self, mock_session): - """Test using custom GitHub API URL for comments.""" - custom_url = "https://mock-github.example.com" + result = main.extract_reviewers(mock_session, "mozilla/firefox", 123) - comments_response = Mock() - comments_response.status_code = 200 - comments_response.json.return_value = [] + assert result == [] - mock_session.get.return_value = comments_response +@patch("main.sleep_for_rate_limit") +def test_rate_limit_handling(mock_sleep, mock_session): + """Test rate limit handling when fetching reviewers.""" + rate_limit_response = Mock() + rate_limit_response.status_code = 403 + rate_limit_response.headers = {"X-RateLimit-Remaining": "0"} - main.extract_comments( - mock_session, "mozilla/firefox", 123, github_api_url=custom_url - ) + success_response = Mock() + success_response.status_code = 200 + success_response.json.return_value = [] - call_args = mock_session.get.call_args - assert custom_url in call_args[0][0] + mock_session.get.side_effect = [rate_limit_response, success_response] + result = main.extract_reviewers(mock_session, "mozilla/firefox", 123) -class TestTransformData: - """Tests for transform_data function.""" + mock_sleep.assert_called_once() + assert result == [] - def test_basic_pr_transformation(self): - """Test basic pull request field mapping.""" - raw_data = [ - { - "number": 123, - "title": "Fix login bug", - "state": "closed", - "created_at": "2024-01-01T10:00:00Z", - "updated_at": "2024-01-02T10:00:00Z", - "merged_at": "2024-01-02T12:00:00Z", - "labels": [], - "commit_data": [], - "reviewer_data": [], - "comment_data": [], - } - ] +def test_api_error(mock_session): + """Test API error handling when fetching reviewers.""" + error_response = Mock() + error_response.status_code = 500 + error_response.text = "Internal Server Error" - result = main.transform_data(raw_data, "mozilla/firefox") + mock_session.get.return_value = error_response - assert len(result["pull_requests"]) == 1 - pr = result["pull_requests"][0] - assert pr["pull_request_id"] == 123 - assert pr["current_status"] == "closed" - assert pr["date_created"] == "2024-01-01T10:00:00Z" - assert pr["date_modified"] == "2024-01-02T10:00:00Z" - assert pr["date_landed"] == "2024-01-02T12:00:00Z" - assert pr["target_repository"] == "mozilla/firefox" - - def test_bug_id_extraction_basic(self): - """Test bug ID extraction from PR title.""" - test_cases = [ - ("Bug 1234567 - Fix issue", 1234567), - ("bug 1234567: Update code", 1234567), - ("Fix for bug 7654321", 7654321), - ("b=9876543 - Change behavior", 9876543), - ] + with pytest.raises(SystemExit) as exc_info: + main.extract_reviewers(mock_session, "mozilla/firefox", 123) - for title, expected_bug_id in test_cases: - raw_data = [ - { - "number": 1, - "title": title, - "state": "open", - "labels": [], - "commit_data": [], - "reviewer_data": [], - "comment_data": [], - } - ] - - result = main.transform_data(raw_data, "mozilla/firefox") - assert result["pull_requests"][0]["bug_id"] == expected_bug_id - - def test_bug_id_extraction_with_hash(self): - """Test bug ID extraction with # symbol.""" - raw_data = [ - { - "number": 1, - "title": "Bug #1234567 - Fix issue", - "state": "open", - "labels": [], - "commit_data": [], - "reviewer_data": [], - "comment_data": [], - } - ] + assert "GitHub API error 500" in str(exc_info.value) - result = main.transform_data(raw_data, "mozilla/firefox") - assert result["pull_requests"][0]["bug_id"] == 1234567 +def test_custom_github_api_url(mock_session): + """Test using custom GitHub API URL for reviewers.""" + custom_url = "https://mock-github.example.com" - def test_bug_id_filter_large_numbers(self): - """Test that bug IDs >= 100000000 are filtered out.""" - raw_data = [ - { - "number": 1, - "title": "Bug 999999999 - Invalid bug ID", - "state": "open", - "labels": [], - "commit_data": [], - "reviewer_data": [], - "comment_data": [], - } - ] + reviewers_response = Mock() + reviewers_response.status_code = 200 + reviewers_response.json.return_value = [] - result = main.transform_data(raw_data, "mozilla/firefox") - assert result["pull_requests"][0]["bug_id"] is None + mock_session.get.return_value = reviewers_response - def test_bug_id_no_match(self): - """Test PR title with no bug ID.""" - raw_data = [ - { - "number": 1, - "title": "Update documentation", - "state": "open", - "labels": [], - "commit_data": [], - "reviewer_data": [], - "comment_data": [], - } - ] + main.extract_reviewers( + mock_session, "mozilla/firefox", 123, github_api_url=custom_url + ) - result = main.transform_data(raw_data, "mozilla/firefox") - assert result["pull_requests"][0]["bug_id"] is None + call_args = mock_session.get.call_args + assert custom_url in call_args[0][0] - def test_labels_extraction(self): - """Test labels array extraction.""" - raw_data = [ - { - "number": 1, - "title": "PR with labels", - "state": "open", - "labels": [ - {"name": "bug"}, - {"name": "priority-high"}, - {"name": "needs-review"}, - ], - "commit_data": [], - "reviewer_data": [], - "comment_data": [], - } - ] - result = main.transform_data(raw_data, "mozilla/firefox") - labels = result["pull_requests"][0]["labels"] - assert len(labels) == 3 - assert "bug" in labels - assert "priority-high" in labels - assert "needs-review" in labels - - def test_labels_empty_list(self): - """Test handling empty labels list.""" - raw_data = [ - { - "number": 1, - "title": "PR without labels", - "state": "open", - "labels": [], - "commit_data": [], - "reviewer_data": [], - "comment_data": [], - } - ] - result = main.transform_data(raw_data, "mozilla/firefox") - assert result["pull_requests"][0]["labels"] == [] +# ============================================================================= +# TESTS FOR EXTRACT_COMMENTS +# ============================================================================= - def test_commit_transformation(self): - """Test commit fields mapping.""" - raw_data = [ - { - "number": 123, - "title": "PR with commits", - "state": "open", - "labels": [], - "commit_data": [ - { - "sha": "abc123", - "commit": { - "author": { - "name": "Test Author", - "date": "2024-01-01T12:00:00Z", - } - }, - "files": [ - { - "filename": "src/main.py", - "additions": 10, - "deletions": 5, - } - ], - } - ], - "reviewer_data": [], - "comment_data": [], - } - ] + comments_response = Mock() + comments_response.status_code = 200 + comments_response.json.return_value = [ + { + "id": 456, + "user": {"login": "commenter1"}, + "body": "This looks good", + "created_at": "2024-01-01T14:00:00Z", + }, + { + "id": 457, + "user": {"login": "commenter2"}, + "body": "I have concerns", + "created_at": "2024-01-01T15:00:00Z", + }, + ] - result = main.transform_data(raw_data, "mozilla/firefox") + mock_session.get.return_value = comments_response - assert len(result["commits"]) == 1 - commit = result["commits"][0] - assert commit["pull_request_id"] == 123 - assert commit["target_repository"] == "mozilla/firefox" - assert commit["commit_sha"] == "abc123" - assert commit["date_created"] == "2024-01-01T12:00:00Z" - assert commit["author_username"] == "Test Author" - assert commit["filename"] == "src/main.py" - assert commit["lines_added"] == 10 - assert commit["lines_removed"] == 5 - - def test_commit_file_flattening(self): - """Test that each file becomes a separate row.""" - raw_data = [ - { - "number": 123, - "title": "PR with multiple files", - "state": "open", - "labels": [], - "commit_data": [ - { - "sha": "abc123", - "commit": {"author": {"name": "Author", "date": "2024-01-01"}}, - "files": [ - {"filename": "file1.py", "additions": 10, "deletions": 5}, - {"filename": "file2.py", "additions": 20, "deletions": 2}, - {"filename": "file3.py", "additions": 5, "deletions": 15}, - ], - } - ], - "reviewer_data": [], - "comment_data": [], - } - ] + result = main.extract_comments(mock_session, "mozilla/firefox", 123) - result = main.transform_data(raw_data, "mozilla/firefox") + assert len(result) == 2 + assert result[0]["id"] == 456 + assert result[1]["id"] == 457 - # Should have 3 rows in commits table (one per file) - assert len(result["commits"]) == 3 - filenames = [c["filename"] for c in result["commits"]] - assert "file1.py" in filenames - assert "file2.py" in filenames - assert "file3.py" in filenames +def test_uses_issues_endpoint(mock_session): + """Test that comments use /issues endpoint not /pulls.""" + comments_response = Mock() + comments_response.status_code = 200 + comments_response.json.return_value = [] - def test_multiple_commits_with_files(self): - """Test multiple commits with multiple files per PR.""" - raw_data = [ - { - "number": 123, - "title": "PR with multiple commits", - "state": "open", - "labels": [], - "commit_data": [ - { - "sha": "commit1", - "commit": {"author": {"name": "Author1", "date": "2024-01-01"}}, - "files": [ - {"filename": "file1.py", "additions": 10, "deletions": 0} - ], - }, - { - "sha": "commit2", - "commit": {"author": {"name": "Author2", "date": "2024-01-02"}}, - "files": [ - {"filename": "file2.py", "additions": 5, "deletions": 2}, - {"filename": "file3.py", "additions": 8, "deletions": 3}, - ], - }, - ], - "reviewer_data": [], - "comment_data": [], - } - ] + mock_session.get.return_value = comments_response - result = main.transform_data(raw_data, "mozilla/firefox") + main.extract_comments(mock_session, "mozilla/firefox", 123) - # Should have 3 rows total (1 file from commit1, 2 files from commit2) - assert len(result["commits"]) == 3 - assert result["commits"][0]["commit_sha"] == "commit1" - assert result["commits"][1]["commit_sha"] == "commit2" - assert result["commits"][2]["commit_sha"] == "commit2" + call_args = mock_session.get.call_args + url = call_args[0][0] + assert "/issues/123/comments" in url + assert "/pulls/123/comments" not in url - def test_reviewer_transformation(self): - """Test reviewer fields mapping.""" - raw_data = [ - { - "number": 123, - "title": "PR with reviewers", - "state": "open", - "labels": [], - "commit_data": [], - "reviewer_data": [ - { - "id": 789, - "user": {"login": "reviewer1"}, - "state": "APPROVED", - "submitted_at": "2024-01-01T15:00:00Z", - } - ], - "comment_data": [], - } - ] +def test_multiple_comments(mock_session): + """Test handling multiple comments.""" + comments_response = Mock() + comments_response.status_code = 200 + comments_response.json.return_value = [ + {"id": i, "user": {"login": f"user{i}"}, "body": f"Comment {i}"} + for i in range(1, 11) + ] - result = main.transform_data(raw_data, "mozilla/firefox") + mock_session.get.return_value = comments_response - assert len(result["reviewers"]) == 1 - reviewer = result["reviewers"][0] - assert reviewer["pull_request_id"] == 123 - assert reviewer["target_repository"] == "mozilla/firefox" - assert reviewer["reviewer_username"] == "reviewer1" - assert reviewer["status"] == "APPROVED" - assert reviewer["date_reviewed"] == "2024-01-01T15:00:00Z" + result = main.extract_comments(mock_session, "mozilla/firefox", 123) - def test_multiple_review_states(self): - """Test handling multiple review states.""" - raw_data = [ - { - "number": 123, - "title": "PR with multiple reviews", - "state": "open", - "labels": [], - "commit_data": [], - "reviewer_data": [ - { - "id": 1, - "user": {"login": "user1"}, - "state": "APPROVED", - "submitted_at": "2024-01-01T15:00:00Z", - }, - { - "id": 2, - "user": {"login": "user2"}, - "state": "CHANGES_REQUESTED", - "submitted_at": "2024-01-01T16:00:00Z", - }, - { - "id": 3, - "user": {"login": "user3"}, - "state": "COMMENTED", - "submitted_at": "2024-01-01T17:00:00Z", - }, - ], - "comment_data": [], - } - ] + assert len(result) == 10 - result = main.transform_data(raw_data, "mozilla/firefox") +def test_empty_comments_list(mock_session): + """Test handling PR with no comments.""" + comments_response = Mock() + comments_response.status_code = 200 + comments_response.json.return_value = [] - assert len(result["reviewers"]) == 3 - states = [r["status"] for r in result["reviewers"]] - assert "APPROVED" in states - assert "CHANGES_REQUESTED" in states - assert "COMMENTED" in states + mock_session.get.return_value = comments_response - def test_date_approved_from_earliest_approval(self): - """Test that date_approved is set to earliest APPROVED review.""" - raw_data = [ - { - "number": 123, - "title": "PR with multiple approvals", - "state": "open", - "labels": [], - "commit_data": [], - "reviewer_data": [ - { - "id": 1, - "user": {"login": "user1"}, - "state": "APPROVED", - "submitted_at": "2024-01-02T15:00:00Z", - }, - { - "id": 2, - "user": {"login": "user2"}, - "state": "APPROVED", - "submitted_at": "2024-01-01T14:00:00Z", # Earliest - }, - { - "id": 3, - "user": {"login": "user3"}, - "state": "APPROVED", - "submitted_at": "2024-01-03T16:00:00Z", - }, - ], - "comment_data": [], - } - ] + result = main.extract_comments(mock_session, "mozilla/firefox", 123) - result = main.transform_data(raw_data, "mozilla/firefox") + assert result == [] - pr = result["pull_requests"][0] - assert pr["date_approved"] == "2024-01-01T14:00:00Z" +@patch("main.sleep_for_rate_limit") +def test_rate_limit_handling(mock_sleep, mock_session): + """Test rate limit handling when fetching comments.""" + rate_limit_response = Mock() + rate_limit_response.status_code = 403 + rate_limit_response.headers = {"X-RateLimit-Remaining": "0"} - def test_comment_transformation(self): - """Test comment fields mapping.""" - raw_data = [ - { - "number": 123, - "title": "PR with comments", - "state": "open", - "labels": [], - "commit_data": [], - "reviewer_data": [], - "comment_data": [ - { - "id": 456, - "user": {"login": "commenter1"}, - "body": "This looks great!", - "created_at": "2024-01-01T14:00:00Z", - "pull_request_review_id": None, - } - ], - } - ] + success_response = Mock() + success_response.status_code = 200 + success_response.json.return_value = [] - result = main.transform_data(raw_data, "mozilla/firefox") + mock_session.get.side_effect = [rate_limit_response, success_response] - assert len(result["comments"]) == 1 - comment = result["comments"][0] - assert comment["pull_request_id"] == 123 - assert comment["target_repository"] == "mozilla/firefox" - assert comment["comment_id"] == 456 - assert comment["author_username"] == "commenter1" - assert comment["date_created"] == "2024-01-01T14:00:00Z" - assert comment["character_count"] == 17 - - def test_comment_character_count(self): - """Test character count calculation for comments.""" - raw_data = [ - { - "number": 123, - "title": "PR", - "state": "open", - "labels": [], - "commit_data": [], - "reviewer_data": [], - "comment_data": [ - { - "id": 1, - "user": {"login": "user1"}, - "body": "Short", - "created_at": "2024-01-01", - }, - { - "id": 2, - "user": {"login": "user2"}, - "body": "This is a much longer comment with more text", - "created_at": "2024-01-01", - }, - ], - } - ] + result = main.extract_comments(mock_session, "mozilla/firefox", 123) - result = main.transform_data(raw_data, "mozilla/firefox") + mock_sleep.assert_called_once() + assert result == [] - assert result["comments"][0]["character_count"] == 5 - assert result["comments"][1]["character_count"] == 44 +def test_api_error(mock_session): + """Test API error handling when fetching comments.""" + error_response = Mock() + error_response.status_code = 404 + error_response.text = "Not Found" - def test_comment_status_from_review(self): - """Test that comment status is mapped from review_id_statuses.""" - raw_data = [ - { - "number": 123, - "title": "PR", - "state": "open", - "labels": [], - "commit_data": [], - "reviewer_data": [ - { - "id": 789, - "user": {"login": "reviewer"}, - "state": "APPROVED", - "submitted_at": "2024-01-01", - } - ], - "comment_data": [ - { - "id": 456, - "user": {"login": "commenter"}, - "body": "LGTM", - "created_at": "2024-01-01", - "pull_request_review_id": 789, - } - ], - } - ] + mock_session.get.return_value = error_response - result = main.transform_data(raw_data, "mozilla/firefox") + with pytest.raises(SystemExit) as exc_info: + main.extract_comments(mock_session, "mozilla/firefox", 123) - # Comment should have status from the review - assert result["comments"][0]["status"] == "APPROVED" + assert "GitHub API error 404" in str(exc_info.value) - def test_comment_empty_body(self): - """Test handling comments with empty or None body.""" - raw_data = [ - { - "number": 123, - "title": "PR", - "state": "open", - "labels": [], - "commit_data": [], - "reviewer_data": [], - "comment_data": [ - { - "id": 1, - "user": {"login": "user1"}, - "body": None, - "created_at": "2024-01-01", - }, - { - "id": 2, - "user": {"login": "user2"}, - "body": "", - "created_at": "2024-01-01", - }, - ], - } - ] +def test_custom_github_api_url(mock_session): + """Test using custom GitHub API URL for comments.""" + custom_url = "https://mock-github.example.com" - result = main.transform_data(raw_data, "mozilla/firefox") + comments_response = Mock() + comments_response.status_code = 200 + comments_response.json.return_value = [] - assert result["comments"][0]["character_count"] == 0 - assert result["comments"][1]["character_count"] == 0 + mock_session.get.return_value = comments_response - def test_empty_raw_data(self): - """Test handling empty input list.""" - result = main.transform_data([], "mozilla/firefox") + main.extract_comments( + mock_session, "mozilla/firefox", 123, github_api_url=custom_url + ) - assert result["pull_requests"] == [] - assert result["commits"] == [] - assert result["reviewers"] == [] - assert result["comments"] == [] + call_args = mock_session.get.call_args + assert custom_url in call_args[0][0] - def test_pr_without_commits_reviewers_comments(self): - """Test PR with no commits, reviewers, or comments.""" - raw_data = [ - { - "number": 123, - "title": "Minimal PR", - "state": "open", - "labels": [], - "commit_data": [], - "reviewer_data": [], - "comment_data": [], - } - ] - result = main.transform_data(raw_data, "mozilla/firefox") - assert len(result["pull_requests"]) == 1 - assert len(result["commits"]) == 0 - assert len(result["reviewers"]) == 0 - assert len(result["comments"]) == 0 +# ============================================================================= +# TESTS FOR TRANSFORM_DATA +# ============================================================================= - def test_return_structure(self): - """Test that transform_data returns dict with 4 keys.""" + raw_data = [ + { + "number": 123, + "title": "Fix login bug", + "state": "closed", + "created_at": "2024-01-01T10:00:00Z", + "updated_at": "2024-01-02T10:00:00Z", + "merged_at": "2024-01-02T12:00:00Z", + "labels": [], + "commit_data": [], + "reviewer_data": [], + "comment_data": [], + } + ] + + result = main.transform_data(raw_data, "mozilla/firefox") + + assert len(result["pull_requests"]) == 1 + pr = result["pull_requests"][0] + assert pr["pull_request_id"] == 123 + assert pr["current_status"] == "closed" + assert pr["date_created"] == "2024-01-01T10:00:00Z" + assert pr["date_modified"] == "2024-01-02T10:00:00Z" + assert pr["date_landed"] == "2024-01-02T12:00:00Z" + assert pr["target_repository"] == "mozilla/firefox" + +def test_bug_id_extraction_basic(): + """Test bug ID extraction from PR title.""" + test_cases = [ + ("Bug 1234567 - Fix issue", 1234567), + ("bug 1234567: Update code", 1234567), + ("Fix for bug 7654321", 7654321), + ("b=9876543 - Change behavior", 9876543), + ] + + for title, expected_bug_id in test_cases: raw_data = [ { "number": 1, - "title": "Test", + "title": title, "state": "open", "labels": [], "commit_data": [], @@ -1444,638 +958,835 @@ def test_return_structure(self): ] result = main.transform_data(raw_data, "mozilla/firefox") + assert result["pull_requests"][0]["bug_id"] == expected_bug_id + +def test_bug_id_extraction_with_hash(): + """Test bug ID extraction with # symbol.""" + raw_data = [ + { + "number": 1, + "title": "Bug #1234567 - Fix issue", + "state": "open", + "labels": [], + "commit_data": [], + "reviewer_data": [], + "comment_data": [], + } + ] + + result = main.transform_data(raw_data, "mozilla/firefox") + assert result["pull_requests"][0]["bug_id"] == 1234567 + +def test_bug_id_filter_large_numbers(): + """Test that bug IDs >= 100000000 are filtered out.""" + raw_data = [ + { + "number": 1, + "title": "Bug 999999999 - Invalid bug ID", + "state": "open", + "labels": [], + "commit_data": [], + "reviewer_data": [], + "comment_data": [], + } + ] + + result = main.transform_data(raw_data, "mozilla/firefox") + assert result["pull_requests"][0]["bug_id"] is None + +def test_bug_id_no_match(): + """Test PR title with no bug ID.""" + raw_data = [ + { + "number": 1, + "title": "Update documentation", + "state": "open", + "labels": [], + "commit_data": [], + "reviewer_data": [], + "comment_data": [], + } + ] + + result = main.transform_data(raw_data, "mozilla/firefox") + assert result["pull_requests"][0]["bug_id"] is None + +def test_labels_extraction(): + """Test labels array extraction.""" + raw_data = [ + { + "number": 1, + "title": "PR with labels", + "state": "open", + "labels": [ + {"name": "bug"}, + {"name": "priority-high"}, + {"name": "needs-review"}, + ], + "commit_data": [], + "reviewer_data": [], + "comment_data": [], + } + ] + + result = main.transform_data(raw_data, "mozilla/firefox") + labels = result["pull_requests"][0]["labels"] + assert len(labels) == 3 + assert "bug" in labels + assert "priority-high" in labels + assert "needs-review" in labels + +def test_labels_empty_list(): + """Test handling empty labels list.""" + raw_data = [ + { + "number": 1, + "title": "PR without labels", + "state": "open", + "labels": [], + "commit_data": [], + "reviewer_data": [], + "comment_data": [], + } + ] + + result = main.transform_data(raw_data, "mozilla/firefox") + assert result["pull_requests"][0]["labels"] == [] + +def test_commit_transformation(): + """Test commit fields mapping.""" + raw_data = [ + { + "number": 123, + "title": "PR with commits", + "state": "open", + "labels": [], + "commit_data": [ + { + "sha": "abc123", + "commit": { + "author": { + "name": "Test Author", + "date": "2024-01-01T12:00:00Z", + } + }, + "files": [ + { + "filename": "src/main.py", + "additions": 10, + "deletions": 5, + } + ], + } + ], + "reviewer_data": [], + "comment_data": [], + } + ] + + result = main.transform_data(raw_data, "mozilla/firefox") + + assert len(result["commits"]) == 1 + commit = result["commits"][0] + assert commit["pull_request_id"] == 123 + assert commit["target_repository"] == "mozilla/firefox" + assert commit["commit_sha"] == "abc123" + assert commit["date_created"] == "2024-01-01T12:00:00Z" + assert commit["author_username"] == "Test Author" + assert commit["filename"] == "src/main.py" + assert commit["lines_added"] == 10 + assert commit["lines_removed"] == 5 + +def test_commit_file_flattening(): + """Test that each file becomes a separate row.""" + raw_data = [ + { + "number": 123, + "title": "PR with multiple files", + "state": "open", + "labels": [], + "commit_data": [ + { + "sha": "abc123", + "commit": {"author": {"name": "Author", "date": "2024-01-01"}}, + "files": [ + {"filename": "file1.py", "additions": 10, "deletions": 5}, + {"filename": "file2.py", "additions": 20, "deletions": 2}, + {"filename": "file3.py", "additions": 5, "deletions": 15}, + ], + } + ], + "reviewer_data": [], + "comment_data": [], + } + ] + + result = main.transform_data(raw_data, "mozilla/firefox") + + # Should have 3 rows in commits table (one per file) + assert len(result["commits"]) == 3 + filenames = [c["filename"] for c in result["commits"]] + assert "file1.py" in filenames + assert "file2.py" in filenames + assert "file3.py" in filenames + +def test_multiple_commits_with_files(): + """Test multiple commits with multiple files per PR.""" + raw_data = [ + { + "number": 123, + "title": "PR with multiple commits", + "state": "open", + "labels": [], + "commit_data": [ + { + "sha": "commit1", + "commit": {"author": {"name": "Author1", "date": "2024-01-01"}}, + "files": [ + {"filename": "file1.py", "additions": 10, "deletions": 0} + ], + }, + { + "sha": "commit2", + "commit": {"author": {"name": "Author2", "date": "2024-01-02"}}, + "files": [ + {"filename": "file2.py", "additions": 5, "deletions": 2}, + {"filename": "file3.py", "additions": 8, "deletions": 3}, + ], + }, + ], + "reviewer_data": [], + "comment_data": [], + } + ] + + result = main.transform_data(raw_data, "mozilla/firefox") + + # Should have 3 rows total (1 file from commit1, 2 files from commit2) + assert len(result["commits"]) == 3 + assert result["commits"][0]["commit_sha"] == "commit1" + assert result["commits"][1]["commit_sha"] == "commit2" + assert result["commits"][2]["commit_sha"] == "commit2" + +def test_reviewer_transformation(): + """Test reviewer fields mapping.""" + raw_data = [ + { + "number": 123, + "title": "PR with reviewers", + "state": "open", + "labels": [], + "commit_data": [], + "reviewer_data": [ + { + "id": 789, + "user": {"login": "reviewer1"}, + "state": "APPROVED", + "submitted_at": "2024-01-01T15:00:00Z", + } + ], + "comment_data": [], + } + ] + + result = main.transform_data(raw_data, "mozilla/firefox") + + assert len(result["reviewers"]) == 1 + reviewer = result["reviewers"][0] + assert reviewer["pull_request_id"] == 123 + assert reviewer["target_repository"] == "mozilla/firefox" + assert reviewer["reviewer_username"] == "reviewer1" + assert reviewer["status"] == "APPROVED" + assert reviewer["date_reviewed"] == "2024-01-01T15:00:00Z" + +def test_multiple_review_states(): + """Test handling multiple review states.""" + raw_data = [ + { + "number": 123, + "title": "PR with multiple reviews", + "state": "open", + "labels": [], + "commit_data": [], + "reviewer_data": [ + { + "id": 1, + "user": {"login": "user1"}, + "state": "APPROVED", + "submitted_at": "2024-01-01T15:00:00Z", + }, + { + "id": 2, + "user": {"login": "user2"}, + "state": "CHANGES_REQUESTED", + "submitted_at": "2024-01-01T16:00:00Z", + }, + { + "id": 3, + "user": {"login": "user3"}, + "state": "COMMENTED", + "submitted_at": "2024-01-01T17:00:00Z", + }, + ], + "comment_data": [], + } + ] + + result = main.transform_data(raw_data, "mozilla/firefox") + + assert len(result["reviewers"]) == 3 + states = [r["status"] for r in result["reviewers"]] + assert "APPROVED" in states + assert "CHANGES_REQUESTED" in states + assert "COMMENTED" in states + +def test_date_approved_from_earliest_approval(): + """Test that date_approved is set to earliest APPROVED review.""" + raw_data = [ + { + "number": 123, + "title": "PR with multiple approvals", + "state": "open", + "labels": [], + "commit_data": [], + "reviewer_data": [ + { + "id": 1, + "user": {"login": "user1"}, + "state": "APPROVED", + "submitted_at": "2024-01-02T15:00:00Z", + }, + { + "id": 2, + "user": {"login": "user2"}, + "state": "APPROVED", + "submitted_at": "2024-01-01T14:00:00Z", # Earliest + }, + { + "id": 3, + "user": {"login": "user3"}, + "state": "APPROVED", + "submitted_at": "2024-01-03T16:00:00Z", + }, + ], + "comment_data": [], + } + ] + + result = main.transform_data(raw_data, "mozilla/firefox") + + pr = result["pull_requests"][0] + assert pr["date_approved"] == "2024-01-01T14:00:00Z" + +def test_comment_transformation(): + """Test comment fields mapping.""" + raw_data = [ + { + "number": 123, + "title": "PR with comments", + "state": "open", + "labels": [], + "commit_data": [], + "reviewer_data": [], + "comment_data": [ + { + "id": 456, + "user": {"login": "commenter1"}, + "body": "This looks great!", + "created_at": "2024-01-01T14:00:00Z", + "pull_request_review_id": None, + } + ], + } + ] + + result = main.transform_data(raw_data, "mozilla/firefox") + + assert len(result["comments"]) == 1 + comment = result["comments"][0] + assert comment["pull_request_id"] == 123 + assert comment["target_repository"] == "mozilla/firefox" + assert comment["comment_id"] == 456 + assert comment["author_username"] == "commenter1" + assert comment["date_created"] == "2024-01-01T14:00:00Z" + assert comment["character_count"] == 17 + +def test_comment_character_count(): + """Test character count calculation for comments.""" + raw_data = [ + { + "number": 123, + "title": "PR", + "state": "open", + "labels": [], + "commit_data": [], + "reviewer_data": [], + "comment_data": [ + { + "id": 1, + "user": {"login": "user1"}, + "body": "Short", + "created_at": "2024-01-01", + }, + { + "id": 2, + "user": {"login": "user2"}, + "body": "This is a much longer comment with more text", + "created_at": "2024-01-01", + }, + ], + } + ] + + result = main.transform_data(raw_data, "mozilla/firefox") + + assert result["comments"][0]["character_count"] == 5 + assert result["comments"][1]["character_count"] == 44 + +def test_comment_status_from_review(): + """Test that comment status is mapped from review_id_statuses.""" + raw_data = [ + { + "number": 123, + "title": "PR", + "state": "open", + "labels": [], + "commit_data": [], + "reviewer_data": [ + { + "id": 789, + "user": {"login": "reviewer"}, + "state": "APPROVED", + "submitted_at": "2024-01-01", + } + ], + "comment_data": [ + { + "id": 456, + "user": {"login": "commenter"}, + "body": "LGTM", + "created_at": "2024-01-01", + "pull_request_review_id": 789, + } + ], + } + ] + + result = main.transform_data(raw_data, "mozilla/firefox") + + # Comment should have status from the review + assert result["comments"][0]["status"] == "APPROVED" + +def test_comment_empty_body(): + """Test handling comments with empty or None body.""" + raw_data = [ + { + "number": 123, + "title": "PR", + "state": "open", + "labels": [], + "commit_data": [], + "reviewer_data": [], + "comment_data": [ + { + "id": 1, + "user": {"login": "user1"}, + "body": None, + "created_at": "2024-01-01", + }, + { + "id": 2, + "user": {"login": "user2"}, + "body": "", + "created_at": "2024-01-01", + }, + ], + } + ] + + result = main.transform_data(raw_data, "mozilla/firefox") + + assert result["comments"][0]["character_count"] == 0 + assert result["comments"][1]["character_count"] == 0 + +def test_empty_raw_data(): + """Test handling empty input list.""" + result = main.transform_data([], "mozilla/firefox") + + assert result["pull_requests"] == [] + assert result["commits"] == [] + assert result["reviewers"] == [] + assert result["comments"] == [] + +def test_pr_without_commits_reviewers_comments(): + """Test PR with no commits, reviewers, or comments.""" + raw_data = [ + { + "number": 123, + "title": "Minimal PR", + "state": "open", + "labels": [], + "commit_data": [], + "reviewer_data": [], + "comment_data": [], + } + ] + + result = main.transform_data(raw_data, "mozilla/firefox") + + assert len(result["pull_requests"]) == 1 + assert len(result["commits"]) == 0 + assert len(result["reviewers"]) == 0 + assert len(result["comments"]) == 0 + +def test_return_structure(): + """Test that transform_data returns dict with 4 keys.""" + raw_data = [ + { + "number": 1, + "title": "Test", + "state": "open", + "labels": [], + "commit_data": [], + "reviewer_data": [], + "comment_data": [], + } + ] + + result = main.transform_data(raw_data, "mozilla/firefox") + + assert isinstance(result, dict) + assert "pull_requests" in result + assert "commits" in result + assert "reviewers" in result + assert "comments" in result + +def test_all_tables_have_target_repository(): + """Test that all tables include target_repository field.""" + raw_data = [ + { + "number": 123, + "title": "Test PR", + "state": "open", + "labels": [], + "commit_data": [ + { + "sha": "abc", + "commit": {"author": {"name": "Author", "date": "2024-01-01"}}, + "files": [ + {"filename": "test.py", "additions": 1, "deletions": 0} + ], + } + ], + "reviewer_data": [ + { + "id": 1, + "user": {"login": "reviewer"}, + "state": "APPROVED", + "submitted_at": "2024-01-01", + } + ], + "comment_data": [ + { + "id": 2, + "user": {"login": "commenter"}, + "body": "Test", + "created_at": "2024-01-01", + } + ], + } + ] - assert isinstance(result, dict) - assert "pull_requests" in result - assert "commits" in result - assert "reviewers" in result - assert "comments" in result - - def test_all_tables_have_target_repository(self): - """Test that all tables include target_repository field.""" - raw_data = [ - { - "number": 123, - "title": "Test PR", - "state": "open", - "labels": [], - "commit_data": [ - { - "sha": "abc", - "commit": {"author": {"name": "Author", "date": "2024-01-01"}}, - "files": [ - {"filename": "test.py", "additions": 1, "deletions": 0} - ], - } - ], - "reviewer_data": [ - { - "id": 1, - "user": {"login": "reviewer"}, - "state": "APPROVED", - "submitted_at": "2024-01-01", - } - ], - "comment_data": [ - { - "id": 2, - "user": {"login": "commenter"}, - "body": "Test", - "created_at": "2024-01-01", - } - ], - } - ] - - result = main.transform_data(raw_data, "mozilla/firefox") + result = main.transform_data(raw_data, "mozilla/firefox") - assert result["pull_requests"][0]["target_repository"] == "mozilla/firefox" - assert result["commits"][0]["target_repository"] == "mozilla/firefox" - assert result["reviewers"][0]["target_repository"] == "mozilla/firefox" - assert result["comments"][0]["target_repository"] == "mozilla/firefox" + assert result["pull_requests"][0]["target_repository"] == "mozilla/firefox" + assert result["commits"][0]["target_repository"] == "mozilla/firefox" + assert result["reviewers"][0]["target_repository"] == "mozilla/firefox" + assert result["comments"][0]["target_repository"] == "mozilla/firefox" -class TestLoadData: - """Tests for load_data function.""" - @patch("main.datetime") - def test_load_all_tables(self, mock_datetime, mock_bigquery_client): - """Test loading all 4 tables to BigQuery.""" - mock_datetime.now.return_value.strftime.return_value = "2024-01-15" +# ============================================================================= +# TESTS FOR LOAD_DATA +# ============================================================================= - transformed_data = { - "pull_requests": [{"pull_request_id": 1}], - "commits": [{"commit_sha": "abc"}], - "reviewers": [{"reviewer_username": "user1"}], - "comments": [{"comment_id": 123}], - } - main.load_data(mock_bigquery_client, "test_dataset", transformed_data) +@patch("main.datetime") +def test_load_data_inserts_all_tables(mock_datetime, mock_bigquery_client): + """Test that load_data inserts all tables correctly.""" + mock_datetime.now.return_value.strftime.return_value = "2024-01-15" - # Should call insert_rows_json 4 times (once per table) - assert mock_bigquery_client.insert_rows_json.call_count == 4 + transformed_data = { + "pull_requests": [{"pull_request_id": 1}], + "commits": [{"commit_sha": "abc"}], + "reviewers": [{"reviewer_username": "user1"}], + "comments": [{"comment_id": 123}], + } - @patch("main.datetime") - def test_adds_snapshot_date(self, mock_datetime, mock_bigquery_client): - """Test that snapshot_date is added to all rows.""" - mock_datetime.now.return_value.strftime.return_value = "2024-01-15" + main.load_data(mock_bigquery_client, "test_dataset", transformed_data) - transformed_data = { - "pull_requests": [{"pull_request_id": 1}, {"pull_request_id": 2}], - "commits": [], - "reviewers": [], - "comments": [], - } + # Should call insert_rows_json 4 times (once per table) + assert mock_bigquery_client.insert_rows_json.call_count == 4 - main.load_data(mock_bigquery_client, "test_dataset", transformed_data) +@patch("main.datetime") +def test_adds_snapshot_date(mock_datetime, mock_bigquery_client): + """Test that snapshot_date is added to all rows.""" + mock_datetime.now.return_value.strftime.return_value = "2024-01-15" - call_args = mock_bigquery_client.insert_rows_json.call_args - rows = call_args[0][1] - assert all(row["snapshot_date"] == "2024-01-15" for row in rows) - - def test_constructs_correct_table_ref(self, mock_bigquery_client): - """Test that table_ref is constructed correctly.""" - transformed_data = { - "pull_requests": [{"pull_request_id": 1}], - "commits": [], - "reviewers": [], - "comments": [], - } + transformed_data = { + "pull_requests": [{"pull_request_id": 1}, {"pull_request_id": 2}], + "commits": [], + "reviewers": [], + "comments": [], + } - main.load_data(mock_bigquery_client, "my_dataset", transformed_data) + main.load_data(mock_bigquery_client, "test_dataset", transformed_data) - call_args = mock_bigquery_client.insert_rows_json.call_args - table_ref = call_args[0][0] - assert table_ref == "test-project.my_dataset.pull_requests" + call_args = mock_bigquery_client.insert_rows_json.call_args + rows = call_args[0][1] + assert all(row["snapshot_date"] == "2024-01-15" for row in rows) - def test_empty_transformed_data_skipped(self, mock_bigquery_client): - """Test that empty transformed_data dict is skipped.""" - transformed_data = {} +def test_constructs_correct_table_ref(mock_bigquery_client): + """Test that table_ref is constructed correctly.""" + transformed_data = { + "pull_requests": [{"pull_request_id": 1}], + "commits": [], + "reviewers": [], + "comments": [], + } - main.load_data(mock_bigquery_client, "test_dataset", transformed_data) + main.load_data(mock_bigquery_client, "my_dataset", transformed_data) - mock_bigquery_client.insert_rows_json.assert_not_called() + call_args = mock_bigquery_client.insert_rows_json.call_args + table_ref = call_args[0][0] + assert table_ref == "test-project.my_dataset.pull_requests" - def test_skips_empty_tables_individually(self, mock_bigquery_client): - """Test that empty tables are skipped individually.""" - transformed_data = { - "pull_requests": [{"pull_request_id": 1}], - "commits": [], # Empty, should be skipped - "reviewers": [], # Empty, should be skipped - "comments": [{"comment_id": 456}], - } +def test_empty_transformed_data_skipped(mock_bigquery_client): + """Test that empty transformed_data dict is skipped.""" + transformed_data = {} - main.load_data(mock_bigquery_client, "test_dataset", transformed_data) + main.load_data(mock_bigquery_client, "test_dataset", transformed_data) - # Should only call insert_rows_json twice (for PRs and comments) - assert mock_bigquery_client.insert_rows_json.call_count == 2 + mock_bigquery_client.insert_rows_json.assert_not_called() - def test_only_pull_requests_table(self, mock_bigquery_client): - """Test loading only pull_requests table.""" - transformed_data = { - "pull_requests": [{"pull_request_id": 1}], - "commits": [], - "reviewers": [], - "comments": [], - } +def test_skips_empty_tables_individually(mock_bigquery_client): + """Test that empty tables are skipped individually.""" + transformed_data = { + "pull_requests": [{"pull_request_id": 1}], + "commits": [], # Empty, should be skipped + "reviewers": [], # Empty, should be skipped + "comments": [{"comment_id": 456}], + } - main.load_data(mock_bigquery_client, "test_dataset", transformed_data) + main.load_data(mock_bigquery_client, "test_dataset", transformed_data) - assert mock_bigquery_client.insert_rows_json.call_count == 1 + # Should only call insert_rows_json twice (for PRs and comments) + assert mock_bigquery_client.insert_rows_json.call_count == 2 - def test_raises_exception_on_insert_errors(self, mock_bigquery_client): - """Test that Exception is raised on BigQuery insert errors.""" - mock_bigquery_client.insert_rows_json.return_value = [ - {"index": 0, "errors": ["Insert failed"]} - ] +def test_only_pull_requests_table(mock_bigquery_client): + """Test loading only pull_requests table.""" + transformed_data = { + "pull_requests": [{"pull_request_id": 1}], + "commits": [], + "reviewers": [], + "comments": [], + } - transformed_data = { - "pull_requests": [{"pull_request_id": 1}], - "commits": [], - "reviewers": [], - "comments": [], - } + main.load_data(mock_bigquery_client, "test_dataset", transformed_data) - with pytest.raises(Exception) as exc_info: - main.load_data(mock_bigquery_client, "test_dataset", transformed_data) + assert mock_bigquery_client.insert_rows_json.call_count == 1 - assert "BigQuery insert errors" in str(exc_info.value) +def test_raises_exception_on_insert_errors(mock_bigquery_client): + """Test that Exception is raised on BigQuery insert errors.""" + mock_bigquery_client.insert_rows_json.return_value = [ + {"index": 0, "errors": ["Insert failed"]} + ] - def test_verifies_client_insert_called_correctly(self, mock_bigquery_client): - """Test that client.insert_rows_json is called with correct arguments.""" - transformed_data = { - "pull_requests": [{"pull_request_id": 1}, {"pull_request_id": 2}], - "commits": [], - "reviewers": [], - "comments": [], - } + transformed_data = { + "pull_requests": [{"pull_request_id": 1}], + "commits": [], + "reviewers": [], + "comments": [], + } + with pytest.raises(Exception) as exc_info: main.load_data(mock_bigquery_client, "test_dataset", transformed_data) - call_args = mock_bigquery_client.insert_rows_json.call_args - table_ref, rows = call_args[0] + assert "BigQuery insert errors" in str(exc_info.value) - assert "pull_requests" in table_ref - assert len(rows) == 2 +def test_verifies_client_insert_called_correctly(mock_bigquery_client): + """Test that client.insert_rows_json is called with correct arguments.""" + transformed_data = { + "pull_requests": [{"pull_request_id": 1}, {"pull_request_id": 2}], + "commits": [], + "reviewers": [], + "comments": [], + } + main.load_data(mock_bigquery_client, "test_dataset", transformed_data) -class TestMain: - """Tests for main function.""" + call_args = mock_bigquery_client.insert_rows_json.call_args + table_ref, rows = call_args[0] - @patch("main.setup_logging") - @patch("main.bigquery.Client") - @patch("requests.Session") - def test_requires_github_repos( - self, mock_session_class, mock_bq_client, mock_setup_logging - ): - """Test that GITHUB_REPOS is required.""" - with patch.dict( - os.environ, - {"BIGQUERY_PROJECT": "test", "BIGQUERY_DATASET": "test"}, - clear=True, - ): - with pytest.raises(SystemExit) as exc_info: - main.main() + assert "pull_requests" in table_ref + assert len(rows) == 2 - assert "GITHUB_REPOS" in str(exc_info.value) - @patch("main.setup_logging") - @patch("main.bigquery.Client") - @patch("requests.Session") - def test_requires_bigquery_project( - self, mock_session_class, mock_bq_client, mock_setup_logging - ): - """Test that BIGQUERY_PROJECT is required.""" - with patch.dict( - os.environ, - {"GITHUB_REPOS": "mozilla/firefox", "BIGQUERY_DATASET": "test"}, - clear=True, - ): - with pytest.raises(SystemExit) as exc_info: - main.main() - assert "BIGQUERY_PROJECT" in str(exc_info.value) - - @patch("main.setup_logging") - @patch("main.bigquery.Client") - @patch("requests.Session") - def test_requires_bigquery_dataset( - self, mock_session_class, mock_bq_client, mock_setup_logging - ): - """Test that BIGQUERY_DATASET is required.""" - with patch.dict( - os.environ, - {"GITHUB_REPOS": "mozilla/firefox", "BIGQUERY_PROJECT": "test"}, - clear=True, - ): - with pytest.raises(SystemExit) as exc_info: - main.main() +# ============================================================================= +# TESTS FOR MAIN +# ============================================================================= - assert "BIGQUERY_DATASET" in str(exc_info.value) - @patch("main.setup_logging") - @patch("main.bigquery.Client") - @patch("requests.Session") - def test_github_token_optional_with_warning( - self, mock_session_class, mock_bq_client, mock_setup_logging +@patch("main.setup_logging") +@patch("main.bigquery.Client") +@patch("requests.Session") +def test_requires_github_repos(mock_session_class, mock_bq_client, mock_setup_logging): + """Test that GITHUB_REPOS is required.""" + with patch.dict( + os.environ, + {"BIGQUERY_PROJECT": "test", "BIGQUERY_DATASET": "test"}, + clear=True, ): - """Test that GITHUB_TOKEN is optional but warns if missing.""" - with ( - patch.dict( - os.environ, - { - "GITHUB_REPOS": "mozilla/firefox", - "BIGQUERY_PROJECT": "test", - "BIGQUERY_DATASET": "test", - }, - clear=True, - ), - patch("main.extract_pull_requests", return_value=iter([])), - ): - # Should not raise, but should log warning - result = main.main() - assert result == 0 - - @patch("main.setup_logging") - @patch("main.bigquery.Client") - @patch("requests.Session") - def test_splits_github_repos_by_comma( - self, mock_session_class, mock_bq_client, mock_setup_logging - ): - """Test that GITHUB_REPOS is split by comma.""" - with ( - patch.dict( - os.environ, - { - "GITHUB_REPOS": "mozilla/firefox,mozilla/gecko-dev", - "BIGQUERY_PROJECT": "test", - "BIGQUERY_DATASET": "test", - "GITHUB_TOKEN": "token", - }, - clear=True, - ), - patch("main.extract_pull_requests", return_value=iter([])) as mock_extract, - ): + with pytest.raises(SystemExit) as exc_info: main.main() - # Should be called twice (once per repo) - assert mock_extract.call_count == 2 + assert "GITHUB_REPOS" in str(exc_info.value) - @patch("main.setup_logging") - @patch("main.bigquery.Client") - @patch("requests.Session") - def test_honors_github_api_url( - self, mock_session_class, mock_bq_client, mock_setup_logging - ): - """Test that GITHUB_API_URL is honored.""" - with ( - patch.dict( - os.environ, - { - "GITHUB_REPOS": "mozilla/firefox", - "BIGQUERY_PROJECT": "test", - "BIGQUERY_DATASET": "test", - "GITHUB_TOKEN": "token", - "GITHUB_API_URL": "https://custom-api.example.com", - }, - clear=True, - ), - patch("main.extract_pull_requests", return_value=iter([])) as mock_extract, - ): - main.main() - - call_kwargs = mock_extract.call_args[1] - assert call_kwargs["github_api_url"] == "https://custom-api.example.com" - @patch("main.setup_logging") - @patch("main.bigquery.Client") - @patch("requests.Session") - def test_honors_bigquery_emulator_host( - self, mock_session_class, mock_bq_client_class, mock_setup_logging +@patch("main.setup_logging") +@patch("main.bigquery.Client") +@patch("requests.Session") +def test_requires_bigquery_project(mock_session_class, mock_bq_client, mock_setup_logging): + """Test that BIGQUERY_PROJECT is required.""" + with patch.dict( + os.environ, + {"GITHUB_REPOS": "mozilla/firefox", "BIGQUERY_DATASET": "test"}, + clear=True, ): - """Test that BIGQUERY_EMULATOR_HOST is honored.""" - with ( - patch.dict( - os.environ, - { - "GITHUB_REPOS": "mozilla/firefox", - "BIGQUERY_PROJECT": "test", - "BIGQUERY_DATASET": "test", - "GITHUB_TOKEN": "token", - "BIGQUERY_EMULATOR_HOST": "http://localhost:9050", - }, - clear=True, - ), - patch("main.extract_pull_requests", return_value=iter([])), - ): + with pytest.raises(SystemExit) as exc_info: main.main() - # Verify BigQuery client was created with emulator settings - mock_bq_client_class.assert_called_once() - - @patch("main.setup_logging") - @patch("main.bigquery.Client") - @patch("requests.Session") - def test_creates_session_with_headers( - self, mock_session_class, mock_bq_client, mock_setup_logging - ): - """Test that session is created with Accept and User-Agent headers.""" - mock_session = MagicMock() - mock_session_class.return_value = mock_session + assert "BIGQUERY_PROJECT" in str(exc_info.value) - with ( - patch.dict( - os.environ, - { - "GITHUB_REPOS": "mozilla/firefox", - "BIGQUERY_PROJECT": "test", - "BIGQUERY_DATASET": "test", - "GITHUB_TOKEN": "token", - }, - clear=True, - ), - patch("main.extract_pull_requests", return_value=iter([])), - ): - main.main() - # Verify session headers were set - assert mock_session.headers.update.called - call_args = mock_session.headers.update.call_args[0][0] - assert "Accept" in call_args - assert "User-Agent" in call_args - - @patch("main.setup_logging") - @patch("main.bigquery.Client") - @patch("requests.Session") - def test_sets_authorization_header_with_token( - self, mock_session_class, mock_bq_client, mock_setup_logging +@patch("main.setup_logging") +@patch("main.bigquery.Client") +@patch("requests.Session") +def test_requires_bigquery_dataset(mock_session_class, mock_bq_client, mock_setup_logging): + """Test that BIGQUERY_DATASET is required.""" + with patch.dict( + os.environ, + {"GITHUB_REPOS": "mozilla/firefox", "BIGQUERY_PROJECT": "test"}, + clear=True, ): - """Test that Authorization header is set when token provided.""" - mock_session = MagicMock() - mock_session_class.return_value = mock_session - - with ( - patch.dict( - os.environ, - { - "GITHUB_REPOS": "mozilla/firefox", - "BIGQUERY_PROJECT": "test", - "BIGQUERY_DATASET": "test", - "GITHUB_TOKEN": "test-token-123", - }, - clear=True, - ), - patch("main.extract_pull_requests", return_value=iter([])), - ): + with pytest.raises(SystemExit) as exc_info: main.main() - # Verify Authorization header was set - assert mock_session.headers.__setitem__.called - - @patch("main.setup_logging") - @patch("main.bigquery.Client") - @patch("requests.Session") - @patch("main.extract_pull_requests") - @patch("main.transform_data") - @patch("main.load_data") - def test_single_repo_successful_etl( - self, - mock_load, - mock_transform, - mock_extract, - mock_session_class, - mock_bq_client, - mock_setup_logging, - ): - """Test successful ETL for single repository.""" - mock_extract.return_value = iter([[{"number": 1}]]) - mock_transform.return_value = { - "pull_requests": [{"pull_request_id": 1}], - "commits": [], - "reviewers": [], - "comments": [], - } + assert "BIGQUERY_DATASET" in str(exc_info.value) - with patch.dict( +@patch("main.setup_logging") +@patch("main.bigquery.Client") +@patch("requests.Session") +def test_github_token_optional_with_warning(mock_session_class, mock_bq_client, mock_setup_logging): + """Test that GITHUB_TOKEN is optional but warns if missing.""" + with ( + patch.dict( os.environ, { "GITHUB_REPOS": "mozilla/firefox", "BIGQUERY_PROJECT": "test", "BIGQUERY_DATASET": "test", - "GITHUB_TOKEN": "token", }, clear=True, - ): - result = main.main() - - assert result == 0 - mock_extract.assert_called_once() - mock_transform.assert_called_once() - mock_load.assert_called_once() - - @patch("main.setup_logging") - @patch("main.bigquery.Client") - @patch("requests.Session") - @patch("main.extract_pull_requests") - @patch("main.transform_data") - @patch("main.load_data") - def test_multiple_repos_processing( - self, - mock_load, - mock_transform, - mock_extract, - mock_session_class, - mock_bq_client, - mock_setup_logging, + ), + patch("main.extract_pull_requests", return_value=iter([])), ): - """Test processing multiple repositories.""" - mock_extract.return_value = iter([[{"number": 1}]]) - mock_transform.return_value = { - "pull_requests": [{"pull_request_id": 1}], - "commits": [], - "reviewers": [], - "comments": [], - } + # Should not raise, but should log warning + result = main.main() + assert result == 0 - with patch.dict( +@patch("main.setup_logging") +@patch("main.bigquery.Client") +@patch("requests.Session") +def test_splits_github_repos_by_comma(mock_session_class, mock_bq_client, mock_setup_logging): + """Test that GITHUB_REPOS is split by comma.""" + with ( + patch.dict( os.environ, { - "GITHUB_REPOS": "mozilla/firefox,mozilla/gecko-dev,mozilla/addons", + "GITHUB_REPOS": "mozilla/firefox,mozilla/gecko-dev", "BIGQUERY_PROJECT": "test", "BIGQUERY_DATASET": "test", "GITHUB_TOKEN": "token", }, clear=True, - ): - result = main.main() - - assert result == 0 - # Should process 3 repositories - assert mock_extract.call_count == 3 - - @patch("main.setup_logging") - @patch("main.bigquery.Client") - @patch("requests.Session") - @patch("main.extract_pull_requests") - @patch("main.transform_data") - @patch("main.load_data") - def test_processes_chunks_iteratively( - self, - mock_load, - mock_transform, - mock_extract, - mock_session_class, - mock_bq_client, - mock_setup_logging, + ), + patch("main.extract_pull_requests", return_value=iter([])) as mock_extract, ): - """Test that chunks are processed iteratively from generator.""" - # Return 3 chunks - mock_extract.return_value = iter( - [ - [{"number": 1}], - [{"number": 2}], - [{"number": 3}], - ] - ) - mock_transform.return_value = { - "pull_requests": [{"pull_request_id": 1}], - "commits": [], - "reviewers": [], - "comments": [], - } - - with patch.dict( + main.main() + + # Should be called twice (once per repo) + assert mock_extract.call_count == 2 + +@patch("main.setup_logging") +@patch("main.bigquery.Client") +@patch("requests.Session") +def test_honors_github_api_url(mock_session_class, mock_bq_client, mock_setup_logging): + """Test that GITHUB_API_URL is honored.""" + with ( + patch.dict( os.environ, { "GITHUB_REPOS": "mozilla/firefox", "BIGQUERY_PROJECT": "test", "BIGQUERY_DATASET": "test", "GITHUB_TOKEN": "token", + "GITHUB_API_URL": "https://custom-api.example.com", }, clear=True, - ): - result = main.main() - - assert result == 0 - # Transform and load should be called 3 times (once per chunk) - assert mock_transform.call_count == 3 - assert mock_load.call_count == 3 - - @patch("main.setup_logging") - @patch("main.bigquery.Client") - @patch("requests.Session") - def test_returns_zero_on_success( - self, mock_session_class, mock_bq_client, mock_setup_logging - ): - """Test that main returns 0 on success.""" - with ( - patch.dict( - os.environ, - { - "GITHUB_REPOS": "mozilla/firefox", - "BIGQUERY_PROJECT": "test", - "BIGQUERY_DATASET": "test", - "GITHUB_TOKEN": "token", - }, - clear=True, - ), - patch("main.extract_pull_requests", return_value=iter([])), - ): - result = main.main() - - assert result == 0 - - -@pytest.mark.integration -class TestIntegration: - """Integration tests that test multiple components together.""" - - @patch("main.setup_logging") - @patch("main.load_data") - @patch("main.bigquery.Client") - @patch("requests.Session") - def test_end_to_end_with_mocked_github( - self, mock_session_class, mock_bq_client, mock_load, mock_setup_logging + ), + patch("main.extract_pull_requests", return_value=iter([])) as mock_extract, ): - """Test end-to-end flow with mocked GitHub responses.""" - mock_session = MagicMock() - mock_session_class.return_value = mock_session - - # Mock PR response - pr_response = Mock() - pr_response.status_code = 200 - pr_response.json.return_value = [ - {"number": 1, "title": "Bug 1234567 - Test PR", "state": "open"} - ] - pr_response.links = {} - - # Mock commits, reviewers, comments responses - empty_response = Mock() - empty_response.status_code = 200 - empty_response.json.return_value = [] - - mock_session.get.side_effect = [ - pr_response, - empty_response, - empty_response, - empty_response, - ] - - with patch.dict( + main.main() + + call_kwargs = mock_extract.call_args[1] + assert call_kwargs["github_api_url"] == "https://custom-api.example.com" + +@patch("main.setup_logging") +@patch("main.bigquery.Client") +@patch("requests.Session") +def test_honors_bigquery_emulator_host(mock_session_class, mock_bq_client_class, mock_setup_logging): + """Test that BIGQUERY_EMULATOR_HOST is honored.""" + with ( + patch.dict( os.environ, { "GITHUB_REPOS": "mozilla/firefox", "BIGQUERY_PROJECT": "test", "BIGQUERY_DATASET": "test", "GITHUB_TOKEN": "token", + "BIGQUERY_EMULATOR_HOST": "http://localhost:9050", }, clear=True, - ): - result = main.main() - - assert result == 0 - mock_load.assert_called_once() - - # Verify transformed data structure - call_args = mock_load.call_args[0] - transformed_data = call_args[2] - assert "pull_requests" in transformed_data - assert len(transformed_data["pull_requests"]) == 1 - - @patch("main.setup_logging") - @patch("main.load_data") - @patch("main.bigquery.Client") - @patch("requests.Session") - def test_bug_id_extraction_through_pipeline( - self, mock_session_class, mock_bq_client, mock_load, mock_setup_logging + ), + patch("main.extract_pull_requests", return_value=iter([])), ): - """Test bug ID extraction through full pipeline.""" - mock_session = MagicMock() - mock_session_class.return_value = mock_session + main.main() - pr_response = Mock() - pr_response.status_code = 200 - pr_response.json.return_value = [ - { - "number": 1, - "title": "Bug 9876543 - Fix critical issue", - "state": "closed", - } - ] - pr_response.links = {} + # Verify BigQuery client was created with emulator settings + mock_bq_client_class.assert_called_once() - empty_response = Mock() - empty_response.status_code = 200 - empty_response.json.return_value = [] +@patch("main.setup_logging") +@patch("main.bigquery.Client") +@patch("requests.Session") +def test_creates_session_with_headers(mock_session_class, mock_bq_client, mock_setup_logging): + """Test that session is created with Accept and User-Agent headers.""" + mock_session = MagicMock() + mock_session_class.return_value = mock_session - mock_session.get.side_effect = [ - pr_response, - empty_response, - empty_response, - empty_response, - ] - - with patch.dict( + with ( + patch.dict( os.environ, { "GITHUB_REPOS": "mozilla/firefox", @@ -2084,59 +1795,176 @@ def test_bug_id_extraction_through_pipeline( "GITHUB_TOKEN": "token", }, clear=True, - ): - main.main() + ), + patch("main.extract_pull_requests", return_value=iter([])), + ): + main.main() + + # Verify session headers were set + assert mock_session.headers.update.called + call_args = mock_session.headers.update.call_args[0][0] + assert "Accept" in call_args + assert "User-Agent" in call_args + +@patch("main.setup_logging") +@patch("main.bigquery.Client") +@patch("requests.Session") +def test_sets_authorization_header_with_token(mock_session_class, mock_bq_client, mock_setup_logging): + """Test that Authorization header is set when token provided.""" + mock_session = MagicMock() + mock_session_class.return_value = mock_session + + with ( + patch.dict( + os.environ, + { + "GITHUB_REPOS": "mozilla/firefox", + "BIGQUERY_PROJECT": "test", + "BIGQUERY_DATASET": "test", + "GITHUB_TOKEN": "test-token-123", + }, + clear=True, + ), + patch("main.extract_pull_requests", return_value=iter([])), + ): + main.main() + + # Verify Authorization header was set + assert mock_session.headers.__setitem__.called + +@patch("main.setup_logging") +@patch("main.bigquery.Client") +@patch("requests.Session") +@patch("main.extract_pull_requests") +@patch("main.transform_data") +@patch("main.load_data") +def test_single_repo_successful_etl( + mock_load, + mock_transform, + mock_extract, + mock_session_class, + mock_bq_client, + mock_setup_logging, +): + """Test successful ETL for single repository.""" + mock_extract.return_value = iter([[{"number": 1}]]) + mock_transform.return_value = { + "pull_requests": [{"pull_request_id": 1}], + "commits": [], + "reviewers": [], + "comments": [], + } - call_args = mock_load.call_args[0] - transformed_data = call_args[2] - pr = transformed_data["pull_requests"][0] - assert pr["bug_id"] == 9876543 - - @patch("main.setup_logging") - @patch("main.load_data") - @patch("main.bigquery.Client") - @patch("requests.Session") - def test_pagination_through_full_flow( - self, mock_session_class, mock_bq_client, mock_load, mock_setup_logging + with patch.dict( + os.environ, + { + "GITHUB_REPOS": "mozilla/firefox", + "BIGQUERY_PROJECT": "test", + "BIGQUERY_DATASET": "test", + "GITHUB_TOKEN": "token", + }, + clear=True, ): - """Test pagination through full ETL flow.""" - mock_session = MagicMock() - mock_session_class.return_value = mock_session - - # First page - pr_response_1 = Mock() - pr_response_1.status_code = 200 - pr_response_1.json.return_value = [ - {"number": 1, "title": "PR 1", "state": "open"} - ] - pr_response_1.links = { - "next": {"url": "https://api.github.com/repos/mozilla/firefox/pulls?page=2"} - } + result = main.main() + + assert result == 0 + mock_extract.assert_called_once() + mock_transform.assert_called_once() + mock_load.assert_called_once() + +@patch("main.setup_logging") +@patch("main.bigquery.Client") +@patch("requests.Session") +@patch("main.extract_pull_requests") +@patch("main.transform_data") +@patch("main.load_data") +def test_multiple_repos_processing( + mock_load, + mock_transform, + mock_extract, + mock_session_class, + mock_bq_client, + mock_setup_logging, +): + """Test processing multiple repositories.""" + mock_extract.return_value = iter([[{"number": 1}]]) + mock_transform.return_value = { + "pull_requests": [{"pull_request_id": 1}], + "commits": [], + "reviewers": [], + "comments": [], + } - # Second page - pr_response_2 = Mock() - pr_response_2.status_code = 200 - pr_response_2.json.return_value = [ - {"number": 2, "title": "PR 2", "state": "open"} - ] - pr_response_2.links = {} - - empty_response = Mock() - empty_response.status_code = 200 - empty_response.json.return_value = [] - - mock_session.get.side_effect = [ - pr_response_1, - empty_response, - empty_response, - empty_response, - pr_response_2, - empty_response, - empty_response, - empty_response, + with patch.dict( + os.environ, + { + "GITHUB_REPOS": "mozilla/firefox,mozilla/gecko-dev,mozilla/addons", + "BIGQUERY_PROJECT": "test", + "BIGQUERY_DATASET": "test", + "GITHUB_TOKEN": "token", + }, + clear=True, + ): + result = main.main() + + assert result == 0 + # Should process 3 repositories + assert mock_extract.call_count == 3 + +@patch("main.setup_logging") +@patch("main.bigquery.Client") +@patch("requests.Session") +@patch("main.extract_pull_requests") +@patch("main.transform_data") +@patch("main.load_data") +def test_processes_chunks_iteratively( + mock_load, + mock_transform, + mock_extract, + mock_session_class, + mock_bq_client, + mock_setup_logging, +): + """Test that chunks are processed iteratively from generator.""" + # Return 3 chunks + mock_extract.return_value = iter( + [ + [{"number": 1}], + [{"number": 2}], + [{"number": 3}], ] + ) + mock_transform.return_value = { + "pull_requests": [{"pull_request_id": 1}], + "commits": [], + "reviewers": [], + "comments": [], + } - with patch.dict( + with patch.dict( + os.environ, + { + "GITHUB_REPOS": "mozilla/firefox", + "BIGQUERY_PROJECT": "test", + "BIGQUERY_DATASET": "test", + "GITHUB_TOKEN": "token", + }, + clear=True, + ): + result = main.main() + + assert result == 0 + # Transform and load should be called 3 times (once per chunk) + assert mock_transform.call_count == 3 + assert mock_load.call_count == 3 + +@patch("main.setup_logging") +@patch("main.bigquery.Client") +@patch("requests.Session") +def test_returns_zero_on_success(mock_session_class, mock_bq_client, mock_setup_logging): + """Test that main returns 0 on success.""" + with ( + patch.dict( os.environ, { "GITHUB_REPOS": "mozilla/firefox", @@ -2145,8 +1973,166 @@ def test_pagination_through_full_flow( "GITHUB_TOKEN": "token", }, clear=True, - ): - main.main() + ), + patch("main.extract_pull_requests", return_value=iter([])), + ): + result = main.main() + + assert result == 0 + + +@pytest.mark.integration +@patch("main.setup_logging") +@patch("main.load_data") +@patch("main.bigquery.Client") +@patch("requests.Session") +def test_full_etl_flow_transforms_data_correctly(mock_session_class, mock_bq_client, mock_load, mock_setup_logging): + """Test full ETL flow with mocked GitHub responses.""" + mock_session = MagicMock() + mock_session_class.return_value = mock_session + + # Mock PR response + pr_response = Mock() + pr_response.status_code = 200 + pr_response.json.return_value = [ + {"number": 1, "title": "Bug 1234567 - Test PR", "state": "open"} + ] + pr_response.links = {} + + # Mock commits, reviewers, comments responses + empty_response = Mock() + empty_response.status_code = 200 + empty_response.json.return_value = [] + + mock_session.get.side_effect = [ + pr_response, + empty_response, + empty_response, + empty_response, + ] + + with patch.dict( + os.environ, + { + "GITHUB_REPOS": "mozilla/firefox", + "BIGQUERY_PROJECT": "test", + "BIGQUERY_DATASET": "test", + "GITHUB_TOKEN": "token", + }, + clear=True, + ): + result = main.main() + + assert result == 0 + mock_load.assert_called_once() + + # Verify transformed data structure + call_args = mock_load.call_args[0] + transformed_data = call_args[2] + assert "pull_requests" in transformed_data + assert len(transformed_data["pull_requests"]) == 1 + +@patch("main.setup_logging") +@patch("main.load_data") +@patch("main.bigquery.Client") +@patch("requests.Session") +def test_bug_id_extraction_through_pipeline(mock_session_class, mock_bq_client, mock_load, mock_setup_logging): + """Test bug ID extraction through full pipeline.""" + mock_session = MagicMock() + mock_session_class.return_value = mock_session + + pr_response = Mock() + pr_response.status_code = 200 + pr_response.json.return_value = [ + { + "number": 1, + "title": "Bug 9876543 - Fix critical issue", + "state": "closed", + } + ] + pr_response.links = {} + + empty_response = Mock() + empty_response.status_code = 200 + empty_response.json.return_value = [] + + mock_session.get.side_effect = [ + pr_response, + empty_response, + empty_response, + empty_response, + ] + + with patch.dict( + os.environ, + { + "GITHUB_REPOS": "mozilla/firefox", + "BIGQUERY_PROJECT": "test", + "BIGQUERY_DATASET": "test", + "GITHUB_TOKEN": "token", + }, + clear=True, + ): + main.main() + + call_args = mock_load.call_args[0] + transformed_data = call_args[2] + pr = transformed_data["pull_requests"][0] + assert pr["bug_id"] == 9876543 + +@patch("main.setup_logging") +@patch("main.load_data") +@patch("main.bigquery.Client") +@patch("requests.Session") +def test_pagination_through_full_flow(mock_session_class, mock_bq_client, mock_load, mock_setup_logging): + """Test pagination through full ETL flow.""" + mock_session = MagicMock() + mock_session_class.return_value = mock_session + + # First page + pr_response_1 = Mock() + pr_response_1.status_code = 200 + pr_response_1.json.return_value = [ + {"number": 1, "title": "PR 1", "state": "open"} + ] + pr_response_1.links = { + "next": {"url": "https://api.github.com/repos/mozilla/firefox/pulls?page=2"} + } + + # Second page + pr_response_2 = Mock() + pr_response_2.status_code = 200 + pr_response_2.json.return_value = [ + {"number": 2, "title": "PR 2", "state": "open"} + ] + pr_response_2.links = {} + + empty_response = Mock() + empty_response.status_code = 200 + empty_response.json.return_value = [] + + mock_session.get.side_effect = [ + pr_response_1, + empty_response, + empty_response, + empty_response, + pr_response_2, + empty_response, + empty_response, + empty_response, + ] + + with patch.dict( + os.environ, + { + "GITHUB_REPOS": "mozilla/firefox", + "BIGQUERY_PROJECT": "test", + "BIGQUERY_DATASET": "test", + "GITHUB_TOKEN": "token", + }, + clear=True, + ): + main.main() - # Should be called twice (once per chunk/page) - assert mock_load.call_count == 2 + # Should be called twice (once per chunk/page) + assert mock_load.call_count == 2 From cb582d3b60a2476725d94aa75db7aad186077747 Mon Sep 17 00:00:00 2001 From: David Lawrence Date: Fri, 23 Jan 2026 18:20:32 -0500 Subject: [PATCH 12/12] Fixed action to install testing dependencies --- .github/workflows/tests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 5480c08..d77e706 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -16,6 +16,7 @@ jobs: run: | python -m pip install --upgrade pip pip install -r requirements.txt + pip install -e ".[dev]" - name: Run unit tests with coverage run: | pytest -m "not integration and not slow" --cov=main --cov-report=term-missing --cov-fail-under=80