Documentation Database Health Check #172
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Documentation Database Health Check | |
| on: | |
| schedule: | |
| # Run daily at 2:00 AM UTC | |
| - cron: '0 2 * * *' | |
| workflow_dispatch: # Allow manual triggering | |
| # Required secrets: | |
| # GOOGLE_DRIVE_FILE_ID: The file ID from your Google Drive URL | |
| # Current file: https://drive.google.com/file/d/1r7KSauTJdMnfthq-L2m9q4UjkREm4Byz/view?usp=sharing | |
| # File ID: 1r7KSauTJdMnfthq-L2m9q4UjkREm4Byz | |
| # GCP_WIF_PROVIDER: Workload Identity Federation provider name | |
| # Format: projects/PROJECT_NUMBER/locations/global/workloadIdentityPools/POOL_ID/providers/PROVIDER_ID | |
| # GCP_WIF_SERVICE_ACCOUNT: Service account email for WIF | |
| # Format: SERVICE_ACCOUNT_NAME@PROJECT_ID.iam.gserviceaccount.com | |
| # Set in: Repository Settings > Secrets and variables > Actions | |
| jobs: | |
| health-check: | |
| runs-on: ubuntu-latest | |
| permissions: | |
| contents: read | |
| id-token: write | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| - name: Set up Python with uv | |
| uses: astral-sh/setup-uv@v3 | |
| with: | |
| version: "latest" | |
| - name: Install dependencies | |
| run: | | |
| cd check-tools | |
| uv sync | |
| - name: Authenticate to Google Cloud using Workload Identity Federation | |
| id: auth | |
| uses: google-github-actions/auth@v2 | |
| with: | |
| workload_identity_provider: ${{ secrets.GCP_WIF_PROVIDER }} | |
| service_account: ${{ secrets.GCP_WIF_SERVICE_ACCOUNT }} | |
| access_token_scopes: | | |
| https://www.googleapis.com/auth/drive.readonly | |
| https://www.googleapis.com/auth/drive.file | |
| - name: Download database from Google Drive | |
| run: | | |
| # Download the latest documentation.db from Google Drive using Google Drive API | |
| echo "Downloading documentation.db from Google Drive using Drive API..." | |
| # Get the Google Drive file ID from secrets | |
| FILE_ID="${{ secrets.GOOGLE_DRIVE_FILE_ID }}" | |
| if [ -z "$FILE_ID" ]; then | |
| echo "Error: GOOGLE_DRIVE_FILE_ID secret not set" | |
| echo "Please set the GOOGLE_DRIVE_FILE_ID secret in your repository settings" | |
| echo "The file ID can be extracted from a Google Drive URL like:" | |
| echo "https://drive.google.com/file/d/FILE_ID_HERE/view?usp=sharing" | |
| exit 1 | |
| fi | |
| echo "Attempting to download file with ID: ${FILE_ID} using Google Drive API" | |
| # Debug: Show authentication status | |
| echo "Authentication status:" | |
| echo "GOOGLE_APPLICATION_CREDENTIALS: ${GOOGLE_APPLICATION_CREDENTIALS:-'Not set'}" | |
| echo "GCP_PROJECT: ${GCP_PROJECT:-'Not set'}" | |
| echo "Service Account Email: ${{ secrets.GCP_WIF_SERVICE_ACCOUNT }}" | |
| echo "File ID: ${FILE_ID}" | |
| # Run the Python download script | |
| cd check-tools | |
| uv run python download_database.py "$FILE_ID" "../documentation.zip" | |
| cd .. | |
| # Verify the download was successful | |
| if [ ! -f "documentation.zip" ]; then | |
| echo "Error: Downloaded zip file does not exist" | |
| exit 1 | |
| fi | |
| # Additional verification | |
| FILE_SIZE=$(stat -c%s "documentation.zip" 2>/dev/null || stat -f%z "documentation.zip" 2>/dev/null || echo "0") | |
| echo "Download verification: documentation.zip ($FILE_SIZE bytes)" | |
| # Verify it's a zip file | |
| if ! file documentation.zip | grep -q "Zip\|ZIP"; then | |
| echo "Warning: Downloaded file may not be a valid zip file" | |
| echo "File type: $(file documentation.zip)" | |
| fi | |
| - name: Extract database from zip file | |
| run: | | |
| echo "Extracting database from zip file..." | |
| # Install unzip if not available | |
| sudo apt-get update -qq && sudo apt-get install -y unzip | |
| # Extract the zip file | |
| if ! unzip -o documentation.zip; then | |
| echo "Error: Failed to extract zip file" | |
| exit 1 | |
| fi | |
| # Look for the database file in the extracted contents | |
| if [ -f "documentation.db" ]; then | |
| echo "Found documentation.db in root directory" | |
| elif [ -f "*/documentation.db" ]; then | |
| echo "Found documentation.db in subdirectory, moving to root" | |
| mv */documentation.db . | |
| else | |
| echo "Error: No documentation.db found in extracted files" | |
| echo "Contents of extracted files:" | |
| ls -la | |
| exit 1 | |
| fi | |
| # Verify the extracted database file | |
| if [ ! -f "documentation.db" ]; then | |
| echo "Error: documentation.db not found after extraction" | |
| exit 1 | |
| fi | |
| DB_SIZE=$(stat -c%s "documentation.db" 2>/dev/null || stat -f%z "documentation.db" 2>/dev/null || echo "0") | |
| echo "Successfully extracted documentation.db ($DB_SIZE bytes)" | |
| # Clean up zip file to save space | |
| rm -f documentation.zip | |
| echo "Cleaned up zip file" | |
| - name: Validate downloaded database | |
| run: | | |
| echo "Validating downloaded database..." | |
| # Check if it's a valid SQLite database | |
| if ! sqlite3 documentation.db "SELECT 1;" > /dev/null 2>&1; then | |
| echo "Error: Downloaded file is not a valid SQLite database" | |
| exit 1 | |
| fi | |
| # Check if it has the expected tables | |
| TABLES=$(sqlite3 documentation.db ".tables" | tr -d '\n') | |
| echo "Found tables: $TABLES" | |
| # Check for at least one expected table | |
| if ! echo "$TABLES" | grep -q "Content\|Tooltips"; then | |
| echo "Warning: Database may not have expected schema (no Content or Tooltips tables found)" | |
| fi | |
| echo "Database validation completed successfully" | |
| - name: Run database health check | |
| run: | | |
| cd check-tools | |
| uv run python main.py ../documentation.db | |
| - name: Upload health check logs | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: health-check-logs-${{ github.run_number }} | |
| path: | | |
| check-tools/db_health_log_*.json | |
| retention-days: 30 | |