Skip to content

Documentation Database Health Check #172

Documentation Database Health Check

Documentation Database Health Check #172

name: Documentation Database Health Check
on:
schedule:
# Run daily at 2:00 AM UTC
- cron: '0 2 * * *'
workflow_dispatch: # Allow manual triggering
# Required secrets:
# GOOGLE_DRIVE_FILE_ID: The file ID from your Google Drive URL
# Current file: https://drive.google.com/file/d/1r7KSauTJdMnfthq-L2m9q4UjkREm4Byz/view?usp=sharing
# File ID: 1r7KSauTJdMnfthq-L2m9q4UjkREm4Byz
# GCP_WIF_PROVIDER: Workload Identity Federation provider name
# Format: projects/PROJECT_NUMBER/locations/global/workloadIdentityPools/POOL_ID/providers/PROVIDER_ID
# GCP_WIF_SERVICE_ACCOUNT: Service account email for WIF
# Format: SERVICE_ACCOUNT_NAME@PROJECT_ID.iam.gserviceaccount.com
# Set in: Repository Settings > Secrets and variables > Actions
jobs:
health-check:
runs-on: ubuntu-latest
permissions:
contents: read
id-token: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Python with uv
uses: astral-sh/setup-uv@v3
with:
version: "latest"
- name: Install dependencies
run: |
cd check-tools
uv sync
- name: Authenticate to Google Cloud using Workload Identity Federation
id: auth
uses: google-github-actions/auth@v2
with:
workload_identity_provider: ${{ secrets.GCP_WIF_PROVIDER }}
service_account: ${{ secrets.GCP_WIF_SERVICE_ACCOUNT }}
access_token_scopes: |
https://www.googleapis.com/auth/drive.readonly
https://www.googleapis.com/auth/drive.file
- name: Download database from Google Drive
run: |
# Download the latest documentation.db from Google Drive using Google Drive API
echo "Downloading documentation.db from Google Drive using Drive API..."
# Get the Google Drive file ID from secrets
FILE_ID="${{ secrets.GOOGLE_DRIVE_FILE_ID }}"
if [ -z "$FILE_ID" ]; then
echo "Error: GOOGLE_DRIVE_FILE_ID secret not set"
echo "Please set the GOOGLE_DRIVE_FILE_ID secret in your repository settings"
echo "The file ID can be extracted from a Google Drive URL like:"
echo "https://drive.google.com/file/d/FILE_ID_HERE/view?usp=sharing"
exit 1
fi
echo "Attempting to download file with ID: ${FILE_ID} using Google Drive API"
# Debug: Show authentication status
echo "Authentication status:"
echo "GOOGLE_APPLICATION_CREDENTIALS: ${GOOGLE_APPLICATION_CREDENTIALS:-'Not set'}"
echo "GCP_PROJECT: ${GCP_PROJECT:-'Not set'}"
echo "Service Account Email: ${{ secrets.GCP_WIF_SERVICE_ACCOUNT }}"
echo "File ID: ${FILE_ID}"
# Run the Python download script
cd check-tools
uv run python download_database.py "$FILE_ID" "../documentation.zip"
cd ..
# Verify the download was successful
if [ ! -f "documentation.zip" ]; then
echo "Error: Downloaded zip file does not exist"
exit 1
fi
# Additional verification
FILE_SIZE=$(stat -c%s "documentation.zip" 2>/dev/null || stat -f%z "documentation.zip" 2>/dev/null || echo "0")
echo "Download verification: documentation.zip ($FILE_SIZE bytes)"
# Verify it's a zip file
if ! file documentation.zip | grep -q "Zip\|ZIP"; then
echo "Warning: Downloaded file may not be a valid zip file"
echo "File type: $(file documentation.zip)"
fi
- name: Extract database from zip file
run: |
echo "Extracting database from zip file..."
# Install unzip if not available
sudo apt-get update -qq && sudo apt-get install -y unzip
# Extract the zip file
if ! unzip -o documentation.zip; then
echo "Error: Failed to extract zip file"
exit 1
fi
# Look for the database file in the extracted contents
if [ -f "documentation.db" ]; then
echo "Found documentation.db in root directory"
elif [ -f "*/documentation.db" ]; then
echo "Found documentation.db in subdirectory, moving to root"
mv */documentation.db .
else
echo "Error: No documentation.db found in extracted files"
echo "Contents of extracted files:"
ls -la
exit 1
fi
# Verify the extracted database file
if [ ! -f "documentation.db" ]; then
echo "Error: documentation.db not found after extraction"
exit 1
fi
DB_SIZE=$(stat -c%s "documentation.db" 2>/dev/null || stat -f%z "documentation.db" 2>/dev/null || echo "0")
echo "Successfully extracted documentation.db ($DB_SIZE bytes)"
# Clean up zip file to save space
rm -f documentation.zip
echo "Cleaned up zip file"
- name: Validate downloaded database
run: |
echo "Validating downloaded database..."
# Check if it's a valid SQLite database
if ! sqlite3 documentation.db "SELECT 1;" > /dev/null 2>&1; then
echo "Error: Downloaded file is not a valid SQLite database"
exit 1
fi
# Check if it has the expected tables
TABLES=$(sqlite3 documentation.db ".tables" | tr -d '\n')
echo "Found tables: $TABLES"
# Check for at least one expected table
if ! echo "$TABLES" | grep -q "Content\|Tooltips"; then
echo "Warning: Database may not have expected schema (no Content or Tooltips tables found)"
fi
echo "Database validation completed successfully"
- name: Run database health check
run: |
cd check-tools
uv run python main.py ../documentation.db
- name: Upload health check logs
uses: actions/upload-artifact@v4
with:
name: health-check-logs-${{ github.run_number }}
path: |
check-tools/db_health_log_*.json
retention-days: 30