From 6269a704c5109c7bbe4998517bad2f52589fb4a4 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 17 Dec 2025 22:01:23 +0000 Subject: [PATCH 1/3] Initial plan From 62f2b9322aba84a83f6b2bdf828af01a54f12655 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 17 Dec 2025 22:07:55 +0000 Subject: [PATCH 2/3] Add comprehensive service health checks and .env validation Co-authored-by: Sulstice <11812946+Sulstice@users.noreply.github.com> --- Dockerfile.unified | 107 +++++++++++++++++++++++++- docker-compose.unified.yml | 6 +- readme.md | 74 +++++++++++++++++- start_services.sh | 137 +++++++++++++++++++++++++++++++++- validate_env.sh | 149 +++++++++++++++++++++++++++++++++++++ 5 files changed, 462 insertions(+), 11 deletions(-) create mode 100755 validate_env.sh diff --git a/Dockerfile.unified b/Dockerfile.unified index 78c9d4c..8f3c6e0 100644 --- a/Dockerfile.unified +++ b/Dockerfile.unified @@ -127,11 +127,97 @@ RUN echo '#!/bin/bash' > /app/oxigraph/oxigraph_server && \ echo 'exit 1' >> /app/oxigraph/oxigraph_server && \ chmod +x /app/oxigraph/oxigraph_server +# Create healthcheck script +RUN cat > /app/healthcheck.sh << 'EOF' +#!/bin/bash +# Healthcheck script to verify all services are running + +# Check if supervisor is running +if ! pgrep supervisord > /dev/null; then + echo "ERROR: supervisord is not running" + exit 1 +fi + +# Check if supervisor socket is accessible +if ! supervisorctl status > /dev/null 2>&1; then + echo "ERROR: Cannot connect to supervisor" + exit 1 +fi + +# Check each service status +for service in api_tokenmanager query_service ml_service; do + status=$(supervisorctl status $service 2>/dev/null | awk '{print $2}') + if [ "$status" != "RUNNING" ]; then + echo "ERROR: Service $service is not running (status: $status)" + echo "Check logs at: /var/log/supervisor/${service}.err.log" + exit 1 + fi +done + +# Check if services are listening on their ports +# Give services time to bind to ports if they just started +sleep 2 + +# Check API Token Manager (port 8000) +if ! curl -s -f http://localhost:8000/ > /dev/null 2>&1; then + echo "WARNING: API Token Manager (port 8000) is not responding" +fi + +# Check Query Service (port 8010) +if ! curl -s -f http://localhost:8010/api/ > /dev/null 2>&1; then + echo "WARNING: Query Service (port 8010) is not responding" +fi + +# Check ML Service (port 8007) +if ! curl -s -f http://localhost:8007/api/ > /dev/null 2>&1; then + echo "WARNING: ML Service (port 8007) is not responding" +fi + +# If we got here, at least supervisor and services are running +exit 0 +EOF + +RUN chmod +x /app/healthcheck.sh + # Create startup script RUN cat > /app/start.sh << 'EOF' #!/bin/bash set -e +echo "==========================================" +echo "BrainKB Unified Container Startup" +echo "==========================================" +echo "" + +# Check critical environment variables +echo "Checking environment configuration..." +MISSING_VARS=() + +# Check JWT/Database configuration +if [ -z "$JWT_POSTGRES_DATABASE_USER" ] && [ -z "$DB_USER" ]; then + MISSING_VARS+=("JWT_POSTGRES_DATABASE_USER or DB_USER") +fi +if [ -z "$JWT_POSTGRES_DATABASE_PASSWORD" ] && [ -z "$DB_PASSWORD" ]; then + MISSING_VARS+=("JWT_POSTGRES_DATABASE_PASSWORD or DB_PASSWORD") +fi +if [ -z "$QUERY_SERVICE_JWT_SECRET_KEY" ]; then + MISSING_VARS+=("QUERY_SERVICE_JWT_SECRET_KEY") +fi +if [ -z "$ML_SERVICE_JWT_SECRET_KEY" ]; then + MISSING_VARS+=("ML_SERVICE_JWT_SECRET_KEY") +fi + +if [ ${#MISSING_VARS[@]} -gt 0 ]; then + echo "WARNING: Missing critical environment variables:" + for var in "${MISSING_VARS[@]}"; do + echo " - $var" + done + echo "" + echo "Services may fail to start without these variables." + echo "Please ensure your .env file is properly configured." + echo "" +fi + # Note: Oxigraph runs as a separate service in docker-compose # Ensure supervisor socket directory exists and is writable @@ -198,7 +284,26 @@ mkdir -p /var/run chmod 755 /var/run # Start supervisor -echo "Starting all services..." +echo "" +echo "Starting all services via supervisord..." +echo "==========================================" +echo "" +echo "Services being started:" +echo " - API Token Manager (port 8000)" +echo " - Query Service (port 8010)" +echo " - ML Service (port 8007)" +echo "" +echo "Logs are available at:" +echo " - /var/log/supervisor/api_tokenmanager.err.log" +echo " - /var/log/supervisor/query_service.err.log" +echo " - /var/log/supervisor/ml_service.err.log" +echo "" +echo "From host, view logs with:" +echo " docker exec brainkb-unified tail -f /var/log/supervisor/query_service.err.log" +echo "" +echo "==========================================" +echo "" + # Use our config file that includes socket configuration exec /usr/bin/supervisord -c /etc/supervisor/conf.d/supervisord.conf EOF diff --git a/docker-compose.unified.yml b/docker-compose.unified.yml index 1b831da..c53b94b 100644 --- a/docker-compose.unified.yml +++ b/docker-compose.unified.yml @@ -51,11 +51,11 @@ services: cpus: '2.0' # Reserve 2 CPUs minimum memory: 4G healthcheck: - test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:8000/ || exit 1"] + test: ["CMD-SHELL", "/app/healthcheck.sh"] interval: 30s timeout: 10s - retries: 3 - start_period: 60s + retries: 5 + start_period: 90s # Oxigraph SPARQL Database (separate service) # Note: Oxigraph is not directly exposed - access through oxigraph-nginx service diff --git a/readme.md b/readme.md index 1c0446c..b814845 100644 --- a/readme.md +++ b/readme.md @@ -21,14 +21,35 @@ BrainKB serves as a knowledge base platform that provides scientists worldwide w #### 1. Setup Environment variables -**Important**: Change default passwords in `.env` for security. +**Important**: You MUST create a `.env` file and change default passwords for security. ```bash # Copy the environment template cp env.template .env -# Edit .env with your configuration (make sure to change passwords!) -nano .env # or use your preferred editor +# Edit .env with your configuration +nano .env # or use your preferred editor (vim, code, etc.) +``` + +**Required changes in `.env`:** +- `POSTGRES_PASSWORD` - Change from default +- `DB_PASSWORD` - Must match POSTGRES_PASSWORD +- `JWT_POSTGRES_DATABASE_PASSWORD` - Must match POSTGRES_PASSWORD +- `DJANGO_SUPERUSER_PASSWORD` - Set a secure password for Django admin +- `OXIGRAPH_PASSWORD` - Change from default +- `GRAPHDATABASE_PASSWORD` - Must match OXIGRAPH_PASSWORD +- `QUERY_SERVICE_JWT_SECRET_KEY` - Set a secure random string +- `ML_SERVICE_JWT_SECRET_KEY` - Set a secure random string + +**Optional configuration:** +- `MONGO_DB_URL` - Required if using ML Service with MongoDB +- `OLLAMA_API_ENDPOINT` - Set to `http://host.docker.internal:11434` when using Docker +- Other service-specific settings as documented in `env.template` + +**Validate your configuration:** +```bash +# Optional but recommended - validate your .env file +./validate_env.sh ``` #### 2. Start Services @@ -54,6 +75,53 @@ Once started, services are accessible at: ### Troubleshooting +#### Services not accessible (localhost:8007, localhost:8010) + +If you cannot access Query Service (port 8010) or ML Service (port 8007) after starting: + +1. **Check if .env file exists and has valid configuration:** + ```bash + # Ensure .env file exists + ls -la .env + + # Verify passwords are not default values + grep "your_secure_password_change_this" .env + # If this finds matches, update those passwords! + ``` + +2. **Check service status inside the container:** + ```bash + # View all service statuses + docker exec brainkb-unified supervisorctl status + + # If services show as EXITED or FATAL, check logs: + docker exec brainkb-unified tail -n 50 /var/log/supervisor/query_service.err.log + docker exec brainkb-unified tail -n 50 /var/log/supervisor/ml_service.err.log + ``` + +3. **Restart individual services:** + ```bash + # Restart query service + ./start_services.sh query-service restart + + # Restart ML service + ./start_services.sh ml-service restart + ``` + +4. **Check container health:** + ```bash + # View container health status + docker ps --format "table {{.Names}}\t{{.Status}}" + + # View container logs + docker logs brainkb-unified + ``` + +5. **Common issues:** + - **Database connection failures**: Ensure PostgreSQL passwords match across POSTGRES_PASSWORD, DB_PASSWORD, and JWT_POSTGRES_DATABASE_PASSWORD + - **Missing environment variables**: Check logs for "environment variable not set" errors + - **Port conflicts**: Ensure ports 8000, 8007, 8010 are not already in use by other applications + If you encounter Docker mount errors or issues with file sharing, please refer to the [Troubleshooting section in LOCAL_DEPLOYMENT.md](LOCAL_DEPLOYMENT.md#troubleshooting). diff --git a/start_services.sh b/start_services.sh index 33bdac5..61ecff7 100755 --- a/start_services.sh +++ b/start_services.sh @@ -43,6 +43,38 @@ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" cd "$SCRIPT_DIR" +# Check if .env file exists, if not provide helpful error message +if [ ! -f ".env" ]; then + echo "ERROR: .env file not found!" + echo "" + echo "Please create a .env file from the template:" + echo " cp env.template .env" + echo "" + echo "Then edit .env with your configuration:" + echo " - Change all default passwords (POSTGRES_PASSWORD, DJANGO_SUPERUSER_PASSWORD, etc.)" + echo " - Configure Ollama settings if needed" + echo " - Review other settings as needed" + echo "" + echo "You can validate your configuration with:" + echo " ./validate_env.sh" + echo "" + echo "See env.template for all available options and documentation." + exit 1 +fi + +# Validate .env file (only for 'up' commands, skip for other operations) +# This helps catch configuration issues early +if [[ "$1" == "up" ]] || [[ "$1" == "" ]]; then + if [ -f "./validate_env.sh" ]; then + # Run validation but don't exit on warnings (only on errors) + if ! ./validate_env.sh; then + echo "" + echo "Please fix the configuration errors above before starting services." + exit 1 + fi + fi +fi + # Load environment variables from .env file FIRST # This ensures the hook script has access to the correct environment variables if [ -f ".env" ]; then @@ -177,11 +209,95 @@ if ! docker network inspect brainkb-network >/dev/null 2>&1; then echo "Creating docker network external - brainkb-network" docker network create brainkb-network fi + # Function to setup and start Ollama (for automatic setup on 'up' commands) setup_ollama() { handle_ollama up } +# Function to check if services are accessible after startup +check_services_health() { + local unified_container="brainkb-unified" + + # Wait a bit for services to start + echo "" + echo "Waiting for services to start..." + sleep 5 + + # Check if container is running + if ! docker ps --format '{{.Names}}' | grep -q "^${unified_container}$"; then + echo "WARNING: Container '${unified_container}' is not running" + return 1 + fi + + echo "Checking service health..." + + # Check supervisor status + local supervisor_status=$(docker exec ${unified_container} supervisorctl status 2>&1) + + if [ $? -ne 0 ]; then + echo "WARNING: Cannot connect to supervisor in '${unified_container}'" + echo "The container may still be starting up. Please wait a moment and check:" + echo " docker logs ${unified_container}" + return 1 + fi + + # Parse supervisor status and show service states + echo "" + echo "Service Status:" + echo "---------------" + echo "$supervisor_status" | while IFS= read -r line; do + service_name=$(echo "$line" | awk '{print $1}') + service_state=$(echo "$line" | awk '{print $2}') + + if [ "$service_state" = "RUNNING" ]; then + echo " ✓ $service_name: $service_state" + else + echo " ✗ $service_name: $service_state" + if [ "$service_name" = "query_service" ] || [ "$service_name" = "ml_service" ] || [ "$service_name" = "api_tokenmanager" ]; then + echo " To view logs: docker exec ${unified_container} tail -n 50 /var/log/supervisor/${service_name}.err.log" + fi + fi + done + + # Check if services are accessible via HTTP + echo "" + echo "Checking HTTP endpoints..." + echo "--------------------------" + + # API Token Manager (port 8000) + if curl -s -f http://localhost:8000/ > /dev/null 2>&1; then + echo " ✓ API Token Manager: http://localhost:8000/ (accessible)" + else + echo " ✗ API Token Manager: http://localhost:8000/ (not responding yet)" + echo " This service may still be starting. Check logs:" + echo " docker exec ${unified_container} tail -n 50 /var/log/supervisor/api_tokenmanager.err.log" + fi + + # Query Service (port 8010) + if curl -s -f http://localhost:8010/api/ > /dev/null 2>&1; then + echo " ✓ Query Service: http://localhost:8010/ (accessible)" + else + echo " ✗ Query Service: http://localhost:8010/ (not responding yet)" + echo " This service may still be starting. Check logs:" + echo " docker exec ${unified_container} tail -n 50 /var/log/supervisor/query_service.err.log" + fi + + # ML Service (port 8007) + if curl -s -f http://localhost:8007/api/ > /dev/null 2>&1; then + echo " ✓ ML Service: http://localhost:8007/ (accessible)" + else + echo " ✗ ML Service: http://localhost:8007/ (not responding yet)" + echo " This service may still be starting. Check logs:" + echo " docker exec ${unified_container} tail -n 50 /var/log/supervisor/ml_service.err.log" + fi + + echo "" + echo "Note: Services may take 30-90 seconds to fully start." + echo "If services are not accessible after 2 minutes, check the logs using the commands above." + echo "" +} + # If no arguments provided, default to "up -d" if [ $# -eq 0 ]; then set -- up -d @@ -447,15 +563,28 @@ fi # Detect which docker-compose command is available # Try docker compose (plugin, newer) first, then docker-compose (standalone, older) +DOCKER_COMPOSE_CMD="" if docker compose version >/dev/null 2>&1; then - # Use docker compose (plugin version) - exec docker compose "${FINAL_ARGS[@]}" + DOCKER_COMPOSE_CMD="docker compose" elif command -v docker-compose >/dev/null 2>&1; then - # Use docker-compose (standalone version) - exec docker-compose "${FINAL_ARGS[@]}" + DOCKER_COMPOSE_CMD="docker-compose" else echo " Error: Neither 'docker compose' nor 'docker-compose' found in PATH" echo " Please install Docker Compose: https://docs.docker.com/compose/install/" exit 1 fi +# Execute docker-compose command +$DOCKER_COMPOSE_CMD "${FINAL_ARGS[@]}" +COMPOSE_EXIT_CODE=$? + +# If this was an 'up' command and it succeeded, check service health +if [ $COMPOSE_EXIT_CODE -eq 0 ]; then + # Check if this was an 'up' command (not for individual services) + if [[ " ${FINAL_ARGS[@]} " =~ " up " ]] && [ -z "$SERVICE_NAME" ]; then + check_services_health + fi +fi + +exit $COMPOSE_EXIT_CODE + diff --git a/validate_env.sh b/validate_env.sh new file mode 100755 index 0000000..36fed41 --- /dev/null +++ b/validate_env.sh @@ -0,0 +1,149 @@ +#!/bin/bash +# Validation script for .env file +# This script checks if critical environment variables are set correctly + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR" + +echo "Validating .env configuration..." +echo "" + +# Check if .env exists +if [ ! -f ".env" ]; then + echo "❌ ERROR: .env file not found!" + echo "" + echo "Please create it from the template:" + echo " cp env.template .env" + echo "" + exit 1 +fi + +# Load environment variables +source .env 2>/dev/null || true + +ERRORS=0 +WARNINGS=0 + +# Check critical passwords +echo "Checking passwords..." +if [ "$POSTGRES_PASSWORD" = "your_secure_password_change_this" ]; then + echo " ❌ POSTGRES_PASSWORD is still set to default value" + ERRORS=$((ERRORS + 1)) +fi + +if [ "$DB_PASSWORD" = "your_secure_password_change_this" ]; then + echo " ❌ DB_PASSWORD is still set to default value" + ERRORS=$((ERRORS + 1)) +fi + +if [ "$JWT_POSTGRES_DATABASE_PASSWORD" = "your_secure_password_change_this" ]; then + echo " ❌ JWT_POSTGRES_DATABASE_PASSWORD is still set to default value" + ERRORS=$((ERRORS + 1)) +fi + +if [ "$DJANGO_SUPERUSER_PASSWORD" = "your_secure_password_change_this" ]; then + echo " ❌ DJANGO_SUPERUSER_PASSWORD is still set to default value" + ERRORS=$((ERRORS + 1)) +fi + +if [ "$OXIGRAPH_PASSWORD" = "your_oxigraph_password_change_this" ]; then + echo " ❌ OXIGRAPH_PASSWORD is still set to default value" + ERRORS=$((ERRORS + 1)) +fi + +if [ "$GRAPHDATABASE_PASSWORD" = "your_oxigraph_password_change_this" ]; then + echo " ❌ GRAPHDATABASE_PASSWORD is still set to default value" + ERRORS=$((ERRORS + 1)) +fi + +# Check password consistency +if [ "$POSTGRES_PASSWORD" != "$DB_PASSWORD" ]; then + echo " ❌ POSTGRES_PASSWORD and DB_PASSWORD do not match" + ERRORS=$((ERRORS + 1)) +fi + +if [ "$POSTGRES_PASSWORD" != "$JWT_POSTGRES_DATABASE_PASSWORD" ]; then + echo " ❌ POSTGRES_PASSWORD and JWT_POSTGRES_DATABASE_PASSWORD do not match" + ERRORS=$((ERRORS + 1)) +fi + +if [ "$OXIGRAPH_PASSWORD" != "$GRAPHDATABASE_PASSWORD" ]; then + echo " ❌ OXIGRAPH_PASSWORD and GRAPHDATABASE_PASSWORD do not match" + ERRORS=$((ERRORS + 1)) +fi + +# Check JWT secret keys +if [ "$QUERY_SERVICE_JWT_SECRET_KEY" = "your-query-service-jwt-secret-key-change-this-in-production" ]; then + echo " ❌ QUERY_SERVICE_JWT_SECRET_KEY is still set to default value" + ERRORS=$((ERRORS + 1)) +fi + +if [ "$ML_SERVICE_JWT_SECRET_KEY" = "your-ml-service-jwt-secret-key-change-this-in-production" ]; then + echo " ❌ ML_SERVICE_JWT_SECRET_KEY is still set to default value" + ERRORS=$((ERRORS + 1)) +fi + +# Check required environment variables +echo "" +echo "Checking required variables..." +REQUIRED_VARS=( + "POSTGRES_USER" + "POSTGRES_PASSWORD" + "POSTGRES_DB" + "DB_USER" + "DB_PASSWORD" + "DB_NAME" + "JWT_POSTGRES_DATABASE_USER" + "JWT_POSTGRES_DATABASE_PASSWORD" + "JWT_POSTGRES_DATABASE_NAME" + "QUERY_SERVICE_JWT_SECRET_KEY" + "ML_SERVICE_JWT_SECRET_KEY" +) + +for var in "${REQUIRED_VARS[@]}"; do + if [ -z "${!var}" ]; then + echo " ❌ $var is not set" + ERRORS=$((ERRORS + 1)) + fi +done + +# Check optional but recommended variables +echo "" +echo "Checking optional variables..." +OPTIONAL_VARS=( + "MONGO_DB_URL" + "OLLAMA_API_ENDPOINT" + "GROBID_SERVER_URL_OR_EXTERNAL_SERVICE" +) + +for var in "${OPTIONAL_VARS[@]}"; do + if [ -z "${!var}" ]; then + echo " ⚠️ $var is not set (optional, some features may not work)" + WARNINGS=$((WARNINGS + 1)) + fi +done + +# Summary +echo "" +echo "==========================================" +if [ $ERRORS -eq 0 ]; then + echo "✅ Validation passed!" + if [ $WARNINGS -gt 0 ]; then + echo "⚠️ $WARNINGS warnings (optional features may not work)" + fi + echo "" + echo "You can now start the services:" + echo " ./start_services.sh" + echo "" + exit 0 +else + echo "❌ Validation failed with $ERRORS error(s)" + if [ $WARNINGS -gt 0 ]; then + echo "⚠️ $WARNINGS warning(s)" + fi + echo "" + echo "Please fix the errors above before starting services." + echo "Edit your .env file to update the configuration." + echo "" + exit 1 +fi From d76baedba754206f3e3f4be1acf1ff2b85bb0a89 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 17 Dec 2025 22:11:42 +0000 Subject: [PATCH 3/3] Address code review feedback - improve safety and maintainability Co-authored-by: Sulstice <11812946+Sulstice@users.noreply.github.com> --- start_services.sh | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/start_services.sh b/start_services.sh index 61ecff7..480e721 100755 --- a/start_services.sh +++ b/start_services.sh @@ -219,6 +219,11 @@ setup_ollama() { check_services_health() { local unified_container="brainkb-unified" + # API endpoint constants + local API_TOKEN_ENDPOINT="http://localhost:8000/" + local QUERY_SERVICE_ENDPOINT="http://localhost:8010/api/" + local ML_SERVICE_ENDPOINT="http://localhost:8007/api/" + # Wait a bit for services to start echo "" echo "Waiting for services to start..." @@ -266,16 +271,16 @@ check_services_health() { echo "--------------------------" # API Token Manager (port 8000) - if curl -s -f http://localhost:8000/ > /dev/null 2>&1; then - echo " ✓ API Token Manager: http://localhost:8000/ (accessible)" + if curl -s -f "$API_TOKEN_ENDPOINT" > /dev/null 2>&1; then + echo " ✓ API Token Manager: $API_TOKEN_ENDPOINT (accessible)" else - echo " ✗ API Token Manager: http://localhost:8000/ (not responding yet)" + echo " ✗ API Token Manager: $API_TOKEN_ENDPOINT (not responding yet)" echo " This service may still be starting. Check logs:" echo " docker exec ${unified_container} tail -n 50 /var/log/supervisor/api_tokenmanager.err.log" fi # Query Service (port 8010) - if curl -s -f http://localhost:8010/api/ > /dev/null 2>&1; then + if curl -s -f "$QUERY_SERVICE_ENDPOINT" > /dev/null 2>&1; then echo " ✓ Query Service: http://localhost:8010/ (accessible)" else echo " ✗ Query Service: http://localhost:8010/ (not responding yet)" @@ -284,7 +289,7 @@ check_services_health() { fi # ML Service (port 8007) - if curl -s -f http://localhost:8007/api/ > /dev/null 2>&1; then + if curl -s -f "$ML_SERVICE_ENDPOINT" > /dev/null 2>&1; then echo " ✓ ML Service: http://localhost:8007/ (accessible)" else echo " ✗ ML Service: http://localhost:8007/ (not responding yet)" @@ -574,8 +579,9 @@ else exit 1 fi -# Execute docker-compose command -$DOCKER_COMPOSE_CMD "${FINAL_ARGS[@]}" +# Execute docker-compose command with proper quoting +# Note: DOCKER_COMPOSE_CMD is safe here as it's set by us, not user input +eval "$DOCKER_COMPOSE_CMD \"\${FINAL_ARGS[@]}\"" COMPOSE_EXIT_CODE=$? # If this was an 'up' command and it succeeded, check service health