Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions .github/workflows/ci-cd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,9 @@ jobs:
git pull origin main

cat > .env << 'EOF'
DO_HOST_1=${{ secrets.DO_HOST_1 }}
DO_HOST_2=${{ secrets.DO_HOST_2 }}
DO_OBSERVABILITY=${{ secrets.DO_OBSERVABILITY }}
DATABASE_NAME=${{ secrets.DATABASE_NAME }}
DATABASE_HOST=${{ secrets.DATABASE_HOST }}
DATABASE_PORT=${{ secrets.DATABASE_PORT }}
Expand All @@ -119,6 +122,10 @@ jobs:
SMTP_AUTH_PASSWORD=${{ secrets.SMTP_AUTH_PASSWORD }}
ALERT_EMAIL_TO=${{ secrets.ALERT_EMAIL_TO }}
DISCORD_WEBHOOK_URL=${{ secrets.DISCORD_WEBHOOK_URL }}
S3_KEY=${{ secrets.S3_KEY }}
SECRET_S3_KEY=${{ secrets.SECRET_S3_KEY }}
AWS_REGION=${{ secrets.AWS_REGION }}
LOKI_S3_BUCKET=${{ secrets.LOKI_S3_BUCKET }}
EOF

docker build -t url-shortner-app .
Expand Down Expand Up @@ -149,6 +156,9 @@ jobs:
git pull origin main

cat > .env << 'EOF'
DO_HOST_1=${{ secrets.DO_HOST_1 }}
DO_HOST_2=${{ secrets.DO_HOST_2 }}
DO_OBSERVABILITY=${{ secrets.DO_OBSERVABILITY }}
DATABASE_NAME=${{ secrets.DATABASE_NAME }}
DATABASE_HOST=${{ secrets.DATABASE_HOST }}
DATABASE_PORT=${{ secrets.DATABASE_PORT }}
Expand All @@ -163,10 +173,44 @@ jobs:
SMTP_AUTH_PASSWORD=${{ secrets.SMTP_AUTH_PASSWORD }}
ALERT_EMAIL_TO=${{ secrets.ALERT_EMAIL_TO }}
DISCORD_WEBHOOK_URL=${{ secrets.DISCORD_WEBHOOK_URL }}
S3_KEY=${{ secrets.S3_KEY }}
SECRET_S3_KEY=${{ secrets.SECRET_S3_KEY }}
AWS_REGION=${{ secrets.AWS_REGION }}
LOKI_S3_BUCKET=${{ secrets.LOKI_S3_BUCKET }}
EOF

docker build -t url-shortner-app .
docker build -t url-shortner-frontend -f frontend/docker/Dockerfile frontend

docker compose -f docker-compose.prod.yml up -d --remove-orphans
docker compose -f docker-compose.prod.yml restart nginx

- name: Deploy observability stack to observability droplet
uses: appleboy/ssh-action@v1.0.3
with:
host: ${{ secrets.DO_OBSERVABILITY }}
username: root
key: ${{ secrets.DO_SSH_PRIVATE_KEY }}
command_timeout: 30m
script: |
cd ~/MetaHackathon
git pull origin main

cat > .env << 'EOF'
DO_HOST_1=${{ secrets.DO_HOST_1 }}
DO_HOST_2=${{ secrets.DO_HOST_2 }}
DO_OBSERVABILITY=${{ secrets.DO_OBSERVABILITY }}
SMTP_SMARTHOST=${{ secrets.SMTP_SMARTHOST }}
SMTP_FROM=${{ secrets.SMTP_FROM }}
SMTP_AUTH_USERNAME=${{ secrets.SMTP_AUTH_USERNAME }}
SMTP_AUTH_PASSWORD=${{ secrets.SMTP_AUTH_PASSWORD }}
ALERT_EMAIL_TO=${{ secrets.ALERT_EMAIL_TO }}
ALERT_SMS_TO=${{ secrets.ALERT_SMS_TO }}
DISCORD_WEBHOOK_URL=${{ secrets.DISCORD_WEBHOOK_URL }}
S3_KEY=${{ secrets.S3_KEY }}
SECRET_S3_KEY=${{ secrets.SECRET_S3_KEY }}
AWS_REGION=${{ secrets.AWS_REGION }}
LOKI_S3_BUCKET=${{ secrets.LOKI_S3_BUCKET }}
EOF

docker compose -f docker-compose.observability.prod.yml up -d --remove-orphans
19 changes: 8 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Meta Production Engineering Hackathon

![Python](https://img.shields.io/badge/Python-3.11%2B-3776AB?logo=python&logoColor=white)
![Flask](https://img.shields.io/badge/Backend-Flask-000000?logo=flask&logoColor=white)
![Frontend](https://img.shields.io/badge/Frontend-Next.js-000000?logo=nextdotjs&logoColor=white)
![Infra](https://img.shields.io/badge/Infra-Docker%20%7C%20Nginx-2496ED?logo=docker&logoColor=white)
![Observability](https://img.shields.io/badge/Observability-Prometheus%20%7C%20Grafana%20%7C%20Loki-E6522C)
![Scale Tested](https://img.shields.io/badge/Scale%20Tested-~7000%20Concurrent%20Users-success)

This is the most scalable, reliable and guaranteed to wake up the on-call engineer url-shortner of all time. Provided to you by 4 students from Canada, 2 from Waterloo and 2 from Concordia.

## Quick Links
Expand Down Expand Up @@ -191,17 +198,7 @@ One of the problems we had was with malformed data and we would get a lone error

## Runbooks

### Incident Response

- [Backend Outage](#)
- [Database Issues](#)
- [High Latency](#)

### Operational Tasks

- [Scaling the Backend](#)
- [Backup Procedures](#)
- [Log Access](#)
- [Runbook](docs/RUNBOOK.md)

---

Expand Down
93 changes: 93 additions & 0 deletions docker-compose.observability.prod.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
services:
prometheus:
image: prom/prometheus:latest
restart: always
ports:
- "9090:9090"
volumes:
- ${PWD}/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
- ${PWD}/prometheus/alerts.yml:/etc/prometheus/alerts.yml:ro
- prometheus-data:/prometheus

alertmanager:
image: prom/alertmanager:latest
restart: always
entrypoint: /bin/sh
command:
- -c
- |
sed -e "s|SMTP_SMARTHOST_VALUE|$$SMTP_SMARTHOST|g" \
-e "s|SMTP_FROM_VALUE|$$SMTP_FROM|g" \
-e "s|SMTP_AUTH_USERNAME_VALUE|$$SMTP_AUTH_USERNAME|g" \
-e "s|SMTP_AUTH_PASSWORD_VALUE|$$SMTP_AUTH_PASSWORD|g" \
-e "s|ALERT_EMAIL_TO_VALUE|$$ALERT_EMAIL_TO|g" \
-e "s|SMTP_AUTH_PASSWORD_VALUE|$$SMTP_AUTH_PASSWORD|g" \
-e "s|ALERT_SMS_TO_VALUE|$$ALERT_SMS_TO|g" \
-e "s|DISCORD_WEBHOOK_URL_VALUE|$$DISCORD_WEBHOOK_URL|g" \
/etc/alertmanager/alertmanager.tmpl.yml > /tmp/alertmanager.yml \
&& exec /bin/alertmanager --config.file=/tmp/alertmanager.yml
environment:
SMTP_SMARTHOST: ${SMTP_SMARTHOST:-localhost:25}
SMTP_FROM: ${SMTP_FROM:-alerts@example.com}
SMTP_AUTH_USERNAME: ${SMTP_AUTH_USERNAME:-}
SMTP_AUTH_PASSWORD: ${SMTP_AUTH_PASSWORD:-}
ALERT_EMAIL_TO: ${ALERT_EMAIL_TO:-admin@example.com}
ALERT_SMS_TO: ${ALERT_SMS_TO:-}
DISCORD_WEBHOOK_URL: ${DISCORD_WEBHOOK_URL:-}
volumes:
- ${PWD}/alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.tmpl.yml:ro
ports:
- "9093:9093"

alertmanager-discord:
image: benjojo/alertmanager-discord:latest
restart: always
environment:
DISCORD_WEBHOOK: ${DISCORD_WEBHOOK_URL:-}

blackbox-exporter:
image: prom/blackbox-exporter:latest
restart: always
volumes:
- ${PWD}/prometheus/blackbox.yml:/etc/blackbox_exporter/config.yml:ro
ports:
- "9115:9115"

otel:
image: otel/opentelemetry-collector-contrib:latest
restart: always
volumes:
- ${PWD}/otel/config.yaml:/etc/otelcol-contrib/config.yaml
ports:
- "8889:8889" # Prometheus scrape endpoint (metrics exposed by OTel)
- "4318:4318" # OTLP HTTP receiver (apps push to this)

loki:
image: grafana/loki:3.0.0
restart: always
command: -config.file=/etc/loki/config.yml
ports:
- "3100:3100"
volumes:
- ${PWD}/loki/config.yml:/etc/loki/config.yml:ro
- loki-data:/loki

grafana:
image: grafana/grafana:latest
restart: always
ports:
- "3000:3000"
environment:
- GF_SECURITY_ADMIN_USER=admin
- GF_SECURITY_ADMIN_PASSWORD=admin
volumes:
- grafana-data:/var/lib/grafana
- ${PWD}/grafana/provisioning/datasources:/etc/grafana/provisioning/datasources:ro
- ${PWD}/grafana/provisioning/dashboards:/etc/grafana/provisioning/dashboards:ro
- ${PWD}/grafana/provisioning/alerting:/etc/grafana/provisioning/alerting:ro
- ${PWD}/grafana/dashboards:/var/lib/grafana/dashboards:ro

volumes:
prometheus-data:
loki-data:
grafana-data:
105 changes: 4 additions & 101 deletions docker-compose.prod.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ services:
DATABASE_SSLMODE: ${DATABASE_SSLMODE:-require}
REDIS_URL: ${REDIS_URL}
SECRET_KEY: ${SECRET_KEY}
LOG_FILE_PATH: /app/logs/app-1.log
OTEL_EXPORTER_OTLP_ENDPOINT: http://otel:4318
LOG_FILE_PATH: /app/logs/app.log
OTEL_EXPORTER_OTLP_ENDPOINT: http://${DO_OBSERVABILITY}:4318
volumes:
- app1_logs:/app/logs
healthcheck:
Expand All @@ -34,8 +34,8 @@ services:
DATABASE_SSLMODE: ${DATABASE_SSLMODE:-require}
REDIS_URL: ${REDIS_URL}
SECRET_KEY: ${SECRET_KEY}
LOG_FILE_PATH: /app/logs/app-2.log
OTEL_EXPORTER_OTLP_ENDPOINT: http://otel:4318
LOG_FILE_PATH: /app/logs/app.log
OTEL_EXPORTER_OTLP_ENDPOINT: http://${DO_OBSERVABILITY}:4318
volumes:
- app2_logs:/app/logs
healthcheck:
Expand All @@ -59,86 +59,6 @@ services:
ports:
- "3000:3000"

prometheus:
image: prom/prometheus:latest
restart: always
ports:
- "9090:9090"
volumes:
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
- ./prometheus/alerts.yml:/etc/prometheus/alerts.yml:ro

alertmanager:
image: prom/alertmanager:latest
restart: always
entrypoint: /bin/sh
command:
- -c
- |
sed -e "s|SMTP_SMARTHOST_VALUE|$$SMTP_SMARTHOST|g" \
-e "s|SMTP_FROM_VALUE|$$SMTP_FROM|g" \
-e "s|SMTP_AUTH_USERNAME_VALUE|$$SMTP_AUTH_USERNAME|g" \
-e "s|SMTP_AUTH_PASSWORD_VALUE|$$SMTP_AUTH_PASSWORD|g" \
-e "s|ALERT_EMAIL_TO_VALUE|$$ALERT_EMAIL_TO|g" \
-e "s|SMTP_AUTH_PASSWORD_VALUE|$$SMTP_AUTH_PASSWORD|g" \
-e "s|ALERT_SMS_TO_VALUE|$$ALERT_SMS_TO|g" \
-e "s|DISCORD_WEBHOOK_URL_VALUE|$$DISCORD_WEBHOOK_URL|g" \
/etc/alertmanager/alertmanager.tmpl.yml > /tmp/alertmanager.yml \
&& exec /bin/alertmanager --config.file=/tmp/alertmanager.yml --cluster.listen-address=""
environment:
SMTP_SMARTHOST: ${SMTP_SMARTHOST:-localhost:25}
SMTP_FROM: ${SMTP_FROM:-alerts@example.com}
SMTP_AUTH_USERNAME: ${SMTP_AUTH_USERNAME:-}
SMTP_AUTH_PASSWORD: ${SMTP_AUTH_PASSWORD:-}
ALERT_EMAIL_TO: ${ALERT_EMAIL_TO:-admin@example.com}
ALERT_SMS_TO: ${ALERT_SMS_TO:-}
DISCORD_WEBHOOK_URL: ${DISCORD_WEBHOOK_URL:-}
volumes:
- ./alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.tmpl.yml:ro
ports:
- "9093:9093"

alertmanager-discord:
image: benjojo/alertmanager-discord:latest
restart: always
environment:
DISCORD_WEBHOOK: ${DISCORD_WEBHOOK_URL:-}

blackbox-exporter:
image: prom/blackbox-exporter:latest
restart: always
volumes:
- ./prometheus/blackbox.yml:/etc/blackbox_exporter/config.yml:ro
ports:
- "9115:9115"

node-exporter:
image: prom/node-exporter:latest
restart: always
command:
- --no-collector.kernel_hung
ports:
- "9100:9100"

otel:
image: otel/opentelemetry-collector-contrib:latest
restart: always
volumes:
- ./otel/config.yaml:/etc/otelcol-contrib/config.yaml
ports:
- "8889:8889"
- "4318:4318"

loki:
image: grafana/loki:3.0.0
restart: always
command: -config.file=/etc/loki/config.yml
ports:
- "3100:3100"
volumes:
- ./loki/config.yml:/etc/loki/config.yml:ro
- loki-data:/loki

promtail:
image: grafana/promtail:3.0.0
restart: always
Expand All @@ -149,24 +69,7 @@ services:
- /var/lib/docker/containers:/var/lib/docker/containers:ro
- promtail-data:/tmp

grafana:
image: grafana/grafana:latest
restart: always
ports:
- "3001:3000"
environment:
- GF_SECURITY_ADMIN_USER=admin
- GF_SECURITY_ADMIN_PASSWORD=admin
volumes:
- grafana-data:/var/lib/grafana
- ./grafana/provisioning/datasources:/etc/grafana/provisioning/datasources:ro
- ./grafana/provisioning/dashboards:/etc/grafana/provisioning/dashboards:ro
- ./grafana/provisioning/alerting:/etc/grafana/provisioning/alerting:ro
- ./grafana/dashboards:/var/lib/grafana/dashboards:ro

volumes:
grafana-data:
loki-data:
promtail-data:
app1_logs:
app2_logs:
Loading