Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 6 additions & 16 deletions jobs/monitoring/gatus.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ job "gatus" {
type = "service"

meta {
domain = "status.redbrick.dcu.ie"
domain = "gatus.redbrick.dcu.ie"
}

group "db-web" {
Expand Down Expand Up @@ -193,7 +193,7 @@ endpoints:
url: "https://api.redbrick.dcu.ie"
<<: *defaults_https

- name: Better Timetable
- name: Timetable Sync
group: Services
url: "https://timetable.redbrick.dcu.ie"
<<: *defaults_https
Expand Down Expand Up @@ -264,39 +264,29 @@ endpoints:
url: "https://outline.solarracing.ie"
<<: *defaults_https

- name: Plume (on RB)
group: Other Socs
url: "https://cspp.rb.dcu.ie"
<<: *defaults_https

- name: Amikon Website
group: Other Socs
url: "https://amikon.me"
<<: *defaults_https

- name: Glados
group: Servers
url: "tcp://10.10.0.4:22"
url: "tcp://10.10.10.4:22"
<<: *defaults_tcp

- name: Wheatley
group: Servers
url: "tcp://10.10.0.5:22"
<<: *defaults_tcp

- name: Bastion VM
group: Servers
url: "tcp://136.206.16.50:2269"
url: "tcp://10.10.10.5:22"
<<: *defaults_tcp

- name: Johnson
group: Servers
url: "tcp://10.10.0.7:22"
url: "tcp://10.10.10.80:22"
<<: *defaults_tcp

- name: Chell
group: Servers
url: "tcp://10.10.0.6:22"
url: "tcp://10.10.10.6:22"
<<: *defaults_tcp

- name: Minecraft Vanilla
Expand Down
179 changes: 179 additions & 0 deletions jobs/monitoring/grafana.hcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
job "grafana" {
datacenters = ["aperture"]
type = "service"

meta {
domain = "grafana.redbrick.dcu.ie"
}

group "database" {
count = 1

network {
port "db" {
to = 5432
}
}

service {
name = "grafana-db"
port = "db"

check {
name = "postgres-tcp"
type = "tcp"
port = "db"
interval = "10s"
timeout = "2s"
}
}

task "db" {
driver = "docker"
kill_signal = "SIGTERM" # SIGTERM instead of SIGKILL so database can shutdown safely
kill_timeout = "30s"
shutdown_delay = "5s"

config {
image = "postgres:17-alpine"
ports = ["db"]

volumes = [
"/storage/nomad/${NOMAD_JOB_NAME}/${NOMAD_TASK_NAME}:/var/lib/postgresql/data"
]
}

template {
data = <<EOH
POSTGRES_DB={{ key "grafana/db/name" }}
POSTGRES_USER={{ key "grafana/db/user" }}
POSTGRES_PASSWORD={{ key "grafana/db/password" }}
EOH
destination = "local/db.env"
env = true
}

resources {
cpu = 500
memory = 512
}
}
}

group "web" {
count = 1
network {
port "http" {
to = 3000
}
}

service {
name = "grafana-rb"
port = "http"

check {
type = "http"
path = "/"
interval = "10s"
timeout = "2s"
}

tags = [
"traefik.enable=true",
"traefik.http.routers.grafanarb.rule=Host(`${NOMAD_META_domain}`)",
"traefik.http.routers.grafanarb.entrypoints=web,websecure",
"traefik.http.routers.grafanarb.tls=true",
"traefik.http.routers.grafanarb.middlewares=grafana-embed-headers",
"traefik.http.middlewares.grafana-embed-headers.headers.contentSecurityPolicy=frame-ancestors https://status.redbrick.dcu.ie",
]
}

task "wait-for-db" {
driver = "docker"

lifecycle {
hook = "prestart"
sidecar = false
}

config {
image = "alpine:3.19"
command = "sh"
args = [
"-c",
"while ! nc -z $DB_HOST $DB_PORT; do echo 'Waiting for DB...'; sleep 1; done; echo 'DB is ready!'"
]
}

template {
destination = "local/.env"
env = true
change_mode = "restart"
data = <<EOH
{{- range service "grafana-db" }}
DB_HOST={{ .Address }}
DB_PORT={{ .Port }}
{{- end }}
EOH
}

resources {
cpu = 50
memory = 64
}
}

task "grafana" {
driver = "docker"

config {
image = "grafana/grafana"
ports = ["http"]

volumes = [
"/storage/nomad/${NOMAD_JOB_NAME}/${NOMAD_TASK_NAME}:/var/lib/grafana",
"local/datasources.yml:/etc/grafana/provisioning/datasources/datasources.yml"
]
}

template {
data = <<EOH
GF_DATABASE_TYPE=postgres
GF_DATABASE_HOST={{ range service "grafana-db" }}{{ .Address }}:{{ .Port }}{{ end }}
GF_DATABASE_NAME={{ key "grafana/db/name" }}
GF_DATABASE_USER={{ key "grafana/db/user" }}
GF_DATABASE_PASSWORD={{ key "grafana/db/password" }}
# GF_FEATURE_TOGGLES_ENABLE=publicDashboards
GF_LOG_LEVEL=info
GF_AUTH_BASIC_ENABLED=true
GF_USERS_ALLOW_SIGN_UP=false
GF_SECURITY_ALLOW_EMBEDDING=true
GF_SERVER_ROOT_URL=https://{{ env "NOMAD_META_domain" }}

EOH
destination = "local/.env"
env = true
}
template {
data = <<EOH
apiVersion: 1

datasources:
- name: Prometheus
type: prometheus
access: proxy
{{- range service "prometheus" }}
url: http://prometheus.service.consul:{{ .Port }}{{ end }}
isDefault: true
editable: false
EOH
destination = "local/datasources.yml"
}
resources {
cpu = 500
memory = 1024
}
}
}
}
74 changes: 74 additions & 0 deletions jobs/monitoring/prometheus.hcl
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
job "prometheus" {
datacenters = ["aperture"]
type = "service"

group "prometheus" {
count = 1

network {
port "http" {
to = 9090
}
}

task "prometheus" {
driver = "docker"

service {
name = "prometheus"
port = "http"
}

config {
image = "prom/prometheus:latest"
ports = ["http"]

volumes = [
"local/prometheus.yml:/etc/prometheus/prometheus.yml",
"local/tokens/:/etc/prometheus/tokens/"
]
}

template {
destination = "local/prometheus.yml"
data = <<EOF
global:
scrape_interval: 10s
evaluation_interval: 10s

scrape_configs:
- job_name: 'nomad_metrics'
consul_sd_configs:
- server: 'consul.service.consul:8500'
services: ['nomad-client', 'nomad']
tags: ['http']
metrics_path: /v1/metrics
params:
format: ['prometheus']

- job_name: 'node-exporter'
consul_sd_configs:
- server: 'consul.service.consul:8500'
services: ['node-exporter']
metrics_path: /metrics
relabel_configs:
- source_labels: ['__meta_consul_service']
target_label: 'job'
replacement: 'node-exporter'
EOF
}

template {
destination = "local/tokens/minio"
data = <<EOF
{{ key "prometheus/minio" }}
EOF
}

resources {
cpu = 500
memory = 512
}
}
}
}