From fd1fb7b56a92e26db0c749695688d86c2096e0f5 Mon Sep 17 00:00:00 2001 From: wizzdom Date: Wed, 26 Feb 2025 04:20:49 +0000 Subject: [PATCH 1/9] monitoring: add grafana --- jobs/monitoring/grafana.hcl | 95 +++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 jobs/monitoring/grafana.hcl diff --git a/jobs/monitoring/grafana.hcl b/jobs/monitoring/grafana.hcl new file mode 100644 index 0000000..2d76bc1 --- /dev/null +++ b/jobs/monitoring/grafana.hcl @@ -0,0 +1,95 @@ +job "grafana" { + datacenters = ["aperture"] + + type = "service" + + group "monitoring" { + network { + port "http" { + to = 3000 + } + port "db" { + to = 5432 + } + } + + service { + name = "grafana" + port = "http" + + check { + type = "http" + path = "/" + interval = "10s" + timeout = "2s" + } + + tags = [ + "traefik.enable=true", + "traefik.http.routers.grafana.entrypoints=web,websecure", + "traefik.http.routers.grafana.rule=Host(`grafana.redbrick.dcu.ie`)", + "traefik.http.routers.grafana.tls=true", + "traefik.http.routers.grafana.tls.certresolver=lets-encrypt", + ] + } + + task "grafana" { + driver = "docker" + user = "1001:1001" + + env { + GF_AUTH_BASIC_ENABLED = "true" + GF_INSTALL_PLUGINS = "grafana-piechart-panel" + GF_SERVER_ROOT_URL = "https://grafana.redbrick.dcu.ie" + } + + config { + image = "grafana/grafana" + ports = ["http"] + + volumes = [ + "/storage/nomad/${NOMAD_JOB_NAME}/${NOMAD_TASK_NAME}:/var/lib/grafana" + ] + } + + + template { + data = < Date: Wed, 26 Feb 2025 04:21:12 +0000 Subject: [PATCH 2/9] monitoring: add prometheus --- jobs/monitoring/prometheus.hcl | 87 ++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) create mode 100644 jobs/monitoring/prometheus.hcl diff --git a/jobs/monitoring/prometheus.hcl b/jobs/monitoring/prometheus.hcl new file mode 100644 index 0000000..764673a --- /dev/null +++ b/jobs/monitoring/prometheus.hcl @@ -0,0 +1,87 @@ +job "prometheus" { + datacenters = ["aperture"] + + group "prometheus" { + network { + port "http" { + static = 9090 + } + } + + service { + name = "prometheus" + port = "http" + } + + task "prometheus" { + driver = "docker" + config { + image = "quay.io/prometheus/prometheus" + ports = ["http"] + + volumes = [ + "/storage/nomad/${NOMAD_JOB_NAME}/${NOMAD_TASK_NAME}:/prometheus" + ] + + args = [ + "--config.file=$${NOMAD_TASK_DIR}/prometheus.yml", + "--log.level=info", + "--storage.tsdb.retention.time=90d", + "--storage.tsdb.path=/prometheus", + "--web.console.libraries=/usr/share/prometheus/console_libraries", + "--web.console.templates=/usr/share/prometheus/consoles" + ] + } + + template { + data = < Date: Fri, 28 Feb 2025 01:09:05 +0000 Subject: [PATCH 3/9] Removed Static, added better prom .yml --- jobs/monitoring/prometheus.hcl | 76 ++++++++++++++-------------------- 1 file changed, 30 insertions(+), 46 deletions(-) diff --git a/jobs/monitoring/prometheus.hcl b/jobs/monitoring/prometheus.hcl index 764673a..6ffa3af 100644 --- a/jobs/monitoring/prometheus.hcl +++ b/jobs/monitoring/prometheus.hcl @@ -1,87 +1,71 @@ job "prometheus" { datacenters = ["aperture"] + type = "service" group "prometheus" { + count = 1 + network { port "http" { - static = 9090 + to = 9090 } } - service { - name = "prometheus" - port = "http" - } - task "prometheus" { driver = "docker" + config { - image = "quay.io/prometheus/prometheus" + image = "prom/prometheus:latest" ports = ["http"] volumes = [ - "/storage/nomad/${NOMAD_JOB_NAME}/${NOMAD_TASK_NAME}:/prometheus" - ] - - args = [ - "--config.file=$${NOMAD_TASK_DIR}/prometheus.yml", - "--log.level=info", - "--storage.tsdb.retention.time=90d", - "--storage.tsdb.path=/prometheus", - "--web.console.libraries=/usr/share/prometheus/console_libraries", - "--web.console.templates=/usr/share/prometheus/consoles" + "local/prometheus.yml:/etc/prometheus/prometheus.yml" ] } template { + destination = "local/prometheus.yml" data = < Date: Fri, 28 Feb 2025 01:23:37 +0000 Subject: [PATCH 4/9] traefik: use consul, bump res, add metrics, fix dummy service --- jobs/ingress/traefik.hcl | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/jobs/ingress/traefik.hcl b/jobs/ingress/traefik.hcl index 2c830c5..3379f4f 100644 --- a/jobs/ingress/traefik.hcl +++ b/jobs/ingress/traefik.hcl @@ -44,6 +44,10 @@ job "traefik" { "/storage/nomad/traefik/access.log:/access.log", ] } + resources { + cpu = 500 + memory = 1024 + } template { destination = "local/.env" @@ -137,6 +141,12 @@ EOF [tracing] +[metrics] + [metrics.prometheus] + addServicesLabels = true + addRoutersLabels = true + addEntryPointsLabels = true + [accessLog] filePath = "/access.log" EOF @@ -221,6 +231,7 @@ EOF certResolver = "lets-encrypt" {{ end -}} + url = "http://127.0.0.1" # Dummy service - not used EOF } } From 1b54ac306f59c22c23d65d3149c93b54775e9b8e Mon Sep 17 00:00:00 2001 From: Gavin Holahan Date: Fri, 28 Feb 2025 02:12:19 +0000 Subject: [PATCH 5/9] service --- jobs/monitoring/prometheus.hcl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/jobs/monitoring/prometheus.hcl b/jobs/monitoring/prometheus.hcl index 6ffa3af..bd5c882 100644 --- a/jobs/monitoring/prometheus.hcl +++ b/jobs/monitoring/prometheus.hcl @@ -14,6 +14,11 @@ job "prometheus" { task "prometheus" { driver = "docker" + service { + name = "prometheus" + port = "http" + } + config { image = "prom/prometheus:latest" ports = ["http"] @@ -56,8 +61,6 @@ scrape_configs: metrics_path: /v1/metrics params: format: ['prometheus'] - - EOF } From 34a6b5e21e117f2ed9f0d5063a840de43ead7a71 Mon Sep 17 00:00:00 2001 From: wizzdom Date: Fri, 28 Feb 2025 02:40:16 +0000 Subject: [PATCH 6/9] grafana: add dynamic prometheus datasource --- jobs/monitoring/grafana.hcl | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/jobs/monitoring/grafana.hcl b/jobs/monitoring/grafana.hcl index 2d76bc1..5a981ae 100644 --- a/jobs/monitoring/grafana.hcl +++ b/jobs/monitoring/grafana.hcl @@ -48,11 +48,11 @@ job "grafana" { ports = ["http"] volumes = [ - "/storage/nomad/${NOMAD_JOB_NAME}/${NOMAD_TASK_NAME}:/var/lib/grafana" + "/storage/nomad/${NOMAD_JOB_NAME}/${NOMAD_TASK_NAME}:/var/lib/grafana", + "local/datasources.yml:/etc/grafana/provisioning/datasources/datasources.yml" ] } - template { data = < Date: Tue, 31 Mar 2026 16:58:41 +0100 Subject: [PATCH 7/9] Add node-exporter scrape to prometheus --- jobs/monitoring/prometheus.hcl | 72 ++++++++++++++++++++++------------ 1 file changed, 46 insertions(+), 26 deletions(-) diff --git a/jobs/monitoring/prometheus.hcl b/jobs/monitoring/prometheus.hcl index bd5c882..6601bbf 100644 --- a/jobs/monitoring/prometheus.hcl +++ b/jobs/monitoring/prometheus.hcl @@ -1,6 +1,6 @@ job "prometheus" { datacenters = ["aperture"] - type = "service" + type = "service" group "prometheus" { count = 1 @@ -24,43 +24,64 @@ job "prometheus" { ports = ["http"] volumes = [ - "local/prometheus.yml:/etc/prometheus/prometheus.yml" + "local/prometheus.yml:/etc/prometheus/prometheus.yml", + "local/tokens/:/etc/prometheus/tokens/" ] } template { destination = "local/prometheus.yml" - data = < Date: Mon, 1 Jun 2026 00:47:38 +0100 Subject: [PATCH 8/9] Added monitoring tools currently used + hcl update --- jobs/ingress/traefik.hcl | 508 +++++++++++++++++++++++------- jobs/monitoring/gatus.hcl | 349 ++++++++++++++++++++ jobs/monitoring/grafana.hcl | 155 ++++++--- jobs/monitoring/node-exporter.hcl | 43 +++ jobs/monitoring/prometheus.hcl | 2 +- jobs/monitoring/uptime-kuma.hcl | 48 --- 6 files changed, 897 insertions(+), 208 deletions(-) create mode 100644 jobs/monitoring/gatus.hcl create mode 100644 jobs/monitoring/node-exporter.hcl delete mode 100644 jobs/monitoring/uptime-kuma.hcl diff --git a/jobs/ingress/traefik.hcl b/jobs/ingress/traefik.hcl index 3379f4f..a86ef23 100644 --- a/jobs/ingress/traefik.hcl +++ b/jobs/ingress/traefik.hcl @@ -1,96 +1,182 @@ job "traefik" { datacenters = ["aperture"] - node_pool = "ingress" - type = "service" + type = "system" group "traefik" { network { port "http" { static = 80 } + port "https" { static = 443 } + port "admin" { static = 8080 } - port "ssh" { - static = 22 - } + port "voice-tcp" { static = 4502 } + port "voice-udp" { static = 4503 } + + port "matrix" { + static = 8448 + } + + port "smtp" { + static = 25 + } + + port "submissions" { + static = 465 + } + + port "submission" { + static = 587 + } + + port "imap" { + static = 143 + } + + port "imaps" { + static = 993 + } + + port "pop3" { + static = 110 + } + + port "pop3s" { + static = 995 + } + + port "managesieve" { + static = 4190 + } } service { name = "traefik-http" provider = "nomad" port = "admin" + + check { + name = "traefik-ping" + type = "http" + path = "/ping" + interval = "10s" + timeout = "2s" + } } task "traefik" { driver = "docker" + config { - image = "traefik" + image = "traefik:v3.3" network_mode = "host" volumes = [ - "local/traefik.toml:/etc/traefik/traefik.toml", - "/storage/nomad/traefik/acme/acme.json:/acme.json", - "/storage/nomad/traefik/acme/acme-dns.json:/acme-dns.json", + "local/traefik.toml:/etc/traefik/traefik.toml:ro", + + # Shared dynamic config directory. + # + # The ACME extractor writes: + # /storage/nomad/traefik/dynamic/90-acme-extracted-certs.toml + # + # Nomad overlays this job's generated config as: + # /dynamic/10-generated.toml + # + # Keeping both files directly under /dynamic avoids relying on + # Traefik watching subdirectories. + "/storage/nomad/traefik/dynamic:/dynamic", + "local/dynamic-generated.toml:/dynamic/10-generated.toml:ro", + + # PEM certs extracted from the ACME renewer. + "/storage/nomad/traefik/certs:/certs:ro", + "/storage/nomad/traefik/access.log:/access.log", ] } - resources { - cpu = 500 - memory = 1024 - } - - template { - destination = "local/.env" - env = true - change_mode = "restart" - data = < +# +# Example: +# redirect/redbrick/wiki = https://wiki.redbrick.dcu.ie/ {{ range $pair := tree "redirect/redbrick" }} - [http.middlewares.redirect-{{ trimPrefix "redirect/redbrick/" $pair.Key }}.redirectRegex] - regex = ".*" # match everything - hosts are handled by the router +{{ $name := trimPrefix "redirect/redbrick/" $pair.Key }} + [http.middlewares.redirect-{{ $name }}.redirectRegex] + regex = ".*" replacement = "{{ $pair.Value }}" permanent = true -{{- end }} + +{{ end }} + + # --- Short-link redirects for rb.dcu.ie --- +{{ range $pair := tree "redirect/rb" }} +{{ $name := trimPrefix "redirect/rb/" $pair.Key }} + [http.middlewares.redirect-rb-{{ $name }}.redirectRegex] + regex = ".*" + replacement = "{{ $pair.Value }}" + permanent = true +{{ end }} + +# --------------------------------------------------------------------------- +# HTTP routers +# --------------------------------------------------------------------------- [http.routers] +# HTTP-01 ACME challenge router. +# Public traffic reaches whichever Traefik owns the Keepalived VIP. +# That Traefik forwards the challenge to the active ACME renewer via Consul. +[http.routers.acme-http01] + rule = "PathPrefix(`/.well-known/acme-challenge/`)" + entryPoints = ["web"] + service = "traefik-acme-renewer" + priority = 10000 + [http.routers.webtree] rule = "HostRegexp(`^([a-z0-9_-]+)\\.redbrick\\.dcu\\.ie$`) || ((Host(`redbrick.dcu.ie`) || Host(`www.redbrick.dcu.ie`)) && PathPrefix(`/~`))" entryPoints = ["websecure"] @@ -178,62 +291,225 @@ EOF service = "webtree@consulcatalog" [http.routers.webtree.tls] - certResolver = "rb" -# create routers with middlewares for each redirect +# --- redbrick.dcu.ie short-link redirect routers --- {{ range $pair := tree "redirect/redbrick" }} - [http.routers.{{ trimPrefix "redirect/redbrick/" $pair.Key }}-redirect] - rule = "Host(`{{ trimPrefix "redirect/redbrick/" $pair.Key }}.redbrick.dcu.ie`)" +{{ $name := trimPrefix "redirect/redbrick/" $pair.Key }} + [http.routers.{{ $name }}-redirect] + rule = "Host(`{{ $name }}.redbrick.dcu.ie`)" entryPoints = ["web", "websecure"] - middlewares = ["redirect-{{ trimPrefix "redirect/redbrick/" $pair.Key }}"] - service = "dummy-service" # all routers need a service, this isn't used - [http.routers.{{ trimPrefix "redirect/redbrick/" $pair.Key }}-redirect.tls] -{{- end }} + middlewares = ["redirect-{{ $name }}"] + service = "dummy-service" + priority = 50 + + [http.routers.{{ $name }}-redirect.tls] + +{{ end }} +# --- rb.dcu.ie short-link redirect routers --- +{{ range $pair := tree "redirect/rb" }} +{{ $name := trimPrefix "redirect/rb/" $pair.Key }} + [http.routers.{{ $name }}-rb-redirect] + rule = "Host(`{{ $name }}.rb.dcu.ie`)" + entryPoints = ["web", "websecure"] + middlewares = ["redirect-rb-{{ $name }}"] + service = "dummy-service" + priority = 50 + + [http.routers.{{ $name }}-rb-redirect.tls] + +{{ end }} + +# Default TLS router. +# This exists so Traefik can present an extracted cert for known Redbrick zones +# even when no higher-priority router has matched. [http.routers.tls-default] - rule = "HostRegexp(`{any:.+}.redbrick.dcu.ie`) || HostRegexp(`{any:.+}.rb.dcu.ie`) || HostRegexp(`{any:.+}.redbrick.ie`)" - entryPoints = ["web", "websecure"] - service = "dummy-service" - priority = -1 + rule = "HostRegexp(`{any:.+}.redbrick.dcu.ie`) || HostRegexp(`{any:.+}.rb.dcu.ie`) || HostRegexp(`{any:.+}.redbrick.ie`)" + entryPoints = ["websecure"] + service = "dummy-service" + priority = -1 - [http.routers.tls-default.tls] - certResolver = "rb" + [http.routers.tls-default.tls] - [[http.routers.tls-default.tls.domains]] - main = "redbrick.dcu.ie" - sans = ["*.redbrick.dcu.ie"] +# Extra webtree domains from Consul KV. +{{ $i := 0 -}} +{{- range $pair := tree "webtree/domains" -}} + {{- $i = add $i 1 }} - [[http.routers.tls-default.tls.domains]] - main = "rb.dcu.ie" - sans = ["*.rb.dcu.ie"] +[http.routers.webtree-domain-{{ $i }}] + rule = "Host(`{{ $pair.Key }}`)" + entryPoints = ["websecure"] + service = "webtree@consulcatalog" + priority = 20 - [[http.routers.tls-default.tls.domains]] - main = "redbrick.ie" - sans = ["*.redbrick.ie"] + [http.routers.webtree-domain-{{ $i }}.tls] +{{ end }} + +# --------------------------------------------------------------------------- +# HTTP services +# --------------------------------------------------------------------------- [http.services] + + [http.services.traefik-acme-renewer.loadBalancer] + passHostHeader = true + +{{ range service "traefik-acme-renewer" }} + [[http.services.traefik-acme-renewer.loadBalancer.servers]] + url = "http://{{ .Address }}:{{ .Port }}" +{{ end }} + [http.services.dummy-service.loadBalancer] + [[http.services.dummy-service.loadBalancer.servers]] - url = "http://127.0.0.1" # Dummy service - not used + url = "http://127.0.0.1" -{{ $i := 0 -}} -{{- range $pair := tree "webtree/domains" -}} - {{- $i = add $i 1 -}} +# --------------------------------------------------------------------------- +# TCP routers +# --------------------------------------------------------------------------- - [http.routers.webtree-domain-{{ $i }}] - rule = "Host(`{{ $pair.Key }}`)" - entryPoints = ["web", "websecure"] - service = "webtree@consulcatalog" - priority = 20 +[tcp] + +[tcp.routers] - [http.routers.webtree-domain-{{ $i }}.tls] - certResolver = "lets-encrypt" + [tcp.routers.mail-smtp] + entryPoints = ["smtp"] + rule = "HostSNI(`*`)" + service = "mail-smtp" -{{ end -}} - url = "http://127.0.0.1" # Dummy service - not used + [tcp.routers.mail-submissions] + entryPoints = ["submissions"] + rule = "HostSNI(`*`)" + service = "mail-submissions" + + [tcp.routers.mail-submissions.tls] + passthrough = true + + [tcp.routers.mail-submission] + entryPoints = ["submission"] + rule = "HostSNI(`*`)" + service = "mail-submission" + + [tcp.routers.mail-imap] + entryPoints = ["imap"] + rule = "HostSNI(`*`)" + service = "mail-imap" + + [tcp.routers.mail-imaps] + entryPoints = ["imaps"] + rule = "HostSNI(`*`)" + service = "mail-imaps" + + [tcp.routers.mail-imaps.tls] + passthrough = true + + [tcp.routers.mail-pop3] + entryPoints = ["pop3"] + rule = "HostSNI(`*`)" + service = "mail-pop3" + + [tcp.routers.mail-pop3s] + entryPoints = ["pop3s"] + rule = "HostSNI(`*`)" + service = "mail-pop3s" + + [tcp.routers.mail-pop3s.tls] + passthrough = true + + [tcp.routers.mail-managesieve] + entryPoints = ["managesieve"] + rule = "HostSNI(`*`)" + service = "mail-managesieve" + +# --------------------------------------------------------------------------- +# TCP services +# --------------------------------------------------------------------------- + +# --------------------------------------------------------------------------- +# TCP services +# --------------------------------------------------------------------------- + +[tcp.services] + + [tcp.services.mail-smtp.loadBalancer] + [tcp.services.mail-smtp.loadBalancer.proxyProtocol] + version = 2 + +{{ range service "mailserver-smtp" }} + [[tcp.services.mail-smtp.loadBalancer.servers]] + address = "{{ .Address }}:{{ .Port }}" +{{ end }} + + [tcp.services.mail-submissions.loadBalancer] + [tcp.services.mail-submissions.loadBalancer.proxyProtocol] + version = 2 + +{{ range service "mailserver-submissions" }} + [[tcp.services.mail-submissions.loadBalancer.servers]] + address = "{{ .Address }}:{{ .Port }}" +{{ end }} + + [tcp.services.mail-submission.loadBalancer] + [tcp.services.mail-submission.loadBalancer.proxyProtocol] + version = 2 + +{{ range service "mailserver-submission" }} + [[tcp.services.mail-submission.loadBalancer.servers]] + address = "{{ .Address }}:{{ .Port }}" +{{ end }} + + [tcp.services.mail-imap.loadBalancer] + [tcp.services.mail-imap.loadBalancer.proxyProtocol] + version = 2 + +{{ range service "mailserver-imap" }} + [[tcp.services.mail-imap.loadBalancer.servers]] + address = "{{ .Address }}:{{ .Port }}" +{{ end }} + + [tcp.services.mail-imaps.loadBalancer] + [tcp.services.mail-imaps.loadBalancer.proxyProtocol] + version = 2 + +{{ range service "mailserver-imaps" }} + [[tcp.services.mail-imaps.loadBalancer.servers]] + address = "{{ .Address }}:{{ .Port }}" +{{ end }} + + [tcp.services.mail-pop3.loadBalancer] + [tcp.services.mail-pop3.loadBalancer.proxyProtocol] + version = 2 + +{{ range service "mailserver-pop3" }} + [[tcp.services.mail-pop3.loadBalancer.servers]] + address = "{{ .Address }}:{{ .Port }}" +{{ end }} + + [tcp.services.mail-pop3s.loadBalancer] + [tcp.services.mail-pop3s.loadBalancer.proxyProtocol] + version = 2 + +{{ range service "mailserver-pop3s" }} + [[tcp.services.mail-pop3s.loadBalancer.servers]] + address = "{{ .Address }}:{{ .Port }}" +{{ end }} + + [tcp.services.mail-managesieve.loadBalancer] + [tcp.services.mail-managesieve.loadBalancer.proxyProtocol] + version = 2 + +{{ range service "mailserver-managesieve" }} + [[tcp.services.mail-managesieve.loadBalancer.servers]] + address = "{{ .Address }}:{{ .Port }}" +{{ end }} EOF } + + resources { + cpu = 500 + memory = 512 + } } } -} +} \ No newline at end of file diff --git a/jobs/monitoring/gatus.hcl b/jobs/monitoring/gatus.hcl new file mode 100644 index 0000000..ca8255f --- /dev/null +++ b/jobs/monitoring/gatus.hcl @@ -0,0 +1,349 @@ +job "gatus" { + datacenters = ["aperture"] + type = "service" + + meta { + domain = "gatus.redbrick.dcu.ie" + } + + group "db-web" { + count = 1 + + network { + port "db" { + to = 5432 + } + } + + service { + name = "gatus-db" + port = "db" + + check { + name = "postgres-tcp" + type = "tcp" + port = "db" + interval = "10s" + timeout = "2s" + } + } + + task "db" { + driver = "docker" + + config { + image = "postgres:17-alpine" + ports = ["db"] + + volumes = [ + "/storage/nomad/${NOMAD_JOB_NAME}/${NOMAD_TASK_NAME}:/var/lib/postgresql/data" + ] + } + + template { + destination = "local/.env" + env = true + data = < 48h" + +endpoints: + # --- All Redbrick Monitors --- + - name: Atlas + group: Services + url: "https://redbrick.dcu.ie" + <<: *defaults_https + + - name: RB Wiki + group: Services + url: "https://wiki.redbrick.dcu.ie" + <<: *defaults_https + + - name: Admin API + group: Services + url: "https://api.redbrick.dcu.ie" + <<: *defaults_https + + - name: Timetable Sync + group: Services + url: "https://timetable.redbrick.dcu.ie" + <<: *defaults_https + + - name: Vaultwarden + group: Services + url: "https://vault.redbrick.dcu.ie" + <<: *defaults_https + + - name: Pastebin + group: Services + url: "https://paste.redbrick.dcu.ie" + <<: *defaults_https + + - name: Plausible + group: Services + url: "https://plausible.redbrick.dcu.ie" + <<: *defaults_https + + - name: RB Docs + group: Services + url: "https://docs.redbrick.dcu.ie" + <<: *defaults_https + + - name: Paperless + group: Services + url: "https://paperless.redbrick.dcu.ie" + <<: *defaults_https + + - name: DCU Clubs and Socs + group: Services + url: "https://dcuclubsandsocs.ie" + <<: *defaults_https + + - name: C&S Room Bookings + group: Services + url: "https://rooms.rb.dcu.ie" + <<: *defaults_https + + - name: Hedgedoc + group: Services + url: "https://md.redbrick.dcu.ie/_health" + <<: *defaults_https + + # --- Other Socs --- + - name: DCU Solar Racing + group: Other Socs + url: "https://solarracing.ie" + <<: *defaults_https + + - name: MPS Site + group: Other Socs + url: "https://dcumps.ie" + <<: *defaults_https + + - name: The Look + group: Other Socs + url: "https://thelookonline.dcu.ie" + <<: *defaults_https + + - name: The College View + group: Other Socs + url: "https://thecollegeview.ie" + <<: *defaults_https + + - name: Solar Racing Outline + group: Other Socs + url: "https://outline.solarracing.ie" + <<: *defaults_https + + - name: Amikon Website + group: Other Socs + url: "https://amikon.me" + <<: *defaults_https + + - name: Glados + group: Servers + url: "tcp://10.10.10.4:22" + <<: *defaults_tcp + + - name: Wheatley + group: Servers + url: "tcp://10.10.10.5:22" + <<: *defaults_tcp + + - name: Johnson + group: Servers + url: "tcp://10.10.10.80:22" + <<: *defaults_tcp + + - name: Chell + group: Servers + url: "tcp://10.10.10.6:22" + <<: *defaults_tcp + + - name: Minecraft Vanilla + group: Game Servers + url: "tcp://vanilla-mc.rb.dcu.ie:25565" + <<: *defaults_tcp + + - name: Discord Shortlink + group: Short Links + url: "https://discord.redbrick.dcu.ie" + <<: *defaults_https + + - name: Github Shortlink + group: Short Links + url: "https://github.redbrick.dcu.ie" + <<: *defaults_https + + - name: Instagram Shortlink + group: Short Links + url: "https://instagram.redbrick.dcu.ie" + <<: *defaults_https + + - name: Youtube Shortlink + group: Short Links + url: "https://youtube.redbrick.dcu.ie" + <<: *defaults_https + + - name: LinkedIn Shortlink + group: Short Links + url: "https://linkedin.redbrick.dcu.ie" + <<: *defaults_https + + - name: Twitch Shortlink + group: Short Links + url: "https://twitch.redbrick.dcu.ie" + <<: *defaults_https + + - name: Tiktok Shortlink + group: Short Links + url: "https://tiktok.redbrick.dcu.ie" + <<: *defaults_https +EOH + } + + resources { + cpu = 500 + memory = 512 + } + } + } +} \ No newline at end of file diff --git a/jobs/monitoring/grafana.hcl b/jobs/monitoring/grafana.hcl index 5a981ae..21c0968 100644 --- a/jobs/monitoring/grafana.hcl +++ b/jobs/monitoring/grafana.hcl @@ -1,20 +1,75 @@ job "grafana" { datacenters = ["aperture"] + type = "service" - type = "service" + meta { + domain = "grafana.redbrick.dcu.ie" + } + + group "database" { + count = 1 - group "monitoring" { network { - port "http" { - to = 3000 - } port "db" { to = 5432 } } service { - name = "grafana" + name = "grafana-db" + port = "db" + + check { + name = "postgres-tcp" + type = "tcp" + port = "db" + interval = "10s" + timeout = "2s" + } + } + + task "db" { + driver = "docker" + kill_signal = "SIGTERM" # SIGTERM instead of SIGKILL so database can shutdown safely + kill_timeout = "30s" + shutdown_delay = "5s" + + config { + image = "postgres:17-alpine" + ports = ["db"] + + volumes = [ + "/storage/nomad/${NOMAD_JOB_NAME}/${NOMAD_TASK_NAME}:/var/lib/postgresql/data" + ] + } + + template { + data = < Date: Mon, 1 Jun 2026 18:10:22 +0100 Subject: [PATCH 9/9] Remove container metrics. This part should be revisited to be added in the future --- jobs/monitoring/prometheus.hcl | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/jobs/monitoring/prometheus.hcl b/jobs/monitoring/prometheus.hcl index 0f6603c..1452e03 100644 --- a/jobs/monitoring/prometheus.hcl +++ b/jobs/monitoring/prometheus.hcl @@ -46,26 +46,6 @@ scrape_configs: params: format: ['prometheus'] - - job_name: 'container-metrics' - consul_sd_configs: - - server: 'consul.service.consul:8500' - tags: ['prometheus.enable=true'] - metrics_path: /metrics - relabel_configs: - - source_labels: ['__meta_consul_service'] - target_label: 'job' - replacement: 'consul-service' - - - source_labels: ['__meta_consul_tags'] - regex: '.*prometheus.path=([^,]+).*' - target_label: '__metrics_path__' - replacement: '/$1' - - - source_labels: ['__meta_consul_tags'] - regex: '.*prometheus.auth.bearer_token=([^,]+).*' - target_label: '__param_bearer_token_file' - replacement: '$1' - - job_name: 'node-exporter' consul_sd_configs: - server: 'consul.service.consul:8500'