Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .changeset/prom-alertmgr-debug-logs.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
---
---
8 changes: 7 additions & 1 deletion infra/monitoring/entrypoint.alertmanager.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
#!/bin/sh
sed "s|\${SLACK_WEBHOOK_URL}|${SLACK_WEBHOOK_URL}|g;s|\${SLACK_CHANNEL}|${SLACK_CHANNEL}|g" /etc/alertmanager/config.yml.tmpl > /etc/alertmanager/config.yml
exec /bin/alertmanager --config.file=/etc/alertmanager/config.yml --storage.path=/alertmanager ${ALERTMANAGER_EXTERNAL_URL:+--web.external-url="$ALERTMANAGER_EXTERNAL_URL"} "$@"
exec /bin/alertmanager \
--config.file=/etc/alertmanager/config.yml \
--storage.path=/alertmanager \
--log.level="${ALERTMANAGER_LOG_LEVEL:-debug}" \
--log.format="${ALERTMANAGER_LOG_FORMAT:-json}" \
${ALERTMANAGER_EXTERNAL_URL:+--web.external-url="$ALERTMANAGER_EXTERNAL_URL"} \
"$@"
9 changes: 8 additions & 1 deletion infra/monitoring/entrypoint.prometheus.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
#!/bin/sh
envsubst < /etc/prometheus/prometheus.yml.tmpl > /etc/prometheus/prometheus.yml
exec /bin/prometheus --config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/prometheus --web.enable-lifecycle --web.enable-remote-write-receiver "$@"
exec /bin/prometheus \
--config.file=/etc/prometheus/prometheus.yml \
--storage.tsdb.path=/prometheus \
--web.enable-lifecycle \
--web.enable-remote-write-receiver \
--log.level="${PROMETHEUS_LOG_LEVEL:-debug}" \
--log.format="${PROMETHEUS_LOG_FORMAT:-json}" \
"$@"
141 changes: 141 additions & 0 deletions infra/monitoring/grafana/dashboards/anticapture-v2.json
Original file line number Diff line number Diff line change
Expand Up @@ -943,6 +943,147 @@
]
}
]
},
{
"id": 140,
"type": "row",
"title": "Scrape Diagnostics",
"collapsed": true,
"gridPos": { "x": 0, "y": 113, "w": 24, "h": 1 },
"panels": [
{
"id": 1401,
"title": "up (per job) — 1 = scrape succeeded, 0 = scrape failed",
"type": "timeseries",
"gridPos": { "x": 0, "y": 114, "w": 12, "h": 8 },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"fieldConfig": {
"defaults": {
"unit": "short",
"min": 0,
"max": 1,
"color": { "mode": "palette-classic" },
"custom": { "fillOpacity": 10, "lineInterpolation": "stepAfter" }
},
"overrides": []
},
"options": {
"tooltip": { "mode": "multi", "sort": "desc" },
"legend": {
"displayMode": "list",
"placement": "bottom",
"showLegend": true
}
},
"targets": [
{
"expr": "up{job=~\"anticapture-.*\"}",
"legendFormat": "{{job}}",
"refId": "A"
}
]
},
{
"id": 1402,
"title": "Scrape flap count (changes in up over 1h)",
"type": "timeseries",
"gridPos": { "x": 12, "y": 114, "w": 12, "h": 8 },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"fieldConfig": {
"defaults": {
"unit": "short",
"color": { "mode": "palette-classic" },
"custom": { "fillOpacity": 10 }
},
"overrides": []
},
"options": {
"tooltip": { "mode": "multi", "sort": "desc" },
"legend": {
"displayMode": "list",
"placement": "bottom",
"showLegend": true
}
},
"targets": [
{
"expr": "changes(up{job=~\"anticapture-.*\"}[1h])",
"legendFormat": "{{job}}",
"refId": "A"
}
]
},
{
"id": 1403,
"title": "scrape_duration_seconds (threshold = scrape_timeout 12s)",
"type": "timeseries",
"gridPos": { "x": 0, "y": 122, "w": 12, "h": 8 },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"fieldConfig": {
"defaults": {
"unit": "s",
"color": { "mode": "palette-classic" },
"thresholds": {
"mode": "absolute",
"steps": [
{ "color": "green", "value": null },
{ "color": "red", "value": 12 }
]
},
"custom": {
"fillOpacity": 5,
"thresholdsStyle": { "mode": "line" }
}
},
"overrides": []
},
"options": {
"tooltip": { "mode": "multi", "sort": "desc" },
"legend": {
"displayMode": "list",
"placement": "bottom",
"showLegend": true
}
},
"targets": [
{
"expr": "scrape_duration_seconds{job=~\"anticapture-.*\"}",
"legendFormat": "{{job}}",
"refId": "A"
}
]
},
{
"id": 1404,
"title": "scrape_samples_scraped (0 = empty / failed response)",
"type": "timeseries",
"gridPos": { "x": 12, "y": 122, "w": 12, "h": 8 },
"datasource": { "type": "prometheus", "uid": "prometheus" },
"fieldConfig": {
"defaults": {
"unit": "short",
"color": { "mode": "palette-classic" },
"custom": { "fillOpacity": 5 }
},
"overrides": []
},
"options": {
"tooltip": { "mode": "multi", "sort": "desc" },
"legend": {
"displayMode": "list",
"placement": "bottom",
"showLegend": true
}
},
"targets": [
{
"expr": "scrape_samples_scraped{job=~\"anticapture-.*\"}",
"legendFormat": "{{job}}",
"refId": "A"
}
]
}
]
}
]
}
4 changes: 4 additions & 0 deletions infra/monitoring/prometheus.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
global:
scrape_interval: 15s
scrape_timeout: 12s

alerting:
alertmanagers:
- static_configs:
Expand Down
Loading