diff --git a/python-pulumi/src/ptd/grafana_dashboards/alerts_dashboard.json b/python-pulumi/src/ptd/grafana_dashboards/alerts_dashboard.json new file mode 100644 index 0000000..7659f91 --- /dev/null +++ b/python-pulumi/src/ptd/grafana_dashboards/alerts_dashboard.json @@ -0,0 +1,2670 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 14, + "panels": [], + "title": "Cloudwatch", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMax": 325000000, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "area" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 314572800 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 16, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "aws_ec2_network_out_average{cluster=~\"$Cluster\", job=\"integrations/cloudwatch\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "EC2 Network Out High", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMax": 410000, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "area" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 400000 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 17, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "aws_ec2_network_packets_out_average{cluster=~\"$Cluster\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "EC2 Network Packets Out High", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMax": 100, + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "area" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 15, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "aws_fsx_used_storage_capacity_average{cluster=~\"$Cluster\"} / aws_fsx_storage_capacity_average{cluster=~\"$Cluster\"} * 100", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "C" + } + ], + "title": "FSx Capacity", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 17 + }, + "id": 5, + "panels": [], + "title": "Pods", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 18 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(pod) (kube_pod_container_status_terminated_reason{cluster=~\"$Cluster\", reason!=\"Completed\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "PodError", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 18 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(phase, pod) (kube_pod_status_phase{phase=~\"Failed|Pending|Unknown\", cluster=~\"$Cluster\"}) > 0", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{pod}}: {{phase}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "PodNotHealthy", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "Shows the delta between expected and ready Deployments in the selected cluster. Any non-zero value satisfies the alert condition.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 26 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "kube_deployment_status_replicas{cluster=~\"$Cluster\"} \n - \n kube_deployment_status_replicas_ready{cluster=~\"$Cluster\"} > 0", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{deployment}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "DeploymentReplicasMismatch", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "Shows the delta between expected and ready Statefulsets in the selected cluster. Any non-zero value satisfies the alert condition.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "loki-write" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": false, + "viz": true + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 26 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "kube_statefulset_status_replicas{cluster=~\"$Cluster\"} \n - \n kube_statefulset_status_replicas_ready{cluster=~\"$Cluster\"}", + "hide": false, + "instant": false, + "legendFormat": "{{statefulset}}", + "range": true, + "refId": "C" + } + ], + "title": "StatefulSetReplicasMismatch", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "dashed" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 5 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 34 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "increase(kube_pod_container_status_restarts_total{cluster=~\"$Cluster\"}[15m])", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{namespace}}:{{pod}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "PodRestarts", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "points", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 34 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "count by(pod) (kube_pod_container_status_waiting_reason{reason=\"CrashLoopBackOff\", cluster=~\"$Cluster\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "CrashLoopBackOff", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 42 + }, + "id": 6, + "panels": [], + "title": "Nodes", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMax": 10, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 43 + }, + "id": 10, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(node) (kube_node_status_condition{cluster=~\"$Cluster\", condition=\"MemoryPressure\", status=\"true\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "NodeMemoryPressure", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMax": 10, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 43 + }, + "id": 11, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(node) (kube_node_status_condition{cluster=~\"$Cluster\", condition=\"DiskPressure\", status=\"true\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "NodeDiskPressure", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMax": 10, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 51 + }, + "id": 9, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(node, condition) (kube_node_status_condition{cluster=~\"$Cluster\", condition!=\"Ready\", status=\"true\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{node}}:{{condition}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Node Not Ready", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 59 + }, + "id": 12, + "panels": [], + "title": "Healthchecks", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "Displays HTTP response codes from the products. Includes both external \"fqdn\" checks and kuberenetes \"internal\" service URL checks. A value of 200 means everything is OK.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "points", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 60 + }, + "id": 13, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "avg by(ptd_component, check_type, ptd_site) (probe_http_status_code{cluster=~\"$Cluster\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "{{check_type}}:{{ptd_site}}-{{ptd_component}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Healthchecks", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 68 + }, + "id": 18, + "panels": [], + "title": "Applications", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "Loki ingester has experienced WAL disk full failures. This indicates storage issues with the Loki WAL directory.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 0 + } + ] + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 69 + }, + "id": 19, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "loki_ingester_wal_disk_full_failures_total{cluster=~\"$Cluster\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Loki WAL Disk Full Failures", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 77 + }, + "id": 20, + "panels": [], + "title": "Mimir", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "No metrics have been received from workload cluster. This could indicate Alloy is not running, network issues between the workload and control room, or the workload cluster is down.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "points", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 1 + } + ] + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 78 + }, + "id": 21, + "options": { + "legend": { + "calcs": [ + "last" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "up{job=\"prometheus.scrape.kube_state_metrics\", cluster=~\"$Cluster\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Workload Metrics Silent", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 86 + }, + "id": 22, + "panels": [], + "title": "RDS", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "RDS instance CPU utilization is above 80% for more than 10 minutes.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMax": 100, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "area" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "yellow", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "yellow", + "value": 80 + } + ] + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 87 + }, + "id": 23, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": false, + "expr": "aws_rds_cpuutilization_average{job=\"integrations/cloudwatch\", cluster=~\"$Cluster\"}", + "format": "time_series", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "RDS CPU Utilization High", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "RDS instance has less than 5 GiB of free storage space remaining.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "area" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red" + }, + { + "color": "green", + "value": 5368709120 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "yellow" + }, + { + "color": "green", + "value": 5368709120 + } + ] + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 87 + }, + "id": 24, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "aws_rds_free_storage_space_average{job=\"integrations/cloudwatch\", cluster=~\"$Cluster\"}", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "RDS Free Storage Low", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "RDS instance has less than 512 MiB of freeable memory remaining for more than 10 minutes.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "area" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red" + }, + { + "color": "green", + "value": 536870912 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "yellow" + }, + { + "color": "green", + "value": 536870912 + } + ] + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 95 + }, + "id": 25, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "aws_rds_freeable_memory_average{job=\"integrations/cloudwatch\", cluster=~\"$Cluster\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "RDS Freeable Memory Low", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "RDS instance has more than 80 active database connections for more than 5 minutes.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMax": 90, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "area" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "yellow", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "yellow", + "value": 80 + } + ] + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 95 + }, + "id": 26, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "aws_rds_database_connections_average{job=\"integrations/cloudwatch\", cluster=~\"$Cluster\"}", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "RDS Database Connections High", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 103 + }, + "id": 27, + "panels": [], + "title": "LoadBalancer", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "Application Load Balancer has more than 10 target 5XX errors for over 5 minutes, indicating backend service failures.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "dashed" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 10 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 10 + } + ] + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 104 + }, + "id": 28, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "aws_applicationelb_httpcode_target_5_xx_count_sum{job=\"integrations/cloudwatch\",cluster=~\"$Cluster\"}", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "ALB Target 5XX Errors High", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "Application Load Balancer has unhealthy targets for over 5 minutes, indicating backend service health issues.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMax": 12, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "area" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "#EAB839", + "value": 10 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 104 + }, + "id": 29, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "aws_applicationelb_un_healthy_host_count_average{job=\"integrations/cloudwatch\",cluster=~\"$Cluster\"}", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "ALB Unhealthy Targets", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "Network Load Balancer has unhealthy targets for over 5 minutes, indicating backend service health issues.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 0 + } + ] + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 112 + }, + "id": 30, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "aws_networkelb_un_healthy_host_count_average{job=\"integrations/cloudwatch\",cluster=~\"$Cluster\"}", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "NLB Unhealthy Targets", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "Application Load Balancer target response time is above 2 seconds for more than 10 minutes, indicating performance degradation.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "dashed" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 2 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Value" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 2 + } + ] + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 112 + }, + "id": 31, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "aws_applicationelb_target_response_time_average{job=\"integrations/cloudwatch\",cluster=~\"$Cluster\"}", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "ALB Response Latency High", + "type": "timeseries" + } + ], + "refresh": "", + "schemaVersion": 38, + "tags": [], + "templating": { + "list": [ + { + "allValue": "", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "definition": "label_values(kube_pod_status_phase,cluster)", + "hide": 0, + "includeAll": true, + "multi": false, + "name": "Cluster", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(kube_pod_status_phase,cluster)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Alert-Based Dashboard", + "uid": "alerts_dashboard", + "version": 1, + "weekStart": "" +} \ No newline at end of file diff --git a/python-pulumi/src/ptd/grafana_dashboards/k8s-views-global.json b/python-pulumi/src/ptd/grafana_dashboards/k8s-views-global.json new file mode 100644 index 0000000..652e079 --- /dev/null +++ b/python-pulumi/src/ptd/grafana_dashboards/k8s-views-global.json @@ -0,0 +1,3071 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__elements": {}, + "__requires": [ + { + "type": "panel", + "id": "bargauge", + "name": "Bar gauge", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "10.3.1" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "This is a modern 'Global View' dashboard for your Kubernetes cluster(s). Made for kube-prometheus-stack and take advantage of the latest Grafana features. GitHub repository: https://github.com/dotdc/grafana-dashboards-kubernetes", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 67, + "panels": [], + "title": "Overview", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 1 + }, + "id": 77, + "options": { + "displayMode": "lcd", + "maxVizHeight": 300, + "minVizHeight": 10, + "minVizWidth": 0, + "namePlacement": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" + }, + "pluginVersion": "11.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": true, + "expr": "avg(sum by (instance, cpu) (rate(node_cpu_seconds_total{mode!~\"idle|iowait|steal\", cluster=~\"$cluster\"}[$__rate_interval])))", + "interval": "", + "legendFormat": "Real", + "range": true, + "refId": "Real" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "sum(kube_pod_container_resource_requests{resource=\"cpu\", cluster=~\"$cluster\"} and on(namespace, pod) max by (namespace, pod) (kube_pod_status_phase{phase=\"Running\", cluster=~\"$cluster\"} == 1)) / sum(machine_cpu_cores{cluster=~\"$cluster\"})", + "hide": false, + "legendFormat": "Requests", + "range": true, + "refId": "Requests" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "sum(kube_pod_container_resource_limits{resource=\"cpu\", cluster=~\"$cluster\"} and on(namespace, pod) max by (namespace, pod) (kube_pod_status_phase{phase=\"Running\", cluster=~\"$cluster\"} == 1)) / sum(machine_cpu_cores{cluster=~\"$cluster\"})", + "hide": false, + "legendFormat": "Limits", + "range": true, + "refId": "Limits" + } + ], + "title": "Global CPU Usage", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "decimals": 2, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 6, + "y": 1 + }, + "id": 78, + "options": { + "displayMode": "lcd", + "maxVizHeight": 300, + "minVizHeight": 10, + "minVizWidth": 0, + "namePlacement": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "text": {}, + "valueMode": "color" + }, + "pluginVersion": "11.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(node_memory_MemTotal_bytes{cluster=~\"$cluster\"} - node_memory_MemAvailable_bytes{cluster=~\"$cluster\"}) / sum(node_memory_MemTotal_bytes{cluster=~\"$cluster\"})", + "hide": false, + "interval": "", + "legendFormat": "Real", + "range": true, + "refId": "Real" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "sum(kube_pod_container_resource_requests{resource=\"memory\", cluster=~\"$cluster\"} and on(namespace, pod) max by (namespace, pod) (kube_pod_status_phase{phase=\"Running\", cluster=~\"$cluster\"} == 1)) / sum(machine_memory_bytes{cluster=~\"$cluster\"})", + "hide": false, + "legendFormat": "Requests", + "range": true, + "refId": "Requests" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "sum(kube_pod_container_resource_limits{resource=\"memory\", cluster=~\"$cluster\"} and on(namespace, pod) max by (namespace, pod) (kube_pod_status_phase{phase=\"Running\", cluster=~\"$cluster\"} == 1)) / sum(machine_memory_bytes{cluster=~\"$cluster\"})", + "hide": false, + "legendFormat": "Limits", + "range": true, + "refId": "Limits" + } + ], + "title": "Global RAM Usage", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 2, + "x": 12, + "y": 1 + }, + "id": 63, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "value", + "wideLayout": true + }, + "pluginVersion": "11.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": true, + "expr": "count(count by (node) (kube_node_info{cluster=~\"$cluster\"}))", + "interval": "", + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "Nodes", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 10, + "x": 14, + "y": 1 + }, + "id": 52, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "exemplar": true, + "expr": "sum(kube_namespace_labels{cluster=~\"$cluster\"})", + "interval": "", + "legendFormat": "Namespaces", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "expr": "sum(kube_pod_container_status_running{cluster=~\"$cluster\"})", + "interval": "", + "legendFormat": "Running Containers", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "expr": "sum(kube_pod_status_phase{phase=\"Running\", cluster=~\"$cluster\"})", + "interval": "", + "legendFormat": "Running Pods", + "refId": "O" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "expr": "sum(kube_service_info{cluster=~\"$cluster\"})", + "interval": "", + "legendFormat": "Services", + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "expr": "sum(kube_endpoint_info{cluster=~\"$cluster\"})", + "interval": "", + "legendFormat": "Endpoints", + "refId": "D" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "expr": "sum(kube_ingress_info{cluster=~\"$cluster\"})", + "interval": "", + "legendFormat": "Ingresses", + "refId": "E" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "expr": "sum(kube_deployment_labels{cluster=~\"$cluster\"})", + "interval": "", + "legendFormat": "Deployments", + "refId": "F" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "expr": "sum(kube_statefulset_labels{cluster=~\"$cluster\"})", + "interval": "", + "legendFormat": "Statefulsets", + "refId": "G" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "expr": "sum(kube_daemonset_labels{cluster=~\"$cluster\"})", + "interval": "", + "legendFormat": "Daemonsets", + "refId": "H" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "expr": "sum(kube_persistentvolumeclaim_info{cluster=~\"$cluster\"})", + "interval": "", + "legendFormat": "Persistent Volume Claims", + "refId": "I" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "expr": "sum(kube_hpa_labels{cluster=~\"$cluster\"})", + "interval": "", + "legendFormat": "Horizontal Pod Autoscalers", + "refId": "J" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "expr": "sum(kube_configmap_info{cluster=~\"$cluster\"})", + "interval": "", + "legendFormat": "Configmaps", + "refId": "K" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "expr": "sum(kube_secret_info{cluster=~\"$cluster\"})", + "interval": "", + "legendFormat": "Secrets", + "refId": "L" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "expr": "sum(kube_networkpolicy_labels{cluster=~\"$cluster\"})", + "interval": "", + "legendFormat": "Network Policies", + "refId": "M" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "exemplar": true, + "expr": "count(count by (node) (kube_node_info{cluster=~\"$cluster\"}))", + "hide": false, + "interval": "", + "legendFormat": "Nodes", + "refId": "N" + } + ], + "title": "Kubernetes Resource Count", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 2, + "x": 12, + "y": 5 + }, + "id": 59, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "value", + "wideLayout": true + }, + "pluginVersion": "11.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "expr": "count(kube_namespace_created{cluster=~\"$cluster\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Namespaces", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgb(255, 255, 255)", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 9 + }, + "id": 37, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(node_cpu_seconds_total{mode!~\"idle|iowait|steal\", cluster=~\"$cluster\"}[$__rate_interval]))", + "interval": "", + "legendFormat": "Real", + "range": true, + "refId": "Real" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "sum(kube_pod_container_resource_requests{resource=\"cpu\", cluster=~\"$cluster\"} and on(namespace, pod) max by (namespace, pod) (kube_pod_status_phase{phase=\"Running\", cluster=~\"$cluster\"} == 1))", + "hide": false, + "legendFormat": "Requests", + "range": true, + "refId": "Requests" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "sum(kube_pod_container_resource_limits{resource=\"cpu\", cluster=~\"$cluster\"} and on(namespace, pod) max by (namespace, pod) (kube_pod_status_phase{phase=\"Running\", cluster=~\"$cluster\"} == 1))", + "hide": false, + "legendFormat": "Limits", + "range": true, + "refId": "Limits" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "sum(machine_cpu_cores{cluster=~\"$cluster\"})", + "hide": false, + "legendFormat": "Total", + "range": true, + "refId": "Total" + } + ], + "title": "CPU Usage", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgb(255, 255, 255)", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 6, + "y": 9 + }, + "id": 39, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "11.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(node_memory_MemTotal_bytes{cluster=~\"$cluster\"} - node_memory_MemAvailable_bytes{cluster=~\"$cluster\"})", + "interval": "", + "legendFormat": "Real", + "range": true, + "refId": "Real" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "sum(kube_pod_container_resource_requests{resource=\"memory\", cluster=~\"$cluster\"} and on(namespace, pod) max by (namespace, pod) (kube_pod_status_phase{phase=\"Running\", cluster=~\"$cluster\"} == 1))", + "hide": false, + "legendFormat": "Requests", + "range": true, + "refId": "Requests" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "sum(kube_pod_container_resource_limits{resource=\"memory\", cluster=~\"$cluster\"} and on(namespace, pod) max by (namespace, pod) (kube_pod_status_phase{phase=\"Running\", cluster=~\"$cluster\"} == 1))", + "hide": false, + "legendFormat": "Limits", + "range": true, + "refId": "Limits" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "sum(machine_memory_bytes{cluster=~\"$cluster\"})", + "hide": false, + "legendFormat": "Total", + "range": true, + "refId": "Total" + } + ], + "title": "RAM Usage", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 2, + "x": 12, + "y": 9 + }, + "id": 62, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "value", + "wideLayout": true + }, + "pluginVersion": "11.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "expr": "sum(kube_pod_status_phase{phase=\"Running\", cluster=~\"$cluster\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Running Pods", + "type": "stat" + }, + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 13 + }, + "id": 71, + "panels": [], + "title": "Resources", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd", + "seriesBy": "last" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "CPU %", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "scheme", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 0.5 + }, + { + "color": "red", + "value": 0.7 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 14 + }, + "id": 72, + "options": { + "legend": { + "calcs": [], + "displayMode": "hidden", + "placement": "right", + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": true, + "expr": "avg(sum by (instance, cpu) (rate(node_cpu_seconds_total{mode!~\"idle|iowait|steal\", cluster=~\"$cluster\"}[$__rate_interval])))", + "interval": "$resolution", + "legendFormat": "CPU usage in %", + "range": true, + "refId": "A" + } + ], + "title": "Cluster CPU Utilization", + "transformations": [ + { + "id": "calculateField", + "options": { + "alias": "CPU usage in %", + "mode": "reduceRow", + "reduce": { + "reducer": "mean" + }, + "replaceFields": true + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-GrYlRd" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "MEMORY", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "scheme", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 0.5 + }, + { + "color": "red", + "value": 0.7 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 14 + }, + "id": 55, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max", + "min" + ], + "displayMode": "hidden", + "placement": "right", + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(node_memory_MemTotal_bytes{cluster=~\"$cluster\"} - node_memory_MemAvailable_bytes{cluster=~\"$cluster\"}) / sum(node_memory_MemTotal_bytes{cluster=~\"$cluster\"})", + "interval": "$resolution", + "legendFormat": "Memory usage in %", + "range": true, + "refId": "A" + } + ], + "title": "Cluster Memory Utilization", + "transformations": [ + { + "id": "calculateField", + "options": { + "alias": "Memory usage in %", + "mode": "reduceRow", + "reduce": { + "reducer": "mean" + }, + "replaceFields": true + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "CPU CORES", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 22 + }, + "id": 46, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.4.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(container_cpu_usage_seconds_total{image!=\"\", cluster=~\"$cluster\"}[$__rate_interval])) by (namespace)", + "format": "time_series", + "hide": false, + "interval": "$resolution", + "legendFormat": "{{ namespace }}", + "range": true, + "refId": "A" + } + ], + "title": "CPU Utilization by namespace", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 22 + }, + "id": 50, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(container_memory_working_set_bytes{image!=\"\", cluster=~\"$cluster\"}) by (namespace)", + "interval": "$resolution", + "legendFormat": "{{ namespace }}", + "range": true, + "refId": "A" + } + ], + "title": "Memory Utilization by namespace", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "CPU %", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 30 + }, + "id": 54, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": true, + "expr": "avg(sum by (instance, cpu) (rate(node_cpu_seconds_total{mode!~\"idle|iowait|steal\", cluster=~\"$cluster\"}[$__rate_interval]))) by (instance)", + "interval": "$resolution", + "legendFormat": "{{ node }}", + "range": true, + "refId": "A" + } + ], + "title": "CPU Utilization by instance", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "MEMORY", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 30 + }, + "id": 73, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(node_memory_MemTotal_bytes{cluster=~\"$cluster\"} - node_memory_MemAvailable_bytes{cluster=~\"$cluster\"}) by (instance)", + "hide": false, + "interval": "$resolution", + "legendFormat": "{{ instance }}", + "range": true, + "refId": "A" + } + ], + "title": "Memory Utilization by instance", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "No data is generally a good thing here.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "SECONDS", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 38 + }, + "id": 82, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(container_cpu_cfs_throttled_seconds_total{image!=\"\", cluster=~\"$cluster\"}[$__rate_interval])) by (namespace) > 0", + "interval": "$resolution", + "legendFormat": "{{ namespace }}", + "range": true, + "refId": "A" + } + ], + "title": "CPU Throttled seconds by namespace", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "No data is generally a good thing here.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "NB", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 38 + }, + "id": 83, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(node_cpu_core_throttles_total{cluster=~\"$cluster\"}[$__rate_interval])) by (instance)", + "interval": "$resolution", + "legendFormat": "{{ instance }}", + "range": true, + "refId": "A" + } + ], + "title": "CPU Core Throttled by instance", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 46 + }, + "id": 86, + "panels": [], + "title": "Kubernetes", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 47 + }, + "id": 84, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(kube_pod_status_qos_class{cluster=~\"$cluster\"}) by (qos_class)", + "interval": "", + "legendFormat": "{{ qos_class }} pods", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "sum(kube_pod_info{cluster=~\"$cluster\"})", + "hide": false, + "legendFormat": "Total pods", + "range": true, + "refId": "B" + } + ], + "title": "Kubernetes Pods QoS classes", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 47 + }, + "id": 85, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(kube_pod_status_reason{cluster=~\"$cluster\"}) by (reason)", + "interval": "", + "legendFormat": "{{ reason }}", + "range": true, + "refId": "A" + } + ], + "title": "Kubernetes Pods Status Reason", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "No data is generally a good thing here.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "points", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 56 + }, + "id": 87, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(increase(container_oom_events_total{cluster=~\"$cluster\"}[$__rate_interval])) by (namespace) > 0", + "interval": "", + "legendFormat": "{{ namespace }}", + "range": true, + "refId": "A" + } + ], + "title": "OOM Events by namespace", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "No data is generally a good thing here.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "points", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 56 + }, + "id": 88, + "options": { + "legend": { + "calcs": [ + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Max", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(increase(kube_pod_container_status_restarts_total{cluster=~\"$cluster\"}[$__rate_interval])) by (namespace) > 0", + "interval": "", + "legendFormat": "{{ namespace }}", + "range": true, + "refId": "A" + } + ], + "title": "Container Restarts by namespace", + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 65 + }, + "id": 69, + "panels": [], + "title": "Network", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "Dropped noisy virtual devices for readability.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "BANDWIDTH", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 66 + }, + "id": 44, + "options": { + "legend": { + "calcs": [], + "displayMode": "hidden", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(node_network_receive_bytes_total{device!~\"(veth|azv|lxc).*\", cluster=~\"$cluster\"}[$__rate_interval])) by (device)", + "interval": "$resolution", + "legendFormat": "Received : {{ device }}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": true, + "expr": "- sum(rate(node_network_transmit_bytes_total{device!~\"(veth|azv|lxc).*\", cluster=~\"$cluster\"}[$__rate_interval])) by (device)", + "interval": "$resolution", + "legendFormat": "Transmitted : {{ device }}", + "range": true, + "refId": "B" + } + ], + "title": "Global Network Utilization by device", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "DROPPED PACKETS", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 66 + }, + "id": 53, + "options": { + "legend": { + "calcs": [], + "displayMode": "hidden", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(node_network_receive_drop_total{cluster=~\"$cluster\"}[$__rate_interval]))", + "interval": "$resolution", + "legendFormat": "Packets dropped (receive)", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": true, + "expr": "- sum(rate(node_network_transmit_drop_total{cluster=~\"$cluster\"}[$__rate_interval]))", + "interval": "$resolution", + "legendFormat": "Packets dropped (transmit)", + "range": true, + "refId": "B" + } + ], + "title": "Network Saturation - Packets dropped", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "BANDWIDTH", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 74 + }, + "id": 79, + "options": { + "legend": { + "calcs": [], + "displayMode": "hidden", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(container_network_receive_bytes_total{cluster=~\"$cluster\"}[$__rate_interval])) by (namespace)", + "interval": "$resolution", + "legendFormat": "Received : {{ namespace }}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "- sum(rate(container_network_transmit_bytes_total{cluster=~\"$cluster\"}[$__rate_interval])) by (namespace)", + "hide": false, + "interval": "$resolution", + "legendFormat": "Transmitted : {{ namespace }}", + "range": true, + "refId": "B" + } + ], + "title": "Network Received by namespace", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "BANDWIDTH", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 74 + }, + "id": 80, + "options": { + "legend": { + "calcs": [], + "displayMode": "hidden", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(node_network_receive_bytes_total{cluster=~\"$cluster\"}[$__rate_interval])) by (instance)", + "interval": "$resolution", + "legendFormat": "Received bytes in {{ instance }}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "- sum(rate(node_network_transmit_bytes_total{cluster=~\"$cluster\"}[$__rate_interval])) by (instance)", + "hide": false, + "interval": "$resolution", + "legendFormat": "Transmitted bytes in {{ instance }}", + "range": true, + "refId": "B" + } + ], + "title": "Total Network Received (with all virtual devices) by instance", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "Dropped noisy virtual devices for readability.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "BANDWIDTH", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 82 + }, + "id": 56, + "options": { + "legend": { + "calcs": [], + "displayMode": "hidden", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(node_network_receive_bytes_total{device!~\"(veth|azv|lxc|lo).*\", cluster=~\"$cluster\"}[$__rate_interval])) by (instance)", + "interval": "$resolution", + "legendFormat": "Received bytes in {{ instance }}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "- sum(rate(node_network_transmit_bytes_total{device!~\"(veth|azv|lxc|lo).*\", cluster=~\"$cluster\"}[$__rate_interval])) by (instance)", + "hide": false, + "interval": "$resolution", + "legendFormat": "Transmitted bytes in {{ instance }}", + "range": true, + "refId": "B" + } + ], + "title": "Network Received (without loopback) by instance", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "Dropped noisy virtual devices for readability.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "BANDWIDTH", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 82 + }, + "id": 81, + "options": { + "legend": { + "calcs": [], + "displayMode": "hidden", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.3.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(node_network_receive_bytes_total{device=\"lo\", cluster=~\"$cluster\"}[$__rate_interval])) by (instance)", + "interval": "$resolution", + "legendFormat": "Received bytes in {{ instance }}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "- sum(rate(node_network_transmit_bytes_total{device=\"lo\", cluster=~\"$cluster\"}[$__rate_interval])) by (instance)", + "hide": false, + "interval": "$resolution", + "legendFormat": "Transmitted bytes in {{ instance }}", + "range": true, + "refId": "B" + } + ], + "title": "Network Received (loopback only) by instance", + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 39, + "tags": [ + "Kubernetes", + "Prometheus" + ], + "templating": { + "list": [ + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "definition": "label_values(kube_node_info,cluster)", + "hide": 0, + "includeAll": true, + "multi": false, + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "name": "cluster", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(kube_node_info,cluster)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "1m", + "value": "1m" + }, + "hide": 0, + "includeAll": false, + "multi": false, + "name": "resolution", + "options": [ + { + "selected": false, + "text": "1s", + "value": "1s" + }, + { + "selected": false, + "text": "15s", + "value": "15s" + }, + { + "selected": false, + "text": "30s", + "value": "30s" + }, + { + "selected": true, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "3m", + "value": "3m" + }, + { + "selected": false, + "text": "5m", + "value": "5m" + } + ], + "query": "1s, 15s, 30s, 1m, 3m, 5m", + "queryValue": "", + "skipUrlSync": false, + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Kubernetes Global View", + "uid": "k8s-views-global", + "version": 45, + "weekStart": "" +} \ No newline at end of file diff --git a/python-pulumi/src/ptd/paths.py b/python-pulumi/src/ptd/paths.py index f747d21..e3ee6ab 100644 --- a/python-pulumi/src/ptd/paths.py +++ b/python-pulumi/src/ptd/paths.py @@ -25,6 +25,10 @@ def alerts() -> pathlib.Path: return top() / "python-pulumi" / "src" / "ptd" / "grafana_alerts" +def dashboards() -> pathlib.Path: + return top() / "python-pulumi" / "src" / "ptd" / "grafana_dashboards" + + class Paths: @property def root(self) -> pathlib.Path: diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_eks_cluster.py b/python-pulumi/src/ptd/pulumi_resources/aws_eks_cluster.py index 7d97f0c..8abb4c1 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_eks_cluster.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_eks_cluster.py @@ -19,6 +19,7 @@ import ptd.junkdrawer import ptd.oidc import ptd.paths +import ptd.pulumi_resources.lib import ptd.secrecy @@ -1950,6 +1951,9 @@ def with_grafana( # Create alert configmaps for all YAML files in the alerts directory self._create_alert_configmaps(grafana_ns) + # Create dashboard configmaps for all JSON files in the dashboards directory + self._create_dashboard_configmaps(grafana_ns) + # TODO: auth.proxy should be configurable, prod grafana auth will need tighter controls than letting anyone in as an Editor k8s.helm.v3.Release( f"{self.name}-grafana", @@ -2073,7 +2077,12 @@ def with_grafana( "alerts": { "enabled": True, "searchNamespace": "grafana", - } + }, + "dashboards": { + "enabled": True, + "searchNamespace": "grafana", + "label": "grafana_dashboard", + }, }, }, ), @@ -2515,6 +2524,53 @@ def _create_alert_configmaps(self, ns: k8s.core.v1.Namespace): opts=pulumi.ResourceOptions(parent=self, provider=self.provider, depends_on=ns), ) + def _create_dashboard_configmaps(self, ns: k8s.core.v1.Namespace): + """ + Create ConfigMaps for Grafana dashboard provisioning. + + Reads all JSON files from grafana_dashboards/ directory and creates + Kubernetes ConfigMaps with the grafana_dashboard label. The Grafana + sidecar watches for this label and provisions dashboards automatically. + + Dashboard UIDs are enforced to match the filename (without .json extension) + to ensure idempotent updates and prevent duplicate dashboards. + """ + + dashboards_dir = ptd.paths.dashboards() + + for dashboard_file in sorted(dashboards_dir.glob("*.json")): + dashboard_name = dashboard_file.stem + + # Sanitize name for Kubernetes RFC 1123 compliance + k8s_safe_name = ptd.pulumi_resources.lib.sanitize_k8s_name(dashboard_name) + + # Read and parse JSON + try: + with open(dashboard_file) as f: + dashboard_json = json.load(f) + except json.JSONDecodeError as e: + msg = f"Invalid JSON in {dashboard_file}: {e}" + raise ValueError(msg) from e + + # Enforce UID = filename for idempotency + # Set id to null (Grafana provisioning ignores it) + dashboard_json["uid"] = dashboard_name + dashboard_json["id"] = None + + # Convert back to JSON string + dashboard_content = json.dumps(dashboard_json, indent=2) + + k8s.core.v1.ConfigMap( + f"{self.name}-grafana-{k8s_safe_name}-dashboard", + metadata={ + "name": f"grafana-{k8s_safe_name}-dashboard", + "namespace": "grafana", + "labels": {"grafana_dashboard": "1"}, + }, + data={f"{dashboard_name}.json": dashboard_content}, + opts=pulumi.ResourceOptions(parent=self, provider=self.provider, depends_on=ns), + ) + def setup_tailscale_access(self): sg_name = f"{self.sg_prefix}-tailscale" self.eks.vpc_config.apply(lambda config: self._setup_sg_access(sg_name, config.vpc_id)) diff --git a/python-pulumi/src/ptd/pulumi_resources/lib.py b/python-pulumi/src/ptd/pulumi_resources/lib.py index f292ad1..0d5506e 100644 --- a/python-pulumi/src/ptd/pulumi_resources/lib.py +++ b/python-pulumi/src/ptd/pulumi_resources/lib.py @@ -1,7 +1,48 @@ +import re + _AWS_TAG_KEY_MAX_LENGTH = 128 _AWS_TAG_VALUE_MAX_LENGTH = 256 +def sanitize_k8s_name(name: str) -> str: + """Sanitize a name to be RFC 1123 compliant for Kubernetes resources. + + RFC 1123 subdomain rules: + - Must contain only lowercase alphanumeric characters and hyphens + - Must start and end with an alphanumeric character + - Maximum length is 253 characters (not enforced here) + + This function: + 1. Converts to lowercase + 2. Replaces all non-alphanumeric characters with hyphens + 3. Collapses consecutive hyphens into a single hyphen + 4. Strips leading/trailing hyphens + + Raises: + ValueError: If the sanitized name is empty or still RFC 1123 non-compliant + """ + if not name: + msg = "Name cannot be empty" + raise ValueError(msg) + + # Convert to lowercase and replace invalid chars with hyphens + sanitized = re.sub(r"[^a-z0-9-]", "-", name.lower()) + + # Collapse consecutive hyphens into single hyphen + sanitized = re.sub(r"-+", "-", sanitized) + + # Strip leading/trailing hyphens + sanitized = sanitized.strip("-") + + # Validate the result matches RFC 1123 subdomain pattern + # Pattern: must start/end with alphanumeric, can contain hyphens in between + if not sanitized or not re.match(r"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$", sanitized): + msg = f"Name '{name}' cannot be sanitized to RFC 1123 format (result: '{sanitized}')" + raise ValueError(msg) + + return sanitized + + def format_lb_tags(tags: dict[str, str]) -> str: """Format tags as comma-separated key=value pairs for AWS LB Controller annotations. diff --git a/python-pulumi/tests/test_dashboard_configmaps.py b/python-pulumi/tests/test_dashboard_configmaps.py new file mode 100644 index 0000000..57dffaf --- /dev/null +++ b/python-pulumi/tests/test_dashboard_configmaps.py @@ -0,0 +1,171 @@ +"""Tests for Grafana dashboard ConfigMap provisioning in aws_eks_cluster.py""" + +import json +import tempfile +from pathlib import Path + +import pytest + +from ptd.pulumi_resources.lib import sanitize_k8s_name + + +def test_dashboard_name_sanitization_basic(): + """Test basic underscore replacement for dashboard names""" + assert sanitize_k8s_name("alerts_dashboard") == "alerts-dashboard" + + +def test_dashboard_name_sanitization_edge_cases(): + """Test edge cases in dashboard name sanitization""" + # Leading/trailing underscores + assert sanitize_k8s_name("_dashboard_") == "dashboard" + + # Mixed case and special chars + assert sanitize_k8s_name("My_Custom_Dashboard!") == "my-custom-dashboard" + + # Multiple consecutive special chars + assert sanitize_k8s_name("test___dashboard") == "test-dashboard" + + +def test_dashboard_name_sanitization_invalid(): + """Test that invalid dashboard names raise errors""" + with pytest.raises(ValueError, match="cannot be sanitized to RFC 1123 format"): + sanitize_k8s_name("___") # Only underscores + + with pytest.raises(ValueError, match="Name cannot be empty"): + sanitize_k8s_name("") # Empty string + + +def test_dashboard_configmap_structure(): + """Test the expected structure of dashboard ConfigMaps""" + # This is a documentation test showing the expected structure + dashboard_name = "alerts_dashboard" + k8s_safe_name = sanitize_k8s_name(dashboard_name) + + # Expected ConfigMap structure + expected_metadata = { + "name": f"grafana-{k8s_safe_name}-dashboard", + "namespace": "grafana", + "labels": {"grafana_dashboard": "1"}, + } + + # Verify sanitized name is RFC 1123 compliant + assert k8s_safe_name == "alerts-dashboard" + assert expected_metadata["name"] == "grafana-alerts-dashboard-dashboard" + + # Verify the data key uses original dashboard name (with underscore) + expected_data_key = f"{dashboard_name}.json" + assert expected_data_key == "alerts_dashboard.json" + + +def test_dashboard_uid_enforcement(): + """Test that dashboard UID is enforced to match filename""" + dashboard_name = "my_test_dashboard" + + # Simulate the dashboard JSON structure + dashboard_json = { + "title": "My Test Dashboard", + "uid": "old_uid", # This should be overwritten + "id": 123, # This should be set to None + } + + # What the code does: + dashboard_json["uid"] = dashboard_name + dashboard_json["id"] = None + + assert dashboard_json["uid"] == "my_test_dashboard" + assert dashboard_json["id"] is None + + +def test_rfc1123_compliance_validation(): + """Test RFC 1123 subdomain naming rules (simplified version without dots)""" + import re + + # Valid names (alphanumeric and hyphens, start/end with alphanumeric) + valid_names = [ + "a", + "abc", + "a-b", + "abc-123", + "alerts-dashboard", + "123-abc", + "my-dashboard", + ] + + # Simplified pattern for our use case (no dots) + rfc1123_pattern = r"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$" + + for name in valid_names: + assert re.match(rfc1123_pattern, name), f"Valid name failed: {name}" + + # Invalid names (should not match) + invalid_names = [ + "-abc", # Starts with hyphen + "abc-", # Ends with hyphen + "ABC", # Uppercase + "a_b", # Underscore + "a b", # Space + "a.b", # Dot (not allowed in our simplified version) + "", # Empty + ] + + for name in invalid_names: + assert not re.match(rfc1123_pattern, name), f"Invalid name passed: {name}" + + +def test_dashboard_file_processing_simulation(): + """Simulate processing a dashboard file to ensure end-to-end behavior""" + with tempfile.TemporaryDirectory() as tmpdir: + dashboard_dir = Path(tmpdir) + + # Create a test dashboard file + dashboard_file = dashboard_dir / "alerts_dashboard.json" + dashboard_content = { + "title": "Alerts Dashboard", + "uid": "wrong_uid", + "id": 999, + "panels": [], + } + + with open(dashboard_file, "w") as f: + json.dump(dashboard_content, f) + + # Simulate what the code does + dashboard_name = dashboard_file.stem + k8s_safe_name = sanitize_k8s_name(dashboard_name) + + # Read and parse + with open(dashboard_file) as f: + dashboard_json = json.load(f) + + # Enforce UID and null id + dashboard_json["uid"] = dashboard_name + dashboard_json["id"] = None + + # Verify results + assert dashboard_name == "alerts_dashboard" + assert k8s_safe_name == "alerts-dashboard" + assert dashboard_json["uid"] == "alerts_dashboard" + assert dashboard_json["id"] is None + assert dashboard_json["title"] == "Alerts Dashboard" + + +def test_configmap_naming_pattern(): + """Test the ConfigMap naming pattern used in the code""" + cluster_name = "main01-staging" + dashboard_name = "alerts_dashboard" + k8s_safe_name = sanitize_k8s_name(dashboard_name) + + # Pulumi resource name (can contain underscores, used internally) + pulumi_resource_name = f"{cluster_name}-grafana-{k8s_safe_name}-dashboard" + + # Kubernetes metadata name (must be RFC 1123 compliant) + k8s_metadata_name = f"grafana-{k8s_safe_name}-dashboard" + + # Verify patterns + assert pulumi_resource_name == "main01-staging-grafana-alerts-dashboard-dashboard" + assert k8s_metadata_name == "grafana-alerts-dashboard-dashboard" + + # Verify metadata name is RFC 1123 compliant + import re + + assert re.match(r"^[a-z0-9]([-a-z0-9]*[a-z0-9])?$", k8s_metadata_name) diff --git a/python-pulumi/tests/test_lib.py b/python-pulumi/tests/test_lib.py index 4b1f8ed..c1649b7 100644 --- a/python-pulumi/tests/test_lib.py +++ b/python-pulumi/tests/test_lib.py @@ -1,6 +1,6 @@ import pytest -from ptd.pulumi_resources.lib import format_lb_tags +from ptd.pulumi_resources.lib import format_lb_tags, sanitize_k8s_name def test_format_lb_tags_normal() -> None: @@ -127,3 +127,125 @@ def test_format_lb_tags_slash_in_key() -> None: result = format_lb_tags({"posit.team/true-name": "myapp", "posit.team/environment": "production"}) assert "posit.team/true-name=myapp" in result assert "posit.team/environment=production" in result + + +# ===== sanitize_k8s_name tests ===== + + +def test_sanitize_k8s_name_basic_underscore_replacement() -> None: + """Basic test: underscores replaced with hyphens""" + assert sanitize_k8s_name("alerts_dashboard") == "alerts-dashboard" + + +def test_sanitize_k8s_name_multiple_underscores() -> None: + """Multiple underscores are all replaced""" + assert sanitize_k8s_name("my_test_dashboard") == "my-test-dashboard" + + +def test_sanitize_k8s_name_uppercase_lowercased() -> None: + """Uppercase letters are lowercased""" + assert sanitize_k8s_name("MyDashboard") == "mydashboard" + + +def test_sanitize_k8s_name_mixed_case_and_underscores() -> None: + """Mix of uppercase and underscores""" + assert sanitize_k8s_name("My_Dashboard") == "my-dashboard" + + +def test_sanitize_k8s_name_leading_underscore_stripped() -> None: + """Leading underscores are stripped""" + assert sanitize_k8s_name("_dashboard") == "dashboard" + + +def test_sanitize_k8s_name_trailing_underscore_stripped() -> None: + """Trailing underscores are stripped""" + assert sanitize_k8s_name("dashboard_") == "dashboard" + + +def test_sanitize_k8s_name_leading_and_trailing_underscores() -> None: + """Both leading and trailing underscores are stripped""" + assert sanitize_k8s_name("_dashboard_") == "dashboard" + + +def test_sanitize_k8s_name_special_chars_replaced() -> None: + """Special characters replaced with hyphens""" + assert sanitize_k8s_name("my@dashboard!") == "my-dashboard" + + +def test_sanitize_k8s_name_spaces_replaced() -> None: + """Spaces replaced with hyphens""" + assert sanitize_k8s_name("my dashboard") == "my-dashboard" + + +def test_sanitize_k8s_name_dots_replaced() -> None: + """Dots are replaced with hyphens (simpler for dashboard names)""" + assert sanitize_k8s_name("my.dashboard") == "my-dashboard" + + +def test_sanitize_k8s_name_hyphens_preserved() -> None: + """Hyphens are preserved""" + assert sanitize_k8s_name("my-dashboard") == "my-dashboard" + + +def test_sanitize_k8s_name_alphanumeric_preserved() -> None: + """Alphanumeric characters are preserved (lowercased)""" + assert sanitize_k8s_name("Dashboard123") == "dashboard123" + + +def test_sanitize_k8s_name_consecutive_special_chars() -> None: + """Consecutive special characters are collapsed into a single hyphen""" + assert sanitize_k8s_name("my___dashboard") == "my-dashboard" + + +def test_sanitize_k8s_name_only_special_chars_fails() -> None: + """Name with only special characters cannot be sanitized""" + with pytest.raises(ValueError, match="cannot be sanitized to RFC 1123 format"): + sanitize_k8s_name("___") + + +def test_sanitize_k8s_name_empty_string_fails() -> None: + """Empty string raises ValueError""" + with pytest.raises(ValueError, match="Name cannot be empty"): + sanitize_k8s_name("") + + +def test_sanitize_k8s_name_only_leading_underscore_fails() -> None: + """Name that becomes empty after stripping fails""" + with pytest.raises(ValueError, match="cannot be sanitized to RFC 1123 format"): + sanitize_k8s_name("_") + + +def test_sanitize_k8s_name_complex_case() -> None: + """Complex real-world example""" + assert sanitize_k8s_name("_My_Dashboard_2024!") == "my-dashboard-2024" + + +def test_sanitize_k8s_name_already_valid() -> None: + """Already valid names pass through (lowercased)""" + assert sanitize_k8s_name("valid-name") == "valid-name" + + +def test_sanitize_k8s_name_single_char() -> None: + """Single character names are valid""" + assert sanitize_k8s_name("a") == "a" + assert sanitize_k8s_name("1") == "1" + + +def test_sanitize_k8s_name_two_chars() -> None: + """Two character names are valid""" + assert sanitize_k8s_name("ab") == "ab" + + +def test_sanitize_k8s_name_hyphen_in_middle() -> None: + """Hyphen in middle is valid (part of RFC 1123 pattern)""" + assert sanitize_k8s_name("a-b") == "a-b" + + +def test_sanitize_k8s_name_leading_hyphen_after_special_char() -> None: + """Special char at start becomes hyphen, then gets stripped""" + assert sanitize_k8s_name("!dashboard") == "dashboard" + + +def test_sanitize_k8s_name_trailing_hyphen_after_special_char() -> None: + """Special char at end becomes hyphen, then gets stripped""" + assert sanitize_k8s_name("dashboard!") == "dashboard"