diff --git a/snmp-observ-lib/alerts.libsonnet b/snmp-observ-lib/alerts.libsonnet index f41352db4..468feb326 100644 --- a/snmp-observ-lib/alerts.libsonnet +++ b/snmp-observ-lib/alerts.libsonnet @@ -156,18 +156,42 @@ local xtd = import 'github.com/jsonnet-libs/xtd/main.libsonnet'; and (%s) != 2 ||| % [ - this.signals.interface.ifOperStatus.withFilteringSelectorMixin(this.config.alertInterfaceDownSelector).asRuleExpression(), + this.signals.interface.ifOperStatus.withFilteringSelectorMixin(this.config.alertInterfaceDownSelectorCritical).asRuleExpression(), this.signals.interface.ifAdminStatus.asRuleExpression(), ], labels: { - severity: this.config.alertInterfaceDownSeverity, + severity: 'critical', + }, + annotations: { + summary: 'Critical network interface is down on SNMP device.', + description: ||| + A critical network interface {{$labels.ifName}} ({{$labels.ifAlias}}) on {{$labels.%s}} is down. + Note that only interfaces with ifAdminStatus = `up` and matching `%s` are being checked and considered critical. + ||| % [instanceLabel, this.config.alertInterfaceDownSelectorCritical], + }, + 'for': '5m', + keep_firing_for: '5m', + }, + { + alert: 'SNMPInterfaceDown', + expr: ||| + (%s) == 2 + # only alert if interface is adminatratively up: + and (%s) != 2 + ||| + % [ + this.signals.interface.ifOperStatus.withFilteringSelectorMixin(this.config.alertInterfaceDownSelectorWarning).asRuleExpression(), + this.signals.interface.ifAdminStatus.asRuleExpression(), + ], + labels: { + severity: 'warning', }, annotations: { summary: 'Network interface is down on SNMP device.', description: ||| Network interface {{$labels.ifName}} ({{$labels.ifAlias}}) on {{$labels.%s}} is down. Only interfaces with ifAdminStatus = `up` and matching `%s` are being checked. - ||| % [instanceLabel, this.config.alertInterfaceDownSelector], + ||| % [instanceLabel, this.config.alertInterfaceDownSelectorWarning], }, 'for': '5m', keep_firing_for: '5m', @@ -239,7 +263,6 @@ local xtd = import 'github.com/jsonnet-libs/xtd/main.libsonnet'; Network interface {{ $labels.ifName }} ({{$labels.ifAlias}}) is flapping on {{ $labels.%s }}. It has changed its status more than 5 times in the last 5 minutes. ||| % [instanceLabel], }, - 'for': '0', keep_firing_for: '5m', }, ], diff --git a/snmp-observ-lib/config.libsonnet b/snmp-observ-lib/config.libsonnet index 05e03299e..ef30893a8 100644 --- a/snmp-observ-lib/config.libsonnet +++ b/snmp-observ-lib/config.libsonnet @@ -15,9 +15,12 @@ // pick vendors you have. See README for full list. metricsSource: ['generic', 'cisco', 'mikrotik', 'juniper'], - //only fire alerts 'interface is down' for the following selector: - alertInterfaceDownSelector: 'ifAlias=~".*(?i:(uplink|internet|WAN)|ISP).*"', - alertInterfaceDownSeverity: 'warning', + // only fire alerts 'interface is down' for the following selector: + // deprecated, use alertInterfaceDownSelectorWarning, alertInterfaceDownSelectorCritical, alertInterfaceDownSelectorInfo instead + alertInterfaceDownSelector: self.alertInterfaceDownSelectorWarning, + alertInterfaceDownSelectorWarning: 'ifAlias=~".*(?i:(uplink|internet|WAN|ISP)).*"', + // change according to your org naming conventions + alertInterfaceDownSelectorCritical: 'ifAlias=~".*(?i:(critical)).*"', // Enable to workaround issue with counters on Cisco NX-OS overloading issue. // This will clamp max interface traffic possible to 1000Gbps. diff --git a/snmp-observ-lib/dashboards_out/snmp-fleet.json b/snmp-observ-lib/dashboards_out/snmp-fleet.json index baf2eb15b..0e8197530 100644 --- a/snmp-observ-lib/dashboards_out/snmp-fleet.json +++ b/snmp-observ-lib/dashboards_out/snmp-fleet.json @@ -662,6 +662,10 @@ "description": "Network interface traffic out.\n", "fieldConfig": { "defaults": { + "color": { + "fixedColor": "#5794f2", + "mode": "palette-classic" + }, "custom": { "axisCenteredZero": false, "axisLabel": "out(-) | in(+)", @@ -679,6 +683,10 @@ } ], "noValue": "No traffic", + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, "unit": "bps" }, "overrides": [ @@ -757,6 +765,10 @@ "description": "Network interface errors in.\n", "fieldConfig": { "defaults": { + "color": { + "fixedColor": "#5794f2", + "mode": "palette-classic" + }, "custom": { "axisCenteredZero": false, "axisLabel": "out(-) | in(+)", @@ -774,6 +786,10 @@ } ], "noValue": "No errors", + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, "unit": "pps" }, "overrides": [ @@ -844,6 +860,10 @@ "description": "Network interface discards in.\n", "fieldConfig": { "defaults": { + "color": { + "fixedColor": "#5794f2", + "mode": "palette-classic" + }, "custom": { "axisCenteredZero": false, "axisLabel": "out(-) | in(+)", @@ -861,6 +881,10 @@ } ], "noValue": "No dropped packets", + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, "unit": "pps" }, "overrides": [ @@ -942,6 +966,10 @@ "description": "Network interface unicast packets in.\n", "fieldConfig": { "defaults": { + "color": { + "fixedColor": "#5794f2", + "mode": "palette-classic" + }, "custom": { "axisCenteredZero": false, "axisLabel": "out(-) | in(+)", @@ -959,6 +987,10 @@ } ], "noValue": "No unicast packets", + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, "unit": "pps" }, "overrides": [ @@ -1029,6 +1061,10 @@ "description": "Network interface broadcast packets in.\n", "fieldConfig": { "defaults": { + "color": { + "fixedColor": "#5794f2", + "mode": "palette-classic" + }, "custom": { "axisCenteredZero": false, "axisLabel": "out(-) | in(+)", @@ -1046,6 +1082,10 @@ } ], "noValue": "No broadcast packets", + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, "unit": "pps" }, "overrides": [ @@ -1116,6 +1156,10 @@ "description": "Network interface multicast packets in.\n", "fieldConfig": { "defaults": { + "color": { + "fixedColor": "#5794f2", + "mode": "palette-classic" + }, "custom": { "axisCenteredZero": false, "axisLabel": "out(-) | in(+)", @@ -1133,6 +1177,10 @@ } ], "noValue": "No multicast packets", + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, "unit": "pps" }, "overrides": [ diff --git a/snmp-observ-lib/dashboards_out/snmp-overview.json b/snmp-observ-lib/dashboards_out/snmp-overview.json index ce8bc6a88..d6496e713 100644 --- a/snmp-observ-lib/dashboards_out/snmp-overview.json +++ b/snmp-observ-lib/dashboards_out/snmp-overview.json @@ -85,6 +85,15 @@ "color": { "fixedColor": "text", "mode": "fixed" + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#5794f2", + "value": null + } + ] } }, "overrides": [ @@ -109,7 +118,6 @@ "y": 1 }, "options": { - "colorMode": "fixed", "graphMode": "none", "reduceOptions": { "calcs": [ @@ -146,6 +154,15 @@ "color": { "fixedColor": "text", "mode": "fixed" + }, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#5794f2", + "value": null + } + ] } }, "overrides": [ @@ -170,7 +187,6 @@ "y": 1 }, "options": { - "colorMode": "fixed", "graphMode": "none", "reduceOptions": { "calcs": [ @@ -205,7 +221,7 @@ "fieldConfig": { "defaults": { "color": { - "fixedColor": "text", + "fixedColor": "#5794f2", "mode": "thresholds" }, "decimals": 1, @@ -275,6 +291,7 @@ "fieldConfig": { "defaults": { "color": { + "fixedColor": "#5794f2", "mode": "continuous-BlYlRd" }, "custom": { @@ -287,6 +304,10 @@ "decimals": 1, "max": 100, "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, "unit": "percent" }, "overrides": [ @@ -346,6 +367,7 @@ "fieldConfig": { "defaults": { "color": { + "fixedColor": "#5794f2", "mode": "continuous-BlYlRd" }, "custom": { @@ -358,6 +380,10 @@ "decimals": 1, "max": 100, "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, "unit": "percent" }, "overrides": [ @@ -966,6 +992,10 @@ "description": "Network interface traffic out.\n", "fieldConfig": { "defaults": { + "color": { + "fixedColor": "#5794f2", + "mode": "palette-classic" + }, "custom": { "axisCenteredZero": false, "axisLabel": "out(-) | in(+)", @@ -977,6 +1007,10 @@ }, "decimals": 1, "noValue": "No traffic", + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, "unit": "bps" }, "overrides": [ @@ -1055,6 +1089,10 @@ "description": "Network interface errors in.\n", "fieldConfig": { "defaults": { + "color": { + "fixedColor": "#5794f2", + "mode": "palette-classic" + }, "custom": { "axisCenteredZero": false, "axisLabel": "out(-) | in(+)", @@ -1066,6 +1104,10 @@ }, "decimals": 1, "noValue": "No errors", + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, "unit": "pps" }, "overrides": [ @@ -1136,6 +1178,10 @@ "description": "Network interface discards in.\n", "fieldConfig": { "defaults": { + "color": { + "fixedColor": "#5794f2", + "mode": "palette-classic" + }, "custom": { "axisCenteredZero": false, "axisLabel": "out(-) | in(+)", @@ -1147,6 +1193,10 @@ }, "decimals": 1, "noValue": "No dropped packets", + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, "unit": "pps" }, "overrides": [ @@ -1228,6 +1278,10 @@ "description": "Network interface unicast packets in.\n", "fieldConfig": { "defaults": { + "color": { + "fixedColor": "#5794f2", + "mode": "palette-classic" + }, "custom": { "axisCenteredZero": false, "axisLabel": "out(-) | in(+)", @@ -1239,6 +1293,10 @@ }, "decimals": 1, "noValue": "No unicast packets", + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, "unit": "pps" }, "overrides": [ @@ -1309,6 +1367,10 @@ "description": "Network interface broadcast packets in.\n", "fieldConfig": { "defaults": { + "color": { + "fixedColor": "#5794f2", + "mode": "palette-classic" + }, "custom": { "axisCenteredZero": false, "axisLabel": "out(-) | in(+)", @@ -1320,6 +1382,10 @@ }, "decimals": 1, "noValue": "No broadcast packets", + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, "unit": "pps" }, "overrides": [ @@ -1390,6 +1456,10 @@ "description": "Network interface multicast packets in.\n", "fieldConfig": { "defaults": { + "color": { + "fixedColor": "#5794f2", + "mode": "palette-classic" + }, "custom": { "axisCenteredZero": false, "axisLabel": "out(-) | in(+)", @@ -1401,6 +1471,10 @@ }, "decimals": 1, "noValue": "No multicast packets", + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, "unit": "pps" }, "overrides": [ @@ -1605,6 +1679,10 @@ "description": "The current value of receive buffer-to-buffer credits for this port.\n", "fieldConfig": { "defaults": { + "color": { + "fixedColor": "#5794f2", + "mode": "palette-classic" + }, "custom": { "axisCenteredZero": false, "axisLabel": "out(-) | in(+)", @@ -1616,6 +1694,10 @@ }, "decimals": 1, "noValue": "No packets", + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, "unit": "pps" }, "overrides": [ @@ -1686,6 +1768,10 @@ "description": "-TxWaitCount: The number of times the FC-port waited due to lack of transmit credits.\n\n-FramesDiscard: The number of frames discarded by the FC-port.\n\n-TxWtAvgBBCreditTransitionToZero: increments if the credit available is zero for 100 ms.\n\n-BBCreditTransistionFromZero: increments if the transmit b2b credit is zero.\n\n-BBCreditTransistionToZero: increments if the receive b2b credit is zero.\n\n-InvalidTxWords: The number of invalid transmission words detected by the FC-Port.\n\n-InvalidCrcs: The number of invalid CRCs detected by the FC-Port.\n", "fieldConfig": { "defaults": { + "color": { + "fixedColor": "#5794f2", + "mode": "palette-classic" + }, "custom": { "axisCenteredZero": false, "axisLabel": "out(-) | in(+)", @@ -1697,6 +1783,10 @@ }, "decimals": 1, "noValue": "No errors", + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, "unit": "pps" }, "overrides": [ @@ -1836,6 +1926,7 @@ "fieldConfig": { "defaults": { "color": { + "fixedColor": "#5794f2", "mode": "continuous-BlYlRd" }, "custom": { @@ -1848,6 +1939,10 @@ "showPoints": "never" }, "decimals": 1, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, "unit": "celsius" }, "overrides": [ @@ -1906,12 +2001,20 @@ "description": "DC voltage sensor.\n", "fieldConfig": { "defaults": { + "color": { + "fixedColor": "#5794f2", + "mode": "palette-classic" + }, "custom": { "fillOpacity": 0, "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" + }, + "thresholds": { + "mode": "absolute", + "steps": [ ] } }, "overrides": [ @@ -1970,12 +2073,20 @@ "description": "Power used in Watts.\n", "fieldConfig": { "defaults": { + "color": { + "fixedColor": "#5794f2", + "mode": "palette-classic" + }, "custom": { "fillOpacity": 0, "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" + }, + "thresholds": { + "mode": "absolute", + "steps": [ ] } }, "overrides": [ @@ -2034,12 +2145,20 @@ "description": "Receive/Transmit power.\n", "fieldConfig": { "defaults": { + "color": { + "fixedColor": "#5794f2", + "mode": "palette-classic" + }, "custom": { "fillOpacity": 0, "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" + }, + "thresholds": { + "mode": "absolute", + "steps": [ ] } }, "overrides": [ @@ -2098,12 +2217,20 @@ "description": "Fan speed.\n", "fieldConfig": { "defaults": { + "color": { + "fixedColor": "#5794f2", + "mode": "palette-classic" + }, "custom": { "fillOpacity": 0, "gradientMode": "opacity", "lineInterpolation": "smooth", "lineWidth": 2, "showPoints": "never" + }, + "thresholds": { + "mode": "absolute", + "steps": [ ] } }, "overrides": [ diff --git a/snmp-observ-lib/prometheus_rules_out/prometheus_alerts.yaml b/snmp-observ-lib/prometheus_rules_out/prometheus_alerts.yaml index 81fe07e44..cb1f95603 100644 --- a/snmp-observ-lib/prometheus_rules_out/prometheus_alerts.yaml +++ b/snmp-observ-lib/prometheus_rules_out/prometheus_alerts.yaml @@ -83,10 +83,22 @@ groups: severity: info - alert: SNMPInterfaceDown annotations: - description: "Network interface {{$labels.ifName}} ({{$labels.ifAlias}}) on {{$labels.instance}} is down. \nOnly interfaces with ifAdminStatus = `up` and matching `ifAlias=~\".*(?i:(uplink|internet|WAN)|ISP).*\"` are being checked.\n" + description: "A critical network interface {{$labels.ifName}} ({{$labels.ifAlias}}) on {{$labels.instance}} is down. \nNote that only interfaces with ifAdminStatus = `up` and matching `ifAlias=~\".*(?i:(critical)).*\"` are being checked and considered critical.\n" + summary: Critical network interface is down on SNMP device. + expr: | + (ifOperStatus{ifAlias=~".*(?i:(critical)).*"}) == 2 + # only alert if interface is adminatratively up: + and (ifAdminStatus{}) != 2 + for: 5m + keep_firing_for: 5m + labels: + severity: critical + - alert: SNMPInterfaceDown + annotations: + description: "Network interface {{$labels.ifName}} ({{$labels.ifAlias}}) on {{$labels.instance}} is down. \nOnly interfaces with ifAdminStatus = `up` and matching `ifAlias=~\".*(?i:(uplink|internet|WAN|ISP)).*\"` are being checked.\n" summary: Network interface is down on SNMP device. expr: | - (ifOperStatus{ifAlias=~".*(?i:(uplink|internet|WAN)|ISP).*"}) == 2 + (ifOperStatus{ifAlias=~".*(?i:(uplink|internet|WAN|ISP)).*"}) == 2 # only alert if interface is adminatratively up: and (ifAdminStatus{}) != 2 for: 5m @@ -134,7 +146,6 @@ groups: summary: Network interface is flapping. expr: | changes(ifOperStatus{}[5m]) > 5 - for: "0" keep_firing_for: 5m labels: severity: warning