Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
bc0bc83
new pod count dashboard per workload
Harish416 Dec 2, 2025
5ee3fc0
chore(deps): bump github.com/cloudflare/pint in /scripts (#1149)
dependabot[bot] Dec 10, 2025
51c56e4
chore(deps): bump softprops/action-gh-release from 2.4.2 to 2.5.0 (#1…
dependabot[bot] Dec 10, 2025
463ae36
chore(deps): bump actions/stale from 10.1.0 to 10.1.1 (#1152)
dependabot[bot] Dec 10, 2025
f21f439
chore(deps): bump actions/checkout from 6.0.0 to 6.0.1 (#1153)
dependabot[bot] Dec 10, 2025
eb08887
chore(deps): bump github.com/prometheus/prometheus in /scripts (#1154)
dependabot[bot] Dec 10, 2025
f88adf1
fix: cluster variable in resources/cluster (#1147)
sleepyfoodie Dec 10, 2025
27ea985
Enable option to allow all values for workload dashboard (#1148)
stefanandres Dec 12, 2025
6b13b6c
fix: multi-cluster cluster index and renames not modified when cluste…
antonincms Dec 12, 2025
778d11f
chore: add job label to kubePodNotReady alert (#1158)
sleepyfoodie Dec 12, 2025
8c515f2
Archive this fork
skl Dec 12, 2025
4655e3d
Revert "Archive this fork"
skl Dec 12, 2025
fcd996c
chore(deps): bump github.com/expr-lang/expr in /scripts (#1161)
dependabot[bot] Dec 17, 2025
2362368
chore(deps): bump actions/download-artifact from 6.0.0 to 7.0.0 (#1160)
dependabot[bot] Dec 17, 2025
8e9b81f
chore(deps): bump actions/upload-artifact from 5.0.0 to 6.0.0 (#1159)
dependabot[bot] Dec 17, 2025
30293bf
new pod count dashboard per workload
Harish416 Dec 2, 2025
94bf255
Update to the review comments
Harish416 Dec 22, 2025
5519cbe
Merge branch 'additon-of-new-pod-count-dashboard' of https://github.c…
Harish416 Dec 22, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/check-with-upstream.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:
runs-on: ubuntu-latest
name: Check if KSM selectors are present on applicable metrics.
steps:
- uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
with:
persist-credentials: false
- run: make --always-make check-selectors-ksm
8 changes: 4 additions & 4 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
runs-on: ubuntu-latest
name: Build Go Tools
steps:
- uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
with:
persist-credentials: false
- uses: actions/setup-go@4dc6199c7b1a012772edbd06daecab0f50c9053c # v6.1.0
Expand All @@ -22,7 +22,7 @@ jobs:
mkdir -p tmp/bin
cd scripts && go list -e -mod=mod -tags tools -f '{{ range .Imports }}{{ printf "%s\n" .}}{{end}}' ./ | xargs -tI % go build -mod=mod -o ../tmp/bin %
- name: Upload built tools
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: go-tools
path: tmp/bin/
Expand Down Expand Up @@ -57,11 +57,11 @@ jobs:
run: make test

steps:
- uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
with:
persist-credentials: false
- name: Download built tools
uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0
uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
with:
name: go-tools
path: tmp/bin/
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1

- name: Set up Go
uses: actions/setup-go@4dc6199c7b1a012772edbd06daecab0f50c9053c # v6.1.0
Expand All @@ -29,7 +29,7 @@ jobs:
zip -r kubernetes-mixin-${{ github.ref_name }}.zip dashboards_out prometheus_alerts.yaml prometheus_rules.yaml

- name: Upload artifact
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
with:
name: kubernetes-mixin-${{ github.ref_name }}.zip
path: ./
Expand All @@ -40,12 +40,12 @@ jobs:
needs: build
steps:
- name: Download artifact
uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0
uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
with:
name: kubernetes-mixin-${{ github.ref_name }}.zip

- name: Create release on kubernetes-mixin
uses: softprops/action-gh-release@5be0e66d93ac7ed76da52eca8bb058f665c3a5fe # v2.4.2
uses: softprops/action-gh-release@a06a81a03ee405af7f2048a818ed3f03bbf83c7b # v2.5.0
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/stale.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
stale:
runs-on: ubuntu-latest
steps:
- uses: actions/stale@5f858e3efba33a5ca4407a664cc011ad407f2008 # v10.1.0
- uses: actions/stale@997185467fa4f803885201cee163a9f38240193d # v10.1.1
with:
days-before-stale: 30
days-before-close: 7
Expand Down
4 changes: 2 additions & 2 deletions alerts/apps_alerts.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ local utils = import '../lib/utils.libsonnet';
// label exists for 2 values. This avoids "many-to-many matching
// not allowed" errors when joining with kube_pod_status_phase.
expr: |||
sum by (namespace, pod, %(clusterLabel)s) (
max by(namespace, pod, %(clusterLabel)s) (
sum by (namespace, pod, job, %(clusterLabel)s) (
max by(namespace, pod, job, %(clusterLabel)s) (
kube_pod_status_phase{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s, phase=~"Pending|Unknown"}
) * on(namespace, pod, %(clusterLabel)s) group_left(owner_kind) topk by(namespace, pod, %(clusterLabel)s) (
1, max by(namespace, pod, owner_kind, %(clusterLabel)s) (kube_pod_owner{owner_kind!="Job"})
Expand Down
8 changes: 4 additions & 4 deletions dashboards/resources/multi-cluster.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -146,15 +146,15 @@ local var = g.dashboard.variable;
'Time 3': 2,
'Time 4': 3,
'Time 5': 4,
cluster: 5,
[$._config.clusterLabel]: 5,
'Value #A': 6,
'Value #B': 7,
'Value #C': 8,
'Value #D': 9,
'Value #E': 10,
},
renameByName: {
cluster: 'Cluster',
[$._config.clusterLabel]: 'Cluster',
'Value #A': 'CPU Usage',
'Value #B': 'CPU Requests',
'Value #C': 'CPU Requests %',
Expand Down Expand Up @@ -245,15 +245,15 @@ local var = g.dashboard.variable;
'Time 3': 2,
'Time 4': 3,
'Time 5': 4,
cluster: 5,
[$._config.clusterLabel]: 5,
'Value #A': 6,
'Value #B': 7,
'Value #C': 8,
'Value #D': 9,
'Value #E': 10,
},
renameByName: {
cluster: 'Cluster',
[$._config.clusterLabel]: 'Cluster',
'Value #A': 'Memory Usage',
'Value #B': 'Memory Requests',
'Value #C': 'Memory Requests %',
Expand Down
2 changes: 1 addition & 1 deletion dashboards/resources/variables/cluster.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,6 @@ local common = import './common.libsonnet';
local datasource = common.datasource(config);
{
datasource: datasource,
cluster: common.cluster(config, datasource, 'up{%(cadvisorSelector)s}'),
cluster: common.cluster(config, datasource),
},
}
10 changes: 3 additions & 7 deletions dashboards/resources/variables/common.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,16 @@ local var = g.dashboard.variable;
},
},

cluster(config, datasourceVar, selectorTemplate)::
cluster(config, datasourceVar)::
var.query.new('cluster')
+ var.query.withDatasourceFromVariable(datasourceVar)
+ var.query.queryTypes.withLabelValues(
config.clusterLabel,
selectorTemplate % config,
'up{%(kubeStateMetricsSelector)s}' % config,
)
+ var.query.generalOptions.withLabel('cluster')
+ var.query.refresh.onTime()
+ (
if config.showMultiCluster
then var.query.generalOptions.showOnDashboard.withLabelAndValue()
else var.query.generalOptions.showOnDashboard.withNothing()
)
+ var.query.generalOptions.showOnDashboard.withLabelAndValue()
+ var.query.withSort(type='alphabetical'),

namespace(config, datasourceVar)::
Expand Down
2 changes: 1 addition & 1 deletion dashboards/resources/variables/namespace.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ local common = import './common.libsonnet';
{
namespace(config)::
local datasource = common.datasource(config);
local clusterVar = common.cluster(config, datasource, 'up{%(kubeStateMetricsSelector)s}');
local clusterVar = common.cluster(config, datasource);
{
datasource: datasource,
cluster: clusterVar,
Expand Down
2 changes: 1 addition & 1 deletion dashboards/resources/variables/pod.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ local common = import './common.libsonnet';
{
pod(config)::
local datasource = common.datasource(config);
local clusterVar = common.cluster(config, datasource, 'up{%(kubeStateMetricsSelector)s}');
local clusterVar = common.cluster(config, datasource);
{
datasource: datasource,
cluster: clusterVar,
Expand Down
46 changes: 43 additions & 3 deletions dashboards/resources/workload.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ local var = g.dashboard.variable;

workload:
var.query.new('workload')
+ var.query.selectionOptions.withIncludeAll()
+ var.query.withDatasourceFromVariable(self.datasource)
+ var.query.queryTypes.withLabelValues(
'workload',
Expand All @@ -105,15 +106,15 @@ local var = g.dashboard.variable;
sum(
max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate5m{%(clusterLabel)s="$cluster", namespace="$namespace"})
* on(%(clusterLabel)s, namespace, pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", namespace="$namespace", workload="$workload", workload_type=~"$type"}
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", namespace="$namespace", workload=~"$workload", workload_type=~"$type"}
) by (pod)
||| % $._config;

local cpuRequestsQuery = |||
sum(
max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(kube_pod_container_resource_requests{%(kubeStateMetricsSelector)s, %(clusterLabel)s="$cluster", namespace="$namespace", resource="cpu"})
* on(%(clusterLabel)s, namespace, pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", namespace="$namespace", workload="$workload", workload_type=~"$type"}
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", namespace="$namespace", workload=~"$workload", workload_type=~"$type"}
) by (pod)
||| % $._config;

Expand All @@ -123,7 +124,7 @@ local var = g.dashboard.variable;
sum(
max by (%(clusterLabel)s, %(namespaceLabel)s, pod, container)(container_memory_working_set_bytes{%(clusterLabel)s="$cluster", namespace="$namespace", container!="", image!=""})
* on(%(clusterLabel)s, namespace, pod)
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", namespace="$namespace", workload="$workload", workload_type=~"$type"}
group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{%(clusterLabel)s="$cluster", namespace="$namespace", workload=~"$workload", workload_type=~"$type"}
) by (pod)
||| % $._config;
local memRequestsQuery = std.strReplace(cpuRequestsQuery, 'cpu', 'memory');
Expand Down Expand Up @@ -163,6 +164,45 @@ local var = g.dashboard.variable;
];

local panels = [
tsPanel.new('Pod Count per Workload')
+ tsPanel.gridPos.withW(24)
+ tsPanel.queryOptions.withTargets([
prometheus.new(
'${datasource}',
'sum by (%(clusterLabel)s, %(namespaceLabel)s, deployment) (kube_deployment_spec_replicas{%(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace", deployment="$workload"})' % $._config
) + prometheus.withLegendFormat('{{deployment}} desired')
+ prometheus.withRefId('D'),

prometheus.new(
'${datasource}',
'sum by (%(clusterLabel)s, %(namespaceLabel)s, deployment) (kube_deployment_status_replicas_available{%(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace", deployment="$workload"})' % $._config
) + prometheus.withLegendFormat('{{deployment}} available')
+ prometheus.withRefId('E'),

prometheus.new(
'${datasource}',
'sum by (%(clusterLabel)s, %(namespaceLabel)s, statefulset) (kube_statefulset_replicas{%(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace", statefulset="$workload"})' % $._config
) + prometheus.withLegendFormat('{{statefulset}} desired')
+ prometheus.withRefId('F'),

prometheus.new(
'${datasource}',
'sum by (%(clusterLabel)s, %(namespaceLabel)s, statefulset) (kube_statefulset_status_replicas_ready{%(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace", statefulset="$workload"})' % $._config
) + prometheus.withLegendFormat('{{statefulset}} available')
+ prometheus.withRefId('G'),

prometheus.new(
'${datasource}',
'sum by (%(clusterLabel)s, %(namespaceLabel)s, daemonset) (kube_daemonset_status_desired_number_scheduled{%(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace", daemonset="$workload"})' % $._config
) + prometheus.withLegendFormat('{{daemonset}} desired')
+ prometheus.withRefId('H'),

prometheus.new(
'${datasource}',
'sum by (%(clusterLabel)s, %(namespaceLabel)s, daemonset) (kube_daemonset_status_number_available{%(clusterLabel)s="$cluster", %(namespaceLabel)s="$namespace", daemonset="$workload"})' % $._config
) + prometheus.withLegendFormat('{{daemonset}} available')
+ prometheus.withRefId('I'),
]),
tsPanel.new('CPU Usage')
+ tsPanel.gridPos.withW(24)
+ tsPanel.queryOptions.withTargets([
Expand Down
Loading