From ed1fe3783517df0abdc2490e62f42851313833e8 Mon Sep 17 00:00:00 2001 From: Catherine Fang Date: Tue, 9 Jun 2026 15:56:48 +0000 Subject: [PATCH] Remove duplicated ksm metrics when self-deployed ksm clusterpodmonitor has duplicated entries as the gke managed one --- pkg/operator/collection.go | 113 +++++++++++ pkg/operator/collection_test.go | 330 ++++++++++++++++++++++++++++++++ 2 files changed, 443 insertions(+) diff --git a/pkg/operator/collection.go b/pkg/operator/collection.go index d6f0ee063f..e82b48efa0 100644 --- a/pkg/operator/collection.go +++ b/pkg/operator/collection.go @@ -409,6 +409,8 @@ func (r *collectionReconciler) makeCollectorConfig(ctx context.Context, spec *mo return nil, nil, fmt.Errorf("failed to list ClusterPodMonitorings: %w", err) } + clusterPodMons.Items = deduplicateKSM(clusterPodMons.Items) + // Mark status updates in batch with single timestamp. for _, cmon := range clusterPodMons.Items { cond := &monitoringv1.MonitoringCondition{ @@ -505,3 +507,114 @@ func makeRemoteWriteConfig(exports []monitoringv1.ExportSpec) ([]*promconfig.Rem } return exportConfigs, nil } + +// deduplicateKSM matches GKE's managed kube-state-metrics and Customer's self-deployed +// kube-state-metrics, and resolves duplicate metric collection by updating Customer's relabeling rules. +func deduplicateKSM(mons []monitoringv1.ClusterPodMonitoring) []monitoringv1.ClusterPodMonitoring { + var gkeKSM *monitoringv1.ClusterPodMonitoring + var custKSMs []*monitoringv1.ClusterPodMonitoring + var custIndices []int + + for i := range mons { + if isSystemKSMResource(&mons[i]) { + gkeKSM = &mons[i] + } else if isSelfDeployedKSMResource(&mons[i]) { + custKSMs = append(custKSMs, &mons[i]) + custIndices = append(custIndices, i) + } + } + + if gkeKSM == nil || len(custKSMs) == 0 { + return mons + } + + if len(gkeKSM.Spec.Endpoints) == 0 { + return mons + } + + result := make([]monitoringv1.ClusterPodMonitoring, len(mons)) + copy(result, mons) + + for idx, custKSM := range custKSMs { + custCopy := custKSM.DeepCopy() + var newEndpoints []monitoringv1.ScrapeEndpoint + + for _, custEp := range custCopy.Spec.Endpoints { + matched := false + var matchedGkeEp *monitoringv1.ScrapeEndpoint + + // Find matching GKE endpoint + for i := range gkeKSM.Spec.Endpoints { + gkeEp := &gkeKSM.Spec.Endpoints[i] + if pathsMatch(custEp, *gkeEp) { + matched = true + matchedGkeEp = gkeEp + break + } + } + + if matched { + // Find keep rules in matched GKE endpoint + var gkeKeepRules []monitoringv1.RelabelingRule + for _, r := range matchedGkeEp.MetricRelabeling { + if r.Action == "keep" && len(r.SourceLabels) == 1 && r.SourceLabels[0] == "__name__" { + gkeKeepRules = append(gkeKeepRules, r) + } + } + + if len(gkeKeepRules) > 0 { + // GKE has allowlist. Drop these metrics from Customer KSM. + epCopy := custEp.DeepCopy() + for _, kr := range gkeKeepRules { + epCopy.MetricRelabeling = append(epCopy.MetricRelabeling, monitoringv1.RelabelingRule{ + Action: "drop", + SourceLabels: []string{"__name__"}, + Regex: kr.Regex, + }) + } + newEndpoints = append(newEndpoints, *epCopy) + } else { + // GKE collects everything. Remove this endpoint from Customer KSM. + // (Do not append to newEndpoints) + } + } else { + // No match, keep it unmodified + newEndpoints = append(newEndpoints, custEp) + } + } + + custCopy.Spec.Endpoints = newEndpoints + if len(custCopy.Spec.Endpoints) == 0 { + custCopy.Spec.Endpoints = nil + } + result[custIndices[idx]] = *custCopy + } + + return result +} + +func pathsMatch(ep1, ep2 monitoringv1.ScrapeEndpoint) bool { + p1 := ep1.Path + if p1 == "" { + p1 = "/metrics" + } + p2 := ep2.Path + if p2 == "" { + p2 = "/metrics" + } + return p1 == p2 +} + +func isSystemKSMResource(mon *monitoringv1.ClusterPodMonitoring) bool { + if mon.Labels == nil { + return false + } + return mon.Labels["app.kubernetes.io/name"] == "gke-managed-kube-state-metrics" +} + +func isSelfDeployedKSMResource(mon *monitoringv1.ClusterPodMonitoring) bool { + if mon.Labels == nil { + return false + } + return mon.Labels["app.kubernetes.io/name"] == "self-deployed-kube-state-metrics" +} diff --git a/pkg/operator/collection_test.go b/pkg/operator/collection_test.go index a3c039eb7f..5a9310d4bf 100644 --- a/pkg/operator/collection_test.go +++ b/pkg/operator/collection_test.go @@ -548,3 +548,333 @@ func TestSetConfigMapData(t *testing.T) { } } } + +func TestDeduplicateKSM(t *testing.T) { + systemLabels := map[string]string{ + "app.kubernetes.io/part-of": "google-cloud-managed-prometheus", + "app.kubernetes.io/name": "gke-managed-kube-state-metrics", + } + customerLabels := map[string]string{ + "app.kubernetes.io/name": "self-deployed-kube-state-metrics", + } + + testCases := []struct { + desc string + input []monitoringv1.ClusterPodMonitoring + expected []monitoringv1.ClusterPodMonitoring + }{ + { + desc: "empty input", + input: nil, + expected: nil, + }, + { + desc: "only GKE KSM", + input: []monitoringv1.ClusterPodMonitoring{ + { + ObjectMeta: metav1.ObjectMeta{Name: "kube-state-metrics", Labels: systemLabels}, + Spec: monitoringv1.ClusterPodMonitoringSpec{ + Selector: metav1.LabelSelector{MatchLabels: map[string]string{"app.kubernetes.io/name": "gke-managed-kube-state-metrics"}}, + Endpoints: []monitoringv1.ScrapeEndpoint{ + {Port: intstr.FromInt(8080), Path: "/metrics"}, + }, + }, + }, + }, + expected: []monitoringv1.ClusterPodMonitoring{ + { + ObjectMeta: metav1.ObjectMeta{Name: "kube-state-metrics", Labels: systemLabels}, + Spec: monitoringv1.ClusterPodMonitoringSpec{ + Selector: metav1.LabelSelector{MatchLabels: map[string]string{"app.kubernetes.io/name": "gke-managed-kube-state-metrics"}}, + Endpoints: []monitoringv1.ScrapeEndpoint{ + {Port: intstr.FromInt(8080), Path: "/metrics"}, + }, + }, + }, + }, + }, + { + desc: "GKE collects everything (Customer is disabled)", + input: []monitoringv1.ClusterPodMonitoring{ + { + ObjectMeta: metav1.ObjectMeta{Name: "kube-state-metrics", Labels: systemLabels}, + Spec: monitoringv1.ClusterPodMonitoringSpec{ + Selector: metav1.LabelSelector{MatchLabels: map[string]string{"app.kubernetes.io/name": "gke-managed-kube-state-metrics"}}, + Endpoints: []monitoringv1.ScrapeEndpoint{ + {Port: intstr.FromInt(8080), Path: "/metrics"}, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "self-deployed-kube-state-metrics", Labels: customerLabels}, + Spec: monitoringv1.ClusterPodMonitoringSpec{ + Selector: metav1.LabelSelector{MatchLabels: map[string]string{"app.kubernetes.io/name": "gke-managed-kube-state-metrics"}}, + Endpoints: []monitoringv1.ScrapeEndpoint{ + {Port: intstr.FromInt(8080), Path: "/metrics"}, + }, + }, + }, + }, + expected: []monitoringv1.ClusterPodMonitoring{ + { + ObjectMeta: metav1.ObjectMeta{Name: "kube-state-metrics", Labels: systemLabels}, + Spec: monitoringv1.ClusterPodMonitoringSpec{ + Selector: metav1.LabelSelector{MatchLabels: map[string]string{"app.kubernetes.io/name": "gke-managed-kube-state-metrics"}}, + Endpoints: []monitoringv1.ScrapeEndpoint{ + {Port: intstr.FromInt(8080), Path: "/metrics"}, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "self-deployed-kube-state-metrics", Labels: customerLabels}, + Spec: monitoringv1.ClusterPodMonitoringSpec{ + Selector: metav1.LabelSelector{MatchLabels: map[string]string{"app.kubernetes.io/name": "gke-managed-kube-state-metrics"}}, + Endpoints: nil, // Disabled because GKE collects everything + }, + }, + }, + }, + { + desc: "GKE has allowlist (Customer gets drop rules)", + input: []monitoringv1.ClusterPodMonitoring{ + { + ObjectMeta: metav1.ObjectMeta{Name: "kube-state-metrics", Labels: systemLabels}, + Spec: monitoringv1.ClusterPodMonitoringSpec{ + Selector: metav1.LabelSelector{MatchLabels: map[string]string{"app.kubernetes.io/name": "gke-managed-kube-state-metrics"}}, + Endpoints: []monitoringv1.ScrapeEndpoint{ + { + Port: intstr.FromInt(8080), + Path: "/metrics", + MetricRelabeling: []monitoringv1.RelabelingRule{ + {Action: "keep", SourceLabels: []string{"__name__"}, Regex: "^(kube_pod_info)$"}, + }, + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "self-deployed-kube-state-metrics", Labels: customerLabels}, + Spec: monitoringv1.ClusterPodMonitoringSpec{ + Selector: metav1.LabelSelector{MatchLabels: map[string]string{"app.kubernetes.io/name": "gke-managed-kube-state-metrics"}}, + Endpoints: []monitoringv1.ScrapeEndpoint{ + {Port: intstr.FromInt(8080), Path: "/metrics"}, + }, + }, + }, + }, + expected: []monitoringv1.ClusterPodMonitoring{ + { + ObjectMeta: metav1.ObjectMeta{Name: "kube-state-metrics", Labels: systemLabels}, + Spec: monitoringv1.ClusterPodMonitoringSpec{ + Selector: metav1.LabelSelector{MatchLabels: map[string]string{"app.kubernetes.io/name": "gke-managed-kube-state-metrics"}}, + Endpoints: []monitoringv1.ScrapeEndpoint{ + { + Port: intstr.FromInt(8080), + Path: "/metrics", + MetricRelabeling: []monitoringv1.RelabelingRule{ + {Action: "keep", SourceLabels: []string{"__name__"}, Regex: "^(kube_pod_info)$"}, + }, + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "self-deployed-kube-state-metrics", Labels: customerLabels}, + Spec: monitoringv1.ClusterPodMonitoringSpec{ + Selector: metav1.LabelSelector{MatchLabels: map[string]string{"app.kubernetes.io/name": "gke-managed-kube-state-metrics"}}, + Endpoints: []monitoringv1.ScrapeEndpoint{ + { + Port: intstr.FromInt(8080), + Path: "/metrics", + MetricRelabeling: []monitoringv1.RelabelingRule{ + {Action: "drop", SourceLabels: []string{"__name__"}, Regex: "^(kube_pod_info)$"}, // Drop rule added + }, + }, + }, + }, + }, + }, + }, + { + desc: "untouched customer CR (mismatched labels)", + input: []monitoringv1.ClusterPodMonitoring{ + { + ObjectMeta: metav1.ObjectMeta{Name: "kube-state-metrics", Labels: systemLabels}, + Spec: monitoringv1.ClusterPodMonitoringSpec{ + Selector: metav1.LabelSelector{MatchLabels: map[string]string{"app.kubernetes.io/name": "gke-managed-kube-state-metrics"}}, + Endpoints: []monitoringv1.ScrapeEndpoint{ + {Port: intstr.FromInt(8080), Path: "/metrics"}, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "other-ksm", Labels: map[string]string{"app.kubernetes.io/name": "other-ksm"}}, // label mismatch + Spec: monitoringv1.ClusterPodMonitoringSpec{ + Selector: metav1.LabelSelector{MatchLabels: map[string]string{"app.kubernetes.io/name": "gke-managed-kube-state-metrics"}}, + Endpoints: []monitoringv1.ScrapeEndpoint{ + {Port: intstr.FromInt(8080), Path: "/metrics"}, + }, + }, + }, + }, + expected: []monitoringv1.ClusterPodMonitoring{ + { + ObjectMeta: metav1.ObjectMeta{Name: "kube-state-metrics", Labels: systemLabels}, + Spec: monitoringv1.ClusterPodMonitoringSpec{ + Selector: metav1.LabelSelector{MatchLabels: map[string]string{"app.kubernetes.io/name": "gke-managed-kube-state-metrics"}}, + Endpoints: []monitoringv1.ScrapeEndpoint{ + {Port: intstr.FromInt(8080), Path: "/metrics"}, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "other-ksm", Labels: map[string]string{"app.kubernetes.io/name": "other-ksm"}}, + Spec: monitoringv1.ClusterPodMonitoringSpec{ + Selector: metav1.LabelSelector{MatchLabels: map[string]string{"app.kubernetes.io/name": "gke-managed-kube-state-metrics"}}, + Endpoints: []monitoringv1.ScrapeEndpoint{ + {Port: intstr.FromInt(8080), Path: "/metrics"}, + }, + }, + }, + }, + }, + { + desc: "multiple customer KSMs match", + input: []monitoringv1.ClusterPodMonitoring{ + { + ObjectMeta: metav1.ObjectMeta{Name: "kube-state-metrics", Labels: systemLabels}, + Spec: monitoringv1.ClusterPodMonitoringSpec{ + Selector: metav1.LabelSelector{MatchLabels: map[string]string{"app.kubernetes.io/name": "gke-managed-kube-state-metrics"}}, + Endpoints: []monitoringv1.ScrapeEndpoint{ + { + Port: intstr.FromInt(8080), + Path: "/metrics", + MetricRelabeling: []monitoringv1.RelabelingRule{ + {Action: "keep", SourceLabels: []string{"__name__"}, Regex: "^(kube_pod_info)$"}, + }, + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "self-deployed-kube-state-metrics-1", Labels: customerLabels}, + Spec: monitoringv1.ClusterPodMonitoringSpec{ + Selector: metav1.LabelSelector{MatchLabels: map[string]string{"app.kubernetes.io/name": "gke-managed-kube-state-metrics"}}, + Endpoints: []monitoringv1.ScrapeEndpoint{ + {Port: intstr.FromInt(8080), Path: "/metrics"}, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "self-deployed-kube-state-metrics-2", Labels: customerLabels}, + Spec: monitoringv1.ClusterPodMonitoringSpec{ + Selector: metav1.LabelSelector{MatchLabels: map[string]string{"app.kubernetes.io/name": "gke-managed-kube-state-metrics"}}, + Endpoints: []monitoringv1.ScrapeEndpoint{ + {Port: intstr.FromInt(8080), Path: "/metrics"}, + }, + }, + }, + }, + expected: []monitoringv1.ClusterPodMonitoring{ + { + ObjectMeta: metav1.ObjectMeta{Name: "kube-state-metrics", Labels: systemLabels}, + Spec: monitoringv1.ClusterPodMonitoringSpec{ + Selector: metav1.LabelSelector{MatchLabels: map[string]string{"app.kubernetes.io/name": "gke-managed-kube-state-metrics"}}, + Endpoints: []monitoringv1.ScrapeEndpoint{ + { + Port: intstr.FromInt(8080), + Path: "/metrics", + MetricRelabeling: []monitoringv1.RelabelingRule{ + {Action: "keep", SourceLabels: []string{"__name__"}, Regex: "^(kube_pod_info)$"}, + }, + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "self-deployed-kube-state-metrics-1", Labels: customerLabels}, + Spec: monitoringv1.ClusterPodMonitoringSpec{ + Selector: metav1.LabelSelector{MatchLabels: map[string]string{"app.kubernetes.io/name": "gke-managed-kube-state-metrics"}}, + Endpoints: []monitoringv1.ScrapeEndpoint{ + { + Port: intstr.FromInt(8080), + Path: "/metrics", + MetricRelabeling: []monitoringv1.RelabelingRule{ + {Action: "drop", SourceLabels: []string{"__name__"}, Regex: "^(kube_pod_info)$"}, + }, + }, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "self-deployed-kube-state-metrics-2", Labels: customerLabels}, + Spec: monitoringv1.ClusterPodMonitoringSpec{ + Selector: metav1.LabelSelector{MatchLabels: map[string]string{"app.kubernetes.io/name": "gke-managed-kube-state-metrics"}}, + Endpoints: []monitoringv1.ScrapeEndpoint{ + { + Port: intstr.FromInt(8080), + Path: "/metrics", + MetricRelabeling: []monitoringv1.RelabelingRule{ + {Action: "drop", SourceLabels: []string{"__name__"}, Regex: "^(kube_pod_info)$"}, + }, + }, + }, + }, + }, + }, + }, + { + desc: "GKE collects everything, Customer has multiple endpoints (only KSM endpoint is disabled)", + input: []monitoringv1.ClusterPodMonitoring{ + { + ObjectMeta: metav1.ObjectMeta{Name: "kube-state-metrics", Labels: systemLabels}, + Spec: monitoringv1.ClusterPodMonitoringSpec{ + Selector: metav1.LabelSelector{MatchLabels: map[string]string{"app.kubernetes.io/name": "gke-managed-kube-state-metrics"}}, + Endpoints: []monitoringv1.ScrapeEndpoint{ + {Port: intstr.FromInt(8080), Path: "/metrics"}, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "self-deployed-kube-state-metrics", Labels: customerLabels}, + Spec: monitoringv1.ClusterPodMonitoringSpec{ + Selector: metav1.LabelSelector{MatchLabels: map[string]string{"app.kubernetes.io/name": "gke-managed-kube-state-metrics"}}, + Endpoints: []monitoringv1.ScrapeEndpoint{ + {Port: intstr.FromInt(8080), Path: "/metrics"}, + {Port: intstr.FromInt(8081), Path: "/status"}, + }, + }, + }, + }, + expected: []monitoringv1.ClusterPodMonitoring{ + { + ObjectMeta: metav1.ObjectMeta{Name: "kube-state-metrics", Labels: systemLabels}, + Spec: monitoringv1.ClusterPodMonitoringSpec{ + Selector: metav1.LabelSelector{MatchLabels: map[string]string{"app.kubernetes.io/name": "gke-managed-kube-state-metrics"}}, + Endpoints: []monitoringv1.ScrapeEndpoint{ + {Port: intstr.FromInt(8080), Path: "/metrics"}, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "self-deployed-kube-state-metrics", Labels: customerLabels}, + Spec: monitoringv1.ClusterPodMonitoringSpec{ + Selector: metav1.LabelSelector{MatchLabels: map[string]string{"app.kubernetes.io/name": "gke-managed-kube-state-metrics"}}, + Endpoints: []monitoringv1.ScrapeEndpoint{ + {Port: intstr.FromInt(8081), Path: "/status"}, + }, + }, + }, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + got := deduplicateKSM(tc.input) + if diff := cmp.Diff(tc.expected, got); diff != "" { + t.Errorf("deduplicateKSM() mismatch (-want +got):\n%s", diff) + } + }) + } +}