From 6698c0de65a141b857f74cd05e086d4bc5a7ac29 Mon Sep 17 00:00:00 2001 From: Simon Gerber Date: Thu, 7 May 2026 14:09:36 +0200 Subject: [PATCH] Introduce new metric `syn_cluster_dynamic_facts` This metric enables us to easily consume Lieutenant cluster dynamic facts through Prometheus queries (e.g. for central dashboards). We currently don't filter out any dynamic facts, and accept that the metric may have sizeable label values. Generally speaking, Prometheus should be able to handle bigger label values gracefully, but if the larger label values (currently primarily `status.facts.kubernetesVersion`) turn out to be an issue, we can always drop them with a scrape relabel config (in the `ServiceMonitor` for each Lieutenant instance). --- metrics/cluster_info_collector.go | 31 +++++++++++++++++++++----- metrics/cluster_info_collector_test.go | 12 +++++++++- 2 files changed, 37 insertions(+), 6 deletions(-) diff --git a/metrics/cluster_info_collector.go b/metrics/cluster_info_collector.go index 289b2647..2c429718 100644 --- a/metrics/cluster_info_collector.go +++ b/metrics/cluster_info_collector.go @@ -31,7 +31,14 @@ var clusterFactsDesc = prometheus.NewDesc( nil, ) -// commodore build info has dynamic labels +var clusterDynFactsDesc = prometheus.NewDesc( + "syn_lieutenant_cluster_dynamic_facts", + "Lieutenant cluster dynamic facts.", + []string{"cluster", "tenant", "display_name"}, + nil, +) + +// cluster facts has dynamic labels func newClusterFactsDesc(lbls ...string) *prometheus.Desc { return prometheus.NewDesc( "syn_lieutenant_cluster_facts", @@ -41,6 +48,16 @@ func newClusterFactsDesc(lbls ...string) *prometheus.Desc { ) } +// cluster dynamic facts has dynamic labels +func newClusterDynFactsDesc(lbls ...string) *prometheus.Desc { + return prometheus.NewDesc( + "syn_lieutenant_cluster_dynamic_facts", + "Lieutenant cluster dynamic facts. Keys are normalized to be valid Prometheus labels.", + lbls, + nil, + ) +} + // ClusterInfoCollector is a Prometheus collector that collects cluster info metrics. type ClusterInfoCollector struct { Client client.Client @@ -73,9 +90,13 @@ func (m *ClusterInfoCollector) Collect(ch chan<- prometheus.Metric) { cl.Name, cl.Spec.TenantRef.Name, cl.Spec.DisplayName, ) - if err := clusterFacts(cl, ch); err != nil { + if err := clusterFacts(newClusterFactsDesc, cl, cl.Spec.Facts, ch); err != nil { log.Log.Info("failed to collect cluster facts", "error", err) } + + if err := clusterFacts(newClusterDynFactsDesc, cl, cl.Status.Facts, ch); err != nil { + log.Log.Info("failed to collect cluster dynamic facts", "error", err) + } } } @@ -85,8 +106,8 @@ func (m *ClusterInfoCollector) Collect(ch chan<- prometheus.Metric) { // If a key is empty it is replaced with "_empty". // If a key is in the protected list after normalizing it is prefixed with "orig_". // If a key is a duplicate after normalizing it is suffixed with "_" where n is the number of duplicates. -func clusterFacts(cl synv1alpha1.Cluster, ch chan<- prometheus.Metric) error { - rks, vs := pairs(cl.Spec.Facts) +func clusterFacts(descfn func(lbls ...string) *prometheus.Desc, cl synv1alpha1.Cluster, facts synv1alpha1.Facts, ch chan<- prometheus.Metric) error { + rks, vs := pairs(facts) ks := make([]string, len(rks)) for i, k := range rks { ks[i] = normalizeLabelKey(k, []string{"cluster", "tenant"}, "fact_") @@ -100,7 +121,7 @@ func clusterFacts(cl synv1alpha1.Cluster, ch chan<- prometheus.Metric) error { } m, err := prometheus.NewConstMetric( - newClusterFactsDesc(append([]string{"cluster", "tenant"}, ks...)...), + descfn(append([]string{"cluster", "tenant"}, ks...)...), prometheus.GaugeValue, 1, append([]string{cl.Name, cl.Spec.TenantRef.Name}, vs...)..., diff --git a/metrics/cluster_info_collector_test.go b/metrics/cluster_info_collector_test.go index ea85e296..61f425e0 100644 --- a/metrics/cluster_info_collector_test.go +++ b/metrics/cluster_info_collector_test.go @@ -19,6 +19,7 @@ func Test_ClusterInfoCollector(t *testing.T) { expectedMetricNames := []string{ "syn_lieutenant_cluster_info", "syn_lieutenant_cluster_facts", + "syn_lieutenant_cluster_dynamic_facts", } c := prepareClient(t, @@ -49,6 +50,11 @@ func Test_ClusterInfoCollector(t *testing.T) { "tenant": "value", }, }, + Status: synv1alpha1.ClusterStatus{ + Facts: map[string]string{ + "test": "value", + }, + }, }, ) @@ -58,7 +64,11 @@ func Test_ClusterInfoCollector(t *testing.T) { Namespace: namespace, } - metrics := `# HELP syn_lieutenant_cluster_facts Lieutenant cluster facts. Keys are normalized to be valid Prometheus labels. + metrics := `# HELP syn_lieutenant_cluster_dynamic_facts Lieutenant cluster dynamic facts. Keys are normalized to be valid Prometheus labels. +# TYPE syn_lieutenant_cluster_dynamic_facts gauge +syn_lieutenant_cluster_dynamic_facts{cluster="c-empty",tenant=""} 1 +syn_lieutenant_cluster_dynamic_facts{cluster="c2",tenant="t2",test="value"} 1 +# HELP syn_lieutenant_cluster_facts Lieutenant cluster facts. Keys are normalized to be valid Prometheus labels. # TYPE syn_lieutenant_cluster_facts gauge syn_lieutenant_cluster_facts{cluster="c-empty",tenant=""} 1 syn_lieutenant_cluster_facts{cluster="c2",fact__key="value",fact__key_duplicate_after_normalize="value",fact__key_duplicate_after_normalize_1="value",fact__key_duplicate_after_normalize_2="value",key="value",key_with847_____invalid_chars="value",orig_cluster="value",orig_tenant="value",tenant="t2"} 1