Skip to content

Commit 8412fe0

Browse files
committed
MON-4033: Add OpenShiftMetricsConfig
This commit adds configuration options for the openshift-state-metrics agent in config/v1alpha1 The new struct supports: - nodeSelector: node scheduling constraints - resources: compute resource requests and limits - tolerations: pod tolerations - topologySpreadConstraints: pod distribution across topology domains Signed-off-by: Daniel Mellado <dmellado@fedoraproject.org>
1 parent 8713f3f commit 8412fe0

8 files changed

Lines changed: 1525 additions & 6 deletions

File tree

config/v1alpha1/tests/clustermonitorings.config.openshift.io/ClusterMonitoringConfig.yaml

Lines changed: 238 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,3 +351,241 @@ tests:
351351
- name: "example.com/quux"
352352
request: "1"
353353
expectedError: 'spec.metricsServerConfig.resources: Too many: 11: must have at most 10 items'
354+
- name: Should be able to create OpenShiftStateMetricsConfig with valid resources
355+
initial: |
356+
apiVersion: config.openshift.io/v1alpha1
357+
kind: ClusterMonitoring
358+
spec:
359+
openShiftStateMetricsConfig:
360+
resources:
361+
- name: "cpu"
362+
request: "50m"
363+
limit: "200m"
364+
- name: "memory"
365+
request: "50Mi"
366+
limit: "200Mi"
367+
expected: |
368+
apiVersion: config.openshift.io/v1alpha1
369+
kind: ClusterMonitoring
370+
spec:
371+
openShiftStateMetricsConfig:
372+
resources:
373+
- name: "cpu"
374+
request: "50m"
375+
limit: "200m"
376+
- name: "memory"
377+
request: "50Mi"
378+
limit: "200Mi"
379+
- name: Should be able to create OpenShiftStateMetricsConfig with valid topologySpreadConstraints
380+
initial: |
381+
apiVersion: config.openshift.io/v1alpha1
382+
kind: ClusterMonitoring
383+
spec:
384+
openShiftStateMetricsConfig:
385+
topologySpreadConstraints:
386+
- maxSkew: 1
387+
topologyKey: topology.kubernetes.io/zone
388+
whenUnsatisfiable: DoNotSchedule
389+
labelSelector:
390+
matchLabels:
391+
app: openshift-state-metrics
392+
- maxSkew: 2
393+
topologyKey: kubernetes.io/hostname
394+
whenUnsatisfiable: ScheduleAnyway
395+
labelSelector:
396+
matchLabels:
397+
app: openshift-state-metrics
398+
expected: |
399+
apiVersion: config.openshift.io/v1alpha1
400+
kind: ClusterMonitoring
401+
spec:
402+
openShiftStateMetricsConfig:
403+
topologySpreadConstraints:
404+
- maxSkew: 1
405+
topologyKey: topology.kubernetes.io/zone
406+
whenUnsatisfiable: DoNotSchedule
407+
labelSelector:
408+
matchLabels:
409+
app: openshift-state-metrics
410+
- maxSkew: 2
411+
topologyKey: kubernetes.io/hostname
412+
whenUnsatisfiable: ScheduleAnyway
413+
labelSelector:
414+
matchLabels:
415+
app: openshift-state-metrics
416+
- name: Should be able to create OpenShiftStateMetricsConfig with all fields
417+
initial: |
418+
apiVersion: config.openshift.io/v1alpha1
419+
kind: ClusterMonitoring
420+
spec:
421+
openShiftStateMetricsConfig:
422+
nodeSelector:
423+
kubernetes.io/os: linux
424+
resources:
425+
- name: "cpu"
426+
request: "50m"
427+
limit: "200m"
428+
tolerations:
429+
- key: "node-role.kubernetes.io/infra"
430+
operator: "Exists"
431+
effect: "NoSchedule"
432+
topologySpreadConstraints:
433+
- maxSkew: 1
434+
topologyKey: topology.kubernetes.io/zone
435+
whenUnsatisfiable: DoNotSchedule
436+
labelSelector:
437+
matchLabels:
438+
app: openshift-state-metrics
439+
expected: |
440+
apiVersion: config.openshift.io/v1alpha1
441+
kind: ClusterMonitoring
442+
spec:
443+
openShiftStateMetricsConfig:
444+
nodeSelector:
445+
kubernetes.io/os: linux
446+
resources:
447+
- name: "cpu"
448+
request: "50m"
449+
limit: "200m"
450+
tolerations:
451+
- key: "node-role.kubernetes.io/infra"
452+
operator: "Exists"
453+
effect: "NoSchedule"
454+
topologySpreadConstraints:
455+
- maxSkew: 1
456+
topologyKey: topology.kubernetes.io/zone
457+
whenUnsatisfiable: DoNotSchedule
458+
labelSelector:
459+
matchLabels:
460+
app: openshift-state-metrics
461+
- name: Should reject OpenShiftStateMetricsConfig with empty object
462+
initial: |
463+
apiVersion: config.openshift.io/v1alpha1
464+
kind: ClusterMonitoring
465+
spec:
466+
openShiftStateMetricsConfig: {}
467+
expectedError: 'spec.openShiftStateMetricsConfig: Invalid value: 0: spec.openShiftStateMetricsConfig in body should have at least 1 properties'
468+
- name: Should reject OpenShiftStateMetricsConfig with too many resources
469+
initial: |
470+
apiVersion: config.openshift.io/v1alpha1
471+
kind: ClusterMonitoring
472+
spec:
473+
openShiftStateMetricsConfig:
474+
resources:
475+
- name: "cpu"
476+
request: "100m"
477+
- name: "memory"
478+
request: "64Mi"
479+
- name: "hugepages-2Mi"
480+
request: "32Mi"
481+
- name: "hugepages-1Gi"
482+
request: "1Gi"
483+
- name: "ephemeral-storage"
484+
request: "1Gi"
485+
- name: "nvidia.com/gpu"
486+
request: "1"
487+
- name: "example.com/foo"
488+
request: "1"
489+
- name: "example.com/bar"
490+
request: "1"
491+
- name: "example.com/baz"
492+
request: "1"
493+
- name: "example.com/qux"
494+
request: "1"
495+
- name: "example.com/quux"
496+
request: "1"
497+
expectedError: 'spec.openShiftStateMetricsConfig.resources: Too many: 11: must have at most 10 items'
498+
- name: Should reject OpenShiftStateMetricsConfig with limit less than request
499+
initial: |
500+
apiVersion: config.openshift.io/v1alpha1
501+
kind: ClusterMonitoring
502+
spec:
503+
openShiftStateMetricsConfig:
504+
resources:
505+
- name: "cpu"
506+
request: "500m"
507+
limit: "200m"
508+
expectedError: 'spec.openShiftStateMetricsConfig.resources[0]: Invalid value: "object": limit must be greater than or equal to request'
509+
- name: Should reject OpenShiftStateMetricsConfig with too many topologySpreadConstraints
510+
initial: |
511+
apiVersion: config.openshift.io/v1alpha1
512+
kind: ClusterMonitoring
513+
spec:
514+
openShiftStateMetricsConfig:
515+
topologySpreadConstraints:
516+
- maxSkew: 1
517+
topologyKey: "zone1"
518+
whenUnsatisfiable: DoNotSchedule
519+
- maxSkew: 1
520+
topologyKey: "zone2"
521+
whenUnsatisfiable: DoNotSchedule
522+
- maxSkew: 1
523+
topologyKey: "zone3"
524+
whenUnsatisfiable: DoNotSchedule
525+
- maxSkew: 1
526+
topologyKey: "zone4"
527+
whenUnsatisfiable: DoNotSchedule
528+
- maxSkew: 1
529+
topologyKey: "zone5"
530+
whenUnsatisfiable: DoNotSchedule
531+
- maxSkew: 1
532+
topologyKey: "zone6"
533+
whenUnsatisfiable: DoNotSchedule
534+
- maxSkew: 1
535+
topologyKey: "zone7"
536+
whenUnsatisfiable: DoNotSchedule
537+
- maxSkew: 1
538+
topologyKey: "zone8"
539+
whenUnsatisfiable: DoNotSchedule
540+
- maxSkew: 1
541+
topologyKey: "zone9"
542+
whenUnsatisfiable: DoNotSchedule
543+
- maxSkew: 1
544+
topologyKey: "zone10"
545+
whenUnsatisfiable: DoNotSchedule
546+
- maxSkew: 1
547+
topologyKey: "zone11"
548+
whenUnsatisfiable: DoNotSchedule
549+
expectedError: 'spec.openShiftStateMetricsConfig.topologySpreadConstraints: Too many: 11: must have at most 10 items'
550+
- name: Should reject OpenShiftStateMetricsConfig with empty resources array
551+
initial: |
552+
apiVersion: config.openshift.io/v1alpha1
553+
kind: ClusterMonitoring
554+
spec:
555+
openShiftStateMetricsConfig:
556+
resources: []
557+
expectedError: 'spec.openShiftStateMetricsConfig.resources: Invalid value: 0: spec.openShiftStateMetricsConfig.resources in body should have at least 1 items'
558+
- name: Should reject OpenShiftStateMetricsConfig with empty topologySpreadConstraints array
559+
initial: |
560+
apiVersion: config.openshift.io/v1alpha1
561+
kind: ClusterMonitoring
562+
spec:
563+
openShiftStateMetricsConfig:
564+
topologySpreadConstraints: []
565+
expectedError: 'spec.openShiftStateMetricsConfig.topologySpreadConstraints: Invalid value: 0: spec.openShiftStateMetricsConfig.topologySpreadConstraints in body should have at least 1 items'
566+
- name: Should reject OpenShiftStateMetricsConfig with duplicate resource names
567+
initial: |
568+
apiVersion: config.openshift.io/v1alpha1
569+
kind: ClusterMonitoring
570+
spec:
571+
openShiftStateMetricsConfig:
572+
resources:
573+
- name: "cpu"
574+
request: "100m"
575+
- name: "cpu"
576+
request: "200m"
577+
expectedError: 'spec.openShiftStateMetricsConfig.resources[1]: Duplicate value: map[string]interface {}{"name":"cpu"}'
578+
- name: Should reject OpenShiftStateMetricsConfig with duplicate topologySpreadConstraints
579+
initial: |
580+
apiVersion: config.openshift.io/v1alpha1
581+
kind: ClusterMonitoring
582+
spec:
583+
openShiftStateMetricsConfig:
584+
topologySpreadConstraints:
585+
- maxSkew: 1
586+
topologyKey: topology.kubernetes.io/zone
587+
whenUnsatisfiable: DoNotSchedule
588+
- maxSkew: 2
589+
topologyKey: topology.kubernetes.io/zone
590+
whenUnsatisfiable: DoNotSchedule
591+
expectedError: "Duplicate value"

config/v1alpha1/types_cluster_monitoring.go

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,12 @@ type ClusterMonitoringSpec struct {
9999
// When omitted, this means no opinion and the platform is left to choose a reasonable default, which is subject to change over time.
100100
// +optional
101101
PrometheusOperatorConfig PrometheusOperatorConfig `json:"prometheusOperatorConfig,omitempty,omitzero"`
102+
// openShiftStateMetricsConfig is an optional field that can be used to configure the openshift-state-metrics
103+
// agent that runs in the openshift-monitoring namespace. The openshift-state-metrics agent generates metrics
104+
// about the state of OpenShift-specific Kubernetes objects, such as routes, builds, and deployments.
105+
// When omitted, this means no opinion and the platform is left to choose a reasonable default, which is subject to change over time.
106+
// +optional
107+
OpenShiftStateMetricsConfig OpenShiftStateMetricsConfig `json:"openShiftStateMetricsConfig,omitempty,omitzero"`
102108
}
103109

104110
// UserDefinedMonitoring config for user-defined projects.
@@ -507,6 +513,79 @@ type PrometheusOperatorConfig struct {
507513
TopologySpreadConstraints []v1.TopologySpreadConstraint `json:"topologySpreadConstraints,omitempty"`
508514
}
509515

516+
// OpenShiftStateMetricsConfig provides configuration options for the openshift-state-metrics agent
517+
// that runs in the `openshift-monitoring` namespace. The openshift-state-metrics agent generates
518+
// metrics about the state of OpenShift-specific Kubernetes objects, such as routes, builds, and deployments.
519+
// +kubebuilder:validation:MinProperties=1
520+
type OpenShiftStateMetricsConfig struct {
521+
// nodeSelector defines the nodes on which the Pods are scheduled.
522+
// nodeSelector is optional.
523+
//
524+
// When omitted, this means the user has no opinion and the platform is left
525+
// to choose reasonable defaults. These defaults are subject to change over time.
526+
// The current default value is `kubernetes.io/os: linux`.
527+
// When specified, nodeSelector must contain at least 1 entry and must not contain more than 10 entries.
528+
// +optional
529+
// +kubebuilder:validation:MinProperties=1
530+
// +kubebuilder:validation:MaxProperties=10
531+
NodeSelector map[string]string `json:"nodeSelector,omitempty"`
532+
// resources defines the compute resource requests and limits for the openshift-state-metrics container.
533+
// This includes CPU, memory and HugePages constraints to help control scheduling and resource usage.
534+
// When not specified, defaults are used by the platform. Requests cannot exceed limits.
535+
// This field is optional.
536+
// More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
537+
// This is a simplified API that maps to Kubernetes ResourceRequirements.
538+
// The current default values are:
539+
// resources:
540+
// - name: cpu
541+
// request: 1m
542+
// limit: null
543+
// - name: memory
544+
// request: 32Mi
545+
// limit: null
546+
// Maximum length for this list is 10.
547+
// Minimum length for this list is 1.
548+
// Each resource name must be unique within this list.
549+
// +optional
550+
// +listType=map
551+
// +listMapKey=name
552+
// +kubebuilder:validation:MaxItems=10
553+
// +kubebuilder:validation:MinItems=1
554+
Resources []ContainerResource `json:"resources,omitempty"`
555+
// tolerations defines tolerations for the pods.
556+
// tolerations is optional.
557+
//
558+
// When omitted, this means the user has no opinion and the platform is left
559+
// to choose reasonable defaults. These defaults are subject to change over time.
560+
// Defaults are empty/unset.
561+
// Maximum length for this list is 10.
562+
// Minimum length for this list is 1.
563+
// +kubebuilder:validation:MaxItems=10
564+
// +kubebuilder:validation:MinItems=1
565+
// +listType=atomic
566+
// +optional
567+
Tolerations []v1.Toleration `json:"tolerations,omitempty"`
568+
// topologySpreadConstraints defines rules for how openshift-state-metrics Pods should be distributed
569+
// across topology domains such as zones, nodes, or other user-defined labels.
570+
// topologySpreadConstraints is optional.
571+
// This helps improve high availability and resource efficiency by avoiding placing
572+
// too many replicas in the same failure domain.
573+
//
574+
// When omitted, this means no opinion and the platform is left to choose a default, which is subject to change over time.
575+
// This field maps directly to the `topologySpreadConstraints` field in the Pod spec.
576+
// Default is empty list.
577+
// Maximum length for this list is 10.
578+
// Minimum length for this list is 1.
579+
// Entries must have unique topologyKey and whenUnsatisfiable pairs.
580+
// +kubebuilder:validation:MaxItems=10
581+
// +kubebuilder:validation:MinItems=1
582+
// +listType=map
583+
// +listMapKey=topologyKey
584+
// +listMapKey=whenUnsatisfiable
585+
// +optional
586+
TopologySpreadConstraints []v1.TopologySpreadConstraint `json:"topologySpreadConstraints,omitempty"`
587+
}
588+
510589
// AuditProfile defines the audit log level for the Metrics Server.
511590
// +kubebuilder:validation:Enum=None;Metadata;Request;RequestResponse
512591
type AuditProfile string

0 commit comments

Comments
 (0)