Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions bundle/manifests/observability-operator.clusterserviceversion.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,13 @@ spec:
- get
- list
- watch
- apiGroups:
- config.openshift.io
resources:
- clusteroperators
verbs:
- get
- list
- apiGroups:
- config.openshift.io
resources:
Expand Down Expand Up @@ -438,6 +445,13 @@ spec:
- get
- list
- watch
- apiGroups:
- kubevirt.io
resources:
- kubevirts
verbs:
- get
- list
- apiGroups:
- loki.grafana.com
resources:
Expand All @@ -454,6 +468,13 @@ spec:
verbs:
- get
- list
- apiGroups:
- machineconfiguration.openshift.io
resources:
- machineconfigpools
verbs:
- get
- list
- apiGroups:
- monitoring.coreos.com
resourceNames:
Expand Down
21 changes: 21 additions & 0 deletions deploy/operator/observability-operator-cluster-role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,13 @@ rules:
- get
- list
- watch
- apiGroups:
- config.openshift.io
resources:
- clusteroperators
verbs:
- get
- list
- apiGroups:
- config.openshift.io
resources:
Expand Down Expand Up @@ -121,6 +128,13 @@ rules:
- get
- list
- watch
- apiGroups:
- kubevirt.io
resources:
- kubevirts
verbs:
- get
- list
- apiGroups:
- loki.grafana.com
resources:
Expand All @@ -137,6 +151,13 @@ rules:
verbs:
- get
- list
- apiGroups:
- machineconfiguration.openshift.io
resources:
- machineconfigpools
verbs:
- get
- list
- apiGroups:
- monitoring.coreos.com
resourceNames:
Expand Down
68 changes: 60 additions & 8 deletions pkg/controllers/uiplugin/components.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,15 +121,30 @@ func pluginComponentReconcilers(plugin *uiv1alpha1.UIPlugin, pluginInfo UIPlugin
monitoringConfig.Incidents != nil &&
monitoringConfig.Incidents.Enabled &&
pluginInfo.HealthAnalyzerImage != ""

healthAnalyzerEnabled := monitoringConfig != nil &&
monitoringConfig.ClusterHealthAnalyzer != nil &&
monitoringConfig.ClusterHealthAnalyzer.Enabled &&
pluginInfo.HealthAnalyzerImage != ""

deployHealthAnalyzer := incidentsEnabled || healthAnalyzerEnabled

components = append(components,
reconciler.NewOptionalUpdater(componentsHealthClusterRole("components-health-view"), plugin, deployHealthAnalyzer),
reconciler.NewOptionalUpdater(newClusterRoleBinding(namespace, serviceAccountName, "components-health-view", plugin.Name+"-"+"components-health-view"), plugin, deployHealthAnalyzer),
reconciler.NewOptionalUpdater(newComponentHealthConfig(namespace), plugin, deployHealthAnalyzer),
)

components = append(components,
reconciler.NewOptionalUpdater(newClusterRoleBinding(namespace, serviceAccountName, monitorClusterroleName, plugin.Name+"-"+monitorClusterroleName), plugin, incidentsEnabled),
reconciler.NewOptionalUpdater(newClusterRoleBinding(namespace, serviceAccountName, "system:auth-delegator", serviceAccountName+"-system-auth-delegator"), plugin, incidentsEnabled),
reconciler.NewOptionalUpdater(newAlertManagerViewRoleBinding(serviceAccountName, namespace), plugin, incidentsEnabled),
reconciler.NewOptionalUpdater(newHealthAnalyzerPrometheusRole(namespace), plugin, incidentsEnabled),
reconciler.NewOptionalUpdater(newHealthAnalyzerPrometheusRoleBinding(namespace), plugin, incidentsEnabled),
reconciler.NewOptionalUpdater(newHealthAnalyzerService(namespace), plugin, incidentsEnabled),
reconciler.NewOptionalUpdater(newHealthAnalyzerDeployment(namespace, serviceAccountName, pluginInfo), plugin, incidentsEnabled),
reconciler.NewOptionalUpdater(newHealthAnalyzerServiceMonitor(namespace), plugin, incidentsEnabled),
reconciler.NewOptionalUpdater(newClusterRoleBinding(namespace, serviceAccountName, "cluster-monitoring-view", plugin.Name+"cluster-monitoring-view"), plugin, deployHealthAnalyzer),
reconciler.NewOptionalUpdater(newClusterRoleBinding(namespace, serviceAccountName, "system:auth-delegator", serviceAccountName+"-system-auth-delegator"), plugin, deployHealthAnalyzer),
reconciler.NewOptionalUpdater(newAlertManagerViewRoleBinding(serviceAccountName, namespace), plugin, deployHealthAnalyzer),
reconciler.NewOptionalUpdater(newHealthAnalyzerPrometheusRole(namespace), plugin, deployHealthAnalyzer),
reconciler.NewOptionalUpdater(newHealthAnalyzerPrometheusRoleBinding(namespace), plugin, deployHealthAnalyzer),
reconciler.NewOptionalUpdater(newHealthAnalyzerService(namespace), plugin, deployHealthAnalyzer),
reconciler.NewOptionalUpdater(newHealthAnalyzerDeployment(namespace, serviceAccountName, pluginInfo.HealthAnalyzerImage),
plugin, deployHealthAnalyzer),
reconciler.NewOptionalUpdater(newHealthAnalyzerServiceMonitor(namespace), plugin, deployHealthAnalyzer),
)

persesServiceAccountName := "perses" + serviceAccountSuffix
Expand Down Expand Up @@ -436,6 +451,43 @@ func newService(info UIPluginInfo, namespace string) *corev1.Service {
}
}

// componentsHealthClusterRole creates a new clusterrole with the provided name.
// The clusterrole has read permissions to the cluster resources and it is required
// for the component health evaluation.
func componentsHealthClusterRole(name string) *rbacv1.ClusterRole {
return &rbacv1.ClusterRole{
TypeMeta: metav1.TypeMeta{
APIVersion: rbacv1.SchemeGroupVersion.String(),
Kind: "ClusterRole",
},
ObjectMeta: metav1.ObjectMeta{
Name: name,
},
Rules: []rbacv1.PolicyRule{
{
APIGroups: []string{""},
Resources: []string{"nodes"},
Verbs: []string{"get", "list"},
},
{
APIGroups: []string{"config.openshift.io"},
Resources: []string{"clusteroperators"},
Verbs: []string{"get", "list"},
},
{
APIGroups: []string{"machineconfiguration.openshift.io"},
Resources: []string{"machineconfigpools"},
Verbs: []string{"get", "list"},
},
{
APIGroups: []string{"kubevirt.io"},
Resources: []string{"kubevirts"},
Verbs: []string{"get", "list"},
},
},
}
}

func newKorrel8rDeployment(name string, namespace string, info UIPluginInfo) *appsv1.Deployment {
volumes := []corev1.Volume{
{
Expand Down
47 changes: 47 additions & 0 deletions pkg/controllers/uiplugin/config/health-analyzer.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Default definition of the component tree used to evaluate component health
# by the cluster-health-analyzer.
components:
- name: control-plane
children:
- name: nodes
objects:
- resource: nodes
selectors:
- matchLabels:
node-role.kubernetes.io/control-plane: []
- resource: machineconfigpools
group: machineconfiguration.openshift.io
selectors:
- matchLabels:
pools.operator.machineconfiguration.openshift.io/master: []
- name: capacity
children:
- name: cpu
alerts:
selectors:
- matchLabels:
alertname: ["KubeCPUOvercommit","HighOverallControlPlaneCPU", "ExtremelyHighIndividualControlPlaneCPU"]
- name: memory
alerts:
selectors:
- matchLabels:
alertname: ["HighOverallControlPlaneMemory", "ExtremelyHighIndividualControlPlaneMemory", "SystemMemoryExceedsReservation"]
- name: operators
children:
- name: etcd
alerts:
selectors:
- matchLabels:
namespace: ["openshift-etcd","openshift-etcd-operator"]
- name: addons
children:
- name: kubevirt
alerts:
selectors:
- matchLabels:
kubernetes_operator_part_of: ["kubevirt"]
- matchLabels:
namespace: ["openshift-cnv"]
objects:
- group: kubevirt.io
resource: kubevirts
3 changes: 3 additions & 0 deletions pkg/controllers/uiplugin/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,9 @@ const (
//+kubebuilder:rbac:groups=authentication.k8s.io,resources=tokenreviews,verbs=create
//+kubebuilder:rbac:groups=authorization.k8s.io,resources=subjectaccessreviews,verbs=create
//+kubebuilder:rbac:groups=monitoring.coreos.com,resources=servicemonitors,verbs=get;create;update;patch;delete
//+kubebuilder:rbac:groups=config.openshift.io,resources=clusteroperators,verbs=get;list
//+kubebuilder:rbac:groups=machineconfiguration.openshift.io,resources=machineconfigpools,verbs=get;list
//+kubebuilder:rbac:groups=kubevirt.io,resources=kubevirts,verbs=get;list

const finalizerName = "uiplugin.observability.openshift.io/finalizer"

Expand Down
55 changes: 51 additions & 4 deletions pkg/controllers/uiplugin/health_analyzer.go
Original file line number Diff line number Diff line change
@@ -1,20 +1,28 @@
package uiplugin

import (
_ "embed"

monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
rbacv1 "k8s.io/api/rbac/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/utils/ptr"
)

const (
name = "health-analyzer"
volumeMountName = name + "-tls"
name = "health-analyzer"
volumeMountName = name + "-tls"
componentConfigVolumeName = "components-health-config"
componentConfigMapName = "components-config"
)

//go:embed config/health-analyzer.yaml
var componentHealthConfig string

func newHealthAnalyzerPrometheusRole(namespace string) *rbacv1.Role {
role := &rbacv1.Role{
TypeMeta: metav1.TypeMeta{
Expand Down Expand Up @@ -94,7 +102,10 @@ func newHealthAnalyzerService(namespace string) *corev1.Service {
return service
}

func newHealthAnalyzerDeployment(namespace string, serviceAccountName string, pluginInfo UIPluginInfo) *appsv1.Deployment {
func newHealthAnalyzerDeployment(namespace string,
serviceAccountName string,
image string) *appsv1.Deployment {

deploy := &appsv1.Deployment{
TypeMeta: metav1.TypeMeta{
APIVersion: appsv1.SchemeGroupVersion.String(),
Expand Down Expand Up @@ -122,7 +133,7 @@ func newHealthAnalyzerDeployment(namespace string, serviceAccountName string, pl
Containers: []corev1.Container{
{
Name: name,
Image: pluginInfo.HealthAnalyzerImage,
Image: image,
ImagePullPolicy: corev1.PullAlways,
Args: []string{
"serve",
Expand Down Expand Up @@ -162,6 +173,11 @@ func newHealthAnalyzerDeployment(namespace string, serviceAccountName string, pl
Name: volumeMountName,
ReadOnly: true,
},
{
Name: componentConfigVolumeName,
MountPath: "/etc/config",
ReadOnly: true,
},
},
},
},
Expand All @@ -174,6 +190,16 @@ func newHealthAnalyzerDeployment(namespace string, serviceAccountName string, pl
},
},
},
{
Name: componentConfigVolumeName,
VolumeSource: corev1.VolumeSource{
ConfigMap: &corev1.ConfigMapVolumeSource{
LocalObjectReference: corev1.LocalObjectReference{
Name: componentConfigMapName,
},
},
},
},
},
},
},
Expand Down Expand Up @@ -218,3 +244,24 @@ func newHealthAnalyzerServiceMonitor(namespace string) *monv1.ServiceMonitor {

return serviceMonitor
}

// newComponentHealthConfig creates a new ConfigMap
// that defines the components whose health is evaluated.
func newComponentHealthConfig(namespace string) *v1.ConfigMap {
cm := v1.ConfigMap{
TypeMeta: metav1.TypeMeta{
APIVersion: v1.SchemeGroupVersion.String(),
Kind: "ConfigMap",
},
ObjectMeta: metav1.ObjectMeta{
Namespace: namespace,
Name: componentConfigMapName,
Labels: componentLabels("monitoring"),
},
Data: map[string]string{
"components.yaml": componentHealthConfig,
},
}

return &cm
}
20 changes: 19 additions & 1 deletion pkg/controllers/uiplugin/monitoring.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,19 @@ func validatePersesConfig(config *uiv1alpha1.MonitoringConfig) bool {
return config.Perses != nil && config.Perses.Enabled
}

func validateHealthanalyzerConfig(config *uiv1alpha1.MonitoringConfig, clusterVersion string) bool {
enabled := config.ClusterHealthAnalyzer != nil &&
config.ClusterHealthAnalyzer.Enabled

if !strings.HasPrefix(clusterVersion, "v") {
clusterVersion = "v" + clusterVersion
}
canonicalClusterVersion := fmt.Sprintf("%s-0", semver.Canonical(clusterVersion))
minClusterVersionMet := semver.Compare(canonicalClusterVersion, "v4.19.0-0") >= 0

return enabled && minClusterVersionMet
}

func validateIncidentsConfig(config *uiv1alpha1.MonitoringConfig, clusterVersion string) bool {
enabled := config.Incidents != nil && config.Incidents.Enabled

Expand Down Expand Up @@ -191,8 +204,9 @@ func createMonitoringPluginInfo(plugin *uiv1alpha1.UIPlugin, namespace, name, im
isValidAcmConfig := validateACMConfig(config)
isValidPersesConfig := validatePersesConfig(config)
isValidIncidentsConfig := validateIncidentsConfig(config, clusterVersion)
isValidHealthAnalyzerConfig := validateHealthanalyzerConfig(config, clusterVersion)

atLeastOneValidConfig := isValidAcmConfig || isValidPersesConfig || isValidIncidentsConfig
atLeastOneValidConfig := isValidAcmConfig || isValidPersesConfig || isValidIncidentsConfig || isValidHealthAnalyzerConfig

pluginInfo := getBasePluginInfo(namespace, name, image)
if !atLeastOneValidConfig {
Expand All @@ -215,6 +229,10 @@ func createMonitoringPluginInfo(plugin *uiv1alpha1.UIPlugin, namespace, name, im
pluginInfo.HealthAnalyzerImage = healthAnalyzerImage
features = append(features, "incidents")
}
if isValidHealthAnalyzerConfig {
pluginInfo.HealthAnalyzerImage = healthAnalyzerImage
features = append(features, "cluster-health-analyzer")
}
addFeatureFlags(pluginInfo, features)

return pluginInfo, nil
Expand Down