diff --git a/CHANGELOG.md b/CHANGELOG.md index 24952cc68..40fe81375 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,133 @@ # Note: This CHANGELOG is only for the changes in insights operator. - Please see OpenShift release notes for official changes\n + Please see OpenShift release notes for official changes\n +## 4.21 + +### Data Enhancement +- [#1141](https://github.com/openshift/insights-operator/pull/1141) set extractor as default container + +### Feature +- [#1187](https://github.com/openshift/insights-operator/pull/1187) add permissions to list nodefeatures +- [#1173](https://github.com/openshift/insights-operator/pull/1173) nodefeature gathering +- [#1157](https://github.com/openshift/insights-operator/pull/1157) Decouple CRD storage specs + +### Bugfix +- [#1192](https://github.com/openshift/insights-operator/pull/1192) add missing nil checks for DataGather CR +- [#1178](https://github.com/openshift/insights-operator/pull/1178) update DataGather status after job failure +- [#1168](https://github.com/openshift/insights-operator/pull/1168) [bugfix] The archive's records may include files whose names are out of bounds +- [#1167](https://github.com/openshift/insights-operator/pull/1167) Add filtering to add other possible pod status to QEMU gatherer +- [#1164](https://github.com/openshift/insights-operator/pull/1164) QEMU logs are not gathered if there are pending status virt-launcher pods +- [#1161](https://github.com/openshift/insights-operator/pull/1161) retry mechanism for network error +- [#1154](https://github.com/openshift/insights-operator/pull/1154) The Gather Job did not check the current configuration before running +- [#1151](https://github.com/openshift/insights-operator/pull/1151) operator is not set as degraded when failed +- [#1131](https://github.com/openshift/insights-operator/pull/1131) update DataGather condition when gathering job fails +- [#1137](https://github.com/openshift/insights-operator/pull/1137) Use correct feature gate for InsightsDataGather +- [#1134](https://github.com/openshift/insights-operator/pull/1134) Update error message for missing SCA certificates +- [#1123](https://github.com/openshift/insights-operator/pull/1123) add missing permissions for replicasets and events + +### Others +- [#1149](https://github.com/openshift/insights-operator/pull/1149) Refactor network anonymization +- [#1180](https://github.com/openshift/insights-operator/pull/1180) add AGENTS.md +- [#1169](https://github.com/openshift/insights-operator/pull/1169) Refactor wasDataProcessed function +- [#1176](https://github.com/openshift/insights-operator/pull/1176) update go version to 1.24.0 +- [#1166](https://github.com/openshift/insights-operator/pull/1166) remove trailing commas from ConfigMap example +- [#1162](https://github.com/openshift/insights-operator/pull/1162) add CLAUDE.md +- [#1156](https://github.com/openshift/insights-operator/pull/1156) remove CRD manifests +- [#1133](https://github.com/openshift/insights-operator/pull/1133) add katarina to OWNERS +- [#1127](https://github.com/openshift/insights-operator/pull/1127) remove me from the OWNERS list + +### Misc +- [#1136](https://github.com/openshift/insights-operator/pull/1136) Updating ose-insights-operator-container image to be consistent with ART for 4.21 +- [#1140](https://github.com/openshift/insights-operator/pull/1140) Run missing make update +- [#1128](https://github.com/openshift/insights-operator/pull/1128) Rerun missed `make update` after read-only filesystem security context added" + +## 4.20 + +### Data Enhancement +- [#1110](https://github.com/openshift/insights-operator/pull/1110) virt launcher logs gatherer + +### Feature +- [#1093](https://github.com/openshift/insights-operator/pull/1093) update code to use v1alpha2 api version +- [#1090](https://github.com/openshift/insights-operator/pull/1090) add Progressing condition +- [#1084](https://github.com/openshift/insights-operator/pull/1084) update DataGather condition reasons +- [#1082](https://github.com/openshift/insights-operator/pull/1082) ensure the etc-entitlement-pki secret exists +- [#1083](https://github.com/openshift/insights-operator/pull/1083) move HTTP status code from condition.Reason to condition.Message +- [#1078](https://github.com/openshift/insights-operator/pull/1078) add disabledWithApi condition + +### Bugfix +- [#1122](https://github.com/openshift/insights-operator/pull/1122) Rerun missed `make update` after read-only filesystem security context added +- [#1111](https://github.com/openshift/insights-operator/pull/1111) incorrect anonymization of domains +- [#1106](https://github.com/openshift/insights-operator/pull/1106) add permissions to gather clusterrole +- [#1109](https://github.com/openshift/insights-operator/pull/1109) Allow on-demand gathering during initial periodic run +- [#1099](https://github.com/openshift/insights-operator/pull/1099) update the manifest crds to be applied during DevPreview and Custom featuresets +- [#1102](https://github.com/openshift/insights-operator/pull/1102) bump golang-ci version +- [#1094](https://github.com/openshift/insights-operator/pull/1094) copy proxy envs from IO pod to gathering pods +- [#1085](https://github.com/openshift/insights-operator/pull/1085) Enhance insights-runtime-extractor errors +- [#1086](https://github.com/openshift/insights-operator/pull/1086) Report an error when the insights-runtime-extractor is … + +### Others +- [#1097](https://github.com/openshift/insights-operator/pull/1097) update on-demand gathering docs +- [#1096](https://github.com/openshift/insights-operator/pull/1096) update new SCA endpoint in docs +- [#1092](https://github.com/openshift/insights-operator/pull/1092) update github.com/openshift/api and openshift/client-go to latest version + +### Misc +- [#1104](https://github.com/openshift/insights-operator/pull/1104) Add missing readonlyRootFilesystem +- [#1101](https://github.com/openshift/insights-operator/pull/1101) Add readonlyRootFilesystem +- [#1095](https://github.com/openshift/insights-operator/pull/1095) Updating ose-insights-operator-container image to be consistent with ART for 4.20 + +## 4.19 + +### Data Enhancement +- [#1022](https://github.com/openshift/insights-operator/pull/1022) LokiStack gatherer +- [#1020](https://github.com/openshift/insights-operator/pull/1020) Add conditional gatherer validation test cases + +### Feature +- [#1081](https://github.com/openshift/insights-operator/pull/1081) Move the Insights runtime extractor feature to GA +- [#1079](https://github.com/openshift/insights-operator/pull/1079) remove the upgradeable condition +- [#1070](https://github.com/openshift/insights-operator/pull/1070) Introduce a new config option for storing archives to persistent volume +- [#1066](https://github.com/openshift/insights-operator/pull/1066) Enable Insight Operator entitlements for multi arch clusters + +### Bugfix +- [#1077](https://github.com/openshift/insights-operator/pull/1077) allow running enabled functions from disabled gatherer +- [#1073](https://github.com/openshift/insights-operator/pull/1073) Fix bad IPv6 address +- [#1072](https://github.com/openshift/insights-operator/pull/1072) update the duration validation pattern according to the OpenShift API +- [#1067](https://github.com/openshift/insights-operator/pull/1067) Add a liveness probe to the extractor container +- [#1055](https://github.com/openshift/insights-operator/pull/1055) Ignore previous status when disabling alerts +- [#1050](https://github.com/openshift/insights-operator/pull/1050) use joinHostPort to fix IPv6 +- [#1046](https://github.com/openshift/insights-operator/pull/1046) avoid possible Go panic when searching existing conditions +- [#1032](https://github.com/openshift/insights-operator/pull/1032) insightsoperator.operator.openshift.io resource is cre… + +### Others +- [#1080](https://github.com/openshift/insights-operator/pull/1080) add BaiyangZhou to OWNERS file +- [#1076](https://github.com/openshift/insights-operator/pull/1076) refactor gather commands +- [#1075](https://github.com/openshift/insights-operator/pull/1075) add gatherer name validation to gendocs +- [#1071](https://github.com/openshift/insights-operator/pull/1071) Upgrade Golang to 1.23 +- [#1053](https://github.com/openshift/insights-operator/pull/1053) bump up OpenShift versions +- [#1052](https://github.com/openshift/insights-operator/pull/1052) minor test update +- [#1045](https://github.com/openshift/insights-operator/pull/1045) golanci-lint version update & corresponding fixes +- [#1043](https://github.com/openshift/insights-operator/pull/1043) Avoid to send the same error repeated N times +- [#1039](https://github.com/openshift/insights-operator/pull/1039) sort the JSON schema validation errors +- [#1033](https://github.com/openshift/insights-operator/pull/1033) set required-scc for dataGahtering jobs & pods + +### Misc +- [#1065](https://github.com/openshift/insights-operator/pull/1065) Add Ondrej user to OWNERS file +- [#1044](https://github.com/openshift/insights-operator/pull/1044) Updating ose-insights-operator-container image to be consistent with ART for 4.19 + +## 4.18 + +### Data Enhancement +- [#949](https://github.com/openshift/insights-operator/pull/949) Gather Workload Runtime Info From Containers +- [#1025](https://github.com/openshift/insights-operator/pull/1025) Remove hardcoded log gatherers + +### Bugfix +- [#1031](https://github.com/openshift/insights-operator/pull/1031) insights runtime extractor daemonset +- [#1028](https://github.com/openshift/insights-operator/pull/1028) Fix links to the OpenStack related CRs in the gathered-data doc + +### Others +- [#1027](https://github.com/openshift/insights-operator/pull/1027) container logs gathering - use the default config in case of validati… + +### Misc +- [#1029](https://github.com/openshift/insights-operator/pull/1029) Revert "Gather Workload Runtime Info From Containers" + ## 4.17 ### Data Enhancement diff --git a/docs/gathered-data.md b/docs/gathered-data.md index 69009731d..033033995 100644 --- a/docs/gathered-data.md +++ b/docs/gathered-data.md @@ -1555,6 +1555,36 @@ None None +## OpenTelemetryCollectors + +collects up to 5 `opentelemetrycollectors.opentelemetry.io` custom resources +installed in the cluster. + +Only the "service" subsection of each resource's spec.config is retained; receivers, +exporters, and other pipeline configuration are omitted to avoid collecting sensitive data. + +### API Reference +- https://github.com/open-telemetry/opentelemetry-operator/blob/main/apis/v1beta1/opentelemetrycollector_types.go + +### Sample data +- [docs/insights-archive-sample/config/opentelemetry/example-namespace/otel.json](./insights-archive-sample/config/opentelemetry/example-namespace/otel.json) + +### Location in archive +- `config/opentelemetry/{namespace}/{name}.json` + +### Config ID +`clusterconfig/opentelemetry_collectors` + +### Released version +- 4.22 + +### Backported versions +TBD + +### Changes +None + + ## OpenshiftLogging Collects `clusterlogging.logging.openshift.io` resources. diff --git a/docs/insights-archive-sample/config/opentelemetry/example-namespace/otel.json b/docs/insights-archive-sample/config/opentelemetry/example-namespace/otel.json new file mode 100644 index 000000000..1b36f7ae0 --- /dev/null +++ b/docs/insights-archive-sample/config/opentelemetry/example-namespace/otel.json @@ -0,0 +1,98 @@ +{ + "apiVersion": "opentelemetry.io/v1alpha1", + "kind": "OpenTelemetryCollector", + "metadata": { + "creationTimestamp": "2026-03-04T19:11:20Z", + "generation": 1, + "name": "otel", + "namespace": "openshift-opentelemetry-operator", + "resourceVersion": "45208", + "uid": "3a3d8225-d567-45a1-b938-1f34b69d43a4" + }, + "spec": { + "config": { + "service": { + "extensions": [ + "health_check", + "pprof", + "zpages" + ], + "pipelines": { + "logs": { + "exporters": [ + "otlp_grpc" + ], + "receivers": [ + "otlp" + ] + }, + "metrics": { + "exporters": [ + "otlp_grpc" + ], + "receivers": [ + "otlp" + ] + }, + "traces": { + "exporters": [ + "otlp_grpc" + ], + "receivers": [ + "otlp" + ] + } + }, + "telemetry": { + "metrics": { + "readers": [ + { + "pull": { + "exporter": { + "prometheus": { + "host": "0.0.0.0", + "port": 8888 + } + } + } + } + ] + } + } + } + }, + "deploymentUpdateStrategy": {}, + "ingress": { + "route": {} + }, + "managementState": "managed", + "mode": "deployment", + "observability": { + "metrics": {} + }, + "replicas": 1, + "resources": {}, + "targetAllocator": { + "allocationStrategy": "consistent-hashing", + "filterStrategy": "relabel-config", + "observability": { + "metrics": {} + }, + "prometheusCR": { + "scrapeInterval": "30s" + }, + "resources": {} + }, + "updateStrategy": {}, + "upgradeStrategy": "automatic" + }, + "status": { + "image": "registry.redhat.io/rhosdt/opentelemetry-collector-rhel9@sha256:dbdcd0c58e63a312fd7677a5940d07f2269bb5ab24e72b9dc97f102267109a78", + "scale": { + "replicas": 1, + "selector": "app.kubernetes.io/component=opentelemetry-collector,app.kubernetes.io/instance=openshift-opentelemetry-operator.otel,app.kubernetes.io/managed-by=opentelemetry-operator,app.kubernetes.io/name=otel-collector,app.kubernetes.io/part-of=opentelemetry,app.kubernetes.io/version=latest", + "statusReplicas": "1/1" + }, + "version": "0.144.0" + } +} \ No newline at end of file diff --git a/manifests/03-clusterrole.yaml b/manifests/03-clusterrole.yaml index cb88c1c97..248fcbb45 100644 --- a/manifests/03-clusterrole.yaml +++ b/manifests/03-clusterrole.yaml @@ -383,6 +383,13 @@ rules: - get - list - watch + - apiGroups: + - opentelemetry.io + resources: + - opentelemetrycollectors + verbs: + - get + - list --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding diff --git a/pkg/gatherers/clusterconfig/clusterconfig_gatherer.go b/pkg/gatherers/clusterconfig/clusterconfig_gatherer.go index 8cb59ed81..42f4ac234 100644 --- a/pkg/gatherers/clusterconfig/clusterconfig_gatherer.go +++ b/pkg/gatherers/clusterconfig/clusterconfig_gatherer.go @@ -73,6 +73,7 @@ var gatheringFunctions = map[string]gathererFuncPtr{ "openstack_dataplanedeployments": (*Gatherer).GatherOpenstackDataplaneDeployments, "openstack_dataplanenodesets": (*Gatherer).GatherOpenstackDataplaneNodeSets, "openstack_version": (*Gatherer).GatherOpenstackVersions, + "opentelemetry_collectors": (*Gatherer).GatherOpenTelemetryCollectors, "operators": (*Gatherer).GatherClusterOperators, "operators_pods_and_events": (*Gatherer).GatherClusterOperatorPodsAndEvents, "overlapping_namespace_uids": (*Gatherer).GatherNamespacesWithOverlappingUIDs, diff --git a/pkg/gatherers/clusterconfig/const.go b/pkg/gatherers/clusterconfig/const.go index 592312135..865b9cc31 100644 --- a/pkg/gatherers/clusterconfig/const.go +++ b/pkg/gatherers/clusterconfig/const.go @@ -64,6 +64,9 @@ var ( jaegerResource = schema.GroupVersionResource{ Group: "jaegertracing.io", Version: "v1", Resource: "jaegers", } + openTelemetryCollectorResource = schema.GroupVersionResource{ + Group: "opentelemetry.io", Version: "v1beta1", Resource: "opentelemetrycollectors", + } costManagementMetricsConfigResource = schema.GroupVersionResource{ Group: "costmanagement-metrics-cfg.openshift.io", Version: "v1beta1", Resource: "costmanagementmetricsconfigs", } diff --git a/pkg/gatherers/clusterconfig/gather_opentelemetry_collectors.go b/pkg/gatherers/clusterconfig/gather_opentelemetry_collectors.go new file mode 100644 index 000000000..e63716749 --- /dev/null +++ b/pkg/gatherers/clusterconfig/gather_opentelemetry_collectors.go @@ -0,0 +1,110 @@ +package clusterconfig + +import ( + "context" + "fmt" + + "github.com/openshift/insights-operator/pkg/record" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/client-go/dynamic" + "k8s.io/klog/v2" +) + +// GatherOpenTelemetryCollectors collects up to 5 `opentelemetrycollectors.opentelemetry.io` custom resources +// installed in the cluster. +// +// Only the "service" subsection of each resource's spec.config is retained; receivers, +// exporters, and other pipeline configuration are omitted to avoid collecting sensitive data. +// +// ### API Reference +// - https://github.com/open-telemetry/opentelemetry-operator/blob/main/apis/v1beta1/opentelemetrycollector_types.go +// +// ### Sample data +// - docs/insights-archive-sample/config/opentelemetry/example-namespace/otel.json +// +// ### Location in archive +// - `config/opentelemetry/{namespace}/{name}.json` +// +// ### Config ID +// `clusterconfig/opentelemetry_collectors` +// +// ### Released version +// - 4.22 +// +// ### Backported versions +// TBD +// +// ### Changes +// None +func (g *Gatherer) GatherOpenTelemetryCollectors(ctx context.Context) ([]record.Record, []error) { + gatherDynamicClient, err := dynamic.NewForConfig(g.gatherKubeConfig) + if err != nil { + return nil, []error{err} + } + + return gatherOpenTelemetryCollectors(ctx, gatherDynamicClient) +} + +// cleanCollectorSpecConfig function parses the spec.config field +// and removes any possible private data getting only the "service" configuration +func cleanCollectorSpecConfig(item *unstructured.Unstructured) error { + specConfig, found, err := unstructured.NestedMap(item.Object, "spec", "config") + if err != nil { + return err + } else if !found { + // skipping due to the lack of target data structure for this item + return nil + } + + // instead of dynamically remove every key, set only the desired one + cleanConfig := make(map[string]interface{}) + if _, exists := specConfig["service"]; exists { + cleanConfig["service"] = specConfig["service"] + } + + if err := unstructured.SetNestedField(item.Object, cleanConfig, "spec", "config"); err != nil { + return err + } + + return nil +} + +func gatherOpenTelemetryCollectors(ctx context.Context, dynamicClient dynamic.Interface) ([]record.Record, []error) { + collectorsList, err := dynamicClient.Resource(openTelemetryCollectorResource).List(ctx, metav1.ListOptions{}) + if err != nil { + if errors.IsNotFound(err) { + // fast exit with no error if no CRs were found + return nil, nil + } + klog.V(2).Infof("unable to list %s resource due to: %s", openTelemetryCollectorResource, err) + return nil, []error{err} + } + + const limit = 5 + var records = make([]record.Record, 0, limit) + var errs []error + for i := range collectorsList.Items { + item := &collectorsList.Items[i] + + if err := cleanCollectorSpecConfig(item); err != nil { + errs = append(errs, err) + continue + } + + records = append(records, record.Record{ + Name: fmt.Sprintf("config/opentelemetry/%s/%s", + item.GetNamespace(), + item.GetName()), + Item: record.ResourceMarshaller{Resource: item}, + }) + + if len(records) >= limit { + klog.Infof("the limit (%d) of gathered OpenTelemetryCollectors resources was reached", limit) + break + } + } + + return records, errs +} diff --git a/pkg/gatherers/clusterconfig/gather_opentelemetry_collectors_test.go b/pkg/gatherers/clusterconfig/gather_opentelemetry_collectors_test.go new file mode 100644 index 000000000..87c0865f9 --- /dev/null +++ b/pkg/gatherers/clusterconfig/gather_opentelemetry_collectors_test.go @@ -0,0 +1,92 @@ +package clusterconfig + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" +) + +func Test_cleanCollectorSpecConfig(t *testing.T) { + t.Run("valid spec.config with service field - only service is kept", func(t *testing.T) { + // given + item := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "spec": map[string]interface{}{ + "config": map[string]interface{}{ + "service": map[string]interface{}{ + "telemetry": "test1", + "receivers": "test2", + }}}}} + + // when + test := cleanCollectorSpecConfig(item) + + // assert + assert.NoError(t, test) + + config, found, err := unstructured.NestedMap(item.Object, "spec", "config") + assert.NoError(t, err) + assert.True(t, found) + assert.Contains(t, config, "service") + assert.NotNil(t, config["service"]) + + service, _ := config["service"].(map[string]interface{}) + assert.Contains(t, service, "telemetry") // field from stub + assert.NotContains(t, config, "receivers") // the rest of the fields should be dropped + }) + + t.Run("missing spec.config - no error, item unchanged", func(t *testing.T) { + // given + item := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "spec": map[string]interface{}{}, + }, + } + + // when + test := cleanCollectorSpecConfig(item) + + // assert + assert.NoError(t, test) + }) + + t.Run("unexpected spec.config value - returns a controlled error", func(t *testing.T) { + // given + item := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "spec": map[string]interface{}{"config": "test1"}, + }, + } + + // when + test := cleanCollectorSpecConfig(item) + + // assert + assert.Error(t, test) + assert.ErrorContains(t, test, "accessor error") + }) + + t.Run("valid spec.config with NO service field - returns a cleaned field", func(t *testing.T) { + // given + item := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "spec": map[string]interface{}{ + "config": map[string]interface{}{ + "receivers": map[string]interface{}{}, + "exporters": map[string]interface{}{}, + }}}} + + // when + test := cleanCollectorSpecConfig(item) + + // assert + assert.NoError(t, test) + config, found, err := unstructured.NestedMap(item.Object, "spec", "config") + assert.NoError(t, err) + assert.True(t, found) + // receivers/exporters are always dropped + assert.NotContains(t, config, "receivers") + assert.NotContains(t, config, "exporters") + }) +}