From 1f0a99f1dcd4bbd78b36eda048a45969bde72be5 Mon Sep 17 00:00:00 2001 From: Daniel Franz Date: Tue, 3 Mar 2026 23:50:59 +0900 Subject: [PATCH] Namespace and PVC probes Adds readiness probing via CEL for namespaces and PVCs, to prevent subsequent phases from installing until their readiness checks have passed. Also adds e2e coverage via direct CER creation. Increased e2e timeout to 15m for experimental target. Signed-off-by: Daniel Franz --- Makefile | 4 +- .../clusterextensionrevision_controller.go | 36 +++- test/e2e/README.md | 1 + test/e2e/features/revision.feature | 204 ++++++++++++++++++ test/e2e/steps/hooks.go | 35 +-- test/e2e/steps/steps.go | 12 +- test/e2e/steps/testdata/rbac-template.yaml | 2 + 7 files changed, 273 insertions(+), 21 deletions(-) create mode 100644 test/e2e/features/revision.feature diff --git a/Makefile b/Makefile index 15d9b1319..646facd6b 100644 --- a/Makefile +++ b/Makefile @@ -243,9 +243,10 @@ verify-crd-compatibility: $(CRD_DIFF) manifests .PHONY: test test: manifests generate fmt lint test-unit test-e2e test-regression #HELP Run all tests. +E2E_TIMEOUT ?= 10m .PHONY: e2e e2e: #EXHELP Run the e2e tests. - go test -count=1 -v ./test/e2e/features_test.go + go test -count=1 -v ./test/e2e/features_test.go -timeout=$(E2E_TIMEOUT) E2E_REGISTRY_NAME := docker-registry E2E_REGISTRY_NAMESPACE := operator-controller-e2e @@ -317,6 +318,7 @@ test-experimental-e2e: GO_BUILD_EXTRA_FLAGS := -cover test-experimental-e2e: COVERAGE_NAME := experimental-e2e test-experimental-e2e: export MANIFEST := $(EXPERIMENTAL_RELEASE_MANIFEST) test-experimental-e2e: PROMETHEUS_VALUES := helm/prom_experimental.yaml +test-experimental-e2e: E2E_TIMEOUT := 15m test-experimental-e2e: run-internal image-registry prometheus e2e e2e-coverage kind-clean #HELP Run experimental e2e test suite on local kind cluster .PHONY: prometheus diff --git a/internal/operator-controller/controllers/clusterextensionrevision_controller.go b/internal/operator-controller/controllers/clusterextensionrevision_controller.go index 221d583fa..90ebf4ce8 100644 --- a/internal/operator-controller/controllers/clusterextensionrevision_controller.go +++ b/internal/operator-controller/controllers/clusterextensionrevision_controller.go @@ -321,6 +321,10 @@ type Sourcerer interface { } func (c *ClusterExtensionRevisionReconciler) SetupWithManager(mgr ctrl.Manager) error { + // Initialize probes once at setup time + if err := initializeProbes(); err != nil { + return err + } skipProgressDeadlineExceededPredicate := predicate.Funcs{ UpdateFunc: func(e event.UpdateEvent) bool { rev, ok := e.ObjectNew.(*ocv1.ClusterExtensionRevision) @@ -465,7 +469,7 @@ func (c *ClusterExtensionRevisionReconciler) toBoxcutterRevision(ctx context.Con opts := []boxcutter.RevisionReconcileOption{ boxcutter.WithPreviousOwners(previousObjs), boxcutter.WithProbe(boxcutter.ProgressProbeType, probing.And{ - deploymentProbe, statefulSetProbe, crdProbe, issuerProbe, certProbe, + &namespaceActiveProbe, deploymentProbe, statefulSetProbe, crdProbe, issuerProbe, certProbe, &pvcBoundProbe, }), } @@ -511,6 +515,28 @@ func EffectiveCollisionProtection(cp ...ocv1.CollisionProtection) ocv1.Collision return ecp } +// initializeProbes is used to initialize CEL probes at startup time, so we don't recreate them on every reconcile +func initializeProbes() error { + nsCEL, err := probing.NewCELProbe(namespaceActiveCEL, `namespace phase must be "Active"`) + if err != nil { + return fmt.Errorf("constructing namespace CEL probe: %w", err) + } + pvcCEL, err := probing.NewCELProbe(pvcBoundCEL, `persistentvolumeclaim phase must be "Bound"`) + if err != nil { + return fmt.Errorf("constructing PVC CEL probe: %w", err) + } + namespaceActiveProbe = probing.GroupKindSelector{ + GroupKind: schema.GroupKind{Group: corev1.GroupName, Kind: "Namespace"}, + Prober: nsCEL, + } + pvcBoundProbe = probing.GroupKindSelector{ + GroupKind: schema.GroupKind{Group: corev1.GroupName, Kind: "PersistentVolumeClaim"}, + Prober: pvcCEL, + } + + return nil +} + var ( deploymentProbe = &probing.GroupKindSelector{ GroupKind: schema.GroupKind{Group: appsv1.GroupName, Kind: "Deployment"}, @@ -542,6 +568,14 @@ var ( }, } + // namespaceActiveCEL is a CEL rule which asserts that the namespace is in "Active" phase + namespaceActiveCEL = `self.status.phase == "Active"` + namespaceActiveProbe probing.GroupKindSelector + + // pvcBoundCEL is a CEL rule which asserts that the PVC is in "Bound" phase + pvcBoundCEL = `self.status.phase == "Bound"` + pvcBoundProbe probing.GroupKindSelector + // deplStaefulSetProbe probes Deployment, StatefulSet objects. deplStatefulSetProbe = &probing.ObservedGenerationProbe{ Prober: probing.And{ diff --git a/test/e2e/README.md b/test/e2e/README.md index 5f9957c5a..f41b612fd 100644 --- a/test/e2e/README.md +++ b/test/e2e/README.md @@ -199,6 +199,7 @@ Leverage existing steps for common operations: Use these variables in YAML templates: - `${NAME}`: Scenario-specific ClusterExtension name (e.g., `ce-123`) +- `${CER_NAME}`: Scenario-specific ClusterExtensionRevision name (e.g., `cer-123`; for applying CERs directly) - `${TEST_NAMESPACE}`: Scenario-specific namespace (e.g., `ns-123`) - `${CATALOG_IMG}`: Catalog image reference (defaults to in-cluster registry, overridable via `CATALOG_IMG` env var) diff --git a/test/e2e/features/revision.feature b/test/e2e/features/revision.feature new file mode 100644 index 000000000..7ac15136d --- /dev/null +++ b/test/e2e/features/revision.feature @@ -0,0 +1,204 @@ +Feature: Install ClusterExtensionRevision + + As an OLM user I would like to install a cluster extension revision. + + Background: + Given OLM is available + And ServiceAccount "olm-sa" with needed permissions is available in ${TEST_NAMESPACE} + + @BoxcutterRuntime + Scenario: Install simple revision + When ClusterExtensionRevision is applied + """ + apiVersion: olm.operatorframework.io/v1 + kind: ClusterExtensionRevision + metadata: + annotations: + olm.operatorframework.io/service-account-name: olm-sa + olm.operatorframework.io/service-account-namespace: ${TEST_NAMESPACE} + name: ${CER_NAME} + spec: + lifecycleState: Active + collisionProtection: Prevent + phases: + - name: policies + objects: + - object: + apiVersion: networking.k8s.io/v1 + kind: NetworkPolicy + metadata: + name: test-operator-network-policy + namespace: ${TEST_NAMESPACE} + spec: + podSelector: {} + policyTypes: + - Ingress + - name: deploy + objects: + - object: + apiVersion: v1 + data: + httpd.sh: | + #!/bin/sh + echo "Version 1.2.0" + echo true > /var/www/started + echo true > /var/www/ready + echo true > /var/www/live + exec httpd -f -h /var/www -p 80 + kind: ConfigMap + metadata: + name: httpd-script + namespace: ${TEST_NAMESPACE} + - object: + apiVersion: v1 + data: + name: test-configmap + version: v1.2.0 + kind: ConfigMap + metadata: + annotations: + shouldNotTemplate: | + The namespace is {{ $labels.namespace }}. The templated $labels.namespace is NOT expected to be processed by OLM's rendering engine for registry+v1 bundles. + name: test-configmap + namespace: ${TEST_NAMESPACE} + revision: 1 + """ + + And ClusterExtensionRevision "${CER_NAME}" reports Progressing as True with Reason Succeeded + And ClusterExtensionRevision "${CER_NAME}" reports Available as True with Reason ProbesSucceeded + And resource "networkpolicy/test-operator-network-policy" is installed + And resource "configmap/test-configmap" is installed + + @BoxcutterRuntime + Scenario: Probe failure for PersistentVolumeClaim halts phase progression + When ClusterExtensionRevision is applied + """ + apiVersion: olm.operatorframework.io/v1 + kind: ClusterExtensionRevision + metadata: + annotations: + olm.operatorframework.io/service-account-name: olm-sa + olm.operatorframework.io/service-account-namespace: ${TEST_NAMESPACE} + name: ${CER_NAME} + spec: + lifecycleState: Active + collisionProtection: Prevent + phases: + - name: pvc + objects: + - object: + apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: test-pvc + namespace: ${TEST_NAMESPACE} + spec: + accessModes: + - ReadWriteOnce + storageClassName: "" + volumeName: test-pv + resources: + requests: + storage: 1Mi + - name: configmap + objects: + - object: + apiVersion: v1 + kind: ConfigMap + metadata: + annotations: + shouldNotTemplate: | + The namespace is {{ $labels.namespace }}. The templated $labels.namespace is NOT expected to be processed by OLM's rendering engine for registry+v1 bundles. + name: test-configmap + namespace: ${TEST_NAMESPACE} + data: + name: test-configmap + version: v1.2.0 + revision: 1 + """ + + And resource "persistentvolumeclaim/test-pvc" is installed + And ClusterExtensionRevision "${CER_NAME}" reports Available as False with Reason ProbeFailure + And resource "configmap/test-configmap" is not installed + + @BoxcutterRuntime + Scenario: Phases progress when PersistentVolumeClaim becomes "Bound" + When ClusterExtensionRevision is applied + """ + apiVersion: olm.operatorframework.io/v1 + kind: ClusterExtensionRevision + metadata: + annotations: + olm.operatorframework.io/service-account-name: olm-sa + olm.operatorframework.io/service-account-namespace: ${TEST_NAMESPACE} + name: ${CER_NAME} + spec: + lifecycleState: Active + collisionProtection: Prevent + phases: + - name: pvc + objects: + - object: + apiVersion: v1 + kind: PersistentVolumeClaim + metadata: + name: test-pvc + namespace: ${TEST_NAMESPACE} + spec: + accessModes: + - ReadWriteOnce + storageClassName: "" + volumeName: test-pv + resources: + requests: + storage: 1Mi + - object: + apiVersion: v1 + kind: PersistentVolume + metadata: + name: test-pv + spec: + accessModes: + - ReadWriteOnce + capacity: + storage: 1Mi + claimRef: + apiVersion: v1 + kind: PersistentVolumeClaim + name: test-pvc + namespace: ${TEST_NAMESPACE} + persistentVolumeReclaimPolicy: Delete + storageClassName: "" + volumeMode: Filesystem + local: + path: /tmp/persistent-volume + nodeAffinity: + required: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: NotIn + values: + - a-node-name-that-should-not-exist + - name: configmap + objects: + - object: + apiVersion: v1 + kind: ConfigMap + metadata: + annotations: + shouldNotTemplate: | + The namespace is {{ $labels.namespace }}. The templated $labels.namespace is NOT expected to be processed by OLM's rendering engine for registry+v1 bundles. + name: test-configmap + namespace: ${TEST_NAMESPACE} + data: + name: test-configmap + version: v1.2.0 + revision: 1 + """ + + And ClusterExtensionRevision "${CER_NAME}" reports Progressing as True with Reason Succeeded + And ClusterExtensionRevision "${CER_NAME}" reports Available as True with Reason ProbesSucceeded + And resource "persistentvolume/test-pv" is installed + And resource "persistentvolumeclaim/test-pvc" is installed + And resource "configmap/test-configmap" is installed diff --git a/test/e2e/steps/hooks.go b/test/e2e/steps/hooks.go index 929138046..a4f518e4f 100644 --- a/test/e2e/steps/hooks.go +++ b/test/e2e/steps/hooks.go @@ -27,12 +27,13 @@ type resource struct { } type scenarioContext struct { - id string - namespace string - clusterExtensionName string - removedResources []unstructured.Unstructured - backGroundCmds []*exec.Cmd - metricsResponse map[string]string + id string + namespace string + clusterExtensionName string + clusterExtensionRevisionName string + removedResources []unstructured.Unstructured + backGroundCmds []*exec.Cmd + metricsResponse map[string]string extensionObjects []client.Object } @@ -142,9 +143,10 @@ func CheckFeatureTags(ctx context.Context, sc *godog.Scenario) (context.Context, func CreateScenarioContext(ctx context.Context, sc *godog.Scenario) (context.Context, error) { scCtx := &scenarioContext{ - id: sc.Id, - namespace: fmt.Sprintf("ns-%s", sc.Id), - clusterExtensionName: fmt.Sprintf("ce-%s", sc.Id), + id: sc.Id, + namespace: fmt.Sprintf("ns-%s", sc.Id), + clusterExtensionName: fmt.Sprintf("ce-%s", sc.Id), + clusterExtensionRevisionName: fmt.Sprintf("cer-%s", sc.Id), } return context.WithValue(ctx, scenarioContextKey, scCtx), nil } @@ -176,13 +178,16 @@ func ScenarioCleanup(ctx context.Context, _ *godog.Scenario, err error) (context if sc.clusterExtensionName != "" { forDeletion = append(forDeletion, resource{name: sc.clusterExtensionName, kind: "clusterextension"}) } + if sc.clusterExtensionRevisionName != "" { + forDeletion = append(forDeletion, resource{name: sc.clusterExtensionRevisionName, kind: "clusterextensionrevision"}) + } forDeletion = append(forDeletion, resource{name: sc.namespace, kind: "namespace"}) - go func() { - for _, r := range forDeletion { - if _, err := k8sClient("delete", r.kind, r.name, "--ignore-not-found=true"); err != nil { - logger.Info("Error deleting resource", "name", r.name, "namespace", sc.namespace, "stderr", stderrOutput(err)) + for _, r := range forDeletion { + go func(res resource) { + if _, err := k8sClient("delete", res.kind, res.name, "--ignore-not-found=true"); err != nil { + logger.Info("Error deleting resource", "name", res.name, "namespace", sc.namespace, "stderr", stderrOutput(err)) } - } - }() + }(r) + } return ctx, nil } diff --git a/test/e2e/steps/steps.go b/test/e2e/steps/steps.go index abea3fcb2..0dcd0788e 100644 --- a/test/e2e/steps/steps.go +++ b/test/e2e/steps/steps.go @@ -72,6 +72,7 @@ func RegisterSteps(sc *godog.ScenarioContext) { sc.Step(`^(?i)ClusterExtension reports ([[:alnum:]]+) as ([[:alnum:]]+)$`, ClusterExtensionReportsConditionWithoutReason) sc.Step(`^(?i)ClusterExtensionRevision "([^"]+)" reports ([[:alnum:]]+) as ([[:alnum:]]+) with Reason ([[:alnum:]]+)$`, ClusterExtensionRevisionReportsConditionWithoutMsg) sc.Step(`^(?i)ClusterExtension reports ([[:alnum:]]+) transition between (\d+) and (\d+) minutes since its creation$`, ClusterExtensionReportsConditionTransitionTime) + sc.Step(`^(?i)ClusterExtensionRevision is applied(?:\s+.*)?$`, ResourceIsApplied) sc.Step(`^(?i)ClusterExtensionRevision "([^"]+)" is archived$`, ClusterExtensionRevisionIsArchived) sc.Step(`^(?i)ClusterExtensionRevision "([^"]+)" contains annotation "([^"]+)" with value$`, ClusterExtensionRevisionHasAnnotationWithValue) sc.Step(`^(?i)ClusterExtensionRevision "([^"]+)" has label "([^"]+)" with value "([^"]+)"$`, ClusterExtensionRevisionHasLabelWithValue) @@ -80,7 +81,7 @@ func RegisterSteps(sc *godog.ScenarioContext) { sc.Step(`^(?i)resource "([^"]+)" is installed$`, ResourceAvailable) sc.Step(`^(?i)resource "([^"]+)" is available$`, ResourceAvailable) sc.Step(`^(?i)resource "([^"]+)" is removed$`, ResourceRemoved) - sc.Step(`^(?i)resource "([^"]+)" is eventually not found$`, ResourceEventuallyNotFound) + sc.Step(`^(?i)resource "([^"]+)" is (?:eventually not found|not installed)$`, ResourceEventuallyNotFound) sc.Step(`^(?i)resource "([^"]+)" exists$`, ResourceAvailable) sc.Step(`^(?i)resource is applied$`, ResourceIsApplied) sc.Step(`^(?i)resource "deployment/test-operator" reports as (not ready|ready)$`, MarkTestOperatorNotReady) @@ -186,6 +187,7 @@ func substituteScenarioVars(content string, sc *scenarioContext) string { vars := map[string]string{ "TEST_NAMESPACE": sc.namespace, "NAME": sc.clusterExtensionName, + "CER_NAME": sc.clusterExtensionRevisionName, "CATALOG_IMG": "docker-registry.operator-controller-e2e.svc.cluster.local:5000/e2e/test-catalog:v1", } if v, found := os.LookupEnv("CATALOG_IMG"); found { @@ -235,8 +237,8 @@ func ClusterExtensionVersionUpdate(ctx context.Context, version string) error { return err } -// ResourceIsApplied applies the provided YAML resource to the cluster and in case of ClusterExtension it captures its name in the test context -// so that it can be referred to in later steps with ${NAME} +// ResourceIsApplied applies the provided YAML resource to the cluster and in case of ClusterExtension or ClusterExtensionRevision it captures +// its name in the test context so that it can be referred to in later steps with ${NAME} or ${CER_NAME}, respectively func ResourceIsApplied(ctx context.Context, yamlTemplate *godog.DocString) error { sc := scenarioCtx(ctx) yamlContent := substituteScenarioVars(yamlTemplate.Content, sc) @@ -246,10 +248,12 @@ func ResourceIsApplied(ctx context.Context, yamlTemplate *godog.DocString) error } out, err := k8scliWithInput(yamlContent, "apply", "-f", "-") if err != nil { - return fmt.Errorf("failed to apply resource %v %w", out, err) + return fmt.Errorf("failed to apply resource %v; err: %w; stderr: %s", out, err, stderrOutput(err)) } if res.GetKind() == "ClusterExtension" { sc.clusterExtensionName = res.GetName() + } else if res.GetKind() == "ClusterExtensionRevision" { + sc.clusterExtensionRevisionName = res.GetName() } return nil } diff --git a/test/e2e/steps/testdata/rbac-template.yaml b/test/e2e/steps/testdata/rbac-template.yaml index 90138b9c6..2342f7b82 100644 --- a/test/e2e/steps/testdata/rbac-template.yaml +++ b/test/e2e/steps/testdata/rbac-template.yaml @@ -28,6 +28,8 @@ rules: - serviceaccounts - events - namespaces + - persistentvolumes + - persistentvolumeclaims verbs: [update, create, list, watch, get, delete, patch] - apiGroups: ["apps"] resources: