rhobs · DavidRajnoha · Apr 21, 2026 · Apr 24, 2026 · Apr 24, 2026
diff --git a/test/e2e/framework/assertions.go b/test/e2e/framework/assertions.go
@@ -423,11 +423,26 @@ func (f *Framework) GetPodMetrics(pod *v1.Pod, opts ...func(*HTTPOptions)) ([]by
 // the callback function for additional checks.
 func (f *Framework) AssertPromQLResult(t *testing.T, expr string, callback func(model.Value) error) error {
 	t.Helper()
+	return f.AssertPromQLResultWithOptions(t, expr, callback)
+}
+
+// AssertPromQLResultWithOptions is like AssertPromQLResult but accepts
+// WithTimeout and WithPollInterval options to override the default polling
+// parameters.
+func (f *Framework) AssertPromQLResultWithOptions(t *testing.T, expr string, callback func(model.Value) error, fns ...OptionFn) error {
+	t.Helper()
+	option := AssertOption{
+		PollInterval: 20 * time.Second,
+		WaitTimeout:  3 * DefaultTestTimeout,
+	}
+	for _, fn := range fns {
+		fn(&option)
+	}
 	var (
 		pollErr error
 		v       model.Value
 	)
-	if err := wait.PollUntilContextTimeout(context.Background(), 20*time.Second, 3*DefaultTestTimeout, true, func(context.Context) (bool, error) {
+	if err := wait.PollUntilContextTimeout(context.Background(), option.PollInterval, option.WaitTimeout, true, func(context.Context) (bool, error) {
 		v, pollErr = f.getPromQLResult(context.Background(), expr)
 		if pollErr != nil {
 			t.Logf("error from getPromQLResult(): %s", pollErr)

diff --git a/test/e2e/framework/framework.go b/test/e2e/framework/framework.go
@@ -14,6 +14,7 @@ import (
 
 	configv1 "github.com/openshift/api/config/v1"
 	"github.com/pkg/errors"
+	"golang.org/x/mod/semver"
 	appsv1 "k8s.io/api/apps/v1"
 	corev1 "k8s.io/api/core/v1"
 	v1 "k8s.io/api/core/v1"
@@ -25,6 +26,7 @@ import (
 	"k8s.io/client-go/rest"
 	"k8s.io/client-go/tools/portforward"
 	"k8s.io/client-go/transport/spdy"
+	"k8s.io/utils/ptr"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 )
 
@@ -37,12 +39,14 @@ type Framework struct {
 	RootCA             *x509.CertPool
 	MetricsClientCert  *tls.Certificate
 	OperatorNamespace  string
+	ClusterVersion     *configv1.ClusterVersion
 }
 
 // Setup finalizes the initilization of the Framework object by setting
 // parameters which are specific to OpenShift.
 func (f *Framework) Setup() error {
 	clusterVersion := &configv1.ClusterVersion{}
+
 	if err := f.K8sClient.Get(context.Background(), client.ObjectKey{Name: "version"}, clusterVersion); err != nil {
 		if meta.IsNoMatchError(err) {
 			return nil
@@ -51,6 +55,7 @@ func (f *Framework) Setup() error {
 		return fmt.Errorf("failed to get clusterversion %w", err)
 	}
 
+	f.ClusterVersion = clusterVersion
 	f.IsOpenshiftCluster = true
 
 	// Load the service CA operator's certificate authority.
@@ -257,3 +262,140 @@ func (f *Framework) CleanUp(t *testing.T, cleanupFunc func()) {
 		}
 	})
 }
+
+// TODO: remove ForceFailure — temporary helper to exercise DumpOnFailure logging.
+func (f *Framework) ForceFailure(t *testing.T) {
+	t.Helper()
+	t.Error("forced failure to test debug dump output")
+}
+
+// DebugFunc is a diagnostic function invoked when a test fails.
+// Implementations should use t.Logf to emit relevant state.
+type DebugFunc func(t *testing.T)
+
+// DumpOnFailure registers a t.Cleanup that runs the given debug functions
+// when the test fails. It can be called multiple times to add more debug
+// functions as resources become available during the test.
+//
+// Cleanups run in LIFO order, so call DumpOnFailure before registering
+// resource deletions via CleanUp to ensure debug info is captured while
+// the resources still exist.
+func (f *Framework) DumpOnFailure(t *testing.T, fns ...DebugFunc) {
+	t.Helper()
+	t.Cleanup(func() {
+		if !t.Failed() {
+			return
+		}
+		for _, fn := range fns {
+			fn(t)
+		}
+	})
+}
+
+// DebugNamespace returns a DebugFunc that dumps deployments, pods, and events
+// for the given namespaces.
+func (f *Framework) DebugNamespace(namespaces ...string) DebugFunc {
+	return func(t *testing.T) {
+		t.Helper()
+		for _, ns := range namespaces {
+			t.Logf("--- Dumping debug info for namespace %s ---", ns)
+			f.DumpNamespaceDebug(t, ns)
+		}
+	}
+}
+
+// SkipIfClusterVersionBelow skips the test if the cluster version is below
+// minVersion. The minVersion string should be a semver-compatible version
+// (e.g. "4.19" or "v4.19").
+func (f *Framework) SkipIfClusterVersionBelow(t *testing.T, minVersion string) {
+	t.Helper()
+	if f.ClusterVersion == nil {
+		t.Fatal("cluster version not available (non-OpenShift cluster?)")
+		return
+	}
+
+	actual := f.ClusterVersion.Status.Desired.Version
+	if actual == "" {
+		t.Fatal("cluster version is empty")
+		return
+	}
+	t.Logf("Detected cluster version: %s", actual)
+
+	if !strings.HasPrefix(actual, "v") {
+		actual = "v" + actual
+	}
+	if !strings.HasPrefix(minVersion, "v") {
+		minVersion = "v" + minVersion
+	}
+
+	canonicalActual := fmt.Sprintf("%s-0", semver.Canonical(actual))
+	canonicalMin := fmt.Sprintf("%s-0", semver.Canonical(minVersion))
+
+	if semver.Canonical(actual) == "" || semver.Canonical(minVersion) == "" {
+		t.Fatalf("Unable to parse version (actual=%q, min=%q)", actual, minVersion)
+		return
+	}
+
+	if semver.Compare(canonicalActual, canonicalMin) < 0 {
+		t.Skipf("Skipping: cluster version %s is below minimum required %s", f.ClusterVersion.Status.Desired.Version, minVersion)
+	}
+}
+
+// DumpNamespaceDebug logs deployments (with conditions), pods (with container
+// statuses), and events for the given namespace. Useful as a t.Cleanup or
+// on-failure diagnostic helper.
+func (f *Framework) DumpNamespaceDebug(t *testing.T, namespace string) {
+	t.Helper()
+	ctx := context.WithoutCancel(t.Context())
+
+	t.Log("=== BEGIN DEBUG DUMP ===")
+	defer t.Log("=== END DEBUG DUMP ===")
+
+	var deployments appsv1.DeploymentList
+	if err := f.K8sClient.List(ctx, &deployments, client.InNamespace(namespace)); err != nil {
+		t.Logf("Failed to list deployments in %s: %v", namespace, err)
+	} else {
+		t.Logf("Deployments in namespace %s: %d", namespace, len(deployments.Items))
+		for _, d := range deployments.Items {
+			t.Logf("  Deployment: name=%s replicas=%d readyReplicas=%d availableReplicas=%d",
+				d.Name, ptr.Deref(d.Spec.Replicas, 0), d.Status.ReadyReplicas, d.Status.AvailableReplicas)
+			for _, c := range d.Status.Conditions {
+				t.Logf("    condition: type=%s status=%s reason=%s message=%s",
+					c.Type, c.Status, c.Reason, c.Message)
+			}
+		}
+	}
+
+	var pods corev1.PodList
+	if err := f.K8sClient.List(ctx, &pods, client.InNamespace(namespace)); err != nil {
+		t.Logf("Failed to list pods in %s: %v", namespace, err)
+	} else {
+		t.Logf("Pods in namespace %s: %d", namespace, len(pods.Items))
+		for _, p := range pods.Items {
+			t.Logf("  Pod: name=%s phase=%s", p.Name, p.Status.Phase)
+			for _, cs := range p.Status.ContainerStatuses {
+				switch {
+				case cs.State.Running != nil:
+					t.Logf("    container=%s ready=%v restarts=%d state=Running", cs.Name, cs.Ready, cs.RestartCount)
+				case cs.State.Waiting != nil:
+					t.Logf("    container=%s ready=%v restarts=%d state=Waiting reason=%s message=%s",
+						cs.Name, cs.Ready, cs.RestartCount, cs.State.Waiting.Reason, cs.State.Waiting.Message)
+				case cs.State.Terminated != nil:
+					t.Logf("    container=%s ready=%v restarts=%d state=Terminated reason=%s exitCode=%d",
+						cs.Name, cs.Ready, cs.RestartCount, cs.State.Terminated.Reason, cs.State.Terminated.ExitCode)
+				}
+			}
+		}
+	}
+
+	var events corev1.EventList
+	if err := f.K8sClient.List(ctx, &events, client.InNamespace(namespace)); err != nil {
+		t.Logf("Failed to list events in %s: %v", namespace, err)
+	} else {
+		t.Logf("Events in namespace %s: %d", namespace, len(events.Items))
+		for _, e := range events.Items {
+			t.Logf("  Event: involvedObject=%s/%s reason=%s message=%s type=%s count=%d",
+				e.InvolvedObject.Kind, e.InvolvedObject.Name, e.Reason, e.Message, e.Type, e.Count)
+		}
+	}
+}
diff --git a/test/e2e/monitoring_stack_controller_test.go b/test/e2e/monitoring_stack_controller_test.go
@@ -47,6 +47,8 @@ func assertCRDExists(t *testing.T, crds ...string) {
 }
 
 func TestMonitoringStackController(t *testing.T) {
+	f.DumpOnFailure(t, f.DebugNamespace(e2eTestNamespace))
+	f.ForceFailure(t) // TODO: remove — temporary, exercises debug dump
 	err := stack.AddToScheme(scheme.Scheme)
 	assert.NilError(t, err, "adding stack to scheme failed")
 	assertCRDExists(t,

diff --git a/test/e2e/observability_installer_test.go b/test/e2e/observability_installer_test.go
@@ -45,6 +45,8 @@ func TestObservabilityInstallerController(t *testing.T) {
 }
 
 func testObservabilityInstallerTracing(t *testing.T) {
+	f.DumpOnFailure(t, f.DebugNamespace(f.OperatorNamespace, "tracing-observability"))
+	f.ForceFailure(t) // TODO: remove — temporary, exercises debug dump
 	ctx := context.Background()
 
 	// The ObservabilityInstaller installs operators via subscriptions,

diff --git a/test/e2e/operator_metrics_test.go b/test/e2e/operator_metrics_test.go
@@ -11,6 +11,8 @@ import (
 )
 
 func TestOperatorMetrics(t *testing.T) {
+	f.DumpOnFailure(t, f.DebugNamespace(f.OperatorNamespace))
+	f.ForceFailure(t) // TODO: remove — temporary, exercises debug dump
 	t.Run("operator exposes metrics", func(t *testing.T) {
 		pod := f.GetOperatorPod(t)
 

diff --git a/test/e2e/po_admission_webhook_test.go b/test/e2e/po_admission_webhook_test.go
@@ -12,6 +12,8 @@ import (
 )
 
 func TestPrometheusRuleWebhook(t *testing.T) {
+	f.DumpOnFailure(t, f.DebugNamespace(e2eTestNamespace))
+	f.ForceFailure(t) // TODO: remove — temporary, exercises debug dump
 	assertCRDExists(t,
 		"prometheusrules.monitoring.rhobs",
 	)

diff --git a/test/e2e/prometheus_operator_test.go b/test/e2e/prometheus_operator_test.go
@@ -29,6 +29,8 @@ type testCase struct {
 }
 
 func TestPrometheusOperatorForNonOwnedResources(t *testing.T) {
+	f.DumpOnFailure(t, f.DebugNamespace(e2eTestNamespace))
+	f.ForceFailure(t) // TODO: remove — temporary, exercises debug dump
 	resources := []client.Object{
 		newPrometheus(nil),
 		newAlertmanager(nil),
@@ -74,6 +76,8 @@ func TestPrometheusOperatorForNonOwnedResources(t *testing.T) {
 }
 
 func TestPrometheusOperatorForOwnedResources(t *testing.T) {
+	f.DumpOnFailure(t, f.DebugNamespace(e2eTestNamespace))
+	f.ForceFailure(t) // TODO: remove — temporary, exercises debug dump
 	resources := []client.Object{
 		newPrometheus(ownedResourceLabels),
 		newAlertmanager(ownedResourceLabels),

diff --git a/test/e2e/thanos_querier_controller_test.go b/test/e2e/thanos_querier_controller_test.go
@@ -23,6 +23,8 @@ import (
 )
 
 func TestThanosQuerierController(t *testing.T) {
+	f.DumpOnFailure(t, f.DebugNamespace(e2eTestNamespace))
+	f.ForceFailure(t) // TODO: remove — temporary, exercises debug dump
 	assertCRDExists(t, "thanosqueriers.monitoring.rhobs")
 
 	ts := []testCase{