Skip to content

Commit a5ba252

Browse files
fix(runtime status) Fix config validation errors to show Failed status
Config validation errors are permanent errors requiring user action, not transient errors that should be retried. This commit ensures config errors are treated as TerminalErrors. Changes: 1. Wrap config validation errors as reconcile.TerminalError in provider.go - Config errors now set Progressing: False, Reason: Blocked - Signals manual intervention required (not automatic retry) 2. Keep existing determineFailureReason() logic in common_controller.go - No revisions → Installed: False, Reason: Failed ✓ - Revision with Retrying → Installed: False, Reason: Failed ✓ - Revision with RollingOut → Installed: False, Reason: Absent ✓ Result: Config errors now correctly show: - Progressing: False, Reason: Blocked (terminal error) - Installed: False, Reason: Failed (installation failed) This fixes the downstream OpenShift test failure where config errors were showing Reason: Retrying instead of Blocked, and Installed was using Absent instead of Failed. IMPORTANT FIX: Only check the latest rolling revision for errors, not all revisions. Problem: - Revisions are sorted ascending by Spec.Revision (oldest to newest) - Previous logic checked ALL revisions for Retrying reason - Old revision errors superseded by newer healthy revisions incorrectly caused Failed Fixed Logic: - Check only the LATEST revision (last element in array) - Old errors that have been superseded by healthy revisions no longer cause Failed - Matches BOXCUTTER behavior in ApplyBundleWithBoxcutter which mirrors only the latest revision This ensures the Installed condition reflects the current state (latest revision), not historical states (old revisions).
1 parent 7a60e71 commit a5ba252

4 files changed

Lines changed: 205 additions & 11 deletions

File tree

internal/operator-controller/applier/provider.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"helm.sh/helm/v3/pkg/chart"
1010
"k8s.io/apimachinery/pkg/util/sets"
1111
"sigs.k8s.io/controller-runtime/pkg/client"
12+
"sigs.k8s.io/controller-runtime/pkg/reconcile"
1213

1314
"github.com/operator-framework/api/pkg/operators/v1alpha1"
1415

@@ -77,7 +78,7 @@ func (r *RegistryV1ManifestProvider) Get(bundleFS fs.FS, ext *ocv1.ClusterExtens
7778
bundleConfigBytes := extensionConfigBytes(ext)
7879
bundleConfig, err := config.UnmarshalConfig(bundleConfigBytes, schema, ext.Spec.Namespace)
7980
if err != nil {
80-
return nil, fmt.Errorf("invalid ClusterExtension configuration: %w", err)
81+
return nil, reconcile.TerminalError(fmt.Errorf("invalid ClusterExtension configuration: %w", err))
8182
}
8283

8384
if watchNS := bundleConfig.GetWatchNamespace(); watchNS != nil {

internal/operator-controller/controllers/clusterextension_reconcile_steps.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -158,11 +158,11 @@ func ResolveBundle(r resolve.Resolver, c client.Client) ReconcileStepFunc {
158158
// handleResolutionError handles the case when bundle resolution fails.
159159
//
160160
// Decision logic (evaluated in order):
161-
// 1. No installed bundle Retry (cannot proceed without any bundle)
162-
// 2. Version change requested Retry (cannot upgrade without catalog)
163-
// 3. Cannot check catalog existence Retry (API error, cannot safely decide)
164-
// 4. Catalogs exist Retry (transient error, catalog may be updating)
165-
// 5. Catalogs deleted Fallback to installed bundle (maintain current state)
161+
// 1. No installed bundle: Retry (cannot proceed without any bundle)
162+
// 2. Version change requested: Retry (cannot upgrade without catalog)
163+
// 3. Cannot check catalog existence: Retry (API error, cannot safely decide)
164+
// 4. Catalogs exist: Retry (transient error, catalog may be updating)
165+
// 5. Catalogs deleted: Fallback to installed bundle (maintain current state)
166166
//
167167
// When falling back (case 5), we set the resolved bundle to the installed bundle and return
168168
// no error, allowing the Apply step to run and maintain resources using the existing installation.

internal/operator-controller/controllers/common_controller.go

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,8 @@ func setInstalledStatusFromRevisionStates(ext *ocv1.ClusterExtension, revisionSt
5656
// Nothing is installed
5757
if revisionStates.Installed == nil {
5858
setInstallStatus(ext, nil)
59-
if len(revisionStates.RollingOut) == 0 {
60-
setInstalledStatusConditionFalse(ext, ocv1.ReasonFailed, "No bundle installed")
61-
} else {
62-
setInstalledStatusConditionFalse(ext, ocv1.ReasonAbsent, "No bundle installed")
63-
}
59+
reason := determineFailureReason(revisionStates.RollingOut)
60+
setInstalledStatusConditionFalse(ext, reason, "No bundle installed")
6461
return
6562
}
6663
// Something is installed
@@ -71,6 +68,45 @@ func setInstalledStatusFromRevisionStates(ext *ocv1.ClusterExtension, revisionSt
7168
setInstalledStatusConditionSuccess(ext, fmt.Sprintf("Installed bundle %s successfully", revisionStates.Installed.Image))
7269
}
7370

71+
// determineFailureReason determines the appropriate reason for the Installed condition
72+
// when no bundle is installed (Installed: False).
73+
//
74+
// Returns Failed when:
75+
// - No rolling revisions exist (nothing to install)
76+
// - The latest rolling revision has Reason: Retrying (indicates an error occurred)
77+
//
78+
// Returns Absent when:
79+
// - Rolling revisions exist with the latest having Reason: RollingOut (healthy phased rollout in progress)
80+
//
81+
// Rationale:
82+
// - Failed: Semantically indicates an error prevented installation
83+
// - Absent: Semantically indicates "not there yet" (neutral state, e.g., during healthy rollout)
84+
// - Retrying reason indicates an error (config validation, apply failure, etc.)
85+
// - RollingOut reason indicates healthy progress (not an error)
86+
// - Only the LATEST revision matters - old errors superseded by newer healthy revisions should not cause Failed
87+
//
88+
// Note: rollingRevisions are sorted in ascending order by Spec.Revision (oldest to newest),
89+
// so the latest revision is the LAST element in the array.
90+
func determineFailureReason(rollingRevisions []*RevisionMetadata) string {
91+
if len(rollingRevisions) == 0 {
92+
return ocv1.ReasonFailed
93+
}
94+
95+
// Check if the LATEST rolling revision indicates an error (Retrying reason)
96+
// Latest revision is the last element in the array (sorted ascending by Spec.Revision)
97+
latestRevision := rollingRevisions[len(rollingRevisions)-1]
98+
progressingCond := apimeta.FindStatusCondition(latestRevision.Conditions, ocv1.ClusterExtensionRevisionTypeProgressing)
99+
if progressingCond != nil && progressingCond.Reason == string(ocv1.ClusterExtensionRevisionReasonRetrying) {
100+
// Retrying indicates an error occurred (config, apply, validation, etc.)
101+
// Use Failed for semantic correctness: installation failed due to error
102+
return ocv1.ReasonFailed
103+
}
104+
105+
// No error detected in latest revision - it's progressing healthily (RollingOut) or no conditions set
106+
// Use Absent for neutral "not installed yet" state
107+
return ocv1.ReasonAbsent
108+
}
109+
74110
// setInstalledStatusConditionSuccess sets the installed status condition to success.
75111
func setInstalledStatusConditionSuccess(ext *ocv1.ClusterExtension, message string) {
76112
SetStatusCondition(&ext.Status.Conditions, metav1.Condition{

internal/operator-controller/controllers/common_controller_test.go

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,3 +240,160 @@ func TestSetStatusConditionWrapper(t *testing.T) {
240240
})
241241
}
242242
}
243+
244+
func TestSetInstalledStatusFromRevisionStates_ConfigValidationError(t *testing.T) {
245+
tests := []struct {
246+
name string
247+
revisionStates *RevisionStates
248+
expectedInstalledCond metav1.Condition
249+
}{
250+
{
251+
name: "no revisions at all - uses Failed",
252+
revisionStates: &RevisionStates{
253+
Installed: nil,
254+
RollingOut: nil,
255+
},
256+
expectedInstalledCond: metav1.Condition{
257+
Type: ocv1.TypeInstalled,
258+
Status: metav1.ConditionFalse,
259+
Reason: ocv1.ReasonFailed,
260+
},
261+
},
262+
{
263+
name: "rolling revision with error (Retrying) - uses Failed",
264+
revisionStates: &RevisionStates{
265+
Installed: nil,
266+
RollingOut: []*RevisionMetadata{
267+
{
268+
RevisionName: "rev-1",
269+
Conditions: []metav1.Condition{
270+
{
271+
Type: ocv1.ClusterExtensionRevisionTypeProgressing,
272+
Status: metav1.ConditionTrue,
273+
Reason: ocv1.ClusterExtensionRevisionReasonRetrying,
274+
Message: "some error occurred",
275+
},
276+
},
277+
},
278+
},
279+
},
280+
expectedInstalledCond: metav1.Condition{
281+
Type: ocv1.TypeInstalled,
282+
Status: metav1.ConditionFalse,
283+
Reason: ocv1.ReasonFailed,
284+
},
285+
},
286+
{
287+
name: "multiple rolling revisions with one Retrying - uses Failed",
288+
revisionStates: &RevisionStates{
289+
Installed: nil,
290+
RollingOut: []*RevisionMetadata{
291+
{
292+
RevisionName: "rev-1",
293+
Conditions: []metav1.Condition{
294+
{
295+
Type: ocv1.ClusterExtensionRevisionTypeProgressing,
296+
Status: metav1.ConditionTrue,
297+
Reason: ocv1.ReasonRollingOut,
298+
Message: "Revision is rolling out",
299+
},
300+
},
301+
},
302+
{
303+
RevisionName: "rev-2",
304+
Conditions: []metav1.Condition{
305+
{
306+
Type: ocv1.ClusterExtensionRevisionTypeProgressing,
307+
Status: metav1.ConditionTrue,
308+
Reason: ocv1.ClusterExtensionRevisionReasonRetrying,
309+
Message: "validation error occurred",
310+
},
311+
},
312+
},
313+
},
314+
},
315+
expectedInstalledCond: metav1.Condition{
316+
Type: ocv1.TypeInstalled,
317+
Status: metav1.ConditionFalse,
318+
Reason: ocv1.ReasonFailed,
319+
},
320+
},
321+
{
322+
name: "rolling revision with RollingOut reason - uses Absent",
323+
revisionStates: &RevisionStates{
324+
Installed: nil,
325+
RollingOut: []*RevisionMetadata{
326+
{
327+
RevisionName: "rev-1",
328+
Conditions: []metav1.Condition{
329+
{
330+
Type: ocv1.ClusterExtensionRevisionTypeProgressing,
331+
Status: metav1.ConditionTrue,
332+
Reason: ocv1.ReasonRollingOut,
333+
Message: "Revision is rolling out",
334+
},
335+
},
336+
},
337+
},
338+
},
339+
expectedInstalledCond: metav1.Condition{
340+
Type: ocv1.TypeInstalled,
341+
Status: metav1.ConditionFalse,
342+
Reason: ocv1.ReasonAbsent,
343+
},
344+
},
345+
{
346+
name: "old revision with Retrying superseded by latest healthy - uses Absent",
347+
revisionStates: &RevisionStates{
348+
Installed: nil,
349+
RollingOut: []*RevisionMetadata{
350+
{
351+
RevisionName: "rev-1",
352+
Conditions: []metav1.Condition{
353+
{
354+
Type: ocv1.ClusterExtensionRevisionTypeProgressing,
355+
Status: metav1.ConditionTrue,
356+
Reason: ocv1.ClusterExtensionRevisionReasonRetrying,
357+
Message: "old error that was superseded",
358+
},
359+
},
360+
},
361+
{
362+
RevisionName: "rev-2",
363+
Conditions: []metav1.Condition{
364+
{
365+
Type: ocv1.ClusterExtensionRevisionTypeProgressing,
366+
Status: metav1.ConditionTrue,
367+
Reason: ocv1.ReasonRollingOut,
368+
Message: "Latest revision is rolling out healthy",
369+
},
370+
},
371+
},
372+
},
373+
},
374+
expectedInstalledCond: metav1.Condition{
375+
Type: ocv1.TypeInstalled,
376+
Status: metav1.ConditionFalse,
377+
Reason: ocv1.ReasonAbsent,
378+
},
379+
},
380+
}
381+
382+
for _, tt := range tests {
383+
t.Run(tt.name, func(t *testing.T) {
384+
ext := &ocv1.ClusterExtension{
385+
ObjectMeta: metav1.ObjectMeta{
386+
Name: "test-ext",
387+
Generation: 1,
388+
},
389+
}
390+
391+
setInstalledStatusFromRevisionStates(ext, tt.revisionStates)
392+
393+
cond := meta.FindStatusCondition(ext.Status.Conditions, ocv1.TypeInstalled)
394+
require.NotNil(t, cond)
395+
require.Equal(t, tt.expectedInstalledCond.Status, cond.Status)
396+
require.Equal(t, tt.expectedInstalledCond.Reason, cond.Reason)
397+
})
398+
}
399+
}

0 commit comments

Comments
 (0)