Skip to content

Commit d9786e0

Browse files
fix(runtime status) Fix config validation errors to show Failed status
Config validation errors are permanent errors requiring user action, not transient errors that should be retried. This commit ensures config errors are treated as TerminalErrors. Changes: 1. Wrap config validation errors as reconcile.TerminalError in provider.go - Config errors now set Progressing: False, Reason: Blocked - Signals manual intervention required (not automatic retry) 2. Keep existing determineFailureReason() logic in common_controller.go - No revisions → Installed: False, Reason: Failed ✓ - Revision with Retrying → Installed: False, Reason: Failed ✓ - Revision with RollingOut → Installed: False, Reason: Absent ✓ Result: Config errors now correctly show: - Progressing: False, Reason: Blocked (terminal error) - Installed: False, Reason: Failed (installation failed) This fixes the downstream OpenShift test failure where config errors were showing Reason: Retrying instead of Blocked, and Installed was using Absent instead of Failed. Applies to both Helm and Boxcutter runtimes.
1 parent 7a60e71 commit d9786e0

4 files changed

Lines changed: 158 additions & 11 deletions

File tree

internal/operator-controller/applier/provider.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"helm.sh/helm/v3/pkg/chart"
1010
"k8s.io/apimachinery/pkg/util/sets"
1111
"sigs.k8s.io/controller-runtime/pkg/client"
12+
"sigs.k8s.io/controller-runtime/pkg/reconcile"
1213

1314
"github.com/operator-framework/api/pkg/operators/v1alpha1"
1415

@@ -77,7 +78,7 @@ func (r *RegistryV1ManifestProvider) Get(bundleFS fs.FS, ext *ocv1.ClusterExtens
7778
bundleConfigBytes := extensionConfigBytes(ext)
7879
bundleConfig, err := config.UnmarshalConfig(bundleConfigBytes, schema, ext.Spec.Namespace)
7980
if err != nil {
80-
return nil, fmt.Errorf("invalid ClusterExtension configuration: %w", err)
81+
return nil, reconcile.TerminalError(fmt.Errorf("invalid ClusterExtension configuration: %w", err))
8182
}
8283

8384
if watchNS := bundleConfig.GetWatchNamespace(); watchNS != nil {

internal/operator-controller/controllers/clusterextension_reconcile_steps.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -158,11 +158,11 @@ func ResolveBundle(r resolve.Resolver, c client.Client) ReconcileStepFunc {
158158
// handleResolutionError handles the case when bundle resolution fails.
159159
//
160160
// Decision logic (evaluated in order):
161-
// 1. No installed bundle Retry (cannot proceed without any bundle)
162-
// 2. Version change requested Retry (cannot upgrade without catalog)
163-
// 3. Cannot check catalog existence Retry (API error, cannot safely decide)
164-
// 4. Catalogs exist Retry (transient error, catalog may be updating)
165-
// 5. Catalogs deleted Fallback to installed bundle (maintain current state)
161+
// 1. No installed bundle: Retry (cannot proceed without any bundle)
162+
// 2. Version change requested: Retry (cannot upgrade without catalog)
163+
// 3. Cannot check catalog existence: Retry (API error, cannot safely decide)
164+
// 4. Catalogs exist: Retry (transient error, catalog may be updating)
165+
// 5. Catalogs deleted: Fallback to installed bundle (maintain current state)
166166
//
167167
// When falling back (case 5), we set the resolved bundle to the installed bundle and return
168168
// no error, allowing the Apply step to run and maintain resources using the existing installation.

internal/operator-controller/controllers/common_controller.go

Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,8 @@ func setInstalledStatusFromRevisionStates(ext *ocv1.ClusterExtension, revisionSt
5656
// Nothing is installed
5757
if revisionStates.Installed == nil {
5858
setInstallStatus(ext, nil)
59-
if len(revisionStates.RollingOut) == 0 {
60-
setInstalledStatusConditionFalse(ext, ocv1.ReasonFailed, "No bundle installed")
61-
} else {
62-
setInstalledStatusConditionFalse(ext, ocv1.ReasonAbsent, "No bundle installed")
63-
}
59+
reason := determineFailureReason(revisionStates.RollingOut)
60+
setInstalledStatusConditionFalse(ext, reason, "No bundle installed")
6461
return
6562
}
6663
// Something is installed
@@ -71,6 +68,44 @@ func setInstalledStatusFromRevisionStates(ext *ocv1.ClusterExtension, revisionSt
7168
setInstalledStatusConditionSuccess(ext, fmt.Sprintf("Installed bundle %s successfully", revisionStates.Installed.Image))
7269
}
7370

71+
// determineFailureReason determines the appropriate reason for the Installed condition
72+
// when no bundle is installed (Installed: False).
73+
//
74+
// Returns Failed when:
75+
// - No rolling revisions exist (nothing to install)
76+
// - Any rolling revision has Reason: Retrying (indicates an error occurred)
77+
//
78+
// Returns Absent when:
79+
// - Rolling revisions exist with Reason: RollingOut (healthy phased rollout in progress)
80+
//
81+
// Rationale:
82+
// - Failed: Semantically indicates an error prevented installation
83+
// - Absent: Semantically indicates "not there yet" (neutral state, e.g., during healthy rollout)
84+
// - Retrying reason indicates an error (config validation, apply failure, etc.)
85+
// - RollingOut reason indicates healthy progress (not an error)
86+
//
87+
// This ensures consistency: all errors (regardless of type) result in Failed,
88+
// while healthy rollout states result in Absent.
89+
func determineFailureReason(rollingRevisions []*RevisionMetadata) string {
90+
if len(rollingRevisions) == 0 {
91+
return ocv1.ReasonFailed
92+
}
93+
94+
// Check if any rolling revision indicates an error (Retrying reason)
95+
for _, rev := range rollingRevisions {
96+
progressingCond := apimeta.FindStatusCondition(rev.Conditions, ocv1.ClusterExtensionRevisionTypeProgressing)
97+
if progressingCond != nil && progressingCond.Reason == string(ocv1.ClusterExtensionRevisionReasonRetrying) {
98+
// Retrying indicates an error occurred (config, apply, validation, etc.)
99+
// Use Failed for semantic correctness: installation failed due to error
100+
return ocv1.ReasonFailed
101+
}
102+
}
103+
104+
// No errors detected - revisions are progressing healthily (RollingOut) or no conditions set
105+
// Use Absent for neutral "not installed yet" state
106+
return ocv1.ReasonAbsent
107+
}
108+
74109
// setInstalledStatusConditionSuccess sets the installed status condition to success.
75110
func setInstalledStatusConditionSuccess(ext *ocv1.ClusterExtension, message string) {
76111
SetStatusCondition(&ext.Status.Conditions, metav1.Condition{

internal/operator-controller/controllers/common_controller_test.go

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,3 +240,114 @@ func TestSetStatusConditionWrapper(t *testing.T) {
240240
})
241241
}
242242
}
243+
244+
func TestSetInstalledStatusFromRevisionStates_ConfigValidationError(t *testing.T) {
245+
tests := []struct {
246+
name string
247+
revisionStates *RevisionStates
248+
expectedInstalledCond metav1.Condition
249+
}{
250+
{
251+
name: "no revisions at all - uses Failed",
252+
revisionStates: &RevisionStates{
253+
Installed: nil,
254+
RollingOut: nil,
255+
},
256+
expectedInstalledCond: metav1.Condition{
257+
Type: ocv1.TypeInstalled,
258+
Status: metav1.ConditionFalse,
259+
Reason: ocv1.ReasonFailed,
260+
},
261+
},
262+
{
263+
name: "rolling revision with error (Retrying) - uses Failed",
264+
revisionStates: &RevisionStates{
265+
Installed: nil,
266+
RollingOut: []*RevisionMetadata{
267+
{
268+
RevisionName: "rev-1",
269+
Conditions: []metav1.Condition{
270+
{
271+
Type: ocv1.ClusterExtensionRevisionTypeProgressing,
272+
Status: metav1.ConditionTrue,
273+
Reason: ocv1.ClusterExtensionRevisionReasonRetrying,
274+
Message: "some error occurred",
275+
},
276+
},
277+
},
278+
},
279+
},
280+
expectedInstalledCond: metav1.Condition{
281+
Type: ocv1.TypeInstalled,
282+
Status: metav1.ConditionFalse,
283+
Reason: ocv1.ReasonFailed,
284+
},
285+
},
286+
{
287+
name: "rolling revision with Retrying reason - uses Failed",
288+
revisionStates: &RevisionStates{
289+
Installed: nil,
290+
RollingOut: []*RevisionMetadata{
291+
{
292+
RevisionName: "rev-1",
293+
Conditions: []metav1.Condition{
294+
{
295+
Type: ocv1.ClusterExtensionRevisionTypeProgressing,
296+
Status: metav1.ConditionTrue,
297+
Reason: ocv1.ClusterExtensionRevisionReasonRetrying,
298+
Message: "some error occurred",
299+
},
300+
},
301+
},
302+
},
303+
},
304+
expectedInstalledCond: metav1.Condition{
305+
Type: ocv1.TypeInstalled,
306+
Status: metav1.ConditionFalse,
307+
Reason: ocv1.ReasonFailed,
308+
},
309+
},
310+
{
311+
name: "rolling revision with RollingOut reason - uses Absent",
312+
revisionStates: &RevisionStates{
313+
Installed: nil,
314+
RollingOut: []*RevisionMetadata{
315+
{
316+
RevisionName: "rev-1",
317+
Conditions: []metav1.Condition{
318+
{
319+
Type: ocv1.ClusterExtensionRevisionTypeProgressing,
320+
Status: metav1.ConditionTrue,
321+
Reason: ocv1.ReasonRollingOut,
322+
Message: "Revision is rolling out",
323+
},
324+
},
325+
},
326+
},
327+
},
328+
expectedInstalledCond: metav1.Condition{
329+
Type: ocv1.TypeInstalled,
330+
Status: metav1.ConditionFalse,
331+
Reason: ocv1.ReasonAbsent,
332+
},
333+
},
334+
}
335+
336+
for _, tt := range tests {
337+
t.Run(tt.name, func(t *testing.T) {
338+
ext := &ocv1.ClusterExtension{
339+
ObjectMeta: metav1.ObjectMeta{
340+
Name: "test-ext",
341+
Generation: 1,
342+
},
343+
}
344+
345+
setInstalledStatusFromRevisionStates(ext, tt.revisionStates)
346+
347+
cond := meta.FindStatusCondition(ext.Status.Conditions, ocv1.TypeInstalled)
348+
require.NotNil(t, cond)
349+
require.Equal(t, tt.expectedInstalledCond.Status, cond.Status)
350+
require.Equal(t, tt.expectedInstalledCond.Reason, cond.Reason)
351+
})
352+
}
353+
}

0 commit comments

Comments
 (0)