Skip to content

Commit 0b603c4

Browse files
Fix config validation errors to show Failed status
Config validation errors are permanent and need user action to fix. This commit makes them show the correct status conditions. What changed: 1. Config errors now return TerminalError (not regular error) - Shows: Progressing=False, Reason=Blocked (won't auto-retry) - Shows: Installed=False, Reason=Failed (not Reason=Absent) 2. Check only the latest revision for errors (not all revisions) - Old revision errors that were fixed don't cause Failed anymore - Matches how BOXCUTTER mirrors status from latest revision only Why this matters: - Users see "Blocked" not "Retrying" when they need to fix their config - Status shows current state (latest revision) not old history Assisted-by: Claude
1 parent 7a60e71 commit 0b603c4

4 files changed

Lines changed: 205 additions & 11 deletions

File tree

internal/operator-controller/applier/provider.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"helm.sh/helm/v3/pkg/chart"
1010
"k8s.io/apimachinery/pkg/util/sets"
1111
"sigs.k8s.io/controller-runtime/pkg/client"
12+
"sigs.k8s.io/controller-runtime/pkg/reconcile"
1213

1314
"github.com/operator-framework/api/pkg/operators/v1alpha1"
1415

@@ -77,7 +78,7 @@ func (r *RegistryV1ManifestProvider) Get(bundleFS fs.FS, ext *ocv1.ClusterExtens
7778
bundleConfigBytes := extensionConfigBytes(ext)
7879
bundleConfig, err := config.UnmarshalConfig(bundleConfigBytes, schema, ext.Spec.Namespace)
7980
if err != nil {
80-
return nil, fmt.Errorf("invalid ClusterExtension configuration: %w", err)
81+
return nil, reconcile.TerminalError(fmt.Errorf("invalid ClusterExtension configuration: %w", err))
8182
}
8283

8384
if watchNS := bundleConfig.GetWatchNamespace(); watchNS != nil {

internal/operator-controller/controllers/clusterextension_reconcile_steps.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -158,11 +158,11 @@ func ResolveBundle(r resolve.Resolver, c client.Client) ReconcileStepFunc {
158158
// handleResolutionError handles the case when bundle resolution fails.
159159
//
160160
// Decision logic (evaluated in order):
161-
// 1. No installed bundle Retry (cannot proceed without any bundle)
162-
// 2. Version change requested Retry (cannot upgrade without catalog)
163-
// 3. Cannot check catalog existence Retry (API error, cannot safely decide)
164-
// 4. Catalogs exist Retry (transient error, catalog may be updating)
165-
// 5. Catalogs deleted Fallback to installed bundle (maintain current state)
161+
// 1. No installed bundle: Retry (cannot proceed without any bundle)
162+
// 2. Version change requested: Retry (cannot upgrade without catalog)
163+
// 3. Cannot check catalog existence: Retry (API error, cannot safely decide)
164+
// 4. Catalogs exist: Retry (transient error, catalog may be updating)
165+
// 5. Catalogs deleted: Fallback to installed bundle (maintain current state)
166166
//
167167
// When falling back (case 5), we set the resolved bundle to the installed bundle and return
168168
// no error, allowing the Apply step to run and maintain resources using the existing installation.

internal/operator-controller/controllers/common_controller.go

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,8 @@ func setInstalledStatusFromRevisionStates(ext *ocv1.ClusterExtension, revisionSt
5656
// Nothing is installed
5757
if revisionStates.Installed == nil {
5858
setInstallStatus(ext, nil)
59-
if len(revisionStates.RollingOut) == 0 {
60-
setInstalledStatusConditionFalse(ext, ocv1.ReasonFailed, "No bundle installed")
61-
} else {
62-
setInstalledStatusConditionFalse(ext, ocv1.ReasonAbsent, "No bundle installed")
63-
}
59+
reason := determineFailureReason(revisionStates.RollingOut)
60+
setInstalledStatusConditionFalse(ext, reason, "No bundle installed")
6461
return
6562
}
6663
// Something is installed
@@ -71,6 +68,45 @@ func setInstalledStatusFromRevisionStates(ext *ocv1.ClusterExtension, revisionSt
7168
setInstalledStatusConditionSuccess(ext, fmt.Sprintf("Installed bundle %s successfully", revisionStates.Installed.Image))
7269
}
7370

71+
// determineFailureReason determines the appropriate reason for the Installed condition
72+
// when no bundle is installed (Installed: False).
73+
//
74+
// Returns Failed when:
75+
// - No rolling revisions exist (nothing to install)
76+
// - The latest rolling revision has Reason: Retrying (indicates an error occurred)
77+
//
78+
// Returns Absent when:
79+
// - Rolling revisions exist with the latest having Reason: RollingOut (healthy phased rollout in progress)
80+
//
81+
// Rationale:
82+
// - Failed: Semantically indicates an error prevented installation
83+
// - Absent: Semantically indicates "not there yet" (neutral state, e.g., during healthy rollout)
84+
// - Retrying reason indicates an error (config validation, apply failure, etc.)
85+
// - RollingOut reason indicates healthy progress (not an error)
86+
// - Only the LATEST revision matters - old errors superseded by newer healthy revisions should not cause Failed
87+
//
88+
// Note: rollingRevisions are sorted in ascending order by Spec.Revision (oldest to newest),
89+
// so the latest revision is the LAST element in the array.
90+
func determineFailureReason(rollingRevisions []*RevisionMetadata) string {
91+
if len(rollingRevisions) == 0 {
92+
return ocv1.ReasonFailed
93+
}
94+
95+
// Check if the LATEST rolling revision indicates an error (Retrying reason)
96+
// Latest revision is the last element in the array (sorted ascending by Spec.Revision)
97+
latestRevision := rollingRevisions[len(rollingRevisions)-1]
98+
progressingCond := apimeta.FindStatusCondition(latestRevision.Conditions, ocv1.ClusterExtensionRevisionTypeProgressing)
99+
if progressingCond != nil && progressingCond.Reason == string(ocv1.ClusterExtensionRevisionReasonRetrying) {
100+
// Retrying indicates an error occurred (config, apply, validation, etc.)
101+
// Use Failed for semantic correctness: installation failed due to error
102+
return ocv1.ReasonFailed
103+
}
104+
105+
// No error detected in latest revision - it's progressing healthily (RollingOut) or no conditions set
106+
// Use Absent for neutral "not installed yet" state
107+
return ocv1.ReasonAbsent
108+
}
109+
74110
// setInstalledStatusConditionSuccess sets the installed status condition to success.
75111
func setInstalledStatusConditionSuccess(ext *ocv1.ClusterExtension, message string) {
76112
SetStatusCondition(&ext.Status.Conditions, metav1.Condition{

internal/operator-controller/controllers/common_controller_test.go

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,3 +240,160 @@ func TestSetStatusConditionWrapper(t *testing.T) {
240240
})
241241
}
242242
}
243+
244+
func TestSetInstalledStatusFromRevisionStates_ConfigValidationError(t *testing.T) {
245+
tests := []struct {
246+
name string
247+
revisionStates *RevisionStates
248+
expectedInstalledCond metav1.Condition
249+
}{
250+
{
251+
name: "no revisions at all - uses Failed",
252+
revisionStates: &RevisionStates{
253+
Installed: nil,
254+
RollingOut: nil,
255+
},
256+
expectedInstalledCond: metav1.Condition{
257+
Type: ocv1.TypeInstalled,
258+
Status: metav1.ConditionFalse,
259+
Reason: ocv1.ReasonFailed,
260+
},
261+
},
262+
{
263+
name: "rolling revision with error (Retrying) - uses Failed",
264+
revisionStates: &RevisionStates{
265+
Installed: nil,
266+
RollingOut: []*RevisionMetadata{
267+
{
268+
RevisionName: "rev-1",
269+
Conditions: []metav1.Condition{
270+
{
271+
Type: ocv1.ClusterExtensionRevisionTypeProgressing,
272+
Status: metav1.ConditionTrue,
273+
Reason: ocv1.ClusterExtensionRevisionReasonRetrying,
274+
Message: "some error occurred",
275+
},
276+
},
277+
},
278+
},
279+
},
280+
expectedInstalledCond: metav1.Condition{
281+
Type: ocv1.TypeInstalled,
282+
Status: metav1.ConditionFalse,
283+
Reason: ocv1.ReasonFailed,
284+
},
285+
},
286+
{
287+
name: "multiple rolling revisions with one Retrying - uses Failed",
288+
revisionStates: &RevisionStates{
289+
Installed: nil,
290+
RollingOut: []*RevisionMetadata{
291+
{
292+
RevisionName: "rev-1",
293+
Conditions: []metav1.Condition{
294+
{
295+
Type: ocv1.ClusterExtensionRevisionTypeProgressing,
296+
Status: metav1.ConditionTrue,
297+
Reason: ocv1.ReasonRollingOut,
298+
Message: "Revision is rolling out",
299+
},
300+
},
301+
},
302+
{
303+
RevisionName: "rev-2",
304+
Conditions: []metav1.Condition{
305+
{
306+
Type: ocv1.ClusterExtensionRevisionTypeProgressing,
307+
Status: metav1.ConditionTrue,
308+
Reason: ocv1.ClusterExtensionRevisionReasonRetrying,
309+
Message: "validation error occurred",
310+
},
311+
},
312+
},
313+
},
314+
},
315+
expectedInstalledCond: metav1.Condition{
316+
Type: ocv1.TypeInstalled,
317+
Status: metav1.ConditionFalse,
318+
Reason: ocv1.ReasonFailed,
319+
},
320+
},
321+
{
322+
name: "rolling revision with RollingOut reason - uses Absent",
323+
revisionStates: &RevisionStates{
324+
Installed: nil,
325+
RollingOut: []*RevisionMetadata{
326+
{
327+
RevisionName: "rev-1",
328+
Conditions: []metav1.Condition{
329+
{
330+
Type: ocv1.ClusterExtensionRevisionTypeProgressing,
331+
Status: metav1.ConditionTrue,
332+
Reason: ocv1.ReasonRollingOut,
333+
Message: "Revision is rolling out",
334+
},
335+
},
336+
},
337+
},
338+
},
339+
expectedInstalledCond: metav1.Condition{
340+
Type: ocv1.TypeInstalled,
341+
Status: metav1.ConditionFalse,
342+
Reason: ocv1.ReasonAbsent,
343+
},
344+
},
345+
{
346+
name: "old revision with Retrying superseded by latest healthy - uses Absent",
347+
revisionStates: &RevisionStates{
348+
Installed: nil,
349+
RollingOut: []*RevisionMetadata{
350+
{
351+
RevisionName: "rev-1",
352+
Conditions: []metav1.Condition{
353+
{
354+
Type: ocv1.ClusterExtensionRevisionTypeProgressing,
355+
Status: metav1.ConditionTrue,
356+
Reason: ocv1.ClusterExtensionRevisionReasonRetrying,
357+
Message: "old error that was superseded",
358+
},
359+
},
360+
},
361+
{
362+
RevisionName: "rev-2",
363+
Conditions: []metav1.Condition{
364+
{
365+
Type: ocv1.ClusterExtensionRevisionTypeProgressing,
366+
Status: metav1.ConditionTrue,
367+
Reason: ocv1.ReasonRollingOut,
368+
Message: "Latest revision is rolling out healthy",
369+
},
370+
},
371+
},
372+
},
373+
},
374+
expectedInstalledCond: metav1.Condition{
375+
Type: ocv1.TypeInstalled,
376+
Status: metav1.ConditionFalse,
377+
Reason: ocv1.ReasonAbsent,
378+
},
379+
},
380+
}
381+
382+
for _, tt := range tests {
383+
t.Run(tt.name, func(t *testing.T) {
384+
ext := &ocv1.ClusterExtension{
385+
ObjectMeta: metav1.ObjectMeta{
386+
Name: "test-ext",
387+
Generation: 1,
388+
},
389+
}
390+
391+
setInstalledStatusFromRevisionStates(ext, tt.revisionStates)
392+
393+
cond := meta.FindStatusCondition(ext.Status.Conditions, ocv1.TypeInstalled)
394+
require.NotNil(t, cond)
395+
require.Equal(t, tt.expectedInstalledCond.Status, cond.Status)
396+
require.Equal(t, tt.expectedInstalledCond.Reason, cond.Reason)
397+
})
398+
}
399+
}

0 commit comments

Comments
 (0)