Skip to content

Commit ee4bab5

Browse files
camilamacedo86claudeCopilot
authored
šŸ› fix: Configuration validation errors now show "Failed" status instead of "Absent" (#2465)
* Make config errors permanent instead of retrying When configuration is invalid, mark it as a permanent error so the system doesn't keep trying to install it over and over. This saves resources and makes it clear the user needs to fix their config. Assisted-by: Claude <noreply@anthropic.com> * Show "Failed" status when config is wrong Before: Invalid config showed status as "Absent" which was confusing After: Shows "Failed" so users know something is wrong Only checks the latest attempt, not old ones that don't matter anymore. Assisted-by: Claude <noreply@anthropic.com> * Fix status not updating when BOXCUTTER config fails BOXCUTTER runtime was returning early on config errors without setting the status. Now it updates the status first so users can see what went wrong. Assisted-by: Claude <noreply@anthropic.com> * Add tests for config error handling Tests check: - Config errors are permanent (not retried) - Status shows "Failed" when config is wrong - Namespace names are validated - AllNamespaces operators reject watchNamespace config Includes both unit tests and end-to-end tests. Assisted-by: Claude <noreply@anthropic.com> * Remove 'terminal error:' prefix from status messages When TerminalError is used, unwrap it before showing the error message in status conditions. This prevents the 'terminal error:' prefix from appearing in user-facing messages. Changes: - Add UnwrapTerminal() helper function to cleanly unwrap terminal errors - Use the helper in setStatusProgressing and wrapErrorWithResolutionInfo - Don't set Installed condition when Progressing is Blocked (terminal error) since the Progressing condition already provides all necessary information Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> * Add custom TerminalError with granular Reason support Previously, all terminal errors used the generic "Blocked" reason in the Progressing condition. This wasn't semantically correct - "Blocked" was intended for external blockers like resource collisions, not for user configuration errors. This commit introduces a custom TerminalError type that can carry a specific Reason field, allowing more precise categorization of terminal errors in status conditions. Changes: - Add ReasonInvalidConfiguration constant for configuration errors - Create NewTerminalError(reason, err) to wrap errors with a reason - Add ExtractTerminalReason(err) to retrieve the reason from errors - Update setStatusProgressing to use extracted reason when available - Fall back to ReasonBlocked for backward compatibility with plain reconcile.TerminalError() usage - Update config validation to use ReasonInvalidConfiguration - Update tests and e2e expectations accordingly Result: - Config errors now show: Progressing: False, Reason: InvalidConfiguration - Generic terminal errors show: Progressing: False, Reason: Blocked - More helpful and semantically correct status reporting Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com> * Update internal/operator-controller/controllers/boxcutter_reconcile_steps.go Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent 5c4bf7b commit ee4bab5

File tree

10 files changed

+362
-17
lines changed

10 files changed

+362
-17
lines changed

ā€Žapi/v1/common_types.goā€Ž

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,10 @@ const (
2424
ReasonAbsent = "Absent"
2525

2626
// Progressing reasons
27-
ReasonRollingOut = "RollingOut"
28-
ReasonRetrying = "Retrying"
29-
ReasonBlocked = "Blocked"
27+
ReasonRollingOut = "RollingOut"
28+
ReasonRetrying = "Retrying"
29+
ReasonBlocked = "Blocked"
30+
ReasonInvalidConfiguration = "InvalidConfiguration"
3031

3132
// Deprecation reasons
3233
ReasonDeprecated = "Deprecated"

ā€Žinternal/operator-controller/applier/provider.goā€Ž

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"github.com/operator-framework/operator-controller/internal/operator-controller/config"
1717
"github.com/operator-framework/operator-controller/internal/operator-controller/rukpak/bundle/source"
1818
"github.com/operator-framework/operator-controller/internal/operator-controller/rukpak/render"
19+
errorutil "github.com/operator-framework/operator-controller/internal/shared/util/error"
1920
)
2021

2122
// ManifestProvider returns the manifests that should be applied by OLM given a bundle and its associated ClusterExtension
@@ -77,7 +78,7 @@ func (r *RegistryV1ManifestProvider) Get(bundleFS fs.FS, ext *ocv1.ClusterExtens
7778
bundleConfigBytes := extensionConfigBytes(ext)
7879
bundleConfig, err := config.UnmarshalConfig(bundleConfigBytes, schema, ext.Spec.Namespace)
7980
if err != nil {
80-
return nil, fmt.Errorf("invalid ClusterExtension configuration: %w", err)
81+
return nil, errorutil.NewTerminalError(ocv1.ReasonInvalidConfiguration, fmt.Errorf("invalid ClusterExtension configuration: %w", err))
8182
}
8283

8384
if watchNS := bundleConfig.GetWatchNamespace(); watchNS != nil {

ā€Žinternal/operator-controller/applier/provider_test.goā€Ž

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
1212
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1313
"sigs.k8s.io/controller-runtime/pkg/client"
14+
"sigs.k8s.io/controller-runtime/pkg/reconcile"
1415

1516
"github.com/operator-framework/api/pkg/operators/v1alpha1"
1617

@@ -100,6 +101,37 @@ func Test_RegistryV1ManifestProvider_Integration(t *testing.T) {
100101
require.Contains(t, err.Error(), "invalid ClusterExtension configuration")
101102
})
102103

104+
t.Run("returns terminal error for invalid config", func(t *testing.T) {
105+
provider := applier.RegistryV1ManifestProvider{
106+
BundleRenderer: render.BundleRenderer{
107+
ResourceGenerators: []render.ResourceGenerator{
108+
func(rv1 *bundle.RegistryV1, opts render.Options) ([]client.Object, error) {
109+
return nil, nil
110+
},
111+
},
112+
},
113+
IsSingleOwnNamespaceEnabled: true,
114+
}
115+
116+
// Bundle with SingleNamespace install mode requiring watchNamespace config
117+
bundleFS := bundlefs.Builder().WithPackageName("test").
118+
WithCSV(clusterserviceversion.Builder().WithInstallModeSupportFor(v1alpha1.InstallModeTypeSingleNamespace).Build()).Build()
119+
120+
// ClusterExtension without required config
121+
ext := &ocv1.ClusterExtension{
122+
Spec: ocv1.ClusterExtensionSpec{
123+
Namespace: "install-namespace",
124+
// No config provided - should fail validation
125+
},
126+
}
127+
128+
_, err := provider.Get(bundleFS, ext)
129+
require.Error(t, err)
130+
require.Contains(t, err.Error(), "invalid ClusterExtension configuration")
131+
// Assert that config validation errors are terminal (not retriable)
132+
require.ErrorIs(t, err, reconcile.TerminalError(nil), "config validation errors should be terminal")
133+
})
134+
103135
t.Run("returns rendered manifests", func(t *testing.T) {
104136
provider := applier.RegistryV1ManifestProvider{
105137
BundleRenderer: registryv1.Renderer,

ā€Žinternal/operator-controller/conditionsets/conditionsets.goā€Ž

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ var ConditionReasons = []string{
4040
ocv1.ReasonDeprecationStatusUnknown,
4141
ocv1.ReasonFailed,
4242
ocv1.ReasonBlocked,
43+
ocv1.ReasonInvalidConfiguration,
4344
ocv1.ReasonRetrying,
4445
ocv1.ReasonAbsent,
4546
ocv1.ReasonRollingOut,

ā€Žinternal/operator-controller/controllers/boxcutter_reconcile_steps.goā€Ž

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package controllers
1919
import (
2020
"cmp"
2121
"context"
22+
"errors"
2223
"fmt"
2324
"io/fs"
2425
"slices"
@@ -27,6 +28,7 @@ import (
2728
ctrl "sigs.k8s.io/controller-runtime"
2829
"sigs.k8s.io/controller-runtime/pkg/client"
2930
"sigs.k8s.io/controller-runtime/pkg/log"
31+
"sigs.k8s.io/controller-runtime/pkg/reconcile"
3032

3133
ocv1 "github.com/operator-framework/operator-controller/api/v1"
3234
"github.com/operator-framework/operator-controller/internal/operator-controller/labels"
@@ -114,6 +116,12 @@ func ApplyBundleWithBoxcutter(apply func(ctx context.Context, contentFS fs.FS, e
114116
// If there was an error applying the resolved bundle,
115117
// report the error via the Progressing condition.
116118
setStatusProgressing(ext, wrapErrorWithResolutionInfo(state.resolvedRevisionMetadata.BundleMetadata, err))
119+
// Only set Installed condition for retryable errors.
120+
// For terminal errors (Progressing: False with a terminal reason such as Blocked or InvalidConfiguration),
121+
// the Progressing condition already provides all necessary information about the failure.
122+
if !errors.Is(err, reconcile.TerminalError(nil)) {
123+
setInstalledStatusFromRevisionStates(ext, state.revisionStates)
124+
}
117125
return nil, err
118126
}
119127

ā€Žinternal/operator-controller/controllers/clusterextension_controller.goā€Ž

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ import (
5050
ocv1 "github.com/operator-framework/operator-controller/api/v1"
5151
"github.com/operator-framework/operator-controller/internal/operator-controller/conditionsets"
5252
"github.com/operator-framework/operator-controller/internal/operator-controller/labels"
53+
errorutil "github.com/operator-framework/operator-controller/internal/shared/util/error"
5354
k8sutil "github.com/operator-framework/operator-controller/internal/shared/util/k8s"
5455
)
5556

@@ -455,6 +456,19 @@ func (r *ClusterExtensionReconciler) SetupWithManager(mgr ctrl.Manager, opts ...
455456
}
456457

457458
func wrapErrorWithResolutionInfo(resolved ocv1.BundleMetadata, err error) error {
459+
// Preserve TerminalError type and reason through wrapping
460+
if errors.Is(err, reconcile.TerminalError(nil)) {
461+
// Extract the reason if one was provided
462+
reason, hasReason := errorutil.ExtractTerminalReason(err)
463+
// Unwrap to get the inner error, add context
464+
innerErr := errorutil.UnwrapTerminal(err)
465+
wrappedErr := fmt.Errorf("error for resolved bundle %q with version %q: %w", resolved.Name, resolved.Version, innerErr)
466+
// Re-wrap preserving the reason if it existed
467+
if hasReason {
468+
return errorutil.NewTerminalError(reason, wrappedErr)
469+
}
470+
return reconcile.TerminalError(wrappedErr)
471+
}
458472
return fmt.Errorf("error for resolved bundle %q with version %q: %w", resolved.Name, resolved.Version, err)
459473
}
460474

ā€Žinternal/operator-controller/controllers/common_controller.goā€Ž

Lines changed: 52 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"sigs.k8s.io/controller-runtime/pkg/reconcile"
2626

2727
ocv1 "github.com/operator-framework/operator-controller/api/v1"
28+
errorutil "github.com/operator-framework/operator-controller/internal/shared/util/error"
2829
)
2930

3031
const (
@@ -56,11 +57,8 @@ func setInstalledStatusFromRevisionStates(ext *ocv1.ClusterExtension, revisionSt
5657
// Nothing is installed
5758
if revisionStates.Installed == nil {
5859
setInstallStatus(ext, nil)
59-
if len(revisionStates.RollingOut) == 0 {
60-
setInstalledStatusConditionFalse(ext, ocv1.ReasonFailed, "No bundle installed")
61-
} else {
62-
setInstalledStatusConditionFalse(ext, ocv1.ReasonAbsent, "No bundle installed")
63-
}
60+
reason := determineFailureReason(revisionStates.RollingOut)
61+
setInstalledStatusConditionFalse(ext, reason, "No bundle installed")
6462
return
6563
}
6664
// Something is installed
@@ -71,6 +69,45 @@ func setInstalledStatusFromRevisionStates(ext *ocv1.ClusterExtension, revisionSt
7169
setInstalledStatusConditionSuccess(ext, fmt.Sprintf("Installed bundle %s successfully", revisionStates.Installed.Image))
7270
}
7371

72+
// determineFailureReason determines the appropriate reason for the Installed condition
73+
// when no bundle is installed (Installed: False).
74+
//
75+
// Returns Failed when:
76+
// - No rolling revisions exist (nothing to install)
77+
// - The latest rolling revision has Reason: Retrying (indicates an error occurred)
78+
//
79+
// Returns Absent when:
80+
// - Rolling revisions exist with the latest having Reason: RollingOut (healthy phased rollout in progress)
81+
//
82+
// Rationale:
83+
// - Failed: Semantically indicates an error prevented installation
84+
// - Absent: Semantically indicates "not there yet" (neutral state, e.g., during healthy rollout)
85+
// - Retrying reason indicates an error (config validation, apply failure, etc.)
86+
// - RollingOut reason indicates healthy progress (not an error)
87+
// - Only the LATEST revision matters - old errors superseded by newer healthy revisions should not cause Failed
88+
//
89+
// Note: rollingRevisions are sorted in ascending order by Spec.Revision (oldest to newest),
90+
// so the latest revision is the LAST element in the array.
91+
func determineFailureReason(rollingRevisions []*RevisionMetadata) string {
92+
if len(rollingRevisions) == 0 {
93+
return ocv1.ReasonFailed
94+
}
95+
96+
// Check if the LATEST rolling revision indicates an error (Retrying reason)
97+
// Latest revision is the last element in the array (sorted ascending by Spec.Revision)
98+
latestRevision := rollingRevisions[len(rollingRevisions)-1]
99+
progressingCond := apimeta.FindStatusCondition(latestRevision.Conditions, ocv1.ClusterExtensionRevisionTypeProgressing)
100+
if progressingCond != nil && progressingCond.Reason == string(ocv1.ClusterExtensionRevisionReasonRetrying) {
101+
// Retrying indicates an error occurred (config, apply, validation, etc.)
102+
// Use Failed for semantic correctness: installation failed due to error
103+
return ocv1.ReasonFailed
104+
}
105+
106+
// No error detected in latest revision - it's progressing healthily (RollingOut) or no conditions set
107+
// Use Absent for neutral "not installed yet" state
108+
return ocv1.ReasonAbsent
109+
}
110+
74111
// setInstalledStatusConditionSuccess sets the installed status condition to success.
75112
func setInstalledStatusConditionSuccess(ext *ocv1.ClusterExtension, message string) {
76113
SetStatusCondition(&ext.Status.Conditions, metav1.Condition{
@@ -119,12 +156,20 @@ func setStatusProgressing(ext *ocv1.ClusterExtension, err error) {
119156

120157
if err != nil {
121158
progressingCond.Reason = ocv1.ReasonRetrying
122-
progressingCond.Message = err.Error()
159+
// Unwrap TerminalError to avoid "terminal error:" prefix in message
160+
progressingCond.Message = errorutil.UnwrapTerminal(err).Error()
123161
}
124162

125163
if errors.Is(err, reconcile.TerminalError(nil)) {
126164
progressingCond.Status = metav1.ConditionFalse
127-
progressingCond.Reason = ocv1.ReasonBlocked
165+
// Try to extract a specific reason from the terminal error.
166+
// If the error was created with NewTerminalError(reason, err), use that reason.
167+
// Otherwise, fall back to the generic "Blocked" reason.
168+
if reason, ok := errorutil.ExtractTerminalReason(err); ok {
169+
progressingCond.Reason = reason
170+
} else {
171+
progressingCond.Reason = ocv1.ReasonBlocked
172+
}
128173
}
129174

130175
SetStatusCondition(&ext.Status.Conditions, progressingCond)

0 commit comments

Comments
Ā (0)