From 882c6a48ba6125848229a4f7b782add78e70a3b4 Mon Sep 17 00:00:00 2001 From: Katarina Borgulova Date: Wed, 15 Apr 2026 11:42:35 +0200 Subject: [PATCH 1/7] refactor: Some retry logic into 1 package --- .../conditional/conditional_gatherer.go | 25 +++-- .../insightsuploader/insightsuploader.go | 30 +++--- pkg/ocm/clustertransfer/cluster_transfer.go | 24 +---- pkg/ocm/sca/sca.go | 26 +---- pkg/retry/retry.go | 94 +++++++++++++++++++ 5 files changed, 129 insertions(+), 70 deletions(-) create mode 100644 pkg/retry/retry.go diff --git a/pkg/gatherers/conditional/conditional_gatherer.go b/pkg/gatherers/conditional/conditional_gatherer.go index 3ada944fc0..bd8f96c891 100644 --- a/pkg/gatherers/conditional/conditional_gatherer.go +++ b/pkg/gatherers/conditional/conditional_gatherer.go @@ -31,6 +31,7 @@ import ( "github.com/openshift/insights-operator/pkg/gatherers/common" "github.com/openshift/insights-operator/pkg/insights/insightsclient" "github.com/openshift/insights-operator/pkg/record" + "github.com/openshift/insights-operator/pkg/retry" "github.com/openshift/insights-operator/pkg/utils" ) @@ -271,30 +272,28 @@ func (g *Gatherer) getRemoteConfiguration(ctx context.Context) ([]byte, error) { Cap: 3 * time.Minute, } endpointWithVersion := fmt.Sprintf(endpoint, ocpVersion) - var remoteConfigData []byte - err = wait.ExponentialBackoffWithContext(ctx, backOff, func(ctx context.Context) (done bool, err error) { + + remoteConfigData, err := retry.RetryWithExpBackOff(backOff, retry.RetryOnNon200HTTP, func() ([]byte, error) { resp, err := g.insightsCli.GetWithPathParam(ctx, endpoint, ocpVersion, false) if err != nil { - return false, err + return nil, err } + defer resp.Body.Close() if resp.StatusCode != http.StatusOK { - klog.Infof("Received HTTP status code %d, trying again in %s", resp.StatusCode, backOff.Step()) - if backOff.Steps > 1 { - return false, nil - } - return true, insightsclient.HttpError{ - Err: fmt.Errorf("received HTTP %s from %s. Using the default built-in configuration", resp.Status, endpointWithVersion), + return nil, insightsclient.HttpError{ + Err: fmt.Errorf("received HTTP %s from %s", resp.Status, endpointWithVersion), StatusCode: resp.StatusCode, } } - remoteConfigData, err = io.ReadAll(resp.Body) - defer resp.Body.Close() + + data, err := io.ReadAll(resp.Body) if err != nil { - return false, nil + return nil, err } - return true, nil + return data, nil }) + if err != nil { return nil, err } diff --git a/pkg/insights/insightsuploader/insightsuploader.go b/pkg/insights/insightsuploader/insightsuploader.go index 97f23f8609..9ecceed8de 100644 --- a/pkg/insights/insightsuploader/insightsuploader.go +++ b/pkg/insights/insightsuploader/insightsuploader.go @@ -16,6 +16,7 @@ import ( "github.com/openshift/insights-operator/pkg/config/configobserver" "github.com/openshift/insights-operator/pkg/controllerstatus" "github.com/openshift/insights-operator/pkg/insights/insightsclient" + "github.com/openshift/insights-operator/pkg/retry" ) type Authorizer interface { @@ -31,6 +32,12 @@ type StatusReporter interface { SetLastReportedTime(time.Time) } +// uploadResult wraps the response from SendAndGetID for use with retry logic +type uploadResult struct { + requestID string + statusCode int +} + type Controller struct { controllerstatus.StatusController @@ -206,25 +213,18 @@ func (c *Controller) Upload(ctx context.Context, s *insightsclient.Source) (stri start := time.Now() s.ID = start.Format(time.RFC3339) s.Type = "application/vnd.redhat.openshift.periodic" - var requestID string - var statusCode int - err := wait.ExponentialBackoff(c.backoff, func() (done bool, err error) { - requestID, statusCode, err = c.client.SendAndGetID(ctx, c.configurator.Config().DataReporting.UploadEndpoint, *s) - if err != nil { - // do no return the error if it's not the last attempt - if c.backoff.Steps > 1 { - klog.Infof("Unable to upload report after %s: %v", time.Since(start).Truncate(time.Second/100), err) - klog.Errorf("%v. Trying again in %s", err, c.backoff.Step()) - return false, nil - } - } - return true, err + + result, err := retry.RetryWithExpBackOff(c.backoff, retry.RetryOnAll, func() (uploadResult, error) { + requestID, statusCode, err := c.client.SendAndGetID(ctx, c.configurator.Config().DataReporting.UploadEndpoint, *s) + return uploadResult{requestID: requestID, statusCode: statusCode}, err }) + if err != nil { - return "", statusCode, err + klog.Infof("Unable to upload report after %s: %v", time.Since(start).Truncate(time.Second/100), err) + return "", result.statusCode, err } klog.Infof("Uploaded report successfully in %s", time.Since(start)) - return requestID, statusCode, nil + return result.requestID, result.statusCode, nil } func reportToLogs(source io.Reader) error { diff --git a/pkg/ocm/clustertransfer/cluster_transfer.go b/pkg/ocm/clustertransfer/cluster_transfer.go index 25cd7be41c..85fdb367e1 100644 --- a/pkg/ocm/clustertransfer/cluster_transfer.go +++ b/pkg/ocm/clustertransfer/cluster_transfer.go @@ -13,6 +13,7 @@ import ( "github.com/openshift/insights-operator/pkg/controllerstatus" "github.com/openshift/insights-operator/pkg/insights/insightsclient" "github.com/openshift/insights-operator/pkg/ocm" + "github.com/openshift/insights-operator/pkg/retry" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/wait" @@ -221,27 +222,10 @@ func (c *Controller) requestClusterTransferWithExponentialBackoff(endpoint strin Cap: c.configurator.Config().ClusterTransfer.Interval, } - var data []byte - err := wait.ExponentialBackoff(bo, func() (bool, error) { - var err error - data, err = c.client.RecvClusterTransfer(endpoint) - if err == nil { - return true, nil - } - // don't try again in case it's not an HTTP error - it could mean we're in disconnected env - if !insightsclient.IsHttpError(err) { - return true, err - } - httpErr := err.(insightsclient.HttpError) - if httpErr.StatusCode >= http.StatusInternalServerError { - // check the number of steps to prevent "timeout waiting for condition" error - we want to propagate the HTTP error below - if bo.Steps > 1 { - klog.Errorf("Got HTTP %v. Trying again in %s", httpErr.StatusCode, bo.Step()) - return false, nil - } - } - return true, httpErr + data, err := retry.RetryWithExpBackOff(bo, retry.RetryOn500HTTP, func() ([]byte, error) { + return c.client.RecvClusterTransfer(endpoint) }) + if err != nil { return nil, err } diff --git a/pkg/ocm/sca/sca.go b/pkg/ocm/sca/sca.go index d0a7a57655..84d1aa4e66 100644 --- a/pkg/ocm/sca/sca.go +++ b/pkg/ocm/sca/sca.go @@ -19,6 +19,7 @@ import ( "github.com/openshift/insights-operator/pkg/controllerstatus" "github.com/openshift/insights-operator/pkg/insights/insightsclient" "github.com/openshift/insights-operator/pkg/ocm" + "github.com/openshift/insights-operator/pkg/retry" ) const ( @@ -299,29 +300,10 @@ func (c *Controller) requestSCAWithExpBackoff( Cap: c.configurator.Config().SCA.Interval, } - var err error - var data []byte - err = wait.ExponentialBackoff(bo, func() (bool, error) { - data, err = c.client.RecvSCACerts(ctx, endpoint, nodeArchitectures) - if err != nil { - // don't try again in case it's not an HTTP error - it could mean we're in disconnected env - if !insightsclient.IsHttpError(err) { - return true, err - } - httpErr := err.(insightsclient.HttpError) - // try again only in case of 500 or higher - if httpErr.StatusCode >= http.StatusInternalServerError { - // check the number of steps to prevent "timeout waiting for condition" error - we want to propagate the HTTP error - if bo.Steps > 1 { - klog.Errorf("%v. Trying again in %s", httpErr, bo.Step()) - return false, nil - } - } - return true, httpErr - } - - return true, nil + data, err := retry.RetryWithExpBackOff(bo, retry.RetryOn500HTTP, func() ([]byte, error) { + return c.client.RecvSCACerts(ctx, endpoint, nodeArchitectures) }) + if err != nil { return nil, err } diff --git a/pkg/retry/retry.go b/pkg/retry/retry.go new file mode 100644 index 0000000000..12637c5e83 --- /dev/null +++ b/pkg/retry/retry.go @@ -0,0 +1,94 @@ +// Package retry provides shared retry logic with exponential backoff for HTTP operations. +// +// Usage example (once implemented): +// +// response, err := retry.RetryWithExpBackOff( +// wait.Backoff{ +// Duration: interval/32, +// Factor: 2, +// Steps: ocm.FailureCountThreshold, +// Cap: interval, +// }, +// retry.RetryOn500HTTP, +// func() (*Response, error) { +// return client.RecvSCACerts(ctx, endpoint, nodeArchs) +// }, +// ) +package retry + +import ( + "net/http" + + "github.com/openshift/insights-operator/pkg/insights/insightsclient" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/klog/v2" +) + +type RetryStrategy int64 + +const ( + // RetryOn500HTTP retries only on HTTP 500+ errors, skips retry for non-HTTP errors (disconnected env) + // Used by: sca.go, clustertransfer.go + RetryOn500HTTP RetryStrategy = iota + + // RetryOnNon200HTTP retries on any non-200 HTTP status code + // Used by: conditional_gatherer.go + RetryOnNon200HTTP + + // RetryOnAll retries on all errors + // Used by: insightsuploader.go + RetryOnAll +) + +// shouldRetry determines if an error should be retried based on the strategy. +// Returns true if retry should be attempted (when steps remain). +func shouldRetry(err error, strategy RetryStrategy) bool { + switch strategy { + case RetryOn500HTTP: + // Only retry HTTP 500+ errors, skip non-HTTP errors (disconnected env) + if !insightsclient.IsHttpError(err) { + return false + } + httpErr := err.(insightsclient.HttpError) + return httpErr.StatusCode >= http.StatusInternalServerError + + case RetryOnNon200HTTP: + // Retry on any non-200 HTTP status, or non-HTTP errors + if !insightsclient.IsHttpError(err) { + return true // retry non-HTTP errors + } + httpErr := err.(insightsclient.HttpError) + return httpErr.StatusCode != http.StatusOK + + case RetryOnAll: + // Retry on all errors + return true + + default: + // Unknown strategy, don't retry + klog.Infof("Unknown strategy %d", strategy) + return false + } +} + +func RetryWithExpBackOff[T any](bo wait.Backoff, strategy RetryStrategy, operation func() (T, error)) (T, error) { + var err error + var data T + + err = wait.ExponentialBackoff(bo, func() (bool, error) { + data, err = operation() + if err != nil { + // Use strategy to determine if we should retry + if shouldRetry(err, strategy) && bo.Steps > 1 { + klog.Errorf("%v. Trying again in %s", err, bo.Step()) + return false, nil + } + // Don't retry - either strategy says no, or no steps remaining + return true, err + } + + return true, nil + }) + + return data, err +} From 5013b933c2ced9b6003c3eac85d81cc7669481aa Mon Sep 17 00:00:00 2001 From: Katarina Borgulova Date: Wed, 15 Apr 2026 11:43:12 +0200 Subject: [PATCH 2/7] test: Retry logic --- pkg/retry/retry_test.go | 140 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 pkg/retry/retry_test.go diff --git a/pkg/retry/retry_test.go b/pkg/retry/retry_test.go new file mode 100644 index 0000000000..01cdd4d558 --- /dev/null +++ b/pkg/retry/retry_test.go @@ -0,0 +1,140 @@ +package retry + +import ( + "fmt" + "net/http" + "testing" + + "github.com/openshift/insights-operator/pkg/insights/insightsclient" +) + +func TestShouldRetry_RetryOn500HTTP_Valid(t *testing.T) { + // HTTP 500 error should trigger retry + err := insightsclient.HttpError{ + StatusCode: http.StatusInternalServerError, + Err: fmt.Errorf("internal server error"), + } + + result := shouldRetry(err, RetryOn500HTTP) + if !result { + t.Errorf("Expected shouldRetry to return true for HTTP 500 error, got false") + } +} + +func TestShouldRetry_RetryOn500HTTP_Invalid(t *testing.T) { + // Test cases where RetryOn500HTTP should NOT retry: + // 1. Non-HTTP error (network error, context error, etc.) + // 2. HTTP 4xx error (client errors should not retry) + + // Test 1: Non-HTTP error + err := fmt.Errorf("network connection error") + result := shouldRetry(err, RetryOn500HTTP) + if result { + t.Errorf("Expected shouldRetry to return false for non-HTTP error, got true") + } + + // Test 2: HTTP 4xx error + err = insightsclient.HttpError{ + StatusCode: http.StatusNotFound, + Err: fmt.Errorf("not found"), + } + result = shouldRetry(err, RetryOn500HTTP) + if result { + t.Errorf("Expected shouldRetry to return false for HTTP 404 error, got true") + } +} + +func TestShouldRetry_RetryOnNon200HTTP_Valid(t *testing.T) { + // Test cases where RetryOnNon200HTTP should retry: + // 1. HTTP 404 error + // 2. HTTP 500 error + // 3. Non-HTTP error + + // Test 1: HTTP 404 + httpErr404 := insightsclient.HttpError{ + StatusCode: http.StatusNotFound, + Err: fmt.Errorf("not found"), + } + result := shouldRetry(httpErr404, RetryOnNon200HTTP) + if !result { + t.Errorf("Expected shouldRetry to return true for HTTP 404 error, got false") + } + + // Test 2: HTTP 500 error + httpErr500 := insightsclient.HttpError{ + StatusCode: http.StatusInternalServerError, + Err: fmt.Errorf("internal server error"), + } + result = shouldRetry(httpErr500, RetryOnNon200HTTP) + if !result { + t.Errorf("Expected shouldRetry to return true for HTTP 500 error, got false") + } + + // Test 3: Non-HTTP error should retry + nonHttpErr := fmt.Errorf("network error") + result = shouldRetry(nonHttpErr, RetryOnNon200HTTP) + if !result { + t.Errorf("Expected shouldRetry to return true for non-HTTP error, got false") + } +} + +func TestShouldRetry_RetryOnNon200HTTP_Invalid(t *testing.T) { + // HTTP 200 should NOT retry + err := insightsclient.HttpError{ + StatusCode: http.StatusOK, + Err: nil, + } + result := shouldRetry(err, RetryOnNon200HTTP) + if result { + t.Errorf("Expected shouldRetry to return false for HTTP 200 response, got true") + } +} + +func TestShouldRetry_RetryOnAll_Valid(t *testing.T) { + // All errors should trigger retry: + // 1. HTTP error + // 2. Non-HTTP error + + // Test 1: HTTP error + httpErr := insightsclient.HttpError{ + StatusCode: http.StatusBadRequest, + Err: fmt.Errorf("bad request"), + } + result := shouldRetry(httpErr, RetryOnAll) + if !result { + t.Errorf("Expected shouldRetry to return true for HTTP 400 error, got false") + } + + // Test 2: Non-HTTP error + nonHttpErr := fmt.Errorf("some random error") + result = shouldRetry(nonHttpErr, RetryOnAll) + if !result { + t.Errorf("Expected shouldRetry to return true for non-HTTP error, got false") + } +} + +func TestShouldRetry_RetryOnAll_Invalid(t *testing.T) { + // For RetryOnAll, there's no "invalid" case where it shouldn't retry + // But we can test that it returns true even for nil-ish cases or success responses + + // Even for HTTP 200 with error wrapper, it should retry + err := insightsclient.HttpError{ + StatusCode: http.StatusOK, + Err: fmt.Errorf("unexpected error despite 200"), + } + result := shouldRetry(err, RetryOnAll) + if !result { + t.Errorf("Expected shouldRetry to return true for any error with RetryOnAll, got false") + } +} + +func TestShouldRetry_DefaultStrategy(t *testing.T) { + // Invalid/unknown strategy should NOT retry + invalidStrategy := RetryStrategy(999) + err := fmt.Errorf("some error") + + result := shouldRetry(err, invalidStrategy) + if result { + t.Errorf("Expected shouldRetry to return false for unknown strategy, got true") + } +} From 54ada83c5741503cb823e2706d6ac8a94931e4bc Mon Sep 17 00:00:00 2001 From: Katarina Borgulova Date: Wed, 15 Apr 2026 16:27:45 +0200 Subject: [PATCH 3/7] fix: lint issues --- pkg/retry/retry_test.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/retry/retry_test.go b/pkg/retry/retry_test.go index 01cdd4d558..90ea6eb633 100644 --- a/pkg/retry/retry_test.go +++ b/pkg/retry/retry_test.go @@ -71,8 +71,8 @@ func TestShouldRetry_RetryOnNon200HTTP_Valid(t *testing.T) { } // Test 3: Non-HTTP error should retry - nonHttpErr := fmt.Errorf("network error") - result = shouldRetry(nonHttpErr, RetryOnNon200HTTP) + nonHTTPErr := fmt.Errorf("network error") + result = shouldRetry(nonHTTPErr, RetryOnNon200HTTP) if !result { t.Errorf("Expected shouldRetry to return true for non-HTTP error, got false") } @@ -106,8 +106,8 @@ func TestShouldRetry_RetryOnAll_Valid(t *testing.T) { } // Test 2: Non-HTTP error - nonHttpErr := fmt.Errorf("some random error") - result = shouldRetry(nonHttpErr, RetryOnAll) + nonHTTPErr := fmt.Errorf("some random error") + result = shouldRetry(nonHTTPErr, RetryOnAll) if !result { t.Errorf("Expected shouldRetry to return true for non-HTTP error, got false") } From e4022c13c2f7e5ed4abcc885484e0968e08f74e9 Mon Sep 17 00:00:00 2001 From: Katarina Borgulova Date: Tue, 21 Apr 2026 12:19:03 +0200 Subject: [PATCH 4/7] fix: review fixes --- pkg/ocm/clustertransfer/cluster_transfer.go | 2 +- pkg/ocm/sca/sca.go | 2 +- pkg/retry/retry.go | 37 +++++++++++++-------- 3 files changed, 25 insertions(+), 16 deletions(-) diff --git a/pkg/ocm/clustertransfer/cluster_transfer.go b/pkg/ocm/clustertransfer/cluster_transfer.go index 85fdb367e1..76d52833a0 100644 --- a/pkg/ocm/clustertransfer/cluster_transfer.go +++ b/pkg/ocm/clustertransfer/cluster_transfer.go @@ -222,7 +222,7 @@ func (c *Controller) requestClusterTransferWithExponentialBackoff(endpoint strin Cap: c.configurator.Config().ClusterTransfer.Interval, } - data, err := retry.RetryWithExpBackOff(bo, retry.RetryOn500HTTP, func() ([]byte, error) { + data, err := retry.RetryWithExpBackOff(bo, retry.RetryOn50xHTTP, func() ([]byte, error) { return c.client.RecvClusterTransfer(endpoint) }) diff --git a/pkg/ocm/sca/sca.go b/pkg/ocm/sca/sca.go index 84d1aa4e66..62f4699bd9 100644 --- a/pkg/ocm/sca/sca.go +++ b/pkg/ocm/sca/sca.go @@ -300,7 +300,7 @@ func (c *Controller) requestSCAWithExpBackoff( Cap: c.configurator.Config().SCA.Interval, } - data, err := retry.RetryWithExpBackOff(bo, retry.RetryOn500HTTP, func() ([]byte, error) { + data, err := retry.RetryWithExpBackOff(bo, retry.RetryOn50xHTTP, func() ([]byte, error) { return c.client.RecvSCACerts(ctx, endpoint, nodeArchitectures) }) diff --git a/pkg/retry/retry.go b/pkg/retry/retry.go index 12637c5e83..83ae5a7ae1 100644 --- a/pkg/retry/retry.go +++ b/pkg/retry/retry.go @@ -1,6 +1,6 @@ // Package retry provides shared retry logic with exponential backoff for HTTP operations. // -// Usage example (once implemented): +// Usage example: // // response, err := retry.RetryWithExpBackOff( // wait.Backoff{ @@ -9,7 +9,7 @@ // Steps: ocm.FailureCountThreshold, // Cap: interval, // }, -// retry.RetryOn500HTTP, +// retry.RetryOn50xHTTP, // func() (*Response, error) { // return client.RecvSCACerts(ctx, endpoint, nodeArchs) // }, @@ -27,9 +27,9 @@ import ( type RetryStrategy int64 const ( - // RetryOn500HTTP retries only on HTTP 500+ errors, skips retry for non-HTTP errors (disconnected env) + // RetryOn50xHTTP retries only on HTTP 500+ errors, skips retry for non-HTTP errors (disconnected env) // Used by: sca.go, clustertransfer.go - RetryOn500HTTP RetryStrategy = iota + RetryOn50xHTTP RetryStrategy = iota // RetryOnNon200HTTP retries on any non-200 HTTP status code // Used by: conditional_gatherer.go @@ -44,7 +44,7 @@ const ( // Returns true if retry should be attempted (when steps remain). func shouldRetry(err error, strategy RetryStrategy) bool { switch strategy { - case RetryOn500HTTP: + case RetryOn50xHTTP: // Only retry HTTP 500+ errors, skip non-HTTP errors (disconnected env) if !insightsclient.IsHttpError(err) { return false @@ -66,29 +66,38 @@ func shouldRetry(err error, strategy RetryStrategy) bool { default: // Unknown strategy, don't retry - klog.Infof("Unknown strategy %d", strategy) + klog.Infof("Unknown strategy %d for retry mechanism", strategy) return false } } func RetryWithExpBackOff[T any](bo wait.Backoff, strategy RetryStrategy, operation func() (T, error)) (T, error) { - var err error + var lastErr error var data T - err = wait.ExponentialBackoff(bo, func() (bool, error) { - data, err = operation() - if err != nil { + attempt := 0 + maxAttempts := bo.Steps + + err := wait.ExponentialBackoff(bo, func() (bool, error) { + attempt++ + data, lastErr = operation() + if lastErr != nil { // Use strategy to determine if we should retry - if shouldRetry(err, strategy) && bo.Steps > 1 { - klog.Errorf("%v. Trying again in %s", err, bo.Step()) + if shouldRetry(lastErr, strategy) { + klog.Errorf("%v. Retrying (attempt %d/%d)", lastErr, attempt, maxAttempts) return false, nil } - // Don't retry - either strategy says no, or no steps remaining - return true, err + // Don't retry based on strategy + return true, lastErr } return true, nil }) + // If we exhausted retries, return the last operation error instead of the timeout error + if wait.Interrupted(err) && lastErr != nil { + return data, lastErr + } + return data, err } From 13a7a8f8a2bee9a9c1096b7fcf5b9a18d6f16b8a Mon Sep 17 00:00:00 2001 From: Katarina Borgulova Date: Tue, 21 Apr 2026 12:19:55 +0200 Subject: [PATCH 5/7] test: fix tests --- pkg/retry/retry_test.go | 418 +++++++++++++++++++++++++++++----------- 1 file changed, 306 insertions(+), 112 deletions(-) diff --git a/pkg/retry/retry_test.go b/pkg/retry/retry_test.go index 90ea6eb633..b508ddf445 100644 --- a/pkg/retry/retry_test.go +++ b/pkg/retry/retry_test.go @@ -1,140 +1,334 @@ package retry import ( + "bytes" "fmt" "net/http" + "strings" "testing" "github.com/openshift/insights-operator/pkg/insights/insightsclient" + "k8s.io/apimachinery/pkg/util/wait" ) -func TestShouldRetry_RetryOn500HTTP_Valid(t *testing.T) { - // HTTP 500 error should trigger retry - err := insightsclient.HttpError{ - StatusCode: http.StatusInternalServerError, - Err: fmt.Errorf("internal server error"), +func Test_ShouldRetry(t *testing.T) { + type testCase struct { + name string + err error + strategy RetryStrategy + shouldRetry bool } - result := shouldRetry(err, RetryOn500HTTP) - if !result { - t.Errorf("Expected shouldRetry to return true for HTTP 500 error, got false") - } -} - -func TestShouldRetry_RetryOn500HTTP_Invalid(t *testing.T) { - // Test cases where RetryOn500HTTP should NOT retry: - // 1. Non-HTTP error (network error, context error, etc.) - // 2. HTTP 4xx error (client errors should not retry) - - // Test 1: Non-HTTP error - err := fmt.Errorf("network connection error") - result := shouldRetry(err, RetryOn500HTTP) - if result { - t.Errorf("Expected shouldRetry to return false for non-HTTP error, got true") - } - - // Test 2: HTTP 4xx error - err = insightsclient.HttpError{ - StatusCode: http.StatusNotFound, - Err: fmt.Errorf("not found"), - } - result = shouldRetry(err, RetryOn500HTTP) - if result { - t.Errorf("Expected shouldRetry to return false for HTTP 404 error, got true") - } -} + testCases := []testCase{ + // RetryOn500HTTP strategy tests + { + name: "RetryOn500HTTP should retry on HTTP 500", + err: insightsclient.HttpError{ + StatusCode: http.StatusInternalServerError, + Err: fmt.Errorf("internal server error"), + }, + strategy: RetryOn50xHTTP, + shouldRetry: true, + }, + { + name: "RetryOn500HTTP should NOT retry on non-HTTP error", + err: fmt.Errorf("network connection error"), + strategy: RetryOn50xHTTP, + shouldRetry: false, + }, + { + name: "RetryOn500HTTP should NOT retry on HTTP 404", + err: insightsclient.HttpError{ + StatusCode: http.StatusNotFound, + Err: fmt.Errorf("not found"), + }, + strategy: RetryOn50xHTTP, + shouldRetry: false, + }, -func TestShouldRetry_RetryOnNon200HTTP_Valid(t *testing.T) { - // Test cases where RetryOnNon200HTTP should retry: - // 1. HTTP 404 error - // 2. HTTP 500 error - // 3. Non-HTTP error + // RetryOnNon200HTTP strategy tests + { + name: "RetryOnNon200HTTP should retry on HTTP 404", + err: insightsclient.HttpError{ + StatusCode: http.StatusNotFound, + Err: fmt.Errorf("not found"), + }, + strategy: RetryOnNon200HTTP, + shouldRetry: true, + }, + { + name: "RetryOnNon200HTTP should retry on HTTP 500", + err: insightsclient.HttpError{ + StatusCode: http.StatusInternalServerError, + Err: fmt.Errorf("internal server error"), + }, + strategy: RetryOnNon200HTTP, + shouldRetry: true, + }, + { + name: "RetryOnNon200HTTP should retry on non-HTTP error", + err: fmt.Errorf("network error"), + strategy: RetryOnNon200HTTP, + shouldRetry: true, + }, + { + name: "RetryOnNon200HTTP should NOT retry on HTTP 200", + err: insightsclient.HttpError{ + StatusCode: http.StatusOK, + Err: nil, + }, + strategy: RetryOnNon200HTTP, + shouldRetry: false, + }, - // Test 1: HTTP 404 - httpErr404 := insightsclient.HttpError{ - StatusCode: http.StatusNotFound, - Err: fmt.Errorf("not found"), - } - result := shouldRetry(httpErr404, RetryOnNon200HTTP) - if !result { - t.Errorf("Expected shouldRetry to return true for HTTP 404 error, got false") - } + // RetryOnAll strategy tests + { + name: "RetryOnAll should retry on HTTP 400", + err: insightsclient.HttpError{ + StatusCode: http.StatusBadRequest, + Err: fmt.Errorf("bad request"), + }, + strategy: RetryOnAll, + shouldRetry: true, + }, + { + name: "RetryOnAll should retry on HTTP 500", + err: insightsclient.HttpError{ + StatusCode: http.StatusInternalServerError, + Err: fmt.Errorf("internal server error"), + }, + strategy: RetryOnAll, + shouldRetry: true, + }, + { + name: "RetryOnAll should retry on non-HTTP error", + err: fmt.Errorf("some random error"), + strategy: RetryOnAll, + shouldRetry: true, + }, + { + name: "RetryOnAll should retry even on HTTP 200 with error", + err: insightsclient.HttpError{ + StatusCode: http.StatusOK, + Err: fmt.Errorf("unexpected error despite 200"), + }, + strategy: RetryOnAll, + shouldRetry: true, + }, - // Test 2: HTTP 500 error - httpErr500 := insightsclient.HttpError{ - StatusCode: http.StatusInternalServerError, - Err: fmt.Errorf("internal server error"), - } - result = shouldRetry(httpErr500, RetryOnNon200HTTP) - if !result { - t.Errorf("Expected shouldRetry to return true for HTTP 500 error, got false") + // Unknown strategy tests + { + name: "Unknown strategy should NOT retry", + err: fmt.Errorf("some error"), + strategy: RetryStrategy(999), + shouldRetry: false, + }, } - // Test 3: Non-HTTP error should retry - nonHTTPErr := fmt.Errorf("network error") - result = shouldRetry(nonHTTPErr, RetryOnNon200HTTP) - if !result { - t.Errorf("Expected shouldRetry to return true for non-HTTP error, got false") + for _, tt := range testCases { + t.Run(tt.name, func(t *testing.T) { + result := shouldRetry(tt.err, tt.strategy) + if result != tt.shouldRetry { + t.Errorf("shouldRetry() = %v, want %v", result, tt.shouldRetry) + } + }) } } -func TestShouldRetry_RetryOnNon200HTTP_Invalid(t *testing.T) { - // HTTP 200 should NOT retry - err := insightsclient.HttpError{ - StatusCode: http.StatusOK, - Err: nil, +func Test_RetryWithExpBackOff(t *testing.T) { + type testCase struct { + name string + backoff wait.Backoff + strategy RetryStrategy + operation func() ([]byte, error) + expectedData []byte + expectedError string } - result := shouldRetry(err, RetryOnNon200HTTP) - if result { - t.Errorf("Expected shouldRetry to return false for HTTP 200 response, got true") - } -} - -func TestShouldRetry_RetryOnAll_Valid(t *testing.T) { - // All errors should trigger retry: - // 1. HTTP error - // 2. Non-HTTP error - // Test 1: HTTP error - httpErr := insightsclient.HttpError{ - StatusCode: http.StatusBadRequest, - Err: fmt.Errorf("bad request"), - } - result := shouldRetry(httpErr, RetryOnAll) - if !result { - t.Errorf("Expected shouldRetry to return true for HTTP 400 error, got false") + testCases := []testCase{ + { + name: "successful operation on first try", + backoff: wait.Backoff{ + Duration: 1, + Steps: 3, + Factor: 2, + }, + strategy: RetryOnAll, + operation: func() ([]byte, error) { + return []byte("success"), nil + }, + expectedData: []byte("success"), + expectedError: "", + }, + { + name: "successful after 2 retries", + backoff: wait.Backoff{ + Duration: 1, + Steps: 3, + Factor: 2, + }, + strategy: RetryOnAll, + operation: func() func() ([]byte, error) { + attempts := 0 + return func() ([]byte, error) { + attempts++ + if attempts < 3 { + return nil, fmt.Errorf("attempt %d failed", attempts) + } + return []byte("success after retries"), nil + } + }(), + expectedData: []byte("success after retries"), + expectedError: "", + }, + { + name: "exhausted retries returns last error", + backoff: wait.Backoff{ + Duration: 1, + Steps: 3, + Factor: 2, + }, + strategy: RetryOnAll, + operation: func() ([]byte, error) { + return nil, fmt.Errorf("persistent failure") + }, + expectedData: nil, + expectedError: "persistent failure", + }, + { + name: "single-step backoff returns original error", + backoff: wait.Backoff{ + Duration: 1, + Steps: 1, + Factor: 2, + }, + strategy: RetryOnAll, + operation: func() ([]byte, error) { + return nil, fmt.Errorf("immediate failure") + }, + expectedData: nil, + expectedError: "immediate failure", + }, + { + name: "RetryOn50xHTTP does not retry on HTTP 404", + backoff: wait.Backoff{ + Duration: 1, + Steps: 3, + Factor: 2, + }, + strategy: RetryOn50xHTTP, + operation: func() ([]byte, error) { + return nil, insightsclient.HttpError{ + StatusCode: http.StatusNotFound, + Err: fmt.Errorf("not found"), + } + }, + expectedData: nil, + expectedError: "not found", + }, + { + name: "RetryOn50xHTTP retries until exhausted on HTTP 500", + backoff: wait.Backoff{ + Duration: 1, + Steps: 3, + Factor: 2, + }, + strategy: RetryOn50xHTTP, + operation: func() ([]byte, error) { + return nil, insightsclient.HttpError{ + StatusCode: http.StatusInternalServerError, + Err: fmt.Errorf("server error"), + } + }, + expectedData: nil, + expectedError: "server error", + }, + { + name: "RetryOnNon200HTTP succeeds after HTTP 500 then 200", + backoff: wait.Backoff{ + Duration: 1, + Steps: 3, + Factor: 2, + }, + strategy: RetryOnNon200HTTP, + operation: func() func() ([]byte, error) { + attempts := 0 + return func() ([]byte, error) { + attempts++ + if attempts < 2 { + return nil, insightsclient.HttpError{ + StatusCode: http.StatusInternalServerError, + Err: fmt.Errorf("temporary server error"), + } + } + return []byte("recovered"), nil + } + }(), + expectedData: []byte("recovered"), + expectedError: "", + }, + { + name: "RetryOnNon200HTTP does not retry on HTTP 200", + backoff: wait.Backoff{ + Duration: 1, + Steps: 3, + Factor: 2, + }, + strategy: RetryOnNon200HTTP, + operation: func() ([]byte, error) { + return nil, insightsclient.HttpError{ + StatusCode: http.StatusOK, + Err: fmt.Errorf("error despite 200"), + } + }, + expectedData: nil, + expectedError: "error despite 200", + }, + { + name: "counts steps correctly - fails after exact retry count", + backoff: wait.Backoff{ + Duration: 1, + Steps: 2, + Factor: 2, + }, + strategy: RetryOnAll, + operation: func() func() ([]byte, error) { + attempts := 0 + return func() ([]byte, error) { + attempts++ + return nil, fmt.Errorf("fail attempt %d", attempts) + } + }(), + expectedData: nil, + expectedError: "fail attempt 2", + }, } - // Test 2: Non-HTTP error - nonHTTPErr := fmt.Errorf("some random error") - result = shouldRetry(nonHTTPErr, RetryOnAll) - if !result { - t.Errorf("Expected shouldRetry to return true for non-HTTP error, got false") - } -} - -func TestShouldRetry_RetryOnAll_Invalid(t *testing.T) { - // For RetryOnAll, there's no "invalid" case where it shouldn't retry - // But we can test that it returns true even for nil-ish cases or success responses - - // Even for HTTP 200 with error wrapper, it should retry - err := insightsclient.HttpError{ - StatusCode: http.StatusOK, - Err: fmt.Errorf("unexpected error despite 200"), - } - result := shouldRetry(err, RetryOnAll) - if !result { - t.Errorf("Expected shouldRetry to return true for any error with RetryOnAll, got false") - } -} + for _, tt := range testCases { + t.Run(tt.name, func(t *testing.T) { + data, err := RetryWithExpBackOff(tt.backoff, tt.strategy, tt.operation) -func TestShouldRetry_DefaultStrategy(t *testing.T) { - // Invalid/unknown strategy should NOT retry - invalidStrategy := RetryStrategy(999) - err := fmt.Errorf("some error") + // Check error + if tt.expectedError != "" { + if err == nil { + t.Errorf("expected error %q, got nil", tt.expectedError) + } else if !strings.Contains(err.Error(), tt.expectedError) { + t.Errorf("expected error containing %q, got %q", tt.expectedError, err.Error()) + } + } else { + if err != nil { + t.Errorf("expected no error, got %v", err) + } + } - result := shouldRetry(err, invalidStrategy) - if result { - t.Errorf("Expected shouldRetry to return false for unknown strategy, got true") + // Check data + if tt.expectedData != nil { + if data == nil { + t.Errorf("expected data %q, got nil", string(tt.expectedData)) + } else if !bytes.Equal(data, tt.expectedData) { + t.Errorf("expected data %q, got %q", string(tt.expectedData), string(data)) + } + } + }) } } From a0fd48fb1d6cab2c0d2df3a0dd5daa659daca572 Mon Sep 17 00:00:00 2001 From: Katarina Borgulova Date: Tue, 21 Apr 2026 17:31:36 +0200 Subject: [PATCH 6/7] refactor: Use Result struct with retry --- .../conditional/conditional_gatherer.go | 12 +- .../insightsuploader/insightsuploader.go | 14 +-- pkg/ocm/clustertransfer/cluster_transfer.go | 7 +- pkg/ocm/sca/sca.go | 7 +- pkg/retry/retry.go | 24 ++-- pkg/retry/retry_test.go | 106 +++++++++--------- 6 files changed, 87 insertions(+), 83 deletions(-) diff --git a/pkg/gatherers/conditional/conditional_gatherer.go b/pkg/gatherers/conditional/conditional_gatherer.go index bd8f96c891..7b09f3dacb 100644 --- a/pkg/gatherers/conditional/conditional_gatherer.go +++ b/pkg/gatherers/conditional/conditional_gatherer.go @@ -273,15 +273,15 @@ func (g *Gatherer) getRemoteConfiguration(ctx context.Context) ([]byte, error) { } endpointWithVersion := fmt.Sprintf(endpoint, ocpVersion) - remoteConfigData, err := retry.RetryWithExpBackOff(backOff, retry.RetryOnNon200HTTP, func() ([]byte, error) { + result, err := retry.RetryWithExpBackOff(backOff, retry.RetryOnNon200HTTP, func() (retry.Result, error) { resp, err := g.insightsCli.GetWithPathParam(ctx, endpoint, ocpVersion, false) if err != nil { - return nil, err + return retry.Result{}, err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { - return nil, insightsclient.HttpError{ + return retry.Result{}, insightsclient.HttpError{ Err: fmt.Errorf("received HTTP %s from %s", resp.Status, endpointWithVersion), StatusCode: resp.StatusCode, } @@ -289,16 +289,16 @@ func (g *Gatherer) getRemoteConfiguration(ctx context.Context) ([]byte, error) { data, err := io.ReadAll(resp.Body) if err != nil { - return nil, err + return retry.Result{}, err } - return data, nil + return retry.Result{Data: data}, nil }) if err != nil { return nil, err } - return remoteConfigData, nil + return result.Data, nil } func (g *Gatherer) getRemoteConfigEndpoint() (string, error) { diff --git a/pkg/insights/insightsuploader/insightsuploader.go b/pkg/insights/insightsuploader/insightsuploader.go index 9ecceed8de..2e254e61bb 100644 --- a/pkg/insights/insightsuploader/insightsuploader.go +++ b/pkg/insights/insightsuploader/insightsuploader.go @@ -32,12 +32,6 @@ type StatusReporter interface { SetLastReportedTime(time.Time) } -// uploadResult wraps the response from SendAndGetID for use with retry logic -type uploadResult struct { - requestID string - statusCode int -} - type Controller struct { controllerstatus.StatusController @@ -214,17 +208,17 @@ func (c *Controller) Upload(ctx context.Context, s *insightsclient.Source) (stri s.ID = start.Format(time.RFC3339) s.Type = "application/vnd.redhat.openshift.periodic" - result, err := retry.RetryWithExpBackOff(c.backoff, retry.RetryOnAll, func() (uploadResult, error) { + result, err := retry.RetryWithExpBackOff(c.backoff, retry.RetryOnAll, func() (retry.Result, error) { requestID, statusCode, err := c.client.SendAndGetID(ctx, c.configurator.Config().DataReporting.UploadEndpoint, *s) - return uploadResult{requestID: requestID, statusCode: statusCode}, err + return retry.Result{RequestID: requestID, StatusCode: statusCode}, err }) if err != nil { klog.Infof("Unable to upload report after %s: %v", time.Since(start).Truncate(time.Second/100), err) - return "", result.statusCode, err + return "", result.StatusCode, err } klog.Infof("Uploaded report successfully in %s", time.Since(start)) - return result.requestID, result.statusCode, nil + return result.RequestID, result.StatusCode, nil } func reportToLogs(source io.Reader) error { diff --git a/pkg/ocm/clustertransfer/cluster_transfer.go b/pkg/ocm/clustertransfer/cluster_transfer.go index 76d52833a0..cbb438ed77 100644 --- a/pkg/ocm/clustertransfer/cluster_transfer.go +++ b/pkg/ocm/clustertransfer/cluster_transfer.go @@ -222,14 +222,15 @@ func (c *Controller) requestClusterTransferWithExponentialBackoff(endpoint strin Cap: c.configurator.Config().ClusterTransfer.Interval, } - data, err := retry.RetryWithExpBackOff(bo, retry.RetryOn50xHTTP, func() ([]byte, error) { - return c.client.RecvClusterTransfer(endpoint) + result, err := retry.RetryWithExpBackOff(bo, retry.RetryOn50xHTTP, func() (retry.Result, error) { + data, err := c.client.RecvClusterTransfer(endpoint) + return retry.Result{Data: data}, err }) if err != nil { return nil, err } - return data, nil + return result.Data, nil } func (c *Controller) updateStatus(healthy bool, msg, reason string, httpErr *insightsclient.HttpError) { diff --git a/pkg/ocm/sca/sca.go b/pkg/ocm/sca/sca.go index 62f4699bd9..952b134859 100644 --- a/pkg/ocm/sca/sca.go +++ b/pkg/ocm/sca/sca.go @@ -300,8 +300,9 @@ func (c *Controller) requestSCAWithExpBackoff( Cap: c.configurator.Config().SCA.Interval, } - data, err := retry.RetryWithExpBackOff(bo, retry.RetryOn50xHTTP, func() ([]byte, error) { - return c.client.RecvSCACerts(ctx, endpoint, nodeArchitectures) + result, err := retry.RetryWithExpBackOff(bo, retry.RetryOn50xHTTP, func() (retry.Result, error) { + data, err := c.client.RecvSCACerts(ctx, endpoint, nodeArchitectures) + return retry.Result{Data: data}, err }) if err != nil { @@ -309,7 +310,7 @@ func (c *Controller) requestSCAWithExpBackoff( } var response Response - err = json.Unmarshal(data, &response) + err = json.Unmarshal(result.Data, &response) if err != nil { klog.Errorf("Unable to decode response: %v", err) return nil, err diff --git a/pkg/retry/retry.go b/pkg/retry/retry.go index 83ae5a7ae1..51fb821970 100644 --- a/pkg/retry/retry.go +++ b/pkg/retry/retry.go @@ -2,7 +2,7 @@ // // Usage example: // -// response, err := retry.RetryWithExpBackOff( +// result, err := retry.RetryWithExpBackOff( // wait.Backoff{ // Duration: interval/32, // Factor: 2, @@ -10,8 +10,9 @@ // Cap: interval, // }, // retry.RetryOn50xHTTP, -// func() (*Response, error) { -// return client.RecvSCACerts(ctx, endpoint, nodeArchs) +// func() (retry.Result, error) { +// data, err := client.RecvSCACerts(ctx, endpoint, nodeArchs) +// return retry.Result{Data: data}, err // }, // ) package retry @@ -40,6 +41,13 @@ const ( RetryOnAll ) +// Result holds the response data from retry operations +type Result struct { + Data []byte // Response data + StatusCode int // HTTP status code + RequestID string // Request ID +} + // shouldRetry determines if an error should be retried based on the strategy. // Returns true if retry should be attempted (when steps remain). func shouldRetry(err error, strategy RetryStrategy) bool { @@ -71,16 +79,16 @@ func shouldRetry(err error, strategy RetryStrategy) bool { } } -func RetryWithExpBackOff[T any](bo wait.Backoff, strategy RetryStrategy, operation func() (T, error)) (T, error) { +func RetryWithExpBackOff(bo wait.Backoff, strategy RetryStrategy, operation func() (Result, error)) (Result, error) { var lastErr error - var data T + var result Result attempt := 0 maxAttempts := bo.Steps err := wait.ExponentialBackoff(bo, func() (bool, error) { attempt++ - data, lastErr = operation() + result, lastErr = operation() if lastErr != nil { // Use strategy to determine if we should retry if shouldRetry(lastErr, strategy) { @@ -96,8 +104,8 @@ func RetryWithExpBackOff[T any](bo wait.Backoff, strategy RetryStrategy, operati // If we exhausted retries, return the last operation error instead of the timeout error if wait.Interrupted(err) && lastErr != nil { - return data, lastErr + return result, lastErr } - return data, err + return result, err } diff --git a/pkg/retry/retry_test.go b/pkg/retry/retry_test.go index b508ddf445..8dd5636b8e 100644 --- a/pkg/retry/retry_test.go +++ b/pkg/retry/retry_test.go @@ -137,12 +137,12 @@ func Test_ShouldRetry(t *testing.T) { func Test_RetryWithExpBackOff(t *testing.T) { type testCase struct { - name string - backoff wait.Backoff - strategy RetryStrategy - operation func() ([]byte, error) - expectedData []byte - expectedError string + name string + backoff wait.Backoff + strategy RetryStrategy + operation func() (Result, error) + expectedResult Result + expectedError string } testCases := []testCase{ @@ -154,11 +154,11 @@ func Test_RetryWithExpBackOff(t *testing.T) { Factor: 2, }, strategy: RetryOnAll, - operation: func() ([]byte, error) { - return []byte("success"), nil + operation: func() (Result, error) { + return Result{Data: []byte("success")}, nil }, - expectedData: []byte("success"), - expectedError: "", + expectedResult: Result{Data: []byte("success")}, + expectedError: "", }, { name: "successful after 2 retries", @@ -168,18 +168,18 @@ func Test_RetryWithExpBackOff(t *testing.T) { Factor: 2, }, strategy: RetryOnAll, - operation: func() func() ([]byte, error) { + operation: func() func() (Result, error) { attempts := 0 - return func() ([]byte, error) { + return func() (Result, error) { attempts++ if attempts < 3 { - return nil, fmt.Errorf("attempt %d failed", attempts) + return Result{}, fmt.Errorf("attempt %d failed", attempts) } - return []byte("success after retries"), nil + return Result{Data: []byte("success after retries")}, nil } }(), - expectedData: []byte("success after retries"), - expectedError: "", + expectedResult: Result{Data: []byte("success after retries")}, + expectedError: "", }, { name: "exhausted retries returns last error", @@ -189,11 +189,11 @@ func Test_RetryWithExpBackOff(t *testing.T) { Factor: 2, }, strategy: RetryOnAll, - operation: func() ([]byte, error) { - return nil, fmt.Errorf("persistent failure") + operation: func() (Result, error) { + return Result{}, fmt.Errorf("persistent failure") }, - expectedData: nil, - expectedError: "persistent failure", + expectedResult: Result{}, + expectedError: "persistent failure", }, { name: "single-step backoff returns original error", @@ -203,11 +203,11 @@ func Test_RetryWithExpBackOff(t *testing.T) { Factor: 2, }, strategy: RetryOnAll, - operation: func() ([]byte, error) { - return nil, fmt.Errorf("immediate failure") + operation: func() (Result, error) { + return Result{}, fmt.Errorf("immediate failure") }, - expectedData: nil, - expectedError: "immediate failure", + expectedResult: Result{}, + expectedError: "immediate failure", }, { name: "RetryOn50xHTTP does not retry on HTTP 404", @@ -217,14 +217,14 @@ func Test_RetryWithExpBackOff(t *testing.T) { Factor: 2, }, strategy: RetryOn50xHTTP, - operation: func() ([]byte, error) { - return nil, insightsclient.HttpError{ + operation: func() (Result, error) { + return Result{}, insightsclient.HttpError{ StatusCode: http.StatusNotFound, Err: fmt.Errorf("not found"), } }, - expectedData: nil, - expectedError: "not found", + expectedResult: Result{}, + expectedError: "not found", }, { name: "RetryOn50xHTTP retries until exhausted on HTTP 500", @@ -234,14 +234,14 @@ func Test_RetryWithExpBackOff(t *testing.T) { Factor: 2, }, strategy: RetryOn50xHTTP, - operation: func() ([]byte, error) { - return nil, insightsclient.HttpError{ + operation: func() (Result, error) { + return Result{}, insightsclient.HttpError{ StatusCode: http.StatusInternalServerError, Err: fmt.Errorf("server error"), } }, - expectedData: nil, - expectedError: "server error", + expectedResult: Result{}, + expectedError: "server error", }, { name: "RetryOnNon200HTTP succeeds after HTTP 500 then 200", @@ -251,21 +251,21 @@ func Test_RetryWithExpBackOff(t *testing.T) { Factor: 2, }, strategy: RetryOnNon200HTTP, - operation: func() func() ([]byte, error) { + operation: func() func() (Result, error) { attempts := 0 - return func() ([]byte, error) { + return func() (Result, error) { attempts++ if attempts < 2 { - return nil, insightsclient.HttpError{ + return Result{}, insightsclient.HttpError{ StatusCode: http.StatusInternalServerError, Err: fmt.Errorf("temporary server error"), } } - return []byte("recovered"), nil + return Result{Data: []byte("recovered")}, nil } }(), - expectedData: []byte("recovered"), - expectedError: "", + expectedResult: Result{Data: []byte("recovered")}, + expectedError: "", }, { name: "RetryOnNon200HTTP does not retry on HTTP 200", @@ -275,14 +275,14 @@ func Test_RetryWithExpBackOff(t *testing.T) { Factor: 2, }, strategy: RetryOnNon200HTTP, - operation: func() ([]byte, error) { - return nil, insightsclient.HttpError{ + operation: func() (Result, error) { + return Result{}, insightsclient.HttpError{ StatusCode: http.StatusOK, Err: fmt.Errorf("error despite 200"), } }, - expectedData: nil, - expectedError: "error despite 200", + expectedResult: Result{}, + expectedError: "error despite 200", }, { name: "counts steps correctly - fails after exact retry count", @@ -292,21 +292,21 @@ func Test_RetryWithExpBackOff(t *testing.T) { Factor: 2, }, strategy: RetryOnAll, - operation: func() func() ([]byte, error) { + operation: func() func() (Result, error) { attempts := 0 - return func() ([]byte, error) { + return func() (Result, error) { attempts++ - return nil, fmt.Errorf("fail attempt %d", attempts) + return Result{}, fmt.Errorf("fail attempt %d", attempts) } }(), - expectedData: nil, - expectedError: "fail attempt 2", + expectedResult: Result{}, + expectedError: "fail attempt 2", }, } for _, tt := range testCases { t.Run(tt.name, func(t *testing.T) { - data, err := RetryWithExpBackOff(tt.backoff, tt.strategy, tt.operation) + result, err := RetryWithExpBackOff(tt.backoff, tt.strategy, tt.operation) // Check error if tt.expectedError != "" { @@ -322,11 +322,11 @@ func Test_RetryWithExpBackOff(t *testing.T) { } // Check data - if tt.expectedData != nil { - if data == nil { - t.Errorf("expected data %q, got nil", string(tt.expectedData)) - } else if !bytes.Equal(data, tt.expectedData) { - t.Errorf("expected data %q, got %q", string(tt.expectedData), string(data)) + if tt.expectedResult.Data != nil { + if result.Data == nil { + t.Errorf("expected data %q, got nil", string(tt.expectedResult.Data)) + } else if !bytes.Equal(result.Data, tt.expectedResult.Data) { + t.Errorf("expected data %q, got %q", string(tt.expectedResult.Data), string(result.Data)) } } }) From 5eebc1bc9488c9dbf4e7f0a20bbb222bb651ae9d Mon Sep 17 00:00:00 2001 From: Katarina Borgulova Date: Mon, 27 Apr 2026 12:06:08 +0200 Subject: [PATCH 7/7] fix: HTTP pointer error & context --- .../conditional/conditional_gatherer.go | 2 +- .../insightsuploader/insightsuploader.go | 2 +- pkg/ocm/clustertransfer/cluster_transfer.go | 6 +- pkg/ocm/sca/sca.go | 2 +- pkg/retry/retry.go | 53 +++++++++++++---- pkg/retry/retry_test.go | 59 ++++++++++++++++++- 6 files changed, 103 insertions(+), 21 deletions(-) diff --git a/pkg/gatherers/conditional/conditional_gatherer.go b/pkg/gatherers/conditional/conditional_gatherer.go index 7b09f3dacb..b6e786cd5e 100644 --- a/pkg/gatherers/conditional/conditional_gatherer.go +++ b/pkg/gatherers/conditional/conditional_gatherer.go @@ -273,7 +273,7 @@ func (g *Gatherer) getRemoteConfiguration(ctx context.Context) ([]byte, error) { } endpointWithVersion := fmt.Sprintf(endpoint, ocpVersion) - result, err := retry.RetryWithExpBackOff(backOff, retry.RetryOnNon200HTTP, func() (retry.Result, error) { + result, err := retry.RetryWithExpBackOff(ctx, backOff, retry.RetryOnNon200HTTP, func() (retry.Result, error) { resp, err := g.insightsCli.GetWithPathParam(ctx, endpoint, ocpVersion, false) if err != nil { return retry.Result{}, err diff --git a/pkg/insights/insightsuploader/insightsuploader.go b/pkg/insights/insightsuploader/insightsuploader.go index 2e254e61bb..225dda9ac2 100644 --- a/pkg/insights/insightsuploader/insightsuploader.go +++ b/pkg/insights/insightsuploader/insightsuploader.go @@ -208,7 +208,7 @@ func (c *Controller) Upload(ctx context.Context, s *insightsclient.Source) (stri s.ID = start.Format(time.RFC3339) s.Type = "application/vnd.redhat.openshift.periodic" - result, err := retry.RetryWithExpBackOff(c.backoff, retry.RetryOnAll, func() (retry.Result, error) { + result, err := retry.RetryWithExpBackOff(ctx, c.backoff, retry.RetryOnAll, func() (retry.Result, error) { requestID, statusCode, err := c.client.SendAndGetID(ctx, c.configurator.Config().DataReporting.UploadEndpoint, *s) return retry.Result{RequestID: requestID, StatusCode: statusCode}, err }) diff --git a/pkg/ocm/clustertransfer/cluster_transfer.go b/pkg/ocm/clustertransfer/cluster_transfer.go index cbb438ed77..57c50e4310 100644 --- a/pkg/ocm/clustertransfer/cluster_transfer.go +++ b/pkg/ocm/clustertransfer/cluster_transfer.go @@ -82,7 +82,7 @@ func (c *Controller) Run(ctx context.Context) { // in the response then check if a secret update is required, and if so, perform the update. func (c *Controller) requestDataAndUpdateSecret(ctx context.Context, endpoint string) { klog.Infof("checking the availability of cluster transfer. Next check is in %s", c.configurator.Config().ClusterTransfer.Interval) - data, err := c.requestClusterTransferWithExponentialBackoff(endpoint) + data, err := c.requestClusterTransferWithExponentialBackoff(ctx, endpoint) if err != nil { msg := fmt.Sprintf("failed to pull cluster transfer: %v", err) httpErr, ok := err.(insightsclient.HttpError) @@ -213,7 +213,7 @@ func (c *Controller) updatePullSecret(ctx context.Context, newData []byte) error // from the OCM API with exponential backoff. // It returns HttpError (see insightsclient.go) in case of any HTTP error response from the OCM API. // The exponential backoff is applied only for HTTP errors >= 500. -func (c *Controller) requestClusterTransferWithExponentialBackoff(endpoint string) ([]byte, error) { +func (c *Controller) requestClusterTransferWithExponentialBackoff(ctx context.Context, endpoint string) ([]byte, error) { bo := wait.Backoff{ Duration: c.configurator.Config().ClusterTransfer.Interval / 24, // 30 min as the first waiting Factor: 2, @@ -222,7 +222,7 @@ func (c *Controller) requestClusterTransferWithExponentialBackoff(endpoint strin Cap: c.configurator.Config().ClusterTransfer.Interval, } - result, err := retry.RetryWithExpBackOff(bo, retry.RetryOn50xHTTP, func() (retry.Result, error) { + result, err := retry.RetryWithExpBackOff(ctx, bo, retry.RetryOn50xHTTP, func() (retry.Result, error) { data, err := c.client.RecvClusterTransfer(endpoint) return retry.Result{Data: data}, err }) diff --git a/pkg/ocm/sca/sca.go b/pkg/ocm/sca/sca.go index 952b134859..cbc2da4ea7 100644 --- a/pkg/ocm/sca/sca.go +++ b/pkg/ocm/sca/sca.go @@ -300,7 +300,7 @@ func (c *Controller) requestSCAWithExpBackoff( Cap: c.configurator.Config().SCA.Interval, } - result, err := retry.RetryWithExpBackOff(bo, retry.RetryOn50xHTTP, func() (retry.Result, error) { + result, err := retry.RetryWithExpBackOff(ctx, bo, retry.RetryOn50xHTTP, func() (retry.Result, error) { data, err := c.client.RecvSCACerts(ctx, endpoint, nodeArchitectures) return retry.Result{Data: data}, err }) diff --git a/pkg/retry/retry.go b/pkg/retry/retry.go index 51fb821970..4548179d09 100644 --- a/pkg/retry/retry.go +++ b/pkg/retry/retry.go @@ -3,6 +3,7 @@ // Usage example: // // result, err := retry.RetryWithExpBackOff( +// ctx, // wait.Backoff{ // Duration: interval/32, // Factor: 2, @@ -18,6 +19,8 @@ package retry import ( + "context" + "errors" "net/http" "github.com/openshift/insights-operator/pkg/insights/insightsclient" @@ -43,30 +46,54 @@ const ( // Result holds the response data from retry operations type Result struct { - Data []byte // Response data - StatusCode int // HTTP status code - RequestID string // Request ID + Data []byte + StatusCode int + RequestID string } // shouldRetry determines if an error should be retried based on the strategy. // Returns true if retry should be attempted (when steps remain). -func shouldRetry(err error, strategy RetryStrategy) bool { +// Returns false immediately if the context is canceled or deadline exceeded. +func shouldRetry(ctx context.Context, err error, strategy RetryStrategy) bool { + // Don't retry if context is canceled or deadline exceeded + if ctx.Err() != nil { + return false + } + + // Don't retry context cancellation or deadline errors + if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { + return false + } + + // Extract status code from HttpError (handles both pointer and non-pointer) + var statusCode int + var isHTTPError bool + + switch e := err.(type) { + case *insightsclient.HttpError: + // Pointer - real-world case from newHTTPErrorFromResponse + statusCode = e.StatusCode + isHTTPError = true + case insightsclient.HttpError: + // Non-pointer - test case + statusCode = e.StatusCode + isHTTPError = true + } + switch strategy { case RetryOn50xHTTP: // Only retry HTTP 500+ errors, skip non-HTTP errors (disconnected env) - if !insightsclient.IsHttpError(err) { + if !isHTTPError { return false } - httpErr := err.(insightsclient.HttpError) - return httpErr.StatusCode >= http.StatusInternalServerError + return statusCode >= http.StatusInternalServerError case RetryOnNon200HTTP: // Retry on any non-200 HTTP status, or non-HTTP errors - if !insightsclient.IsHttpError(err) { + if !isHTTPError { return true // retry non-HTTP errors } - httpErr := err.(insightsclient.HttpError) - return httpErr.StatusCode != http.StatusOK + return statusCode != http.StatusOK case RetryOnAll: // Retry on all errors @@ -79,19 +106,19 @@ func shouldRetry(err error, strategy RetryStrategy) bool { } } -func RetryWithExpBackOff(bo wait.Backoff, strategy RetryStrategy, operation func() (Result, error)) (Result, error) { +func RetryWithExpBackOff(ctx context.Context, bo wait.Backoff, strategy RetryStrategy, operation func() (Result, error)) (Result, error) { var lastErr error var result Result attempt := 0 maxAttempts := bo.Steps - err := wait.ExponentialBackoff(bo, func() (bool, error) { + err := wait.ExponentialBackoffWithContext(ctx, bo, func(context.Context) (bool, error) { attempt++ result, lastErr = operation() if lastErr != nil { // Use strategy to determine if we should retry - if shouldRetry(lastErr, strategy) { + if shouldRetry(ctx, lastErr, strategy) { klog.Errorf("%v. Retrying (attempt %d/%d)", lastErr, attempt, maxAttempts) return false, nil } diff --git a/pkg/retry/retry_test.go b/pkg/retry/retry_test.go index 8dd5636b8e..af11dc4085 100644 --- a/pkg/retry/retry_test.go +++ b/pkg/retry/retry_test.go @@ -2,6 +2,7 @@ package retry import ( "bytes" + "context" "fmt" "net/http" "strings" @@ -123,11 +124,64 @@ func Test_ShouldRetry(t *testing.T) { strategy: RetryStrategy(999), shouldRetry: false, }, + + // Context cancellation tests + { + name: "should NOT retry on context.Canceled error", + err: context.Canceled, + strategy: RetryOnAll, + shouldRetry: false, + }, + { + name: "should NOT retry on context.DeadlineExceeded error", + err: context.DeadlineExceeded, + strategy: RetryOnAll, + shouldRetry: false, + }, + + // Pointer HttpError tests (real-world scenario) + { + name: "RetryOn50xHTTP should retry on *HttpError (pointer) with HTTP 500", + err: &insightsclient.HttpError{ + StatusCode: http.StatusInternalServerError, + Err: fmt.Errorf("internal server error"), + }, + strategy: RetryOn50xHTTP, + shouldRetry: true, + }, + { + name: "RetryOn50xHTTP should NOT retry on *HttpError (pointer) with HTTP 404", + err: &insightsclient.HttpError{ + StatusCode: http.StatusNotFound, + Err: fmt.Errorf("not found"), + }, + strategy: RetryOn50xHTTP, + shouldRetry: false, + }, + { + name: "RetryOnNon200HTTP should retry on *HttpError (pointer) with HTTP 500", + err: &insightsclient.HttpError{ + StatusCode: http.StatusInternalServerError, + Err: fmt.Errorf("internal server error"), + }, + strategy: RetryOnNon200HTTP, + shouldRetry: true, + }, + { + name: "RetryOnNon200HTTP should NOT retry on *HttpError (pointer) with HTTP 200", + err: &insightsclient.HttpError{ + StatusCode: http.StatusOK, + Err: nil, + }, + strategy: RetryOnNon200HTTP, + shouldRetry: false, + }, } for _, tt := range testCases { t.Run(tt.name, func(t *testing.T) { - result := shouldRetry(tt.err, tt.strategy) + ctx := context.Background() + result := shouldRetry(ctx, tt.err, tt.strategy) if result != tt.shouldRetry { t.Errorf("shouldRetry() = %v, want %v", result, tt.shouldRetry) } @@ -306,7 +360,8 @@ func Test_RetryWithExpBackOff(t *testing.T) { for _, tt := range testCases { t.Run(tt.name, func(t *testing.T) { - result, err := RetryWithExpBackOff(tt.backoff, tt.strategy, tt.operation) + ctx := context.Background() + result, err := RetryWithExpBackOff(ctx, tt.backoff, tt.strategy, tt.operation) // Check error if tt.expectedError != "" {