Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ require (
github.com/stretchr/testify v1.11.1
go.etcd.io/etcd/client/pkg/v3 v3.6.9
go.uber.org/zap v1.27.1
golang.org/x/time v0.9.0
k8s.io/api v0.34.6
k8s.io/apimachinery v0.34.6
k8s.io/client-go v0.34.6
Expand Down Expand Up @@ -87,7 +88,6 @@ require (
golang.org/x/sys v0.41.0 // indirect
golang.org/x/term v0.39.0 // indirect
golang.org/x/text v0.33.0 // indirect
golang.org/x/time v0.9.0 // indirect
gomodules.xyz/jsonpatch/v2 v2.5.0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20260128011058-8636f8732409 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20260128011058-8636f8732409 // indirect
Expand Down
28 changes: 27 additions & 1 deletion internal/controller/metal3.io/baremetalhost_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,13 @@ import (
"github.com/metal3-io/baremetal-operator/pkg/provisioner"
"github.com/metal3-io/baremetal-operator/pkg/secretutils"
"github.com/prometheus/client_golang/prometheus"
"golang.org/x/time/rate"
corev1 "k8s.io/api/core/v1"
apiequality "k8s.io/apimachinery/pkg/api/equality"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/util/workqueue"
"sigs.k8s.io/cluster-api/util/conditions"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
Expand All @@ -47,6 +49,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/predicate"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
)

const (
Expand All @@ -58,6 +61,15 @@ const (
clarifySoftPoweroffFailure = "Continuing with hard poweroff after soft poweroff fails. More details: "
hardwareDataFinalizer = metal3api.BareMetalHostFinalizer + "/hardwareData"
NotReady = "Not ready"

// rateLimiterBaseDelay is the initial delay for the exponential backoff rate limiter.
rateLimiterBaseDelay = 5 * time.Millisecond
// rateLimiterMaxDelay caps the exponential backoff to avoid long reconciliation lockouts.
rateLimiterMaxDelay = 30 * time.Second
// rateLimiterBursts is the token bucket burst size for the rate limiter.
rateLimiterBursts = 100
// rateLimiterRequestsPerSecond is the steady-state rate (requests per second) for the token bucket rate limiter.
rateLimiterRequestsPerSecond = 10
)

// BareMetalHostReconciler reconciles a BareMetalHost object.
Expand Down Expand Up @@ -2486,13 +2498,27 @@ func (r *BareMetalHostReconciler) updateEventHandler(e event.UpdateEvent) bool {

// SetupWithManager registers the reconciler to be run by the manager.
func (r *BareMetalHostReconciler) SetupWithManager(mgr ctrl.Manager, preprovImgEnable bool, maxConcurrentReconcile int) error {
// Cap the exponential backoff at 30 seconds instead of the default 1000 seconds.
// Without this cap, transient "no endpoints available" errors on the BMO validating
// webhook (which occur during the brief window between BMO becoming ready and its
// Service endpoint being propagated) can push the rate limiter to its maximum delay.
// At the 1000s cap a single such burst locks out BareMetalHost reconciliation for
// up to ~16 minutes after the webhook becomes reachable.
rateLimiter := workqueue.NewTypedMaxOfRateLimiter(
workqueue.NewTypedItemExponentialFailureRateLimiter[reconcile.Request](rateLimiterBaseDelay, rateLimiterMaxDelay),
&workqueue.TypedBucketRateLimiter[reconcile.Request]{Limiter: rate.NewLimiter(rate.Limit(rateLimiterRequestsPerSecond), rateLimiterBursts)},
)

controller := ctrl.NewControllerManagedBy(mgr).
For(&metal3api.BareMetalHost{}).
WithEventFilter(
predicate.Funcs{
UpdateFunc: r.updateEventHandler,
}).
WithOptions(controller.Options{MaxConcurrentReconciles: maxConcurrentReconcile}).
WithOptions(controller.Options{
MaxConcurrentReconciles: maxConcurrentReconcile,
RateLimiter: rateLimiter,
}).
Owns(&corev1.Secret{}, builder.MatchEveryOwner)

if preprovImgEnable {
Expand Down
14 changes: 14 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,19 @@ func setupChecks(mgr ctrl.Manager) {
}
}

// setupWebhookReadinessCheck adds a readiness check that blocks the pod from
// entering "Ready" state (and thus from being added to the Service's endpoints)
// until the webhook server is actually listening. This prevents a race where
// the Kubernetes API server tries to route admission webhook calls to this pod
// before the webhook TLS server is up, which would cause "no endpoints
// available" errors for BareMetalHost operations.
func setupWebhookReadinessCheck(mgr ctrl.Manager) {
if err := mgr.AddReadyzCheck("webhook", mgr.GetWebhookServer().StartedChecker()); err != nil {
setupLog.Error(err, "unable to create ready check for webhook server")
os.Exit(1)
}
}

func setupWebhooks(mgr ctrl.Manager) {
if err := (&webhooks.BareMetalHost{}).SetupWebhookWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create webhook", "webhook", "BareMetalHost")
Expand Down Expand Up @@ -405,6 +418,7 @@ func main() {
setupChecks(mgr)

if enableWebhook {
setupWebhookReadinessCheck(mgr)
setupWebhooks(mgr)
}

Expand Down