@@ -2,6 +2,7 @@ package operator
22
33import (
44 "context"
5+ "errors"
56 "fmt"
67 "os"
78 "time"
@@ -12,9 +13,10 @@ import (
1213 configinformersv1 "github.com/openshift/client-go/config/informers/externalversions/config/v1"
1314 configlistersv1 "github.com/openshift/client-go/config/listers/config/v1"
1415 machineclientset "github.com/openshift/client-go/machine/clientset/versioned"
16+ "github.com/openshift/library-go/pkg/operator/configobserver/featuregates"
17+ "github.com/openshift/library-go/pkg/operator/events"
1518 "github.com/openshift/library-go/pkg/operator/resource/resourceapply"
1619 admissionregistrationv1 "k8s.io/api/admissionregistration/v1"
17- "k8s.io/apimachinery/pkg/api/errors"
1820 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1921 utilruntime "k8s.io/apimachinery/pkg/util/runtime"
2022 "k8s.io/apimachinery/pkg/util/wait"
@@ -39,6 +41,9 @@ const (
3941 // 5ms, 10ms, 20ms, 40ms, 80ms, 160ms, 320ms, 640ms, 1.3s, 2.6s, 5.1s, 10.2s, 20.4s, 41s, 82s
4042 maxRetries = 15
4143 maoOwnedAnnotation = "machine.openshift.io/owned"
44+
45+ releaseVersionEnvVariableName = "RELEASE_VERSION"
46+ releaseVersionUnknownValue = "unknown"
4247)
4348
4449// Operator defines machine api operator.
@@ -53,6 +58,7 @@ type Operator struct {
5358 machineClient machineclientset.Interface
5459 dynamicClient dynamic.Interface
5560 eventRecorder record.EventRecorder
61+ recorder events.Recorder
5662
5763 syncHandler func (ic string ) (reconcile.Result , error )
5864
@@ -71,8 +77,7 @@ type Operator struct {
7177 mutatingWebhookLister admissionlisterv1.MutatingWebhookConfigurationLister
7278 mutatingWebhookListerSynced cache.InformerSynced
7379
74- featureGateLister configlistersv1.FeatureGateLister
75- featureGateCacheSynced cache.InformerSynced
80+ featureGateAccessor featuregates.FeatureGateAccess
7681
7782 // queue only ever has one item, but it has nice error handling backoff/retry semantics
7883 queue workqueue.RateLimitingInterface
@@ -91,6 +96,7 @@ func New(
9196 deployInformer appsinformersv1.DeploymentInformer ,
9297 daemonsetInformer appsinformersv1.DaemonSetInformer ,
9398 featureGateInformer configinformersv1.FeatureGateInformer ,
99+ clusterVersionInformer configinformersv1.ClusterVersionInformer ,
94100 validatingWebhookInformer admissioninformersv1.ValidatingWebhookConfigurationInformer ,
95101 mutatingWebhookInformer admissioninformersv1.MutatingWebhookConfigurationInformer ,
96102 proxyInformer configinformersv1.ProxyInformer ,
@@ -99,13 +105,18 @@ func New(
99105 machineClient machineclientset.Interface ,
100106 dynamicClient dynamic.Interface ,
101107
102- recorder record.EventRecorder ,
108+ eventRecorder record.EventRecorder ,
109+ recorder events.Recorder ,
103110) (* Operator , error ) {
104111 // we must report the version from the release payload when we report available at that level
105112 // TODO we will report the version of the operands (so our machine api implementation version)
106113 operandVersions := []osconfigv1.OperandVersion {}
107- if releaseVersion := os .Getenv ("RELEASE_VERSION" ); len (releaseVersion ) > 0 {
114+ releaseVersion := os .Getenv (releaseVersionEnvVariableName )
115+ if len (releaseVersion ) > 0 {
108116 operandVersions = append (operandVersions , osconfigv1.OperandVersion {Name : "operator" , Version : releaseVersion })
117+ } else {
118+ klog .Infof ("%s environment variable is missing, defaulting to %q" , releaseVersionEnvVariableName , releaseVersionUnknownValue )
119+ releaseVersion = releaseVersionUnknownValue
109120 }
110121
111122 optr := & Operator {
@@ -116,7 +127,8 @@ func New(
116127 osClient : osClient ,
117128 machineClient : machineClient ,
118129 dynamicClient : dynamicClient ,
119- eventRecorder : recorder ,
130+ eventRecorder : eventRecorder ,
131+ recorder : recorder ,
120132 queue : workqueue .NewNamedRateLimitingQueue (workqueue .DefaultControllerRateLimiter (), "machineapioperator" ),
121133 operandVersions : operandVersions ,
122134 }
@@ -137,10 +149,36 @@ func New(
137149 if err != nil {
138150 return nil , fmt .Errorf ("error adding event handler to mutatingwebhook informer: %v" , err )
139151 }
140- _ , err = featureGateInformer .Informer ().AddEventHandler (optr .eventHandler ())
141- if err != nil {
142- return nil , fmt .Errorf ("error adding event handler to featuregates informer: %v" , err )
143- }
152+
153+ desiredVersion := releaseVersion
154+ missingVersion := "0.0.1-snapshot"
155+ featureGateAccessor := featuregates .NewFeatureGateAccess (
156+ desiredVersion , missingVersion ,
157+ clusterVersionInformer , featureGateInformer ,
158+ recorder ,
159+ )
160+ featureGateAccessor .SetChangeHandler (func (featureChange featuregates.FeatureChange ) {
161+ if featureChange .Previous == nil {
162+ // When the initial featuregate is set, the previous version is nil.
163+ // Nothing to do in this case, it's handled by the 1st sync, which only runs after the initial feature set was received.
164+ return
165+ }
166+
167+ klog .V (4 ).InfoS ("FeatureGates changed" , "enabled" , featureChange .New .Enabled , "disabled" , featureChange .New .Disabled )
168+ prevDisableMHC := featuregates .NewFeatureGate (featureChange .Previous .Enabled , featureChange .Previous .Disabled ).
169+ Enabled (osconfigv1 .FeatureGateMachineAPIOperatorDisableMachineHealthCheckController )
170+ newDisableMHC := featuregates .NewFeatureGate (featureChange .New .Enabled , featureChange .New .Disabled ).
171+ Enabled (osconfigv1 .FeatureGateMachineAPIOperatorDisableMachineHealthCheckController )
172+
173+ if prevDisableMHC != newDisableMHC {
174+ klog .V (2 ).InfoS ("Resync for modified feature gate" ,
175+ "FeatureGateMachineAPIOperatorDisableMachineHealthCheckController enabled" , newDisableMHC ,
176+ )
177+ workQueueKey := fmt .Sprintf ("%s/%s" , optr .namespace , optr .name )
178+ optr .queue .Add (workQueueKey )
179+ }
180+ })
181+ optr .featureGateAccessor = featureGateAccessor
144182
145183 optr .config = config
146184 optr .syncHandler = optr .sync
@@ -160,9 +198,6 @@ func New(
160198 optr .mutatingWebhookLister = mutatingWebhookInformer .Lister ()
161199 optr .mutatingWebhookListerSynced = mutatingWebhookInformer .Informer ().HasSynced
162200
163- optr .featureGateLister = featureGateInformer .Lister ()
164- optr .featureGateCacheSynced = featureGateInformer .Informer ().HasSynced
165-
166201 return optr , nil
167202}
168203
@@ -179,12 +214,24 @@ func (optr *Operator) Run(workers int, stopCh <-chan struct{}) {
179214 optr .validatingWebhookListerSynced ,
180215 optr .deployListerSynced ,
181216 optr .daemonsetListerSynced ,
182- optr .proxyListerSynced ,
183- optr .featureGateCacheSynced ) {
217+ optr .proxyListerSynced ) {
184218 klog .Error ("Failed to sync caches" )
185219 return
186220 }
187221 klog .Info ("Synced up caches" )
222+
223+ ctx , cancelFeatureGateAccessor := context .WithCancel (context .Background ())
224+ defer cancelFeatureGateAccessor ()
225+ go optr .featureGateAccessor .Run (ctx )
226+ klog .Info ("Started feature gate accessor" )
227+ select {
228+ case <- optr .featureGateAccessor .InitialFeatureGatesObserved ():
229+ klog .V (4 ).Info ("FeatureGates initialized" )
230+ case <- time .After (1 * time .Minute ):
231+ klog .Error (errors .New ("timed out waiting for FeatureGate detection" ), "unable to start operator" )
232+ return
233+ }
234+
188235 for i := 0 ; i < workers ; i ++ {
189236 go wait .Until (optr .worker , time .Second , stopCh )
190237 }
@@ -353,19 +400,6 @@ func (optr *Operator) sync(key string) (reconcile.Result, error) {
353400 return optr .syncAll (operatorConfig )
354401}
355402
356- func getFeatureGate (lister configlistersv1.FeatureGateLister ) (* osconfigv1.FeatureGate , error ) {
357- featureGate , err := lister .Get ("cluster" )
358- if errors .IsNotFound (err ) {
359- // No feature gate is set, therefore cannot be external.
360- // This is not an error as the feature gate is an optional resource.
361- return nil , nil
362- } else if err != nil {
363- return nil , fmt .Errorf ("could not fetch featuregate: %v" , err )
364- }
365-
366- return featureGate , nil
367- }
368-
369403func (optr * Operator ) maoConfigFromInfrastructure () (* OperatorConfig , error ) {
370404 infra , err := optr .osClient .ConfigV1 ().Infrastructures ().Get (context .Background (), "cluster" , metav1.GetOptions {})
371405 if err != nil {
@@ -382,12 +416,7 @@ func (optr *Operator) maoConfigFromInfrastructure() (*OperatorConfig, error) {
382416 return nil , err
383417 }
384418
385- featureGate , err := getFeatureGate (optr .featureGateLister )
386- if err != nil {
387- return nil , err
388- }
389-
390- providerControllerImage , err := getProviderControllerFromImages (provider , featureGate , * images )
419+ providerControllerImage , err := getProviderControllerFromImages (provider , * images )
391420 if err != nil {
392421 return nil , err
393422 }
@@ -412,14 +441,25 @@ func (optr *Operator) maoConfigFromInfrastructure() (*OperatorConfig, error) {
412441 return nil , err
413442 }
414443
444+ // in case the MHC controller is disabled, leave its image empty
445+ mhcImage := machineAPIOperatorImage
446+ featureGates , err := optr .featureGateAccessor .CurrentFeatureGates ()
447+ if err != nil {
448+ return nil , err
449+ }
450+ if featureGates .Enabled (osconfigv1 .FeatureGateMachineAPIOperatorDisableMachineHealthCheckController ) {
451+ klog .V (2 ).Info ("Disabling MHC controller" )
452+ mhcImage = ""
453+ }
454+
415455 return & OperatorConfig {
416456 TargetNamespace : optr .namespace ,
417457 Proxy : clusterWideProxy ,
418458 Controllers : Controllers {
419459 Provider : providerControllerImage ,
420460 MachineSet : machineAPIOperatorImage ,
421461 NodeLink : machineAPIOperatorImage ,
422- MachineHealthCheck : machineAPIOperatorImage ,
462+ MachineHealthCheck : mhcImage ,
423463 KubeRBACProxy : kubeRBACProxy ,
424464 TerminationHandler : terminationHandlerImage ,
425465 },
0 commit comments