@@ -75,7 +75,7 @@ func (ctrl *Controller) syncStatusOnly(pool *mcfgv1.MachineConfigPool) error {
7575}
7676
7777//nolint:gocyclo,gosec
78- func (ctrl * Controller ) calculateStatus (mcs []* mcfgv1.MachineConfigNode , cconfig * mcfgv1.ControllerConfig , pool * mcfgv1.MachineConfigPool , nodes []* corev1.Node , mosc * mcfgv1.MachineOSConfig , mosb * mcfgv1.MachineOSBuild ) mcfgv1.MachineConfigPoolStatus {
78+ func (ctrl * Controller ) calculateStatus (mcns []* mcfgv1.MachineConfigNode , cconfig * mcfgv1.ControllerConfig , pool * mcfgv1.MachineConfigPool , nodes []* corev1.Node , mosc * mcfgv1.MachineOSConfig , mosb * mcfgv1.MachineOSBuild ) mcfgv1.MachineConfigPoolStatus {
7979 certExpirys := []mcfgv1.CertExpiry {}
8080 if cconfig != nil {
8181 for _ , cert := range cconfig .Status .ControllerCertificates {
@@ -94,134 +94,28 @@ func (ctrl *Controller) calculateStatus(mcs []*mcfgv1.MachineConfigNode, cconfig
9494
9595 isLayeredPool := ctrl .isLayeredPool (mosc , mosb )
9696
97- var degradedMachines , readyMachines , updatedMachines , unavailableMachines , updatingMachines []* corev1.Node
9897 degradedReasons := []string {}
9998 pisIsEnabled := ctrl .fgHandler .Enabled (features .FeatureGatePinnedImages )
10099 pinnedImageSetsDegraded := false
101100
102- // if we represent updating properly here, we will also represent updating properly in the CO
103- // so this solves the cordoning RFE and the upgradeable RFE
104- // updating == updatePrepared, updateExecuted, updatedComplete, postAction, cordoning, draining
105- // updated == nodeResumed, updated
106- // ready == nodeResumed, updated
107- // unavailable == draining, cordoned
108- // degraded == if the condition.Reason == error
109- // this ensures that a MCP only enters Upgradeable==False if the node actually needs to upgrade to the new MC
110- for _ , state := range mcs {
111- var ourNode * corev1.Node
112- for _ , n := range nodes {
113- if state .Name == n .Name {
114- ourNode = n
115- break
116- }
117- }
118- if ourNode == nil {
119- klog .Errorf ("Could not find specified node %s" , state .Name )
120- }
121- if len (state .Status .Conditions ) == 0 {
122- // not ready yet
123- break
124- }
101+ // Set the PIS pool synchronizer to updated based on the MCN
102+ for _ , mcn := range mcns {
125103 if pisIsEnabled {
126- if isPinnedImageSetsUpdated (state ) {
104+ if isPinnedImageSetsUpdated (mcn ) {
127105 poolSynchronizer .SetUpdated (mcfgv1 .PinnedImageSets )
128106 }
129107 }
130- for _ , cond := range state .Status .Conditions {
131- // populate the degradedReasons from the MachineConfigNodeNodeDegraded condition
132- if mcfgv1 .StateProgress (cond .Type ) == mcfgv1 .MachineConfigNodeNodeDegraded && cond .Status == metav1 .ConditionTrue {
133- degradedMachines = append (degradedMachines , ourNode )
134- // Degraded nodes are also unavailable since they are not in a "Done" state
135- // and cannot be used for further updates (see IsUnavailableForUpdate)
136- unavailableMachines = append (unavailableMachines , ourNode )
137- degradedReasons = append (degradedReasons , fmt .Sprintf ("Node %s is reporting: %q" , ourNode .Name , cond .Message ))
138- break
139- }
140- /*
141- // TODO (ijanssen): This section of code should be implemented as part of OCPBUGS-57177 after OCPBUGS-32745 is addressed.
142- // In the current state of the code, the `MachineConfigNodePinnedImageSetsDegraded` condition is wrongly being set to True`
143- // even when no unintended functionality is occurring, such as many images taking more than 2 minutes to fetch. Thus, it is
144- // not wise to degrade an MCP on a PIS degrade while the PIS degrade is not acting as intended. OCPBUGS-57177 has
145- // been marked as blocked by OCPBUGS-32745 in Jira, but once the degrade condition is stablilized, this code block should
146- // cover the fix for OCPBUGS-57177.
147- // populate the degradedReasons from the MachineConfigNodePinnedImageSetsDegraded condition
148- if pisIsEnabled && mcfgv1.StateProgress(cond.Type) == mcfgv1.MachineConfigNodePinnedImageSetsDegraded && cond.Status == metav1.ConditionTrue {
149- degradedMachines = append(degradedMachines, ourNode)
150- degradedReasons = append(degradedReasons, fmt.Sprintf("Node %s references an invalid PinnedImageSet. See the node's MachineConfigNode resource for details.", ourNode.Name))
151- if mcfgv1.StateProgress(cond.Type) == mcfgv1.MachineConfigNodePinnedImageSetsDegraded {
152- pinnedImageSetsDegraded = true
153- }
154- break
155- }
156- */
157- /*
158- // TODO: (djoshy) Rework this block to use MCN conditions correctly. See: https://issues.redhat.com/browse/MCO-1228
159-
160- // Specifically, the main concerns for the following block are:
161- // (i) Why are only unknown conditions being evaluated? Shouldn't True/False conditions be used?
162- // (ii) Multiple conditions can be unknown at the same time, resulting in certain machines being double counted
163-
164- // Some background:
165- // The MCN conditions are used to feed MCP statuses if the machine counts add up correctly to the total node count.
166- // If this check fails, node conditions are used to determine machine counts. The MCN counts calculated in this block
167- // seem incorrect most of the time, so the controller almost always defaults to using the node condition based counts.
168- // On occasion, the MCN counts cause a false positive in aformentioned check, resulting in invalid values for the MCP
169- // statuses. Commenting out this block will force the controller to always use node condition based counts to feed the
170- // MCP status.
171-
172-
173- if cond.Status == metav1.ConditionUnknown {
174- // This switch case will cause a node to be double counted, maybe use a hash for node count
175- switch mcfgv1.StateProgress(cond.Type) {
176- case mcfgv1.MachineConfigNodeUpdatePrepared:
177- updatingMachines = append(updatedMachines, ourNode) //nolint:gocritic
178- case mcfgv1.MachineConfigNodeUpdateExecuted:
179- updatingMachines = append(updatingMachines, ourNode)
180- case mcfgv1.MachineConfigNodeUpdatePostActionComplete:
181- updatingMachines = append(updatingMachines, ourNode)
182- case mcfgv1.MachineConfigNodeUpdateComplete:
183- updatingMachines = append(updatingMachines, ourNode)
184- case mcfgv1.MachineConfigNodeResumed:
185- updatingMachines = append(updatedMachines, ourNode) //nolint:gocritic
186- readyMachines = append(readyMachines, ourNode)
187- // Note (ijanssen): `MachineConfigNodeUpdateCompatible` was removed with MCO-1543. This case will need to be replaced/removed when working on MCO-1228.
188- case mcfgv1.MachineConfigNodeUpdateCompatible:
189- updatingMachines = append(updatedMachines, ourNode) //nolint:gocritic
190- case mcfgv1.MachineConfigNodeUpdateDrained:
191- unavailableMachines = append(unavailableMachines, ourNode)
192- updatingMachines = append(updatingMachines, ourNode)
193- case mcfgv1.MachineConfigNodeUpdateCordoned:
194- unavailableMachines = append(unavailableMachines, ourNode)
195- updatingMachines = append(updatingMachines, ourNode)
196- case mcfgv1.MachineConfigNodeUpdated:
197- updatedMachines = append(updatedMachines, ourNode)
198- readyMachines = append(readyMachines, ourNode)
199- }
200- }
201- */
202- }
203108 }
204- degradedMachineCount := int32 (len (degradedMachines ))
109+
110+ // Get the updated, degraded, ready, and unavailable machine counts from the nodes' properties
111+ updatedMachines := getUpdatedMachines (pool , nodes , mosc , mosb , isLayeredPool )
205112 updatedMachineCount := int32 (len (updatedMachines ))
206- unavailableMachineCount := int32 (len (unavailableMachines ))
207- updatingMachineCount := int32 (len (updatingMachines ))
113+ readyMachines := getReadyMachines (pool , nodes , mosc , mosb , isLayeredPool )
208114 readyMachineCount := int32 (len (readyMachines ))
209-
210- // this is # 1 priority, get the upgrade states actually reporting
211- if degradedMachineCount + readyMachineCount + unavailableMachineCount + updatingMachineCount != int32 (len (nodes )) {
212-
213- updatedMachines = getUpdatedMachines (pool , nodes , mosc , mosb , isLayeredPool )
214- updatedMachineCount = int32 (len (updatedMachines ))
215-
216- readyMachines = getReadyMachines (pool , nodes , mosc , mosb , isLayeredPool )
217- readyMachineCount = int32 (len (readyMachines ))
218-
219- unavailableMachines = getUnavailableMachines (nodes , pool )
220- unavailableMachineCount = int32 (len (unavailableMachines ))
221-
222- degradedMachines = getDegradedMachines (nodes )
223- degradedMachineCount = int32 (len (degradedMachines ))
224- }
115+ unavailableMachines := getUnavailableMachines (nodes , pool )
116+ unavailableMachineCount := int32 (len (unavailableMachines ))
117+ degradedMachines := getDegradedMachines (nodes )
118+ degradedMachineCount := int32 (len (degradedMachines ))
225119
226120 for _ , n := range degradedMachines {
227121 reason , ok := n .Annotations [daemonconsts .MachineConfigDaemonReasonAnnotationKey ]
0 commit comments