From 82ab1424c0a73e96430e76becafd2d1993d55acf Mon Sep 17 00:00:00 2001
From: Isabella Janssen <ijanssen@redhat.com>
Date: Tue, 24 Feb 2026 12:47:02 -0500
Subject: [PATCH] controller: always determine MCP machine counts based on node
 properties

---
 pkg/controller/node/status.go | 130 ++++------------------------------
 1 file changed, 12 insertions(+), 118 deletions(-)

diff --git a/pkg/controller/node/status.go b/pkg/controller/node/status.go
index edda080c38..503b2ba0a1 100644
--- a/pkg/controller/node/status.go
+++ b/pkg/controller/node/status.go
@@ -75,7 +75,7 @@ func (ctrl *Controller) syncStatusOnly(pool *mcfgv1.MachineConfigPool) error {
 }
 
 //nolint:gocyclo,gosec
-func (ctrl *Controller) calculateStatus(mcs []*mcfgv1.MachineConfigNode, cconfig *mcfgv1.ControllerConfig, pool *mcfgv1.MachineConfigPool, nodes []*corev1.Node, mosc *mcfgv1.MachineOSConfig, mosb *mcfgv1.MachineOSBuild) mcfgv1.MachineConfigPoolStatus {
+func (ctrl *Controller) calculateStatus(mcns []*mcfgv1.MachineConfigNode, cconfig *mcfgv1.ControllerConfig, pool *mcfgv1.MachineConfigPool, nodes []*corev1.Node, mosc *mcfgv1.MachineOSConfig, mosb *mcfgv1.MachineOSBuild) mcfgv1.MachineConfigPoolStatus {
 	certExpirys := []mcfgv1.CertExpiry{}
 	if cconfig != nil {
 		for _, cert := range cconfig.Status.ControllerCertificates {
@@ -94,134 +94,28 @@ func (ctrl *Controller) calculateStatus(mcs []*mcfgv1.MachineConfigNode, cconfig
 
 	isLayeredPool := ctrl.isLayeredPool(mosc, mosb)
 
-	var degradedMachines, readyMachines, updatedMachines, unavailableMachines, updatingMachines []*corev1.Node
 	degradedReasons := []string{}
 	pisIsEnabled := ctrl.fgHandler.Enabled(features.FeatureGatePinnedImages)
 	pinnedImageSetsDegraded := false
 
-	// if we represent updating properly here, we will also represent updating properly in the CO
-	// so this solves the cordoning RFE and the upgradeable RFE
-	// updating == updatePrepared, updateExecuted, updatedComplete, postAction, cordoning, draining
-	// updated == nodeResumed, updated
-	// ready == nodeResumed, updated
-	// unavailable == draining, cordoned
-	// degraded == if the condition.Reason == error
-	// this ensures that a MCP only enters Upgradeable==False if the node actually needs to upgrade to the new MC
-	for _, state := range mcs {
-		var ourNode *corev1.Node
-		for _, n := range nodes {
-			if state.Name == n.Name {
-				ourNode = n
-				break
-			}
-		}
-		if ourNode == nil {
-			klog.Errorf("Could not find specified node %s", state.Name)
-		}
-		if len(state.Status.Conditions) == 0 {
-			// not ready yet
-			break
-		}
+	// Set the PIS pool synchronizer to updated based on the MCN
+	for _, mcn := range mcns {
 		if pisIsEnabled {
-			if isPinnedImageSetsUpdated(state) {
+			if isPinnedImageSetsUpdated(mcn) {
 				poolSynchronizer.SetUpdated(mcfgv1.PinnedImageSets)
 			}
 		}
-		for _, cond := range state.Status.Conditions {
-			// populate the degradedReasons from the MachineConfigNodeNodeDegraded condition
-			if mcfgv1.StateProgress(cond.Type) == mcfgv1.MachineConfigNodeNodeDegraded && cond.Status == metav1.ConditionTrue {
-				degradedMachines = append(degradedMachines, ourNode)
-				// Degraded nodes are also unavailable since they are not in a "Done" state
-				// and cannot be used for further updates (see IsUnavailableForUpdate)
-				unavailableMachines = append(unavailableMachines, ourNode)
-				degradedReasons = append(degradedReasons, fmt.Sprintf("Node %s is reporting: %q", ourNode.Name, cond.Message))
-				break
-			}
-			/*
-				// TODO (ijanssen): This section of code should be implemented as part of OCPBUGS-57177 after OCPBUGS-32745 is addressed.
-				// 	In the current state of the code, the `MachineConfigNodePinnedImageSetsDegraded` condition is wrongly being set to True`
-				// 	even when no unintended functionality is occurring, such as many images taking more than 2 minutes to fetch. Thus, it is
-				//  not wise to degrade an MCP on a PIS degrade while the PIS degrade is not acting as intended. OCPBUGS-57177 has
-				// 	been marked as blocked by OCPBUGS-32745 in Jira, but once the degrade condition is stablilized, this code block should
-				// 	cover the fix for OCPBUGS-57177.
-					// populate the degradedReasons from the MachineConfigNodePinnedImageSetsDegraded condition
-					if pisIsEnabled && mcfgv1.StateProgress(cond.Type) == mcfgv1.MachineConfigNodePinnedImageSetsDegraded && cond.Status == metav1.ConditionTrue {
-						degradedMachines = append(degradedMachines, ourNode)
-						degradedReasons = append(degradedReasons, fmt.Sprintf("Node %s references an invalid PinnedImageSet. See the node's MachineConfigNode resource for details.", ourNode.Name))
-						if mcfgv1.StateProgress(cond.Type) == mcfgv1.MachineConfigNodePinnedImageSetsDegraded {
-							pinnedImageSetsDegraded = true
-						}
-						break
-					}
-			*/
-			/*
-				// TODO: (djoshy) Rework this block to use MCN conditions correctly. See: https://issues.redhat.com/browse/MCO-1228
-
-				// Specifically, the main concerns for the following block are:
-				// (i) Why are only unknown conditions being evaluated? Shouldn't True/False conditions be used?
-				// (ii) Multiple conditions can be unknown at the same time, resulting in certain machines being double counted
-
-				// Some background:
-				// The MCN conditions are used to feed MCP statuses if the machine counts add up correctly to the total node count.
-				// If this check fails, node conditions are used to determine machine counts. The MCN counts calculated in this block
-				// seem incorrect most of the time, so the controller almost always defaults to using the node condition based counts.
-				// On occasion, the MCN counts cause a false positive in aformentioned check, resulting in invalid values for the MCP
-				// statuses. Commenting out this block will force the controller to always use node condition based counts to feed the
-				// MCP status.
-
-
-				if cond.Status == metav1.ConditionUnknown {
-					// This switch case will cause a node to be double counted, maybe use a hash for node count
-					switch mcfgv1.StateProgress(cond.Type) {
-					case mcfgv1.MachineConfigNodeUpdatePrepared:
-						updatingMachines = append(updatedMachines, ourNode) //nolint:gocritic
-					case mcfgv1.MachineConfigNodeUpdateExecuted:
-						updatingMachines = append(updatingMachines, ourNode)
-					case mcfgv1.MachineConfigNodeUpdatePostActionComplete:
-						updatingMachines = append(updatingMachines, ourNode)
-					case mcfgv1.MachineConfigNodeUpdateComplete:
-						updatingMachines = append(updatingMachines, ourNode)
-					case mcfgv1.MachineConfigNodeResumed:
-						updatingMachines = append(updatedMachines, ourNode) //nolint:gocritic
-						readyMachines = append(readyMachines, ourNode)
-					// Note (ijanssen): `MachineConfigNodeUpdateCompatible` was removed with MCO-1543. This case will need to be replaced/removed when working on MCO-1228.
-					case mcfgv1.MachineConfigNodeUpdateCompatible:
-						updatingMachines = append(updatedMachines, ourNode) //nolint:gocritic
-					case mcfgv1.MachineConfigNodeUpdateDrained:
-						unavailableMachines = append(unavailableMachines, ourNode)
-						updatingMachines = append(updatingMachines, ourNode)
-					case mcfgv1.MachineConfigNodeUpdateCordoned:
-						unavailableMachines = append(unavailableMachines, ourNode)
-						updatingMachines = append(updatingMachines, ourNode)
-					case mcfgv1.MachineConfigNodeUpdated:
-						updatedMachines = append(updatedMachines, ourNode)
-						readyMachines = append(readyMachines, ourNode)
-					}
-				}
-			*/
-		}
 	}
-	degradedMachineCount := int32(len(degradedMachines))
+
+	// Get the updated, degraded, ready, and unavailable machine counts from the nodes' properties
+	updatedMachines := getUpdatedMachines(pool, nodes, mosc, mosb, isLayeredPool)
 	updatedMachineCount := int32(len(updatedMachines))
-	unavailableMachineCount := int32(len(unavailableMachines))
-	updatingMachineCount := int32(len(updatingMachines))
+	readyMachines := getReadyMachines(pool, nodes, mosc, mosb, isLayeredPool)
 	readyMachineCount := int32(len(readyMachines))
-
-	// this is # 1 priority, get the upgrade states actually reporting
-	if degradedMachineCount+readyMachineCount+unavailableMachineCount+updatingMachineCount != int32(len(nodes)) {
-
-		updatedMachines = getUpdatedMachines(pool, nodes, mosc, mosb, isLayeredPool)
-		updatedMachineCount = int32(len(updatedMachines))
-
-		readyMachines = getReadyMachines(pool, nodes, mosc, mosb, isLayeredPool)
-		readyMachineCount = int32(len(readyMachines))
-
-		unavailableMachines = getUnavailableMachines(nodes, pool)
-		unavailableMachineCount = int32(len(unavailableMachines))
-
-		degradedMachines = getDegradedMachines(nodes)
-		degradedMachineCount = int32(len(degradedMachines))
-	}
+	unavailableMachines := getUnavailableMachines(nodes, pool)
+	unavailableMachineCount := int32(len(unavailableMachines))
+	degradedMachines := getDegradedMachines(nodes)
+	degradedMachineCount := int32(len(degradedMachines))
 
 	for _, n := range degradedMachines {
 		reason, ok := n.Annotations[daemonconsts.MachineConfigDaemonReasonAnnotationKey]