Skip to content

Commit aefe980

Browse files
committed
remove driver upgrade label from nodes
this commit removes driver upgrade label from nodes which don't have any driver pod running on them Signed-off-by: Rahul Sharma <rahulsharm@nvidia.com>
1 parent 688e38d commit aefe980

2 files changed

Lines changed: 538 additions & 0 deletions

File tree

controllers/upgrade_controller.go

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,14 @@ func (r *UpgradeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
152152
return ctrl.Result{}, err
153153
}
154154

155+
// Clear stale upgrade labels from nodes that no longer have driver pods
156+
// Use the built state to avoid duplicate API calls and to skip nodes actively being upgraded
157+
// This is best-effort cleanup that runs every reconciliation
158+
if err := r.clearStaleUpgradeLabels(ctx, state, driverLabel, clusterPolicyCtrl.operatorNamespace); err != nil {
159+
// Log the error but continue with the upgrade process, as this is a best-effort cleanup and should not block upgrades
160+
r.Log.V(consts.LogLevelWarning).Info("Failed to clear stale upgrade labels", "error", err)
161+
}
162+
155163
reqLogger.Info("Propagate state to state manager")
156164
reqLogger.V(consts.LogLevelDebug).Info("Current cluster upgrade state", "state", state)
157165

@@ -198,6 +206,101 @@ func (r *UpgradeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
198206
return ctrl.Result{Requeue: true, RequeueAfter: plannedRequeueInterval}, nil
199207
}
200208

209+
// clearStaleUpgradeLabels removes upgrade labels from nodes where driver pods are no longer scheduled.
210+
// This handles the case where a nodeSelector change causes pods to be terminated from certain nodes,
211+
// but the upgrade labels remain. It skips nodes that are actively being managed by the upgrade process.
212+
func (r *UpgradeReconciler) clearStaleUpgradeLabels(ctx context.Context, state *upgrade.ClusterUpgradeState, driverLabel map[string]string, namespace string) error {
213+
upgradeStateLabel := upgrade.GetUpgradeStateLabelKey()
214+
215+
// Build a set of nodes being actively managed by the upgrade process
216+
// Managed nodes are those in ClusterUpgradeState.NodeStates, which are populated by BuildState()
217+
// and represent nodes that currently have driver pods associated with them
218+
managedNodes := make(map[string]bool)
219+
for _, nodeStates := range state.NodeStates {
220+
for _, nodeState := range nodeStates {
221+
if nodeState.Node != nil {
222+
managedNodes[nodeState.Node.Name] = true
223+
}
224+
}
225+
}
226+
227+
// List only nodes that have the upgrade label
228+
nodeList := &corev1.NodeList{}
229+
if err := r.List(ctx, nodeList, client.HasLabels{upgradeStateLabel}); err != nil {
230+
return fmt.Errorf("failed to list nodes with upgrade labels: %w", err)
231+
}
232+
233+
if len(nodeList.Items) == 0 {
234+
return nil
235+
}
236+
237+
// Filter out nodes being actively managed by upgrade process (those with driver pods in state.NodeStates)
238+
// This ensures we only clean up labels from nodes that truly have stale labels
239+
var nodesToCheck []corev1.Node
240+
for _, node := range nodeList.Items {
241+
if !managedNodes[node.Name] {
242+
nodesToCheck = append(nodesToCheck, node)
243+
}
244+
}
245+
246+
if len(nodesToCheck) == 0 {
247+
return nil
248+
}
249+
250+
// List driver DaemonSets to check which nodes should have pods
251+
// This protects against removing labels during pod recreation (e.g., during rolling updates)
252+
dsList := &appsv1.DaemonSetList{}
253+
if err := r.List(ctx, dsList, client.InNamespace(namespace), client.MatchingLabels(driverLabel)); err != nil {
254+
return fmt.Errorf("failed to list driver DaemonSets: %w", err)
255+
}
256+
257+
// Build a set of nodes that DaemonSets would schedule to
258+
// This protects nodes during pod recreation (e.g., rolling updates where pod is temporarily missing)
259+
nodesSelected := make(map[string]bool)
260+
for _, ds := range dsList.Items {
261+
// DaemonSet nodeSelector is a map[string]string in spec.template.spec.nodeSelector
262+
nodeSelector := ds.Spec.Template.Spec.NodeSelector
263+
selector := labels.SelectorFromSet(nodeSelector)
264+
265+
for _, node := range nodesToCheck {
266+
// Check if this DaemonSet would schedule to this node
267+
if selector.Matches(labels.Set(node.Labels)) {
268+
nodesSelected[node.Name] = true
269+
}
270+
}
271+
}
272+
273+
// Clear upgrade label from nodes that aren't targeted by any DaemonSet
274+
// nodesToCheck excludes nodes actively managed by the upgrade state machine (managedNodes filter)
275+
// These managed nodes are in ClusterUpgradeState.NodeStates and have driver pods associated with them
276+
// The nodesSelected check protects against removing labels during pod recreation
277+
// (e.g., rolling updates where old pod is deleted but new pod not yet scheduled)
278+
//
279+
// Note: There is a small race condition window where a DaemonSet's nodeSelector is updated
280+
// after we list DaemonSets but before we patch the node. This is acceptable because:
281+
// 1. This is best-effort cleanup that runs every 2 minutes
282+
// 2. The primary use case (nodeSelector narrowing to exclude nodes) is correctly handled
283+
// 3. False negatives (not removing when we should) are safer than false positives
284+
for i := range nodesToCheck {
285+
node := &nodesToCheck[i]
286+
287+
// Only remove label from node if no DaemonSet intends to schedule a pod there
288+
if _, exists := nodesSelected[node.Name]; !exists {
289+
r.Log.Info("Clearing stale upgrade label from node", "node", node.Name)
290+
291+
nodeCopy := node.DeepCopy()
292+
delete(nodeCopy.Labels, upgradeStateLabel)
293+
if err := r.Patch(ctx, nodeCopy, client.MergeFrom(node)); err != nil {
294+
r.Log.Error(err, "Failed to clear upgrade label from node", "node", node.Name)
295+
// Continue with other nodes even if one fails
296+
continue
297+
}
298+
}
299+
}
300+
301+
return nil
302+
}
303+
201304
// removeNodeUpgradeStateLabels loops over nodes in the cluster and removes "nvidia.com/gpu-driver-upgrade-state"
202305
// It is used for cleanup when autoUpgrade feature gets disabled
203306
func (r *UpgradeReconciler) removeNodeUpgradeStateLabels(ctx context.Context) error {

0 commit comments

Comments
 (0)