diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go index bdd317af7..39bf7f7f0 100644 --- a/pkg/util/provider/machinecontroller/machine.go +++ b/pkg/util/provider/machinecontroller/machine.go @@ -814,6 +814,9 @@ func (c *controller) triggerDeletionFlow(ctx context.Context, deleteMachineReque case strings.Contains(machine.Status.LastOperation.Description, machineutils.InitiateDrain): return c.drainNode(ctx, deleteMachineRequest) + case strings.Contains(machine.Status.LastOperation.Description, machineutils.SetDeletionTaint): + return c.taintNode(ctx, deleteMachineRequest) + case strings.Contains(machine.Status.LastOperation.Description, machineutils.DelVolumesAttachments): return c.deleteNodeVolAttachments(ctx, deleteMachineRequest) diff --git a/pkg/util/provider/machinecontroller/machine_test.go b/pkg/util/provider/machinecontroller/machine_test.go index e8534c87a..8722935c5 100644 --- a/pkg/util/provider/machinecontroller/machine_test.go +++ b/pkg/util/provider/machinecontroller/machine_test.go @@ -1949,7 +1949,7 @@ var _ = Describe("machine", func() { }, }, expect: expect{ - err: fmt.Errorf("Drain successful. %s", machineutils.InitiateVMDeletion), + err: fmt.Errorf("Drain successful. %s", machineutils.SetDeletionTaint), retry: machineutils.ShortRetry, nodeTerminationConditionIsSet: true, machine: newMachine( @@ -1969,7 +1969,7 @@ var _ = Describe("machine", func() { LastUpdateTime: metav1.Now(), }, LastOperation: v1alpha1.LastOperation{ - Description: fmt.Sprintf("Drain successful. %s", machineutils.InitiateVMDeletion), + Description: fmt.Sprintf("Drain successful. %s", machineutils.SetDeletionTaint), State: v1alpha1.MachineStateProcessing, Type: v1alpha1.MachineOperationDelete, LastUpdateTime: metav1.Now(), @@ -2465,7 +2465,7 @@ var _ = Describe("machine", func() { }, }, expect: expect{ - err: fmt.Errorf("Drain successful. %s", machineutils.InitiateVMDeletion), + err: fmt.Errorf("Drain successful. %s", machineutils.SetDeletionTaint), retry: machineutils.ShortRetry, machine: newMachine( &v1alpha1.MachineTemplateSpec{ @@ -2484,7 +2484,7 @@ var _ = Describe("machine", func() { LastUpdateTime: metav1.Now(), }, LastOperation: v1alpha1.LastOperation{ - Description: fmt.Sprintf("Drain successful. %s", machineutils.InitiateVMDeletion), + Description: fmt.Sprintf("Drain successful. %s", machineutils.SetDeletionTaint), State: v1alpha1.MachineStateProcessing, Type: v1alpha1.MachineOperationDelete, LastUpdateTime: metav1.Now(), @@ -2567,7 +2567,7 @@ var _ = Describe("machine", func() { }, }, expect: expect{ - err: fmt.Errorf("Drain successful. %s", machineutils.InitiateVMDeletion), + err: fmt.Errorf("Drain successful. %s", machineutils.SetDeletionTaint), retry: machineutils.ShortRetry, machine: newMachine( &v1alpha1.MachineTemplateSpec{ @@ -2586,7 +2586,7 @@ var _ = Describe("machine", func() { LastUpdateTime: metav1.Now(), }, LastOperation: v1alpha1.LastOperation{ - Description: fmt.Sprintf("Drain successful. %s", machineutils.InitiateVMDeletion), + Description: fmt.Sprintf("Drain successful. %s", machineutils.SetDeletionTaint), State: v1alpha1.MachineStateProcessing, Type: v1alpha1.MachineOperationDelete, LastUpdateTime: metav1.Now(), @@ -3064,7 +3064,7 @@ var _ = Describe("machine", func() { LastUpdateTime: metav1.Now(), }, LastOperation: v1alpha1.LastOperation{ - Description: fmt.Sprintf("Drain successful. %s", machineutils.InitiateVMDeletion), + Description: fmt.Sprintf("Node tainted. %s", machineutils.InitiateVMDeletion), State: v1alpha1.MachineStateProcessing, Type: v1alpha1.MachineOperationDelete, LastUpdateTime: metav1.Now(), @@ -3128,6 +3128,318 @@ var _ = Describe("machine", func() { ), }, }), + Entry("Set ToBedeletedByClusterAutoscaler Taint", &data{ + setup: setup{ + secrets: []*corev1.Secret{ + { + ObjectMeta: *newObjectMeta(objMeta, 0), + }, + }, + machineClasses: []*v1alpha1.MachineClass{ + { + ObjectMeta: *newObjectMeta(objMeta, 0), + SecretRef: newSecretReference(objMeta, 0), + }, + }, + machines: newMachines( + 1, + &v1alpha1.MachineTemplateSpec{ + ObjectMeta: *newObjectMeta(objMeta, 0), + Spec: v1alpha1.MachineSpec{ + Class: v1alpha1.ClassSpec{ + Kind: "MachineClass", + Name: "machine-0", + }, + ProviderID: "fakeID", + }, + }, + &v1alpha1.MachineStatus{ + CurrentStatus: v1alpha1.CurrentStatus{ + Phase: v1alpha1.MachineTerminating, + LastUpdateTime: metav1.Now(), + }, + LastOperation: v1alpha1.LastOperation{ + Description: fmt.Sprintf("Drain successful. %s", machineutils.SetDeletionTaint), + State: v1alpha1.MachineStateProcessing, + Type: v1alpha1.MachineOperationDelete, + LastUpdateTime: metav1.Now(), + }, + }, + nil, + map[string]string{ + machineutils.MachinePriority: "3", + }, + map[string]string{ + v1alpha1.NodeLabelKey: "fakeID-0", + }, + true, + metav1.Now(), + ), + nodes: []*corev1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakeID-0", + }, + }, + }, + }, + action: action{ + machine: "machine-0", + fakeDriver: &driver.FakeDriver{ + VMExists: true, + ProviderID: "fakeID-0", + NodeName: "fakeNode-0", + Err: nil, + }, + }, + expect: expect{ + err: nil, + retry: machineutils.ShortRetry, + nodeDeleted: false, + machine: newMachine( + &v1alpha1.MachineTemplateSpec{ + ObjectMeta: *newObjectMeta(objMeta, 0), + Spec: v1alpha1.MachineSpec{ + Class: v1alpha1.ClassSpec{ + Kind: "MachineClass", + Name: "machine-0", + }, + ProviderID: "fakeID", + }, + }, + &v1alpha1.MachineStatus{ + CurrentStatus: v1alpha1.CurrentStatus{ + Phase: v1alpha1.MachineTerminating, + LastUpdateTime: metav1.Now(), + }, + LastOperation: v1alpha1.LastOperation{ + Description: fmt.Sprintf("Drain successful. %s", machineutils.SetDeletionTaint), + State: v1alpha1.MachineStateProcessing, + Type: v1alpha1.MachineOperationDelete, + LastUpdateTime: metav1.Now(), + }, + }, + nil, + map[string]string{ + machineutils.MachinePriority: "3", + }, + map[string]string{ + v1alpha1.NodeLabelKey: "fakeID-0", + }, + true, + metav1.Now(), + ), + }, + }), + Entry("Continue if ToBedeletedByClusterAutoscaler Taint is set", &data{ + setup: setup{ + secrets: []*corev1.Secret{ + { + ObjectMeta: *newObjectMeta(objMeta, 0), + }, + }, + machineClasses: []*v1alpha1.MachineClass{ + { + ObjectMeta: *newObjectMeta(objMeta, 0), + SecretRef: newSecretReference(objMeta, 0), + }, + }, + machines: newMachines( + 1, + &v1alpha1.MachineTemplateSpec{ + ObjectMeta: *newObjectMeta(objMeta, 0), + Spec: v1alpha1.MachineSpec{ + Class: v1alpha1.ClassSpec{ + Kind: "MachineClass", + Name: "machine-0", + }, + ProviderID: "fakeID", + }, + }, + &v1alpha1.MachineStatus{ + CurrentStatus: v1alpha1.CurrentStatus{ + Phase: v1alpha1.MachineTerminating, + LastUpdateTime: metav1.Now(), + }, + LastOperation: v1alpha1.LastOperation{ + Description: fmt.Sprintf("Drain successful. %s", machineutils.SetDeletionTaint), + State: v1alpha1.MachineStateProcessing, + Type: v1alpha1.MachineOperationDelete, + LastUpdateTime: metav1.Now(), + }, + }, + nil, + map[string]string{ + machineutils.MachinePriority: "3", + }, + map[string]string{ + v1alpha1.NodeLabelKey: "fakeID-0", + }, + true, + metav1.Now(), + ), + nodes: []*corev1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "fakeID-0", + }, + Spec: corev1.NodeSpec{ + Taints: []corev1.Taint{ + { + Key: machineutils.TaintToBeDeleted, + Value: "gardener-machine-controller-manager", + Effect: corev1.TaintEffectPreferNoSchedule, + }, + }, + }, + }, + }, + }, + action: action{ + machine: "machine-0", + fakeDriver: &driver.FakeDriver{ + VMExists: true, + ProviderID: "fakeID-0", + NodeName: "fakeNode-0", + Err: nil, + }, + }, + expect: expect{ + err: nil, + retry: machineutils.ShortRetry, + nodeDeleted: false, + machine: newMachine( + &v1alpha1.MachineTemplateSpec{ + ObjectMeta: *newObjectMeta(objMeta, 0), + Spec: v1alpha1.MachineSpec{ + Class: v1alpha1.ClassSpec{ + Kind: "MachineClass", + Name: "machine-0", + }, + ProviderID: "fakeID", + }, + }, + &v1alpha1.MachineStatus{ + CurrentStatus: v1alpha1.CurrentStatus{ + Phase: v1alpha1.MachineTerminating, + LastUpdateTime: metav1.Now(), + }, + LastOperation: v1alpha1.LastOperation{ + Description: fmt.Sprintf("Node tainted. %s", machineutils.InitiateVMDeletion), + State: v1alpha1.MachineStateProcessing, + Type: v1alpha1.MachineOperationDelete, + LastUpdateTime: metav1.Now(), + }, + }, + nil, + map[string]string{ + machineutils.MachinePriority: "3", + }, + map[string]string{ + v1alpha1.NodeLabelKey: "fakeID-0", + }, + true, + metav1.Now(), + ), + }, + }), + Entry("Skip ToBedeletedByClusterAutoscaler Taint if node dose not exist", &data{ + setup: setup{ + secrets: []*corev1.Secret{ + { + ObjectMeta: *newObjectMeta(objMeta, 0), + }, + }, + machineClasses: []*v1alpha1.MachineClass{ + { + ObjectMeta: *newObjectMeta(objMeta, 0), + SecretRef: newSecretReference(objMeta, 0), + }, + }, + machines: newMachines( + 1, + &v1alpha1.MachineTemplateSpec{ + ObjectMeta: *newObjectMeta(objMeta, 0), + Spec: v1alpha1.MachineSpec{ + Class: v1alpha1.ClassSpec{ + Kind: "MachineClass", + Name: "machine-0", + }, + ProviderID: "fakeID", + }, + }, + &v1alpha1.MachineStatus{ + CurrentStatus: v1alpha1.CurrentStatus{ + Phase: v1alpha1.MachineTerminating, + LastUpdateTime: metav1.Now(), + }, + LastOperation: v1alpha1.LastOperation{ + Description: fmt.Sprintf("Drain successful. %s", machineutils.SetDeletionTaint), + State: v1alpha1.MachineStateProcessing, + Type: v1alpha1.MachineOperationDelete, + LastUpdateTime: metav1.Now(), + }, + }, + nil, + map[string]string{ + machineutils.MachinePriority: "3", + }, + map[string]string{ + v1alpha1.NodeLabelKey: "fakeID-0", + }, + true, + metav1.Now(), + ), + nodes: []*corev1.Node{}, + }, + action: action{ + machine: "machine-0", + fakeDriver: &driver.FakeDriver{ + VMExists: true, + ProviderID: "fakeID-0", + NodeName: "fakeNode-0", + Err: nil, + }, + }, + expect: expect{ + err: nil, + retry: machineutils.ShortRetry, + nodeDeleted: false, + machine: newMachine( + &v1alpha1.MachineTemplateSpec{ + ObjectMeta: *newObjectMeta(objMeta, 0), + Spec: v1alpha1.MachineSpec{ + Class: v1alpha1.ClassSpec{ + Kind: "MachineClass", + Name: "machine-0", + }, + ProviderID: "fakeID", + }, + }, + &v1alpha1.MachineStatus{ + CurrentStatus: v1alpha1.CurrentStatus{ + Phase: v1alpha1.MachineTerminating, + LastUpdateTime: metav1.Now(), + }, + LastOperation: v1alpha1.LastOperation{ + Description: fmt.Sprintf("Node dose not exist. %s", machineutils.InitiateVMDeletion), + State: v1alpha1.MachineStateProcessing, + Type: v1alpha1.MachineOperationDelete, + LastUpdateTime: metav1.Now(), + }, + }, + nil, + map[string]string{ + machineutils.MachinePriority: "3", + }, + map[string]string{ + v1alpha1.NodeLabelKey: "fakeID-0", + }, + true, + metav1.Now(), + ), + }, + }), Entry("Delete node object successfully", &data{ setup: setup{ secrets: []*corev1.Secret{ diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go index 4a77317e8..22961d0a2 100644 --- a/pkg/util/provider/machinecontroller/machine_util.go +++ b/pkg/util/provider/machinecontroller/machine_util.go @@ -1698,7 +1698,7 @@ func (c *controller) drainNode(ctx context.Context, deleteMachineRequest *driver if forceDeletePods { description = fmt.Sprintf("Force Drain successful. %s", machineutils.DelVolumesAttachments) } else { // regular drain already waits for vol detach and attach for another node. - description = fmt.Sprintf("Drain successful. %s", machineutils.InitiateVMDeletion) + description = fmt.Sprintf("Drain successful. %s", machineutils.SetDeletionTaint) } err = fmt.Errorf("%s", description) state = v1alpha1.MachineStateProcessing @@ -1742,6 +1742,67 @@ func (c *controller) drainNode(ctx context.Context, deleteMachineRequest *driver return machineutils.ShortRetry, err } +func (c *controller) taintNode(ctx context.Context, deleteMachineRequest *driver.DeleteMachineRequest) (machineutils.RetryPeriod, error) { + var ( + machine = deleteMachineRequest.Machine + toBeDeletedTaint = v1.Taint{ + Key: machineutils.TaintToBeDeleted, + Value: "gardener-machine-controller-manager", + Effect: v1.TaintEffectPreferNoSchedule, + } + description = "" + taintAlreadySet = false + skipStep = false + ) + node, err := c.nodeLister.Get(getNodeName(machine)) + if err != nil { + if !apierrors.IsNotFound(err) { + klog.Errorf("Error occurred while trying to fetch node object - err: %s", err) + return machineutils.ShortRetry, err + } + skipStep = true + description = fmt.Sprintf("Node dose not exist. %s", machineutils.InitiateVMDeletion) + } + + if node != nil { + for _, taint := range node.Spec.Taints { + if taint.MatchTaint(&toBeDeletedTaint) { + taintAlreadySet = true + description = fmt.Sprintf("Node tainted. %s", machineutils.InitiateVMDeletion) + } + } + } + + if taintAlreadySet || skipStep { + return c.machineStatusUpdate( + ctx, + machine, + v1alpha1.LastOperation{ + Description: description, + State: v1alpha1.MachineStateProcessing, + Type: v1alpha1.MachineOperationDelete, + LastUpdateTime: metav1.Now(), + }, + // Let the clone.Status.CurrentStatus (LastUpdateTime) be as it was before. + // This helps while computing when the drain timeout to determine if force deletion is to be triggered. + // Ref - https://github.com/gardener/machine-controller-manager/blob/rel-v0.34.0/pkg/util/provider/machinecontroller/machine_util.go#L872 + machine.Status.CurrentStatus, + machine.Status.LastKnownState, + ) + } + + node.Spec.Taints = append(node.Spec.Taints, toBeDeletedTaint) + + if _, err := c.targetCoreClient.CoreV1().Nodes().Update(ctx, node, metav1.UpdateOptions{}); err != nil { + if apierrors.IsConflict(err) { + return machineutils.ConflictRetry, err + } + return machineutils.ShortRetry, err + } + + return machineutils.ShortRetry, nil +} + // deleteNodeVolAttachments deletes VolumeAttachment(s) for a node before moving to VM deletion stage. func (c *controller) deleteNodeVolAttachments(ctx context.Context, deleteMachineRequest *driver.DeleteMachineRequest) (machineutils.RetryPeriod, error) { var ( @@ -1758,11 +1819,11 @@ func (c *controller) deleteNodeVolAttachments(ctx context.Context, deleteMachine return retryPeriod, err } // node not found move to vm deletion - description = fmt.Sprintf("Skipping deleteNodeVolAttachments due to - %s. Moving to VM Deletion. %s", err.Error(), machineutils.InitiateVMDeletion) + description = fmt.Sprintf("Skipping deleteNodeVolAttachments due to - %s. Moving to VM Deletion. %s", err.Error(), machineutils.SetDeletionTaint) state = v1alpha1.MachineStateProcessing retryPeriod = 0 } else if len(node.Status.VolumesAttached) == 0 { - description = fmt.Sprintf("Node Volumes for node: %s are already detached. Moving to VM Deletion. %s", nodeName, machineutils.InitiateVMDeletion) + description = fmt.Sprintf("Node Volumes for node: %s are already detached. Moving to VM Deletion. %s", nodeName, machineutils.SetDeletionTaint) state = v1alpha1.MachineStateProcessing retryPeriod = 0 } else { @@ -1781,7 +1842,7 @@ func (c *controller) deleteNodeVolAttachments(ctx context.Context, deleteMachine } return retryPeriod, nil } - description = fmt.Sprintf("No Live VolumeAttachments for node: %s. Moving to VM Deletion. %s", nodeName, machineutils.InitiateVMDeletion) + description = fmt.Sprintf("No Live VolumeAttachments for node: %s. Moving to VM Deletion. %s", nodeName, machineutils.SetDeletionTaint) state = v1alpha1.MachineStateProcessing } now := metav1.Now() diff --git a/pkg/util/provider/machineutils/utils.go b/pkg/util/provider/machineutils/utils.go index 5389aa96a..79f8917fc 100644 --- a/pkg/util/provider/machineutils/utils.go +++ b/pkg/util/provider/machineutils/utils.go @@ -23,6 +23,9 @@ const ( // InitiateDrain specifies next step as initiate node drain InitiateDrain = "Initiate node drain" + // SetDeletionTaint specifies next step as set deletion taint + SetDeletionTaint = "Set deletion taint" + // NodeReadyForUpdate specifies next step as node ready for update. NodeReadyForUpdate = "Node drain successful. Node is ready for update" @@ -74,6 +77,10 @@ const ( // indicating that a node is not yet ready to have user workload scheduled TaintNodeCriticalComponentsNotReady = "node.gardener.cloud/critical-components-not-ready" + // TaintToBeDeleted is the taint of the cluster autoscaler which is used in cloud-provider and + // kube-proxy to check if a node is getting deleted soon. + TaintToBeDeleted = "ToBeDeletedByClusterAutoscaler" + // MachineLabelKey defines the labels which contains the name of the machine of a node MachineLabelKey = "node.gardener.cloud/machine-name"