From d6c1d728f1d40fd96549def00bc463bbb6167d43 Mon Sep 17 00:00:00 2001 From: Ponmuthu K Date: Thu, 13 Nov 2025 09:54:16 +0530 Subject: [PATCH 1/3] Extend Upgrade Controller for Windows Node Compatibility --- manifests/system-upgrade-controller.yaml | 3 + pkg/upgrade/job/job.go | 113 +++++++++++++++++------ 2 files changed, 87 insertions(+), 29 deletions(-) diff --git a/manifests/system-upgrade-controller.yaml b/manifests/system-upgrade-controller.yaml index 8749102e..90c31990 100644 --- a/manifests/system-upgrade-controller.yaml +++ b/manifests/system-upgrade-controller.yaml @@ -27,6 +27,9 @@ data: SYSTEM_UPGRADE_JOB_PRIVILEGED: "true" SYSTEM_UPGRADE_JOB_TTL_SECONDS_AFTER_FINISH: "900" SYSTEM_UPGRADE_PLAN_POLLING_INTERVAL: "15m" + # Set to the Windows kubectl image ONLY if you are upgrading Windows nodes. + # Windows customers must provide this value. Leave empty ("") if not required. + SYSTEM_UPGRADE_JOB_KUBECTL_IMAGE_WINDOWS: "" --- apiVersion: apps/v1 kind: Deployment diff --git a/pkg/upgrade/job/job.go b/pkg/upgrade/job/job.go index 86b9256f..ace205b6 100644 --- a/pkg/upgrade/job/job.go +++ b/pkg/upgrade/job/job.go @@ -21,6 +21,7 @@ import ( "k8s.io/apimachinery/pkg/selection" "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/utils/pointer" + "k8s.io/utils/ptr" ) const ( @@ -310,18 +311,42 @@ func New(plan *upgradeapiv1.Plan, node *corev1.Node, controllerName string) *bat }) } + // Determine if the target node is Windows + isWindows := node.Labels["kubernetes.io/os"] == "windows" + + // Initialize with the default kubectl image; can be changed to Windows or other image if needed + selectedKubectlImage := KubectlImage + if isWindows { + // Retrieve the Windows kubectl image from the SYSTEM_UPGRADE_JOB_KUBECTL_IMAGE_WINDOWS environment variable. + // This environment variable must be set by customers/operators when performing upgrades on Windows nodes. + selectedKubectlImage = os.Getenv("SYSTEM_UPGRADE_JOB_KUBECTL_IMAGE_WINDOWS") + if selectedKubectlImage == "" { + logrus.Fatal("SYSTEM_UPGRADE_JOB_KUBECTL_IMAGE_WINDOWS is a required environment variable when targeting Windows") + } + } + // first, we prepare if plan.Spec.Prepare != nil { - podTemplate.Spec.InitContainers = append(podTemplate.Spec.InitContainers, - upgradectr.New("prepare", *plan.Spec.Prepare, - upgradectr.WithLatestTag(plan.Status.LatestVersion), - upgradectr.WithSecrets(plan.Spec.Secrets), - upgradectr.WithPlanEnvironment(plan.Name, plan.Status), - upgradectr.WithImagePullPolicy(ImagePullPolicy), - upgradectr.WithVolumes(plan.Spec.Prepare.Volumes), - upgradectr.WithSecurityContext(plan.Spec.Prepare.SecurityContext), - ), + prepareContainer := upgradectr.New("prepare", *plan.Spec.Prepare, + upgradectr.WithLatestTag(plan.Status.LatestVersion), + upgradectr.WithSecrets(plan.Spec.Secrets), + upgradectr.WithPlanEnvironment(plan.Name, plan.Status), + upgradectr.WithImagePullPolicy(ImagePullPolicy), + upgradectr.WithVolumes(plan.Spec.Prepare.Volumes), + upgradectr.WithSecurityContext(plan.Spec.Prepare.SecurityContext), ) + if isWindows { + if prepareContainer.SecurityContext == nil { + prepareContainer.SecurityContext = &corev1.SecurityContext{} + } + if prepareContainer.SecurityContext.WindowsOptions == nil { + prepareContainer.SecurityContext.WindowsOptions = &corev1.WindowsSecurityContextOptions{} + } + prepareContainer.SecurityContext.WindowsOptions.HostProcess = ptr.To(true) + prepareContainer.SecurityContext.WindowsOptions.RunAsUserName = ptr.To("NT AUTHORITY\\SYSTEM") + } + + podTemplate.Spec.InitContainers = append(podTemplate.Spec.InitContainers, prepareContainer) } // then we cordon/drain @@ -375,35 +400,65 @@ func New(plan *upgradeapiv1.Plan, node *corev1.Node, controllerName string) *bat args = append(args, "--skip-wait-for-delete-timeout", strconv.FormatInt(int64(drain.SkipWaitForDeleteTimeout), 10)) } - podTemplate.Spec.InitContainers = append(podTemplate.Spec.InitContainers, - upgradectr.New("drain", upgradeapiv1.ContainerSpec{ - Image: KubectlImage, - Args: args, - }, - upgradectr.WithSecrets(plan.Spec.Secrets), - upgradectr.WithPlanEnvironment(plan.Name, plan.Status), - upgradectr.WithImagePullPolicy(ImagePullPolicy), - upgradectr.WithVolumes(plan.Spec.Upgrade.Volumes), - ), + drainContainer := upgradectr.New("drain", upgradeapiv1.ContainerSpec{ + Image: selectedKubectlImage, + Args: args, + }, + upgradectr.WithSecrets(plan.Spec.Secrets), + upgradectr.WithPlanEnvironment(plan.Name, plan.Status), + upgradectr.WithImagePullPolicy(ImagePullPolicy), + upgradectr.WithVolumes(plan.Spec.Upgrade.Volumes), ) + + if isWindows { + if drainContainer.SecurityContext == nil { + drainContainer.SecurityContext = &corev1.SecurityContext{} + } + if drainContainer.SecurityContext.WindowsOptions == nil { + drainContainer.SecurityContext.WindowsOptions = &corev1.WindowsSecurityContextOptions{} + } + drainContainer.SecurityContext.WindowsOptions.HostProcess = ptr.To(true) + drainContainer.SecurityContext.WindowsOptions.RunAsUserName = ptr.To("NT AUTHORITY\\SYSTEM") + } + + podTemplate.Spec.InitContainers = append(podTemplate.Spec.InitContainers, drainContainer) + } else if cordon { - podTemplate.Spec.InitContainers = append(podTemplate.Spec.InitContainers, - upgradectr.New("cordon", upgradeapiv1.ContainerSpec{ - Image: KubectlImage, - Args: []string{"cordon", node.Name}, - }, - upgradectr.WithSecrets(plan.Spec.Secrets), - upgradectr.WithPlanEnvironment(plan.Name, plan.Status), - upgradectr.WithImagePullPolicy(ImagePullPolicy), - upgradectr.WithVolumes(plan.Spec.Upgrade.Volumes), - ), + cordonContainer := upgradectr.New("cordon", upgradeapiv1.ContainerSpec{ + Image: selectedKubectlImage, + Args: []string{"cordon", node.Name}, + }, + upgradectr.WithSecrets(plan.Spec.Secrets), + upgradectr.WithPlanEnvironment(plan.Name, plan.Status), + upgradectr.WithImagePullPolicy(ImagePullPolicy), + upgradectr.WithVolumes(plan.Spec.Upgrade.Volumes), ) + if isWindows { + if cordonContainer.SecurityContext == nil { + cordonContainer.SecurityContext = &corev1.SecurityContext{} + } + if cordonContainer.SecurityContext.WindowsOptions == nil { + cordonContainer.SecurityContext.WindowsOptions = &corev1.WindowsSecurityContextOptions{} + } + cordonContainer.SecurityContext.WindowsOptions.HostProcess = ptr.To(true) + cordonContainer.SecurityContext.WindowsOptions.RunAsUserName = ptr.To("NT AUTHORITY\\SYSTEM") + } + + podTemplate.Spec.InitContainers = append(podTemplate.Spec.InitContainers, cordonContainer) } // Check if SecurityContext from the Plan is non-nil var securityContext *corev1.SecurityContext if plan.Spec.Upgrade.SecurityContext != nil { securityContext = plan.Spec.Upgrade.SecurityContext + } else if isWindows { + // Set Windows-specific security context (HostProcess, run as SYSTEM) + securityContext = &corev1.SecurityContext{ + WindowsOptions: &corev1.WindowsSecurityContextOptions{ + RunAsUserName: ptr.To("NT AUTHORITY\\SYSTEM"), + HostProcess: ptr.To(true), + }, + } } else { securityContext = &corev1.SecurityContext{ Privileged: &Privileged, From c356bf6f91291fdaa0664dacf55b68301b4c1853 Mon Sep 17 00:00:00 2001 From: Ponmuthu K Date: Wed, 19 Nov 2025 10:07:00 +0530 Subject: [PATCH 2/3] Add validation in the caller and refactored the job --- manifests/system-upgrade-controller.yaml | 3 +- pkg/upgrade/handle_upgrade.go | 10 ++++- pkg/upgrade/job/job.go | 56 +++++++++++------------- 3 files changed, 35 insertions(+), 34 deletions(-) diff --git a/manifests/system-upgrade-controller.yaml b/manifests/system-upgrade-controller.yaml index 90c31990..abf41341 100644 --- a/manifests/system-upgrade-controller.yaml +++ b/manifests/system-upgrade-controller.yaml @@ -27,8 +27,7 @@ data: SYSTEM_UPGRADE_JOB_PRIVILEGED: "true" SYSTEM_UPGRADE_JOB_TTL_SECONDS_AFTER_FINISH: "900" SYSTEM_UPGRADE_PLAN_POLLING_INTERVAL: "15m" - # Set to the Windows kubectl image ONLY if you are upgrading Windows nodes. - # Windows customers must provide this value. Leave empty ("") if not required. + # Only set if you have Windows nodes to upgrade; ignored otherwise. SYSTEM_UPGRADE_JOB_KUBECTL_IMAGE_WINDOWS: "" --- apiVersion: apps/v1 diff --git a/pkg/upgrade/handle_upgrade.go b/pkg/upgrade/handle_upgrade.go index 3aa56edb..71ae9edb 100644 --- a/pkg/upgrade/handle_upgrade.go +++ b/pkg/upgrade/handle_upgrade.go @@ -2,6 +2,7 @@ package upgrade import ( "context" + "fmt" "slices" "strings" "time" @@ -81,7 +82,7 @@ func (ctl *Controller) handlePlans(ctx context.Context) error { // on the resolved status indicates that the interval has not been reached. if resolved.IsTrue(obj) { if lastUpdated, err := time.Parse(time.RFC3339, resolved.GetLastUpdated(obj)); err == nil { - if interval := time.Now().Sub(lastUpdated); interval < upgradeplan.PollingInterval { + if interval := time.Since(lastUpdated); interval < upgradeplan.PollingInterval { plans.EnqueueAfter(obj.Namespace, obj.Name, upgradeplan.PollingInterval-interval) return status, nil } @@ -139,6 +140,13 @@ func (ctl *Controller) handlePlans(ctx context.Context) error { concurrentNodeNames := make([]string, len(concurrentNodes)) for i := range concurrentNodes { node := concurrentNodes[i] + // Validate Windows nodes have kubectl image configured before creating job + if node.Labels["kubernetes.io/os"] == "windows" && upgradejob.KubectlImageWindows == "" { + err := fmt.Errorf("SYSTEM_UPGRADE_JOB_KUBECTL_IMAGE_WINDOWS is required when targeting Windows nodes") + recorder.Eventf(obj, corev1.EventTypeWarning, "ValidationFailed", "Failed to create job for node %s: %v", node.Name, err) + complete.SetError(obj, "ValidationFailed", err) + return objects, status, err + } objects = append(objects, upgradejob.New(obj, node, ctl.Name)) concurrentNodeNames[i] = upgradenode.Hostname(node) } diff --git a/pkg/upgrade/job/job.go b/pkg/upgrade/job/job.go index ace205b6..c038762b 100644 --- a/pkg/upgrade/job/job.go +++ b/pkg/upgrade/job/job.go @@ -21,7 +21,6 @@ import ( "k8s.io/apimachinery/pkg/selection" "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/utils/pointer" - "k8s.io/utils/ptr" ) const ( @@ -86,6 +85,10 @@ var ( return defaultValue }(defaultKubectlImage) + KubectlImageWindows = func() string { + return os.Getenv("SYSTEM_UPGRADE_JOB_KUBECTL_IMAGE_WINDOWS") + }() + Privileged = func(defaultValue bool) bool { if str, ok := os.LookupEnv("SYSTEM_UPGRADE_JOB_PRIVILEGED"); ok { if b, err := strconv.ParseBool(str); err != nil { @@ -317,12 +320,8 @@ func New(plan *upgradeapiv1.Plan, node *corev1.Node, controllerName string) *bat // Initialize with the default kubectl image; can be changed to Windows or other image if needed selectedKubectlImage := KubectlImage if isWindows { - // Retrieve the Windows kubectl image from the SYSTEM_UPGRADE_JOB_KUBECTL_IMAGE_WINDOWS environment variable. - // This environment variable must be set by customers/operators when performing upgrades on Windows nodes. - selectedKubectlImage = os.Getenv("SYSTEM_UPGRADE_JOB_KUBECTL_IMAGE_WINDOWS") - if selectedKubectlImage == "" { - logrus.Fatal("SYSTEM_UPGRADE_JOB_KUBECTL_IMAGE_WINDOWS is a required environment variable when targeting Windows") - } + // Retrieve the Windows kubectl image from the global variable + selectedKubectlImage = KubectlImageWindows } // first, we prepare @@ -336,14 +335,12 @@ func New(plan *upgradeapiv1.Plan, node *corev1.Node, controllerName string) *bat upgradectr.WithSecurityContext(plan.Spec.Prepare.SecurityContext), ) if isWindows { - if prepareContainer.SecurityContext == nil { - prepareContainer.SecurityContext = &corev1.SecurityContext{} - } - if prepareContainer.SecurityContext.WindowsOptions == nil { - prepareContainer.SecurityContext.WindowsOptions = &corev1.WindowsSecurityContextOptions{} + prepareContainer.SecurityContext = &corev1.SecurityContext{ + WindowsOptions: &corev1.WindowsSecurityContextOptions{ + HostProcess: pointer.Bool(true), + RunAsUserName: pointer.String("NT AUTHORITY\\SYSTEM"), + }, } - prepareContainer.SecurityContext.WindowsOptions.HostProcess = ptr.To(true) - prepareContainer.SecurityContext.WindowsOptions.RunAsUserName = ptr.To("NT AUTHORITY\\SYSTEM") } podTemplate.Spec.InitContainers = append(podTemplate.Spec.InitContainers, prepareContainer) @@ -411,16 +408,13 @@ func New(plan *upgradeapiv1.Plan, node *corev1.Node, controllerName string) *bat ) if isWindows { - if drainContainer.SecurityContext == nil { - drainContainer.SecurityContext = &corev1.SecurityContext{} - } - if drainContainer.SecurityContext.WindowsOptions == nil { - drainContainer.SecurityContext.WindowsOptions = &corev1.WindowsSecurityContextOptions{} + drainContainer.SecurityContext = &corev1.SecurityContext{ + WindowsOptions: &corev1.WindowsSecurityContextOptions{ + HostProcess: pointer.Bool(true), + RunAsUserName: pointer.String("NT AUTHORITY\\SYSTEM"), + }, } - drainContainer.SecurityContext.WindowsOptions.HostProcess = ptr.To(true) - drainContainer.SecurityContext.WindowsOptions.RunAsUserName = ptr.To("NT AUTHORITY\\SYSTEM") } - podTemplate.Spec.InitContainers = append(podTemplate.Spec.InitContainers, drainContainer) } else if cordon { @@ -434,14 +428,14 @@ func New(plan *upgradeapiv1.Plan, node *corev1.Node, controllerName string) *bat upgradectr.WithVolumes(plan.Spec.Upgrade.Volumes), ) if isWindows { - if cordonContainer.SecurityContext == nil { - cordonContainer.SecurityContext = &corev1.SecurityContext{} - } - if cordonContainer.SecurityContext.WindowsOptions == nil { - cordonContainer.SecurityContext.WindowsOptions = &corev1.WindowsSecurityContextOptions{} + cordonContainer.SecurityContext = &corev1.SecurityContext{ + WindowsOptions: &corev1.WindowsSecurityContextOptions{ + HostProcess: pointer.Bool(true), + RunAsUserName: pointer.String("NT AUTHORITY\\SYSTEM"), + }, } - cordonContainer.SecurityContext.WindowsOptions.HostProcess = ptr.To(true) - cordonContainer.SecurityContext.WindowsOptions.RunAsUserName = ptr.To("NT AUTHORITY\\SYSTEM") + cordonContainer.SecurityContext.WindowsOptions.HostProcess = pointer.Bool(true) + cordonContainer.SecurityContext.WindowsOptions.RunAsUserName = pointer.String("NT AUTHORITY\\SYSTEM") } podTemplate.Spec.InitContainers = append(podTemplate.Spec.InitContainers, cordonContainer) @@ -455,8 +449,8 @@ func New(plan *upgradeapiv1.Plan, node *corev1.Node, controllerName string) *bat // Set Windows-specific security context (HostProcess, run as SYSTEM) securityContext = &corev1.SecurityContext{ WindowsOptions: &corev1.WindowsSecurityContextOptions{ - RunAsUserName: ptr.To("NT AUTHORITY\\SYSTEM"), - HostProcess: ptr.To(true), + RunAsUserName: pointer.String("NT AUTHORITY\\SYSTEM"), + HostProcess: pointer.Bool(true), }, } } else { From d4f97be958b4aaf12493672c1de9283c1703247f Mon Sep 17 00:00:00 2001 From: Ponmuthu K Date: Mon, 24 Nov 2025 10:13:39 +0530 Subject: [PATCH 3/3] Moved SYSTEM_UPGRADE_JOB_KUBECTL_IMAGE_WINDOWS variable to top --- manifests/system-upgrade-controller.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/manifests/system-upgrade-controller.yaml b/manifests/system-upgrade-controller.yaml index abf41341..df8b1097 100644 --- a/manifests/system-upgrade-controller.yaml +++ b/manifests/system-upgrade-controller.yaml @@ -24,11 +24,11 @@ data: SYSTEM_UPGRADE_JOB_BACKOFF_LIMIT: "99" SYSTEM_UPGRADE_JOB_IMAGE_PULL_POLICY: "Always" SYSTEM_UPGRADE_JOB_KUBECTL_IMAGE: "rancher/kubectl:v1.30.3" + # Only set if you have Windows nodes to upgrade; ignored otherwise. + SYSTEM_UPGRADE_JOB_KUBECTL_IMAGE_WINDOWS: "" SYSTEM_UPGRADE_JOB_PRIVILEGED: "true" SYSTEM_UPGRADE_JOB_TTL_SECONDS_AFTER_FINISH: "900" SYSTEM_UPGRADE_PLAN_POLLING_INTERVAL: "15m" - # Only set if you have Windows nodes to upgrade; ignored otherwise. - SYSTEM_UPGRADE_JOB_KUBECTL_IMAGE_WINDOWS: "" --- apiVersion: apps/v1 kind: Deployment