From 905996986d9d68c2fc57f760af9b053bd8214389 Mon Sep 17 00:00:00 2001 From: Ian Flores Siaca Date: Mon, 9 Mar 2026 11:28:23 -0700 Subject: [PATCH 1/3] Add force_node_group_upgrade option for EKS clusters Per-cluster opt-in flag (default false) that passes force_update_version to the EKS NodeGroup resource, allowing version updates even when PDBs block pod eviction. --- docs/CONFIGURATION.md | 1 + examples/workload/ptd.yaml | 3 +++ python-pulumi/src/ptd/aws_workload.py | 1 + python-pulumi/src/ptd/pulumi_resources/aws_eks_cluster.py | 3 +++ python-pulumi/src/ptd/pulumi_resources/aws_workload_eks.py | 1 + 5 files changed, 9 insertions(+) diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index f3f7cc3..5015710 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -113,6 +113,7 @@ spec: mp_instance_type: r6a.2xlarge root_disk_size: 200 routing_weight: "100" # For blue/green: 0-255 + # force_node_group_upgrade: false # Force version updates even when PDBs block pod eviction components: traefik_forward_auth_version: "0.0.14" diff --git a/examples/workload/ptd.yaml b/examples/workload/ptd.yaml index 4a867c0..b85b0b0 100644 --- a/examples/workload/ptd.yaml +++ b/examples/workload/ptd.yaml @@ -69,6 +69,9 @@ spec: # Traffic routing weight (for blue/green deployments) routing_weight: "100" + # Force node group version updates even when PDBs block pod eviction (default: false) + # force_node_group_upgrade: true + # Component versions components: traefik_forward_auth_version: "0.0.14" diff --git a/python-pulumi/src/ptd/aws_workload.py b/python-pulumi/src/ptd/aws_workload.py index 81724aa..b1dc6ed 100644 --- a/python-pulumi/src/ptd/aws_workload.py +++ b/python-pulumi/src/ptd/aws_workload.py @@ -258,6 +258,7 @@ class AWSWorkloadClusterConfig(ptd.WorkloadClusterConfig): enable_efs_csi_driver: bool = False efs_config: ptd.EFSConfig | None = None karpenter_config: KarpenterConfig | None = None + force_node_group_upgrade: bool = False @dataclasses.dataclass(frozen=True) diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_eks_cluster.py b/python-pulumi/src/ptd/pulumi_resources/aws_eks_cluster.py index 7d97f0c..a752079 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_eks_cluster.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_eks_cluster.py @@ -527,6 +527,7 @@ def with_node_group( ami_type: str = "AL2_x86_64", taints: list[aws.eks.NodeGroupTaintArgs] | None = None, depends_on: list[pulumi.Resource] | None = None, + force_update_version: bool = False, *, use_name: bool = False, ): @@ -552,6 +553,7 @@ def with_node_group( :param max_unavailable: Optional. The maximum number of unavailable nodes during an update. Default 1 :param taints: Optional. The Kubernetes taints to be applied to the nodes in the node group :param depends_on: Optional. Resources that must be created before the node group (e.g., CNI) + :param force_update_version: Optional. Force version update even when PDBs block pod eviction. Default False :param opts: Optional. Resource options. :return: The AWSEKSCluster component resource """ @@ -601,6 +603,7 @@ def instance_type_check(t: str) -> None: ), update_config=aws.eks.NodeGroupUpdateConfigArgs(max_unavailable=max_unavailable), taints=taints, + force_update_version=force_update_version, opts=pulumi.ResourceOptions(parent=self.eks, depends_on=depends_on), ) diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_workload_eks.py b/python-pulumi/src/ptd/pulumi_resources/aws_workload_eks.py index 8796f73..09a2ee5 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_workload_eks.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_workload_eks.py @@ -248,6 +248,7 @@ def _create_node_group( version=cluster_cfg.cluster_version, taints=eks_taints, depends_on=depends_on, + force_update_version=cluster_cfg.force_node_group_upgrade, ) def _define_tigera_operator( From d13b6615f13b6d8bdfdf898a39c06b017d874800 Mon Sep 17 00:00:00 2001 From: Ian Flores Siaca Date: Mon, 9 Mar 2026 11:28:23 -0700 Subject: [PATCH 2/3] Address review findings (job 807) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All 15 tests pass, including the 2 new ones. Changes: - Fixed documentation inconsistency: changed example `force_node_group_upgrade` value from `true` to `false` to match `CONFIGURATION.md` and clarify the default behavior - Added tests for `AWSWorkloadClusterConfig.force_node_group_upgrade` field (default value and explicit enable) - Note: the **Medium** finding about the flag only applying to the main node group is not an issue — `_create_node_group` reads the flag from `cluster_cfg` and is called for both the default and additional node groups --- examples/workload/ptd.yaml | 2 +- python-pulumi/tests/test_workload_cluster_config.py | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/examples/workload/ptd.yaml b/examples/workload/ptd.yaml index b85b0b0..9e0259b 100644 --- a/examples/workload/ptd.yaml +++ b/examples/workload/ptd.yaml @@ -70,7 +70,7 @@ spec: routing_weight: "100" # Force node group version updates even when PDBs block pod eviction (default: false) - # force_node_group_upgrade: true + # force_node_group_upgrade: false # Component versions components: diff --git a/python-pulumi/tests/test_workload_cluster_config.py b/python-pulumi/tests/test_workload_cluster_config.py index 2e3d7ba..8220fcc 100644 --- a/python-pulumi/tests/test_workload_cluster_config.py +++ b/python-pulumi/tests/test_workload_cluster_config.py @@ -3,6 +3,7 @@ import pytest import ptd +import ptd.aws_workload def test_workload_cluster_config_default_initialization(): @@ -308,3 +309,15 @@ def test_workload_cluster_config_custom_k8s_resources_in_workload(): assert workload_config.clusters["20250328"].custom_k8s_resources == ["storage", "common"] assert workload_config.clusters["20250415"].custom_k8s_resources == ["monitoring"] + + +def test_aws_workload_cluster_config_force_node_group_upgrade_default(): + """Test that force_node_group_upgrade defaults to False.""" + config = ptd.aws_workload.AWSWorkloadClusterConfig() + assert config.force_node_group_upgrade is False + + +def test_aws_workload_cluster_config_force_node_group_upgrade_enabled(): + """Test that force_node_group_upgrade can be set to True.""" + config = ptd.aws_workload.AWSWorkloadClusterConfig(force_node_group_upgrade=True) + assert config.force_node_group_upgrade is True From c5f01765a0ace6bb1fd891b6a9f28d629556211b Mon Sep 17 00:00:00 2001 From: Ian Flores Siaca Date: Mon, 9 Mar 2026 11:30:52 -0700 Subject: [PATCH 3/3] Move force_update_version to keyword-only argument Fixes ruff FBT001/FBT002 lint errors. --- python-pulumi/src/ptd/pulumi_resources/aws_eks_cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python-pulumi/src/ptd/pulumi_resources/aws_eks_cluster.py b/python-pulumi/src/ptd/pulumi_resources/aws_eks_cluster.py index a752079..e2e5447 100644 --- a/python-pulumi/src/ptd/pulumi_resources/aws_eks_cluster.py +++ b/python-pulumi/src/ptd/pulumi_resources/aws_eks_cluster.py @@ -527,8 +527,8 @@ def with_node_group( ami_type: str = "AL2_x86_64", taints: list[aws.eks.NodeGroupTaintArgs] | None = None, depends_on: list[pulumi.Resource] | None = None, - force_update_version: bool = False, *, + force_update_version: bool = False, use_name: bool = False, ): # TODO: what typing should we have for subnets? Consistency?