Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions hack/capi/metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,21 @@ apiVersion: clusterctl.cluster.x-k8s.io/v1alpha3
kind: Metadata
# Release series declare which controller contract a given minor line ships.
# kany8s v0.1.x ships with Cluster API v1beta2 because cmd/main.go imports
# sigs.k8s.io/cluster-api/api/core/v1beta2 (CAPI v1.12+). v0.2.x and v0.3.x
# stay on v1beta2: v0.2.x added the v1beta2 served-alias on existing CRDs
# (APTH-1563) without changing the contract, and v0.3.x simplifies the
# sigs.k8s.io/cluster-api/api/core/v1beta2 (CAPI v1.12+). v0.2.x, v0.3.x, and
# v0.4.x stay on v1beta2: v0.2.x added the v1beta2 served-alias on existing
# CRDs (APTH-1563) without changing the contract; v0.3.x simplifies the
# eks-karpenter-bootstrapper plugin so the kany8s-eks-byo ClusterClass RGD
# owns the FargateProfile lifecycle (APTH-1568) — neither touches contract.
# owns the FargateProfile lifecycle (APTH-1568); v0.4.x adds a new
# ClusterClass topology variable (karpenter-node-role-additional-inline-policies)
# so node-role inline policies become declarative end-to-end (APTH-1704),
# eliminating the ACK + kro drift delete race against externally attached
# inline policies — none of these touch the controller contract.
# When cutting a new minor, prepend another entry; clusterctl requires
# descending order by (major, minor).
releaseSeries:
- major: 0
minor: 4
contract: v1beta2
- major: 0
minor: 3
contract: v1beta2
Expand Down
3 changes: 2 additions & 1 deletion internal/controller/plugin/eks/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,12 @@
karpenterNodePoolTemplateConfigMapKeyAnnotation = "eks.kany8s.io/karpenter-nodepool-template-key"
oidcThumbprintAutoAnnotation = "eks.kany8s.io/oidc-thumbprint-auto"

topologyControlPlaneSubnetIDsVariableName = "vpc-control-plane-subnet-ids"

Check failure on line 28 in internal/controller/plugin/eks/constants.go

View workflow job for this annotation

GitHub Actions / Run on Ubuntu

File is not properly formatted (gofmt)

Check failure on line 28 in internal/controller/plugin/eks/constants.go

View workflow job for this annotation

GitHub Actions / Run on Ubuntu

File is not properly formatted (gofmt)
topologyNodeSubnetIDsVariableName = "vpc-node-subnet-ids"
topologyControlPlaneSecurityGroupIDsVariableName = "vpc-security-group-ids"
topologyNodeSecurityGroupIDsVariableName = "vpc-node-security-group-ids"
topologyNodeRoleAdditionalPolicyARNsVariableName = "karpenter-node-role-additional-policy-arns"
topologyNodeRoleAdditionalPolicyARNsVariableName = "karpenter-node-role-additional-policy-arns"
topologyNodeRoleAdditionalInlinePoliciesVariableName = "karpenter-node-role-additional-inline-policies"

defaultKarpenterChartVersion = "1.0.8"
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,19 @@
nodeManagedPolicies = append(nodeManagedPolicies, additionalPolicyARNs...)
}
nodeManagedPolicies = normalizeDistinctStrings(nodeManagedPolicies)
if ok, err := r.ensureIAMRoleForEC2(ctx, cluster, nodeRoleName, region, nodeRoleAWSName, nodeManagedPolicies); err != nil {
// APTH-1704: read additional inline policies from the ClusterClass topology
// variable (sister to karpenter-node-role-additional-policy-arns above).
// nil map = no inline policies; the writer skips spec.inlinePolicies
// entirely so existing clusters that do not opt in remain byte-identical.
nodeInlinePolicies := map[string]string{}
if additionalInline, ok, err := readTopologyStringMap(cluster, topologyNodeRoleAdditionalInlinePoliciesVariableName); err != nil {
return ctrl.Result{}, err
} else if ok {
for k, v := range additionalInline {
nodeInlinePolicies[k] = v

Check failure on line 469 in internal/controller/plugin/eks/karpenter_bootstrapper_controller.go

View workflow job for this annotation

GitHub Actions / Run on Ubuntu

mapsloop: Replace m[k]=v loop with maps.Copy (modernize)

Check failure on line 469 in internal/controller/plugin/eks/karpenter_bootstrapper_controller.go

View workflow job for this annotation

GitHub Actions / test

mapsloop: Replace m[k]=v loop with maps.Copy (modernize)

Check failure on line 469 in internal/controller/plugin/eks/karpenter_bootstrapper_controller.go

View workflow job for this annotation

GitHub Actions / test

mapsloop: Replace m[k]=v loop with maps.Copy (modernize)

Check failure on line 469 in internal/controller/plugin/eks/karpenter_bootstrapper_controller.go

View workflow job for this annotation

GitHub Actions / Run on Ubuntu

mapsloop: Replace m[k]=v loop with maps.Copy (modernize)
}
}
if ok, err := r.ensureIAMRoleForEC2(ctx, cluster, nodeRoleName, region, nodeRoleAWSName, nodeManagedPolicies, nodeInlinePolicies); err != nil {
return ctrl.Result{}, err
} else if !ok {
msg := fmt.Sprintf("IAM Role %s/%s exists and is not managed by %s", cluster.Namespace, nodeRoleName, karpenterManagedByValue)
Expand Down Expand Up @@ -1239,7 +1251,7 @@
})
}

func (r *EKSKarpenterBootstrapperReconciler) ensureIAMRoleForEC2(ctx context.Context, owner *clusterv1.Cluster, name, region, awsRoleName string, managedPolicyARNs []string) (bool, error) {
func (r *EKSKarpenterBootstrapperReconciler) ensureIAMRoleForEC2(ctx context.Context, owner *clusterv1.Cluster, name, region, awsRoleName string, managedPolicyARNs []string, inlinePolicies map[string]string) (bool, error) {
obj := newUnstructured(ackIAMRoleGVK, owner.Namespace, name)
assume := `{"Version":"2012-10-17","Statement":[{"Effect":"Allow","Principal":{"Service":"ec2.amazonaws.com"},"Action":"sts:AssumeRole"}]}`
return r.upsertManagedUnstructured(ctx, owner, obj, func(u *unstructured.Unstructured) error {
Expand All @@ -1253,6 +1265,17 @@
policies = append(policies, arn)
}
mustSetNestedSlice(u, policies, "spec", "policies")
// APTH-1704: inline policies become declarative state on the ACK Role
// CR. Without this, ACK + kro reconcile interprets externally attached
// inline policies as drift and issues DeleteRolePolicy on the next
// reconcile cycle.
if len(inlinePolicies) > 0 {
inlineMap := make(map[string]any, len(inlinePolicies))
for k, v := range inlinePolicies {
inlineMap[k] = v
}
mustSetNestedField(u, inlineMap, "spec", "inlinePolicies")
}
mustSetNestedSlice(u, awsTagsAsSlice(bootstrapperResourceTags(owner, nil)), "spec", "tags")
return nil
})
Expand Down Expand Up @@ -2439,6 +2462,30 @@
return nil, false, nil
}

// readTopologyStringMap reads a ClusterClass topology variable that is shaped
// as map[string]string and returns the decoded value. It mirrors
// readTopologyStringSlice for sister variables whose schema is
// (type: object, additionalProperties: type: string) — used by APTH-1704 for
// karpenter-node-role-additional-inline-policies, where each map entry is an
// IAM inline policy name → JSON document string.
func readTopologyStringMap(cluster *clusterv1.Cluster, variableName string) (map[string]string, bool, error) {
if cluster == nil || !cluster.Spec.Topology.IsDefined() {
return nil, false, nil
}
for i := range cluster.Spec.Topology.Variables {
v := cluster.Spec.Topology.Variables[i]
if v.Name != variableName {
continue
}
var out map[string]string
if err := json.Unmarshal(v.Value.Raw, &out); err != nil {
return nil, false, fmt.Errorf("unmarshal topology variable %q: %w", variableName, err)
}
return out, true, nil
}
return nil, false, nil
}

func (r *EKSKarpenterBootstrapperReconciler) ensureTopologyStringSliceVariable(ctx context.Context, cluster *clusterv1.Cluster, variableName string, desired []string) (bool, error) {
if r == nil || cluster == nil {
return false, nil
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,7 @@
t.Fatalf("irsa role spec.policyRefs len = %d, want 1", len(refs))
}

if ok, err := r.ensureIAMRoleForEC2(context.Background(), cluster, "demo-karpenter-node", "ap-northeast-1", testEKSNodeRoleName, []string{"arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy"}); err != nil {
if ok, err := r.ensureIAMRoleForEC2(context.Background(), cluster, "demo-karpenter-node", "ap-northeast-1", testEKSNodeRoleName, []string{"arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy"}, nil); err != nil {
t.Fatalf("ensureIAMRoleForEC2() error = %v", err)
} else if !ok {
t.Fatalf("ensureIAMRoleForEC2() managed = false, want true")
Expand All @@ -459,6 +459,14 @@
} else if len(got) != 1 || got[0] != "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy" {
t.Fatalf("ec2 role spec.policies = %#v", got)
}
// APTH-1704: with nil inlinePolicies, spec.inlinePolicies must remain
// absent so that existing ACK Role CRs are byte-identical to the v0.3.x
// shape.
if _, found, err := unstructured.NestedMap(ec2Role.Object, "spec", "inlinePolicies"); err != nil {
t.Fatalf("ec2 role spec.inlinePolicies: %v", err)
} else if found {
t.Fatalf("ec2 role spec.inlinePolicies = present, want absent for nil input")
}
if ok, err := r.ensureIAMInstanceProfile(context.Background(), cluster, "demo-karpenter-node-instance-profile", "ap-northeast-1", testEKSNodeRoleName, "demo-karpenter-node"); err != nil {
t.Fatalf("ensureIAMInstanceProfile() error = %v", err)
} else if !ok {
Expand Down Expand Up @@ -554,6 +562,7 @@
"ap-northeast-1",
testEKSNodeRoleName,
[]string{"arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy"},
nil,
)
if err != nil {
t.Fatalf("ensureIAMRoleForEC2() error = %v", err)
Expand All @@ -573,6 +582,130 @@
}
}

// TestEKSKarpenterBootstrapperReconciler_EnsureIAMRoleForEC2_InlinePoliciesDeclaredOnSpec
// verifies the APTH-1704 root fix: when the caller passes a non-empty
// inlinePolicies map, the writer materializes it into spec.inlinePolicies on
// the ACK Role CR so that ACK + kro reconcile sees the policies as declarative
// state (not drift). Without this assertion, the ACK IAM controller would
// issue DeleteRolePolicy on every reconcile cycle (~6.5h period observed in
// pmc-local CloudTrail).
func TestEKSKarpenterBootstrapperReconciler_EnsureIAMRoleForEC2_InlinePoliciesDeclaredOnSpec(t *testing.T) {
t.Parallel()

scheme := runtime.NewScheme()
utilruntime.Must(clusterv1.AddToScheme(scheme))

cluster := &clusterv1.Cluster{
ObjectMeta: metav1.ObjectMeta{
Name: testClusterName,
Namespace: "default",
UID: "cluster-uid",
},
}

c := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cluster).Build()
r := &EKSKarpenterBootstrapperReconciler{Client: c, Scheme: scheme}

certManagerPolicy := `{"Version":"2012-10-17","Statement":[{"Effect":"Allow","Action":["route53:GetChange"],"Resource":"arn:aws:route53:::change/*"}]}`
externalDNSPolicy := `{"Version":"2012-10-17","Statement":[{"Effect":"Allow","Action":["route53:ListHostedZones"],"Resource":"*"}]}`

ok, err := r.ensureIAMRoleForEC2(
context.Background(),
cluster,
"demo-karpenter-node",
"ap-northeast-1",
testEKSNodeRoleName,
[]string{"arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy"},
map[string]string{
"cert-manager-route53-dns01-txt-only": certManagerPolicy,

Check failure on line 620 in internal/controller/plugin/eks/karpenter_bootstrapper_controller_test.go

View workflow job for this annotation

GitHub Actions / Run on Ubuntu

File is not properly formatted (gofmt)

Check failure on line 620 in internal/controller/plugin/eks/karpenter_bootstrapper_controller_test.go

View workflow job for this annotation

GitHub Actions / Run on Ubuntu

File is not properly formatted (gofmt)
"external-dns-route53-record-management": externalDNSPolicy,
},
)
if err != nil {
t.Fatalf("ensureIAMRoleForEC2() error = %v", err)
}
if !ok {
t.Fatalf("ensureIAMRoleForEC2() managed = false, want true")
}

got := getUnstructured(t, c, ackIAMRoleGVK, "demo-karpenter-node")
// Managed policies (existing path) must still be set independently of
// inlinePolicies — the two fields are independent on the ACK Role CR.
if policies, _, err := unstructured.NestedStringSlice(got.Object, "spec", "policies"); err != nil {
t.Fatalf("role spec.policies: %v", err)
} else if len(policies) != 1 || policies[0] != "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy" {
t.Fatalf("role spec.policies = %#v, want managed policy preserved", policies)
}
// spec.inlinePolicies must be present and contain both entries with the
// JSON document strings preserved verbatim (ACK schema = map[string]string).
inline, found, err := unstructured.NestedMap(got.Object, "spec", "inlinePolicies")
if err != nil {
t.Fatalf("role spec.inlinePolicies: %v", err)
}
if !found {
t.Fatalf("role spec.inlinePolicies missing, want both inline entries declared")
}
if got, want := inline["cert-manager-route53-dns01-txt-only"], certManagerPolicy; got != want {
t.Fatalf("inlinePolicies[cert-manager] = %v, want %q", got, want)
}
if got, want := inline["external-dns-route53-record-management"], externalDNSPolicy; got != want {
t.Fatalf("inlinePolicies[external-dns] = %v, want %q", got, want)
}
if len(inline) != 2 {
t.Fatalf("inlinePolicies len = %d, want 2", len(inline))
}
}

// TestReadTopologyStringMap_RoundTripsClusterClassVariable verifies that the
// new helper introduced for APTH-1704 decodes a map[string]string topology
// variable identically to readTopologyStringSlice's []string sister. boot
// scripts (boot-single.sh) emit a YAML object whose values are JSON-document
// strings; the helper must preserve those strings byte-for-byte for the writer
// to forward them to spec.inlinePolicies on the ACK Role CR.
func TestReadTopologyStringMap_RoundTripsClusterClassVariable(t *testing.T) {
t.Parallel()

rawJSON := `{"cert-manager-route53-dns01-txt-only":"{\"Version\":\"2012-10-17\"}","external-dns-route53-record-management":"{\"Version\":\"2012-10-17\",\"Statement\":[]}"}`
inlineVar := clusterv1.ClusterVariable{Name: topologyNodeRoleAdditionalInlinePoliciesVariableName}
inlineVar.Value.Raw = []byte(rawJSON)
cluster := &clusterv1.Cluster{
ObjectMeta: metav1.ObjectMeta{Name: testClusterName, Namespace: "default"},
Spec: clusterv1.ClusterSpec{
Topology: clusterv1.Topology{
ClassRef: clusterv1.ClusterClassRef{Name: "kany8s-eks-byo"},
Variables: []clusterv1.ClusterVariable{inlineVar},
},
},
}

got, ok, err := readTopologyStringMap(cluster, topologyNodeRoleAdditionalInlinePoliciesVariableName)
if err != nil {
t.Fatalf("readTopologyStringMap() error = %v", err)
}
if !ok {
t.Fatalf("readTopologyStringMap() ok = false, want true")
}
if len(got) != 2 {
t.Fatalf("readTopologyStringMap() len = %d, want 2", len(got))
}
if v := got["cert-manager-route53-dns01-txt-only"]; v != `{"Version":"2012-10-17"}` {
t.Fatalf("cert-manager value = %q, want JSON document preserved verbatim", v)
}
if v := got["external-dns-route53-record-management"]; v != `{"Version":"2012-10-17","Statement":[]}` {
t.Fatalf("external-dns value = %q, want JSON document preserved verbatim", v)
}

// Sister assertion: readTopologyStringMap on a non-existent variable name
// returns (nil, false, nil) — same shape as readTopologyStringSlice — so
// production cluster specs that omit the variable see no inline policies
// (= production side-effect-zero invariant).
if missing, ok, err := readTopologyStringMap(cluster, "non-existent-variable"); err != nil {
t.Fatalf("readTopologyStringMap(missing) error = %v", err)
} else if ok || missing != nil {
t.Fatalf("readTopologyStringMap(missing) = (%v, %v), want (nil, false)", missing, ok)
}
}

func TestEKSKarpenterBootstrapperReconciler_EnsureIAMPolicy_InvalidExistingShapeReturnsError(t *testing.T) {
t.Parallel()

Expand Down
Loading