From 8d00d1a33c5e68bd01bec1f222660645ec991c0b Mon Sep 17 00:00:00 2001 From: Scot Wells Date: Tue, 2 Jun 2026 21:06:37 -0400 Subject: [PATCH 01/13] fix(controller): watch downstream WD to propagate status without resync MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The WorkloadDeploymentFederator mirrors the downstream Karmada WorkloadDeployment status onto the project (VCP) WorkloadDeployment, but SetupWithManager only watched the project WD via For(). Nothing watched the downstream WD whose status it mirrors, so when Karmada aggregated new status onto the downstream object the federator was not notified — it only caught up on the next informer resync (~10h default) or an incidental project-WD spec write. This is why a freshly created workload's replica counts stayed empty on the VCP long after its projected Instance had already appeared (the InstanceProjector holds the analogous downstream watch and so propagates immediately). Add a downstream watch using the same cross-plane mechanism the InstanceProjector and unikraft-provider use (milosource cluster source + TypedEnqueueRequestsFromMapFunc). The map function correlates a downstream WD event back to its project WD reconcile request: name is stable across planes, namespace comes from the UpstreamOwnerNamespace label the federator stamps, and the project cluster name is recovered by decoding the UpstreamOwnerClusterName label on the downstream namespace (the exact inverse of the encoding applied in ensureDownstreamNamespace). The federation manager already constructed for the InstanceProjector is reused as the watchable source, so there is no additional manager or informer-cache cost beyond the new WD and Namespace informers. Karmada's own status-aggregation interval (edge cell → downstream WD) remains outside this repo; once Karmada writes the aggregated status, the new watch reacts immediately. Co-Authored-By: Claude Sonnet 4.6 --- cmd/main.go | 29 +++-- .../workloaddeployment_federator.go | 111 ++++++++++++++++- .../workloaddeployment_federator_test.go | 117 ++++++++++++++++++ 3 files changed, 245 insertions(+), 12 deletions(-) diff --git a/cmd/main.go b/cmd/main.go index 01d3eddd..1d1241be 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -467,7 +467,27 @@ func ignoreCanceled(err error) error { // InstanceProjector). Called only when management controllers are enabled and // a federation REST config is available. func setupManagementControllers(mgr mcmanager.Manager, federationClient client.Client) ([]manager.Runnable, error) { - federator := &controller.WorkloadDeploymentFederator{FederationClient: federationClient} + // The federation manager provides a cached, watchable handle to the Karmada + // federation control plane. It backs the InstanceProjector's Instance watch + // and the WorkloadDeploymentFederator's downstream WorkloadDeployment status + // watch. A manager.Manager embeds a cluster.Cluster, so it can be passed + // directly anywhere a watchable federation cluster source is required. + federationMgr, err := manager.New(federationRestConfig, manager.Options{ + Scheme: scheme, + Metrics: metricsserver.Options{BindAddress: "0"}, + }) + if err != nil { + return nil, fmt.Errorf("federation manager: %w", err) + } + + // The federator watches both the project WD (via the multicluster manager) + // and the downstream Karmada WD (via the federation cluster) so that status + // aggregated downstream by Karmada is mirrored back to the project WD + // immediately instead of on the next informer resync. + federator := &controller.WorkloadDeploymentFederator{ + FederationClient: federationClient, + FederationCluster: federationMgr, + } if err := federator.SetupWithManager(mgr); err != nil { return nil, fmt.Errorf("WorkloadDeploymentFederator: %w", err) } @@ -475,13 +495,6 @@ func setupManagementControllers(mgr mcmanager.Manager, federationClient client.C // InstanceProjector runs in the management plane, watches Instances written // back by POP-cell operators to the Karmada federation control plane, and // projects them into the corresponding project namespaces via the multicluster manager. - federationMgr, err := manager.New(federationRestConfig, manager.Options{ - Scheme: scheme, - Metrics: metricsserver.Options{BindAddress: "0"}, - }) - if err != nil { - return nil, fmt.Errorf("federation manager for InstanceProjector: %w", err) - } if err = (&controller.InstanceProjector{ FederationClient: federationClient, MCManager: mgr, diff --git a/internal/controller/workloaddeployment_federator.go b/internal/controller/workloaddeployment_federator.go index 9c736cf0..53e7e635 100644 --- a/internal/controller/workloaddeployment_federator.go +++ b/internal/controller/workloaddeployment_federator.go @@ -14,17 +14,21 @@ import ( "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/cluster" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/finalizer" "sigs.k8s.io/controller-runtime/pkg/log" mcbuilder "sigs.k8s.io/multicluster-runtime/pkg/builder" mccontext "sigs.k8s.io/multicluster-runtime/pkg/context" + mchandler "sigs.k8s.io/multicluster-runtime/pkg/handler" mcmanager "sigs.k8s.io/multicluster-runtime/pkg/manager" + "sigs.k8s.io/multicluster-runtime/pkg/multicluster" mcreconcile "sigs.k8s.io/multicluster-runtime/pkg/reconcile" karmadapolicyv1alpha1 "github.com/karmada-io/api/policy/v1alpha1" computev1alpha "go.datum.net/compute/api/v1alpha" "go.miloapis.com/milo/pkg/downstreamclient" + milosource "go.miloapis.com/milo/pkg/multicluster-runtime/source" ) const ( @@ -69,7 +73,15 @@ type WorkloadDeploymentFederator struct { // plane (the federation hub that the management controllers read and write // through). The caller (cmd/main.go) constructs it from --federation-kubeconfig. FederationClient client.Client - finalizers finalizer.Finalizers + // FederationCluster is a watchable cluster handle for the same Karmada + // federation control plane that FederationClient talks to. It is used to set + // up an informer-backed watch on the downstream WorkloadDeployment objects so + // that status aggregated by Karmada onto the downstream WD is mirrored back to + // the project-namespace WD immediately, rather than waiting for the next + // informer resync. When nil (e.g. in unit tests), the downstream watch is + // skipped and the controller falls back to watching only the VCP WD. + FederationCluster cluster.Cluster + finalizers finalizer.Finalizers } // +kubebuilder:rbac:groups=compute.datumapis.com,resources=workloaddeployments,verbs=get;list;watch;update;patch @@ -383,16 +395,107 @@ func (r *WorkloadDeploymentFederator) cleanupPropagationPolicyIfUnused( // SetupWithManager registers the controller with the multicluster manager. // It must only be called when FederationClient is non-nil. +// +// The controller watches two control planes: +// +// - The VCP/project WorkloadDeployment (via For), so spec changes in the +// project namespace trigger federation to the downstream control plane. +// - The downstream Karmada WorkloadDeployment (via WatchesRawSource against +// FederationCluster), so when Karmada aggregates new status onto the +// downstream WD the corresponding project WD is reconciled immediately and +// the status is mirrored back. Without this second watch the federator only +// caught up on the next informer resync (~10h), causing status lag. func (r *WorkloadDeploymentFederator) SetupWithManager(mgr mcmanager.Manager) error { r.mgr = mgr r.finalizers = finalizer.NewFinalizers() if err := r.finalizers.Register(federatorFinalizer, r); err != nil { return fmt.Errorf("failed to register federator finalizer: %w", err) } - return mcbuilder.ControllerManagedBy(mgr). + + b := mcbuilder.ControllerManagedBy(mgr). For(&computev1alpha.WorkloadDeployment{}, mcbuilder.WithEngageWithLocalCluster(false)). - Named("workload-deployment-federator"). - Complete(r) + Named("workload-deployment-federator") + + // Watch the downstream Karmada WorkloadDeployment whose status we mirror. + // FederationCluster is a watchable handle for the federation control plane; + // it is nil in unit tests, where only the For watch is exercised. + if r.FederationCluster != nil { + b = b.WatchesRawSource(milosource.MustNewClusterSource( + r.FederationCluster, + &computev1alpha.WorkloadDeployment{}, + mchandler.TypedEnqueueRequestsFromMapFunc(r.mapDownstreamDeploymentToRequest), + )) + } + + return b.Complete(r) +} + +// mapDownstreamDeploymentToRequest maps an event on a downstream Karmada +// WorkloadDeployment to a reconcile request for the corresponding +// project-namespace WorkloadDeployment. +// +// Correlation mirrors the identity the federator establishes when it mirrors the +// object downstream (see upsertDownstreamDeployment / ensureDownstreamNamespace): +// +// - The WD name is stable across all planes, so the request name equals the +// downstream WD name. +// - upsertDownstreamDeployment stamps the downstream WD with +// UpstreamOwnerNamespaceLabel = the project namespace, which becomes the +// request namespace. +// - The project cluster name is not on the WD itself; ensureDownstreamNamespace +// stamps it as UpstreamOwnerClusterNameLabel on the downstream namespace +// (encoded "cluster-" with "/" -> "_"). We read the namespace from the +// federation plane to recover and decode it. +// +// Events lacking the required correlation labels (e.g. WorkloadDeployments not +// created by this federator) are dropped. +func (r *WorkloadDeploymentFederator) mapDownstreamDeploymentToRequest( + ctx context.Context, + downstream *computev1alpha.WorkloadDeployment, +) []mcreconcile.Request { + logger := log.FromContext(ctx) + + projectNamespace := downstream.Labels[downstreamclient.UpstreamOwnerNamespaceLabel] + if projectNamespace == "" { + // Not federated by us (no upstream-namespace label) — nothing to enqueue. + return nil + } + + // Recover the project cluster name from the downstream namespace label. + var ns corev1.Namespace + if err := r.FederationCluster.GetClient().Get(ctx, types.NamespacedName{Name: downstream.Namespace}, &ns); err != nil { + logger.V(1).Info("unable to resolve downstream namespace for status mapping; dropping event", + "downstreamNamespace", downstream.Namespace, "error", err) + return nil + } + encodedClusterName := ns.Labels[downstreamclient.UpstreamOwnerClusterNameLabel] + if encodedClusterName == "" { + logger.V(1).Info("downstream namespace missing upstream-cluster-name label; dropping event", + "downstreamNamespace", downstream.Namespace) + return nil + } + clusterName := decodeUpstreamClusterName(encodedClusterName) + + return []mcreconcile.Request{ + { + ClusterName: multicluster.ClusterName(clusterName), + Request: ctrl.Request{ + NamespacedName: types.NamespacedName{ + Namespace: projectNamespace, + Name: downstream.Name, + }, + }, + }, + } +} + +// decodeUpstreamClusterName reverses the "cluster-" encoding (with "/" +// replaced by "_") that MappedNamespaceResourceStrategy applies to the +// UpstreamOwnerClusterNameLabel value, recovering the original project cluster +// name. +func decodeUpstreamClusterName(encoded string) string { + name := strings.TrimPrefix(encoded, "cluster-") + return strings.ReplaceAll(name, "_", "/") } // propagationPolicyNameFor returns the PropagationPolicy name for a given city diff --git a/internal/controller/workloaddeployment_federator_test.go b/internal/controller/workloaddeployment_federator_test.go index 2bd2169f..e51af553 100644 --- a/internal/controller/workloaddeployment_federator_test.go +++ b/internal/controller/workloaddeployment_federator_test.go @@ -4,6 +4,7 @@ package controller import ( "context" + "strings" "testing" "time" @@ -21,6 +22,7 @@ import ( mcreconcile "sigs.k8s.io/multicluster-runtime/pkg/reconcile" computev1alpha "go.datum.net/compute/api/v1alpha" + "go.miloapis.com/milo/pkg/downstreamclient" ) // ─── Shared test constants ──────────────────────────────────────────────────── @@ -118,6 +120,121 @@ func reconcileRequest() mcreconcile.Request { // ─── Unit tests ─────────────────────────────────────────────────────────────── +// TestMapDownstreamDeploymentToRequest verifies the downstream-WD → project-WD +// mapping used by the cross-plane status watch: the request name equals the +// downstream WD name, the namespace comes from the WD's upstream-namespace label, +// and the cluster name is decoded from the downstream namespace's +// upstream-cluster-name label. Events lacking correlation metadata are dropped. +func TestMapDownstreamDeploymentToRequest(t *testing.T) { + t.Parallel() + + // The encoded cluster name on the downstream namespace decodes to testCluster. + encodedCluster := "cluster-" + strings.ReplaceAll(testCluster, "/", "_") + + downstreamNS := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: testKarmadaNSStr, + Labels: map[string]string{ + downstreamclient.UpstreamOwnerClusterNameLabel: encodedCluster, + }, + }, + } + + newDownstreamWD := func(labels map[string]string) *computev1alpha.WorkloadDeployment { + return &computev1alpha.WorkloadDeployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: testWDName, + Namespace: testKarmadaNSStr, + Labels: labels, + }, + } + } + + tests := []struct { + name string + karmadaObjs []client.Object + downstreamWD *computev1alpha.WorkloadDeployment + want []mcreconcile.Request + }{ + { + name: "maps to project WD request", + karmadaObjs: []client.Object{downstreamNS}, + downstreamWD: newDownstreamWD(map[string]string{ + downstreamclient.UpstreamOwnerNamespaceLabel: testProjNS, + }), + want: []mcreconcile.Request{ + { + ClusterName: testCluster, + Request: ctrl.Request{ + NamespacedName: types.NamespacedName{ + Namespace: testProjNS, + Name: testWDName, + }, + }, + }, + }, + }, + { + name: "missing upstream-namespace label is dropped", + karmadaObjs: []client.Object{downstreamNS}, + downstreamWD: newDownstreamWD(nil), + want: nil, + }, + { + name: "missing downstream namespace is dropped", + karmadaObjs: nil, // namespace not present in federation cluster + downstreamWD: newDownstreamWD(map[string]string{ + downstreamclient.UpstreamOwnerNamespaceLabel: testProjNS, + }), + want: nil, + }, + { + name: "namespace without cluster label is dropped", + karmadaObjs: []client.Object{&corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{Name: testKarmadaNSStr}, + }}, + downstreamWD: newDownstreamWD(map[string]string{ + downstreamclient.UpstreamOwnerNamespaceLabel: testProjNS, + }), + want: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + karmadaClient := newKarmadaFakeClient(tt.karmadaObjs...) + r := &WorkloadDeploymentFederator{ + FederationClient: karmadaClient, + FederationCluster: newFakeCluster(karmadaClient), + } + + got := r.mapDownstreamDeploymentToRequest(context.Background(), tt.downstreamWD) + assert.Equal(t, tt.want, got) + }) + } +} + +func TestDecodeUpstreamClusterName(t *testing.T) { + t.Parallel() + + tests := []struct { + encoded string + want string + }{ + {"cluster-datum-cloud", "datum-cloud"}, + {"cluster-org_project", "org/project"}, + {"cluster-test-project-cluster", "test-project-cluster"}, + } + for _, tt := range tests { + t.Run(tt.encoded, func(t *testing.T) { + t.Parallel() + assert.Equal(t, tt.want, decodeUpstreamClusterName(tt.encoded)) + }) + } +} + func TestPropagationPolicyNameFor(t *testing.T) { t.Parallel() From 814db3189afd2e0d9a688e97f0116d48932d9b10 Mon Sep 17 00:00:00 2001 From: Scot Wells Date: Tue, 2 Jun 2026 22:02:45 -0400 Subject: [PATCH 02/13] fix(controller): map downstream WD events to bare project cluster name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The downstream WorkloadDeployment status watch mapped events to a reconcile request whose ClusterName was the full decoded org/project path (decodeUpstreamClusterName turned the "cluster-_" namespace label into "/"). But the Milo multicluster provider keys project clusters by bare project name only. As a result every project except the org-less "datum-cloud" failed to resolve: mcmanager routed the unmatched name (ultimately the empty string) to the local host cluster, which has no compute CRDs, so Reconcile failed with "no matches for kind WorkloadDeployment" in a hot loop (~2 errors/sec observed on staging). Extract the bare project name (final path segment) so it matches the provider key, and guard the mapping with GetCluster: if the project cluster isn't engaged yet, drop the event instead of enqueuing a request that falls back to the host cluster and errors. Dropping is safe — once the provider engages the cluster, the For watch reconciles it and the next downstream status event maps cleanly. Rename decodeUpstreamClusterName to projectClusterNameFromLabel to reflect that it now returns the provider cluster key, and add the not-engaged drop case to the mapping test. Co-Authored-By: Claude Sonnet 4.6 --- .../workloaddeployment_federator.go | 46 ++++++++++++++++--- .../workloaddeployment_federator_test.go | 31 +++++++++++-- 2 files changed, 67 insertions(+), 10 deletions(-) diff --git a/internal/controller/workloaddeployment_federator.go b/internal/controller/workloaddeployment_federator.go index 53e7e635..2dadb429 100644 --- a/internal/controller/workloaddeployment_federator.go +++ b/internal/controller/workloaddeployment_federator.go @@ -474,7 +474,26 @@ func (r *WorkloadDeploymentFederator) mapDownstreamDeploymentToRequest( "downstreamNamespace", downstream.Namespace) return nil } - clusterName := decodeUpstreamClusterName(encodedClusterName) + clusterName := projectClusterNameFromLabel(encodedClusterName) + if clusterName == "" { + logger.V(1).Info("undecodable upstream-cluster-name label; dropping event", + "downstreamNamespace", downstream.Namespace, "encoded", encodedClusterName) + return nil + } + + // Verify the project cluster is engaged before enqueuing. The Milo + // multicluster provider keys clusters by bare project name, and GetCluster + // returns an error for an unknown name. Without this guard, an unresolvable + // name — or the empty string, which mcmanager routes to the local host + // cluster that has no compute CRDs — would make Reconcile fail with + // "no matches for kind WorkloadDeployment" in a hot loop. Dropping the event + // is safe: once the provider engages the project cluster, the For watch + // reconciles it and the next downstream status event maps cleanly. + if _, err := r.mgr.GetCluster(ctx, multicluster.ClusterName(clusterName)); err != nil { + logger.V(1).Info("project cluster not engaged for downstream status mapping; dropping event", + "clusterName", clusterName, "downstreamNamespace", downstream.Namespace, "error", err) + return nil + } return []mcreconcile.Request{ { @@ -489,13 +508,26 @@ func (r *WorkloadDeploymentFederator) mapDownstreamDeploymentToRequest( } } -// decodeUpstreamClusterName reverses the "cluster-" encoding (with "/" -// replaced by "_") that MappedNamespaceResourceStrategy applies to the -// UpstreamOwnerClusterNameLabel value, recovering the original project cluster -// name. -func decodeUpstreamClusterName(encoded string) string { +// projectClusterNameFromLabel extracts the project cluster name that the Milo +// multicluster provider uses as its cluster key from a downstream namespace's +// UpstreamOwnerClusterNameLabel value. +// +// MappedNamespaceResourceStrategy encodes the label as "cluster-_" +// (with "/" replaced by "_"), e.g. "cluster-datum-cloud" (no org) or +// "cluster-_test-project-abc" (empty org). The provider, however, keys clusters +// by bare project name only (multicluster provider: key = project.Name), so we +// strip the "cluster-" prefix, decode "_" back to "/", and return the final path +// segment — the project name. Examples: +// +// "cluster-datum-cloud" -> "datum-cloud" +// "cluster-_test-project-abc" -> "test-project-abc" +func projectClusterNameFromLabel(encoded string) string { name := strings.TrimPrefix(encoded, "cluster-") - return strings.ReplaceAll(name, "_", "/") + name = strings.ReplaceAll(name, "_", "/") + if i := strings.LastIndex(name, "/"); i >= 0 { + name = name[i+1:] + } + return name } // propagationPolicyNameFor returns the PropagationPolicy name for a given city diff --git a/internal/controller/workloaddeployment_federator_test.go b/internal/controller/workloaddeployment_federator_test.go index e51af553..0fbee7fd 100644 --- a/internal/controller/workloaddeployment_federator_test.go +++ b/internal/controller/workloaddeployment_federator_test.go @@ -140,6 +140,17 @@ func TestMapDownstreamDeploymentToRequest(t *testing.T) { }, } + // A downstream namespace whose cluster label decodes to a project cluster the + // manager has not engaged — used to verify the not-engaged drop path. + unknownClusterNS := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: testKarmadaNSStr, + Labels: map[string]string{ + downstreamclient.UpstreamOwnerClusterNameLabel: "cluster-unregistered-project", + }, + }, + } + newDownstreamWD := func(labels map[string]string) *computev1alpha.WorkloadDeployment { return &computev1alpha.WorkloadDeployment{ ObjectMeta: metav1.ObjectMeta{ @@ -198,6 +209,14 @@ func TestMapDownstreamDeploymentToRequest(t *testing.T) { }), want: nil, }, + { + name: "project cluster not engaged is dropped", + karmadaObjs: []client.Object{unknownClusterNS}, + downstreamWD: newDownstreamWD(map[string]string{ + downstreamclient.UpstreamOwnerNamespaceLabel: testProjNS, + }), + want: nil, + }, } for _, tt := range tests { @@ -206,6 +225,9 @@ func TestMapDownstreamDeploymentToRequest(t *testing.T) { karmadaClient := newKarmadaFakeClient(tt.karmadaObjs...) r := &WorkloadDeploymentFederator{ + // Only testCluster is engaged; the not-engaged case decodes to a + // different project name and must be dropped by the GetCluster guard. + mgr: newFakeMCManager(testCluster, newFakeCluster(karmadaClient)), FederationClient: karmadaClient, FederationCluster: newFakeCluster(karmadaClient), } @@ -216,7 +238,7 @@ func TestMapDownstreamDeploymentToRequest(t *testing.T) { } } -func TestDecodeUpstreamClusterName(t *testing.T) { +func TestProjectClusterNameFromLabel(t *testing.T) { t.Parallel() tests := []struct { @@ -224,13 +246,16 @@ func TestDecodeUpstreamClusterName(t *testing.T) { want string }{ {"cluster-datum-cloud", "datum-cloud"}, - {"cluster-org_project", "org/project"}, + // Org-scoped encodings decode to org/project; the provider keys on the + // bare project name, so only the final path segment is returned. + {"cluster-org_project", "project"}, + {"cluster-_test-project-abc", "test-project-abc"}, {"cluster-test-project-cluster", "test-project-cluster"}, } for _, tt := range tests { t.Run(tt.encoded, func(t *testing.T) { t.Parallel() - assert.Equal(t, tt.want, decodeUpstreamClusterName(tt.encoded)) + assert.Equal(t, tt.want, projectClusterNameFromLabel(tt.encoded)) }) } } From 041151832f853d15e10e1cc668e2fa5f071bb893 Mon Sep 17 00:00:00 2001 From: Scot Wells Date: Tue, 2 Jun 2026 22:40:54 -0400 Subject: [PATCH 03/13] fix(controller): preserve cluster name on downstream WD status watch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The downstream WorkloadDeployment status watch was a complete no-op and the source of a steady ~130 errors/min on the management plane. Two layered causes: milosource.NewClusterSource binds the raw source to the empty cluster name, and the default mchandler.TypedEnqueueRequestsFromMapFunc wraps the map in TypedInjectCluster, which overwrites each request's ClusterName with that bound empty name. So the project cluster name computed by mapDownstreamDeploymentToRequest (and validated by its GetCluster guard) was discarded at enqueue time; every downstream event reached Reconcile with ClusterName="". mcmanager routes the empty name to the local host management cluster, which has no compute CRDs, so the Get failed with "no matches for kind WorkloadDeployment" and requeued in a hot loop — while the watch's actual purpose (immediate status mirror-back) never ran for any project. Switch the handler to TypedEnqueueRequestsFromMapFuncWithClusterPreservation so the map's project cluster name survives to Reconcile, making the downstream watch functional. Add a defensive guard at the top of Reconcile that drops (returns nil, not an error) any request with an empty cluster name, so a host-cluster fallback can never again spin in a requeue loop. Co-Authored-By: Claude Sonnet 4.6 --- .../workloaddeployment_federator.go | 24 ++++++++++++++++++- .../workloaddeployment_federator_test.go | 22 +++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/internal/controller/workloaddeployment_federator.go b/internal/controller/workloaddeployment_federator.go index 2dadb429..651db2e6 100644 --- a/internal/controller/workloaddeployment_federator.go +++ b/internal/controller/workloaddeployment_federator.go @@ -17,6 +17,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/cluster" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/finalizer" + "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/log" mcbuilder "sigs.k8s.io/multicluster-runtime/pkg/builder" mccontext "sigs.k8s.io/multicluster-runtime/pkg/context" @@ -96,6 +97,16 @@ func (r *WorkloadDeploymentFederator) Reconcile(ctx context.Context, req mcrecon logger := log.FromContext(ctx) + // An empty cluster name resolves to the local host management cluster, which + // has no compute CRDs — any Get would fail with "no matches for kind" and + // requeue in a hot loop. The For watch (EngageWithLocalCluster=false) and the + // preservation-wrapped downstream watch both set a real project cluster name, + // so an empty name here is never legitimate. Drop it without erroring. + if req.ClusterName == "" { + logger.V(1).Info("dropping reconcile with empty cluster name") + return ctrl.Result{}, nil + } + cl, err := r.mgr.GetCluster(ctx, req.ClusterName) if err != nil { return ctrl.Result{}, err @@ -419,11 +430,22 @@ func (r *WorkloadDeploymentFederator) SetupWithManager(mgr mcmanager.Manager) er // Watch the downstream Karmada WorkloadDeployment whose status we mirror. // FederationCluster is a watchable handle for the federation control plane; // it is nil in unit tests, where only the For watch is exercised. + // + // The handler MUST preserve the ClusterName that mapDownstreamDeploymentToRequest + // sets. milosource binds the raw source to the empty cluster name, and the + // default TypedEnqueueRequestsFromMapFunc wraps the map in TypedInjectCluster, + // which overwrites each request's ClusterName with that bound empty name — so + // every request would resolve to the local host cluster (no compute CRDs) and + // fail with "no matches for kind WorkloadDeployment". The preservation variant + // skips that injection so our project-cluster ClusterName survives to Reconcile. if r.FederationCluster != nil { + preserveClusterName := func(_ multicluster.ClusterName, _ cluster.Cluster) handler.TypedEventHandler[*computev1alpha.WorkloadDeployment, mcreconcile.Request] { + return mchandler.TypedEnqueueRequestsFromMapFuncWithClusterPreservation(r.mapDownstreamDeploymentToRequest) + } b = b.WatchesRawSource(milosource.MustNewClusterSource( r.FederationCluster, &computev1alpha.WorkloadDeployment{}, - mchandler.TypedEnqueueRequestsFromMapFunc(r.mapDownstreamDeploymentToRequest), + preserveClusterName, )) } diff --git a/internal/controller/workloaddeployment_federator_test.go b/internal/controller/workloaddeployment_federator_test.go index 0fbee7fd..84a6f76d 100644 --- a/internal/controller/workloaddeployment_federator_test.go +++ b/internal/controller/workloaddeployment_federator_test.go @@ -297,6 +297,28 @@ func TestWorkloadDeploymentFederator_NoFederationClient(t *testing.T) { assert.Equal(t, ctrl.Result{}, result) } +// TestWorkloadDeploymentFederator_EmptyClusterNameDropped verifies that a +// reconcile request carrying an empty cluster name is dropped without error +// (and without touching GetCluster), so it can never fall back to the local +// host cluster and spin in a "no matches for kind" requeue loop. +func TestWorkloadDeploymentFederator_EmptyClusterNameDropped(t *testing.T) { + t.Parallel() + + projectClient := newProjectFakeClient(testProjectNamespace(), testWorkloadDeployment()) + karmadaClient := newKarmadaFakeClient() + r := newTestFederator(projectClient, karmadaClient) + + req := mcreconcile.Request{ + ClusterName: "", + Request: ctrl.Request{ + NamespacedName: types.NamespacedName{Name: testWDName, Namespace: testProjNS}, + }, + } + result, err := r.Reconcile(context.Background(), req) + require.NoError(t, err) + assert.Equal(t, ctrl.Result{}, result) +} + // TestWorkloadDeploymentFederator_AddsFinalizerOnFirstSeen verifies that the // first reconcile of a brand-new WorkloadDeployment adds the finalizer and // returns without federating (the finalizer update triggers a re-queue). From 2ef8246fec88399c579ce86390ad584f38cc0af1 Mon Sep 17 00:00:00 2001 From: Scot Wells Date: Wed, 3 Jun 2026 17:43:29 -0400 Subject: [PATCH 04/13] fix(controller): re-enqueue instance on quota grant; key claim by instance name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An Instance could wedge Pending forever (QuotaGranted=Unknown/QuotaNoBudget, Quota scheduling gate never removed) even though its Milo ResourceClaim was granted: the Instance reconciled once while the claim was still pending, and nothing re-triggered it when the grant landed a beat later. The ResourceClaim watch mapped a claim to its Spec.ResourceRef — the Project — so the grant enqueued the project name, never the owning Instance. Fix the watch to enqueue the owning Instance: its namespace is carried on a new compute.datumapis.com/instance-namespace label (the claim lives in the project quota namespace, not the Instance's), and its name is the claim name with the resource-kind prefix stripped. Also name the claim after the Instance (unique among Instances in the project control plane) with an "instance-" prefix so it cannot collide with other resource kinds' claims sharing the quota namespace, replacing the previous "--" scheme. Co-Authored-By: Claude Opus 4.8 (1M context) --- internal/controller/instance_controller.go | 41 ++++++++++++++++--- .../controller/instance_controller_test.go | 14 +++---- 2 files changed, 42 insertions(+), 13 deletions(-) diff --git a/internal/controller/instance_controller.go b/internal/controller/instance_controller.go index f11520a7..15555a69 100644 --- a/internal/controller/instance_controller.go +++ b/internal/controller/instance_controller.go @@ -57,6 +57,19 @@ const ( // the same project control planes. instanceQuotaClaimSourceLabel = "compute.datumapis.com/source-cluster" + // instanceQuotaClaimNamespaceLabel records the source Instance's namespace on + // the ResourceClaim. The claim lives in the project's quota namespace (not the + // Instance's namespace), so the claim watch reads this label to map a grant + // back to the owning Instance. + instanceQuotaClaimNamespaceLabel = "compute.datumapis.com/instance-namespace" + + // instanceQuotaClaimNamePrefix namespaces an Instance's ResourceClaim name by + // resource type. Claims for different resource kinds share the project quota + // namespace, so the Instance name alone (unique among Instances, but not + // across kinds) could collide with another kind's claim — the prefix prevents + // that. The claim watch strips it to recover the Instance name. + instanceQuotaClaimNamePrefix = "instance-" + // quotaResourceTypeInstances is the quota resource type for Instance count. quotaResourceTypeInstances = "compute.datumapis.com/instances" @@ -275,7 +288,7 @@ func (r *InstanceReconciler) reconcileDeletion(ctx context.Context, cl client.Cl if err != nil { return fmt.Errorf("resolving project namespace during deletion: %w", err) } - claimName := fmt.Sprintf("%s--%s", instance.Namespace, instance.Name) + claimName := quotaClaimName(instance) var claim quotav1alpha1.ResourceClaim if err := projectClient.Get(ctx, client.ObjectKey{Namespace: claimNamespace, Name: claimName}, &claim); err != nil { if !apierrors.IsNotFound(err) { @@ -296,6 +309,16 @@ func (r *InstanceReconciler) reconcileDeletion(ctx context.Context, cl client.Cl return nil } +// quotaClaimName returns the name of the ResourceClaim backing an Instance's +// quota: the Instance name (unique among Instances within the project control +// plane) prefixed by instanceQuotaClaimNamePrefix to avoid colliding with other +// resource kinds' claims in the shared quota namespace. The owning Instance's +// namespace is preserved on the claim via instanceQuotaClaimNamespaceLabel so +// the claim watch can map a grant back to the Instance. +func quotaClaimName(instance *computev1alpha.Instance) string { + return instanceQuotaClaimNamePrefix + instance.Name +} + // reconcileQuotaCondition reconciles the ResourceClaim and updates the // InstanceQuotaGranted status condition. It returns (changed, err) where // changed=true means a status update is required, and err non-nil means the @@ -627,7 +650,7 @@ func (r *InstanceReconciler) reconcileQuotaClaim(ctx context.Context, clusterNam return nil, nil } - claimName := fmt.Sprintf("%s--%s", instance.Namespace, instance.Name) + claimName := quotaClaimName(instance) requests := []quotav1alpha1.ResourceRequest{ { @@ -657,7 +680,8 @@ func (r *InstanceReconciler) reconcileQuotaClaim(ctx context.Context, clusterNam Name: claimName, Namespace: claimNamespace, Labels: map[string]string{ - instanceQuotaClaimSourceLabel: r.edgeClusterName, + instanceQuotaClaimSourceLabel: r.edgeClusterName, + instanceQuotaClaimNamespaceLabel: instance.Namespace, }, }, Spec: quotav1alpha1.ResourceClaimSpec{ @@ -1033,15 +1057,20 @@ func (r *InstanceReconciler) SetupWithManager( return handler.TypedEnqueueRequestsFromMapFunc( func(ctx context.Context, obj client.Object) []mcreconcile.Request { claim := obj.(*quotav1alpha1.ResourceClaim) - if claim.Spec.ResourceRef.Name == "" { + // Map the claim back to its owning Instance. The Instance + // namespace is carried on a label (the claim itself lives in + // the project's quota namespace) and the Instance name is the + // claim name with the resource-kind prefix stripped. + instanceNamespace := claim.GetLabels()[instanceQuotaClaimNamespaceLabel] + if instanceNamespace == "" { return nil } return []mcreconcile.Request{ { Request: reconcile.Request{ NamespacedName: types.NamespacedName{ - Namespace: claim.Spec.ResourceRef.Namespace, - Name: claim.Spec.ResourceRef.Name, + Namespace: instanceNamespace, + Name: strings.TrimPrefix(claim.Name, instanceQuotaClaimNamePrefix), }, }, ClusterName: r.resolveClusterNameForProject(claim.Spec.ConsumerRef.Name), diff --git a/internal/controller/instance_controller_test.go b/internal/controller/instance_controller_test.go index 31636c3f..ed3a1ef8 100644 --- a/internal/controller/instance_controller_test.go +++ b/internal/controller/instance_controller_test.go @@ -479,7 +479,7 @@ func TestReconcileQuota(t *testing.T) { instanceName = "my-instance" ) - claimName := namespace + "--" + instanceName + claimName := instanceQuotaClaimNamePrefix + instanceName const deploymentName = "my-deployment" @@ -812,7 +812,7 @@ func TestQuotaGateRemovedInSingleReconcile(t *testing.T) { deploymentName = "my-deployment" ) - claimName := namespace + "--" + instanceName + claimName := instanceQuotaClaimNamePrefix + instanceName tests := []struct { name string @@ -994,9 +994,9 @@ func TestReconcileQuotaSingleMode(t *testing.T) { deploymentName = "my-deployment" ) - // Claim name uses the edge namespace prefix (stable identifier for the claim) - // but the claim object itself lives in projectNS. - claimName := edgeNS + "--" + instanceName + // Claim name is the instance-prefixed Instance name; the claim object itself + // lives in projectNS (the instance's edge namespace is carried on a label). + claimName := instanceQuotaClaimNamePrefix + instanceName s := newTestScheme(t) @@ -1346,7 +1346,7 @@ func TestReconcileQuotaFailureModes(t *testing.T) { s := newTestScheme(t) fakeRecorder := record.NewFakeRecorder(10) - claimName := testNS + "--" + testInstance + claimName := instanceQuotaClaimNamePrefix + testInstance pendingClaim := "av1alpha1.ResourceClaim{ ObjectMeta: metav1.ObjectMeta{Name: claimName, Namespace: testNS}, Spec: quotav1alpha1.ResourceClaimSpec{ @@ -1498,7 +1498,7 @@ func TestReconcileQuotaFailureModes(t *testing.T) { WithStatusSubresource(&computev1alpha.Instance{}). Build() - claimName := testNS + "--" + testInstance + claimName := instanceQuotaClaimNamePrefix + testInstance grantedClaim := "av1alpha1.ResourceClaim{ ObjectMeta: metav1.ObjectMeta{Name: claimName, Namespace: testNS}, Spec: quotav1alpha1.ResourceClaimSpec{ From 8305ef6885c64d387851ca6fa11bef9aae4d28c8 Mon Sep 17 00:00:00 2001 From: Scot Wells Date: Wed, 3 Jun 2026 19:03:49 -0400 Subject: [PATCH 05/13] fix(controller): roll instances by recreate so restart actually rolls them MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A template-hash change (an image update, or a restartedAt annotation from `datumctl compute restart`) previously resolved to an in-place Update of the Instance. The unikraft provider bakes the pod at creation time and never recomputes an existing pod's spec, so the in-place update silently failed to roll the running workload — instances kept their old pod. Emit a delete (recreate) for drifted Ready instances instead. The next reconcile refills the slot via the create path with the new template, and the provider's finalizer-gated teardown plus create-on-new-Instance roll the pod with no provider changes. Ordered one-at-a-time pacing is preserved by the existing descending-ordinal sort, skip-all-but-first, and the DeletionTimestamp WaitAction. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../stateful/stateful_control.go | 33 ++++--- .../stateful/stateful_control_test.go | 90 +++++++++++-------- 2 files changed, 73 insertions(+), 50 deletions(-) diff --git a/internal/controller/instancecontrol/stateful/stateful_control.go b/internal/controller/instancecontrol/stateful/stateful_control.go index 2d2e3073..6dd8934d 100644 --- a/internal/controller/instancecontrol/stateful/stateful_control.go +++ b/internal/controller/instancecontrol/stateful/stateful_control.go @@ -53,8 +53,10 @@ func (c *statefulControl) GetActions( var createActions []instancecontrol.Action var waitActions []instancecontrol.Action - // highest -> lowest - var updateActions []instancecontrol.Action + // highest -> lowest. Instances whose template hash has drifted from the + // desired template are deleted and recreated (not updated in place) so the + // change actually rolls the backing pod — see the recreate branch below. + var recreateActions []instancecontrol.Action // highest -> lowest var deleteActions []instancecontrol.Action @@ -129,14 +131,19 @@ func (c *statefulControl) GetActions( if !apimeta.IsStatusConditionTrue(instance.Status.Conditions, v1alpha.InstanceReady) { waitActions = append(waitActions, instancecontrol.NewWaitAction(instance)) } else if needsUpdate(instance, instanceTemplateHash) { - updatedInstance := instance.DeepCopy() - updatedInstance.Annotations = deployment.Spec.Template.Annotations - updatedInstance.Labels = deployment.Spec.Template.Labels - - addInstanceControllerLabels(updatedInstance, getInstanceOrdinal(updatedInstance.Name), deployment) - - updatedInstance.Spec = deployment.Spec.Template.Spec - updateActions = append(updateActions, instancecontrol.NewUpdateAction(updatedInstance)) + // The instance's template hash no longer matches the desired + // template — e.g. an image change, or a restart requested via the + // RestartedAtAnnotation, which is part of the template hash. The + // unikraft provider bakes the pod's runtime, rootfs, and file + // mounts at pod-creation time and never reconciles an existing + // pod's spec, so an in-place Instance update would silently fail to + // roll the running workload. Delete the instance instead; the next + // reconcile recreates it from the current template via the create + // path above, and the provider tears down the old pod + // (finalizer-gated) and boots a fresh one. Ordered, one-at-a-time + // pacing is preserved by the descending-ordinal sort, the + // skip-all-but-first logic, and the DeletionTimestamp WaitAction. + recreateActions = append(recreateActions, instancecontrol.NewDeleteAction(instance)) } } } @@ -168,10 +175,10 @@ func (c *statefulControl) GetActions( } } - slices.SortFunc(updateActions, descendingOrdinal) + slices.SortFunc(recreateActions, descendingOrdinal) slices.SortFunc(deleteActions, descendingOrdinal) - actions := make([]instancecontrol.Action, 0, len(createActions)+len(waitActions)+len(updateActions)+len(deleteActions)+len(patchLabelActions)) + actions := make([]instancecontrol.Action, 0, len(createActions)+len(waitActions)+len(recreateActions)+len(deleteActions)+len(patchLabelActions)) switch deployment.Spec.ScaleSettings.InstanceManagementPolicy { case v1alpha.OrderedReadyInstanceManagementPolicyType: @@ -186,7 +193,7 @@ func (c *statefulControl) GetActions( slices.SortFunc(actions, ascendingOrdinal) - actions = append(actions, updateActions...) + actions = append(actions, recreateActions...) actions = append(actions, deleteActions...) // Skip all actions except the first one. diff --git a/internal/controller/instancecontrol/stateful/stateful_control_test.go b/internal/controller/instancecontrol/stateful/stateful_control_test.go index ffc04272..985c78b2 100644 --- a/internal/controller/instancecontrol/stateful/stateful_control_test.go +++ b/internal/controller/instancecontrol/stateful/stateful_control_test.go @@ -49,6 +49,11 @@ func TestFreshDeployment(t *testing.T) { assert.True(t, actions[1].IsSkipped()) } +// TestUpdateWithAllReadyInstances verifies that a template change on Ready +// instances rolls them by delete+recreate (not an in-place update), ordered +// highest-ordinal-first with only the first action active. An in-place update +// would never roll the backing pod, since the unikraft provider bakes the pod +// at creation time and ignores spec changes on an existing pod. func TestUpdateWithAllReadyInstances(t *testing.T) { ctx := context.Background() control := New() @@ -67,11 +72,11 @@ func TestUpdateWithAllReadyInstances(t *testing.T) { assert.Len(t, actions, 2) assert.Equal(t, "test-deploy-1", actions[0].Object.GetName()) - assert.Equal(t, instancecontrol.ActionTypeUpdate, actions[0].ActionType()) + assert.Equal(t, instancecontrol.ActionTypeDelete, actions[0].ActionType()) assert.False(t, actions[0].IsSkipped()) assert.Equal(t, "test-deploy-0", actions[1].Object.GetName()) - assert.Equal(t, instancecontrol.ActionTypeUpdate, actions[1].ActionType()) + assert.Equal(t, instancecontrol.ActionTypeDelete, actions[1].ActionType()) assert.True(t, actions[1].IsSkipped()) } @@ -244,38 +249,48 @@ func TestInstanceLabels_FourNewLabelsStamped(t *testing.T) { "PlacementNameLabel must equal deployment.Spec.PlacementName") } -// TestInstanceLabels_PropagatedOnUpdate verifies that when an existing instance -// is updated (rolling update path), the four new labels are refreshed from the -// deployment so they remain accurate after spec changes. -func TestInstanceLabels_PropagatedOnUpdate(t *testing.T) { +// TestInstanceLabels_RefreshedOnRecreate verifies that when a template change +// rolls an instance, the recreated instance carries the four self-describing +// labels sourced from the WorkloadDeployment. A template change no longer +// updates the instance in place; it deletes the drifted instance and recreates +// it via the create path on the following reconcile, which stamps the labels. +func TestInstanceLabels_RefreshedOnRecreate(t *testing.T) { ctx := context.Background() control := New() deployment := getWorkloadDeployment("test-labels-update", 1) - // Build a ready existing instance. + // A ready existing instance on the old template hash. currentInstances := []v1alpha.Instance{*getInstanceForDeployment(deployment, 0)} - // Trigger a rolling update by changing the image. + // Trigger a roll by changing the image. deployment.Spec.Template.Spec.Runtime.Sandbox.Containers[0].Image = "updated-image" + // First reconcile: the drifted instance is deleted (recreate), not updated. actions, err := control.GetActions(ctx, scheme, deployment, currentInstances) + assert.NoError(t, err) + assert.Len(t, actions, 1) + assert.Equal(t, instancecontrol.ActionTypeDelete, actions[0].ActionType()) + assert.Equal(t, "test-labels-update-0", actions[0].Object.GetName()) + // Next reconcile, after the old instance has been fully deleted and is gone: + // the empty slot is refilled by the create path, which stamps the labels. + actions, err = control.GetActions(ctx, scheme, deployment, nil) assert.NoError(t, err) assert.Len(t, actions, 1) - assert.Equal(t, instancecontrol.ActionTypeUpdate, actions[0].ActionType()) + assert.Equal(t, instancecontrol.ActionTypeCreate, actions[0].ActionType()) instance, ok := actions[0].Object.(*v1alpha.Instance) assert.True(t, ok) assert.Equal(t, deployment.GetName(), instance.Labels[v1alpha.WorkloadDeploymentNameLabel], - "WorkloadDeploymentNameLabel must be refreshed on update") + "WorkloadDeploymentNameLabel must be set on the recreated instance") assert.Equal(t, deployment.Spec.CityCode, instance.Labels[v1alpha.CityCodeLabel], - "CityCodeLabel must be refreshed on update") + "CityCodeLabel must be set on the recreated instance") assert.Equal(t, deployment.Spec.WorkloadRef.Name, instance.Labels[v1alpha.WorkloadNameLabel], - "WorkloadNameLabel must be refreshed on update") + "WorkloadNameLabel must be set on the recreated instance") assert.Equal(t, deployment.Spec.PlacementName, instance.Labels[v1alpha.PlacementNameLabel], - "PlacementNameLabel must be refreshed on update") + "PlacementNameLabel must be set on the recreated instance") } // TestInstanceLocation_SetWhenDeploymentStatusLocationPresent verifies that when @@ -331,7 +346,7 @@ func TestInstanceLocation_NilWhenDeploymentStatusLocationAbsent(t *testing.T) { // TestLabelBackfill_NotReadyMatchingHash verifies that a not-Ready instance // with an unchanged template hash receives a PatchLabels action when it is -// missing controller-managed labels. The action must not be a rollout Update, +// missing controller-managed labels. The action must not be a rollout recreate, // must not alter spec/template, and must not block subsequent instances. func TestLabelBackfill_NotReadyMatchingHash(t *testing.T) { ctx := context.Background() @@ -361,15 +376,15 @@ func TestLabelBackfill_NotReadyMatchingHash(t *testing.T) { assert.NoError(t, err) // Collect actions by type. - var waitActions, createActions, updateActions, patchActions []instancecontrol.Action + var waitActions, createActions, recreateActions, patchActions []instancecontrol.Action for _, a := range actions { switch a.ActionType() { case instancecontrol.ActionTypeWait: waitActions = append(waitActions, a) case instancecontrol.ActionTypeCreate: createActions = append(createActions, a) - case instancecontrol.ActionTypeUpdate: - updateActions = append(updateActions, a) + case instancecontrol.ActionTypeDelete: + recreateActions = append(recreateActions, a) case instancecontrol.ActionTypePatchLabels: patchActions = append(patchActions, a) } @@ -383,8 +398,8 @@ func TestLabelBackfill_NotReadyMatchingHash(t *testing.T) { assert.Len(t, createActions, 1, "instance-1 create action must be present") assert.True(t, createActions[0].IsSkipped(), "create for instance-1 must be skipped while instance-0 is waiting") - // No template Update actions must be produced. - assert.Empty(t, updateActions, "no template Update must be produced for a matching-hash instance") + // No rollout recreate actions must be produced. + assert.Empty(t, recreateActions, "no rollout recreate must be produced for a matching-hash instance") // A PatchLabels action must be produced for instance-0. assert.Len(t, patchActions, 1, "exactly one PatchLabels action for the label-drifted instance") @@ -439,7 +454,7 @@ func TestLabelBackfill_Idempotent(t *testing.T) { // TestLabelBackfill_ReadyInstanceCorrected verifies that a Ready instance with // correct template hash but drifted labels receives a PatchLabels action -// without triggering a template rollout Update. +// without triggering a rollout recreate. func TestLabelBackfill_ReadyInstanceCorrected(t *testing.T) { ctx := context.Background() control := New() @@ -456,18 +471,18 @@ func TestLabelBackfill_ReadyInstanceCorrected(t *testing.T) { assert.NoError(t, err) - var updateActions, patchActions []instancecontrol.Action + var recreateActions, patchActions []instancecontrol.Action for _, a := range actions { switch a.ActionType() { - case instancecontrol.ActionTypeUpdate: - updateActions = append(updateActions, a) + case instancecontrol.ActionTypeDelete: + recreateActions = append(recreateActions, a) case instancecontrol.ActionTypePatchLabels: patchActions = append(patchActions, a) } } - // No template Update must be produced — template hash matches. - assert.Empty(t, updateActions, "no template Update must be produced for a matching-hash ready instance") + // No rollout recreate must be produced — template hash matches. + assert.Empty(t, recreateActions, "no rollout recreate must be produced for a matching-hash ready instance") // A PatchLabels action must be produced. assert.Len(t, patchActions, 1, "PatchLabels action must be produced for the label-drifted ready instance") @@ -478,8 +493,9 @@ func TestLabelBackfill_ReadyInstanceCorrected(t *testing.T) { } // TestLabelBackfill_DoesNotAffectRollingUpdate verifies that a genuine template -// change on a Ready instance still produces a normal ordered Update action and -// that the PatchLabels path does not interfere with or duplicate it. +// change on a Ready instance still produces the normal ordered roll (a recreate +// Delete per instance) and that the PatchLabels path does not interfere with or +// duplicate it. func TestLabelBackfill_DoesNotAffectRollingUpdate(t *testing.T) { ctx := context.Background() control := New() @@ -516,23 +532,23 @@ func TestLabelBackfill_DoesNotAffectRollingUpdate(t *testing.T) { assert.NoError(t, err) - var updateActions, patchActions []instancecontrol.Action + var recreateActions, patchActions []instancecontrol.Action for _, a := range actions { switch a.ActionType() { - case instancecontrol.ActionTypeUpdate: - updateActions = append(updateActions, a) + case instancecontrol.ActionTypeDelete: + recreateActions = append(recreateActions, a) case instancecontrol.ActionTypePatchLabels: patchActions = append(patchActions, a) } } - // Two Update actions expected (one per instance), ordered highest-to-lowest. - assert.Len(t, updateActions, 2, "both instances must produce Update actions on template change") - assert.Equal(t, "test-backfill-rolling-1", updateActions[0].Object.GetName(), - "Update actions must be ordered highest ordinal first") - assert.Equal(t, "test-backfill-rolling-0", updateActions[1].Object.GetName()) - assert.False(t, updateActions[0].IsSkipped(), "first Update must be active") - assert.True(t, updateActions[1].IsSkipped(), "second Update must be skipped (ordered rollout)") + // Two recreate (Delete) actions expected (one per instance), ordered highest-to-lowest. + assert.Len(t, recreateActions, 2, "both instances must produce recreate actions on template change") + assert.Equal(t, "test-backfill-rolling-1", recreateActions[0].Object.GetName(), + "recreate actions must be ordered highest ordinal first") + assert.Equal(t, "test-backfill-rolling-0", recreateActions[1].Object.GetName()) + assert.False(t, recreateActions[0].IsSkipped(), "first recreate must be active") + assert.True(t, recreateActions[1].IsSkipped(), "second recreate must be skipped (ordered rollout)") // No PatchLabels — all labels are already correct. assert.Empty(t, patchActions, "no PatchLabels when all labels are already correct") From 24101351bc01e56f77da5287eb878ddac9a16f77 Mon Sep 17 00:00:00 2001 From: Scot Wells Date: Wed, 3 Jun 2026 20:26:08 -0400 Subject: [PATCH 06/13] refactor(api)!: rename Instance "Running" condition to "Available" The Instance "Running" status condition is renamed to "Available" (wire value "Available"). An instance can be available while not actively running a pod (e.g. scaled to zero), so "Running" was misleading as a serving/health signal. Renamed constants: InstanceRunning -> InstanceAvailable ("Available") InstanceReadyReasonRunning -> InstanceReadyReasonAvailable ("Available") InstanceRunningReasonRunning -> InstanceAvailableReasonAvailable ("Available") InstanceRunningReasonStopped -> InstanceAvailableReasonStopped InstanceRunningReasonStarting -> InstanceAvailableReasonStarting InstanceRunningReasonStopping -> InstanceAvailableReasonStopping BREAKING CHANGE: the on-the-wire Instance condition type changes from "Running" to "Available". Consumers reading conditions[type=="Running"] must switch to "Available". Existing Instances self-heal on the next provider reconcile (the provider re-asserts the condition under its new name); the stale "Running" entry lingers cosmetically until then and is no longer read by the Ready derivation. Co-Authored-By: Claude Opus 4.8 (1M context) --- api/v1alpha/instance_types.go | 28 +++++------ .../compute.datumapis.com_instances.yaml | 2 +- internal/controller/instance_controller.go | 24 +++++----- .../controller/instance_controller_test.go | 46 +++++++++---------- .../controller/instance_writeback_test.go | 2 +- 5 files changed, 52 insertions(+), 50 deletions(-) diff --git a/api/v1alpha/instance_types.go b/api/v1alpha/instance_types.go index cb1698b3..3f61955b 100644 --- a/api/v1alpha/instance_types.go +++ b/api/v1alpha/instance_types.go @@ -404,8 +404,10 @@ const ( // InstanceReady indicates that the instance is ready InstanceReady = "Ready" - // InstanceRunning indicates that the instance is running - InstanceRunning = "Running" + // InstanceAvailable indicates that the instance is available. It is True + // when the instance is serving and does not assert that a process is + // actively running at this instant. + InstanceAvailable = "Available" // InstanceProgrammed indicates that the instance has been programmed InstanceProgrammed = "Programmed" @@ -458,20 +460,20 @@ const ( // InstanceReadyReasonSchedulingGatesPresent indicates that the instance is not ready because scheduling gates are present. InstanceReadyReasonSchedulingGatesPresent = "SchedulingGatesPresent" - // InstanceReadyReasonRunning indicates that the instance is running - InstanceReadyReasonRunning = "Running" + // InstanceReadyReasonAvailable indicates that the instance is available + InstanceReadyReasonAvailable = "Available" - // InstanceRunningReasonStopped indicates that the instance is stopped - InstanceRunningReasonStopped = "Stopped" + // InstanceAvailableReasonStopped indicates that the instance is stopped + InstanceAvailableReasonStopped = "Stopped" - // InstanceRunningReasonStarting indicates that the instance is starting - InstanceRunningReasonStarting = "Starting" + // InstanceAvailableReasonStarting indicates that the instance is starting + InstanceAvailableReasonStarting = "Starting" - // InstanceRunningReasonStopping indicates that the instance is stopping - InstanceRunningReasonStopping = "Stopping" + // InstanceAvailableReasonStopping indicates that the instance is stopping + InstanceAvailableReasonStopping = "Stopping" - // InstanceRunningReasonRunning indicates that the instance is running - InstanceRunningReasonRunning = "Running" + // InstanceAvailableReasonAvailable indicates that the instance is available + InstanceAvailableReasonAvailable = "Available" // InstanceProgrammedReasonPendingProgramming indicates that the instance has not been programmed InstanceProgrammedReasonPendingProgramming = "PendingProgramming" @@ -515,7 +517,7 @@ type Instance struct { // Status defines the current state of an Instance. // - // +kubebuilder:default={conditions:{{type:"Programmed",status:"Unknown",reason:"Pending", message:"Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"},{type:"Running",status:"Unknown",reason:"Pending", message:"Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"},{type:"Ready",status:"Unknown",reason:"Pending", message:"Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"},{type:"QuotaGranted",status:"Unknown",reason:"PendingEvaluation",message:"Waiting for quota evaluation",lastTransitionTime:"1970-01-01T00:00:00Z"}}} + // +kubebuilder:default={conditions:{{type:"Programmed",status:"Unknown",reason:"Pending", message:"Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"},{type:"Available",status:"Unknown",reason:"Pending", message:"Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"},{type:"Ready",status:"Unknown",reason:"Pending", message:"Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"},{type:"QuotaGranted",status:"Unknown",reason:"PendingEvaluation",message:"Waiting for quota evaluation",lastTransitionTime:"1970-01-01T00:00:00Z"}}} Status InstanceStatus `json:"status,omitempty"` } diff --git a/config/base/crd/bases/compute.datumapis.com_instances.yaml b/config/base/crd/bases/compute.datumapis.com_instances.yaml index c9301561..a007c0d7 100644 --- a/config/base/crd/bases/compute.datumapis.com_instances.yaml +++ b/config/base/crd/bases/compute.datumapis.com_instances.yaml @@ -887,7 +887,7 @@ spec: message: Waiting for controller reason: Pending status: Unknown - type: Running + type: Available - lastTransitionTime: "1970-01-01T00:00:00Z" message: Waiting for controller reason: Pending diff --git a/internal/controller/instance_controller.go b/internal/controller/instance_controller.go index 15555a69..5dbe3a47 100644 --- a/internal/controller/instance_controller.go +++ b/internal/controller/instance_controller.go @@ -88,8 +88,8 @@ const ( // msgInstanceProgrammed is the human-readable message for the programmed state. msgInstanceProgrammed = "Instance has been programmed" - // msgInstanceRunning is the human-readable message for the running state. - msgInstanceRunning = "Instance is running" + // msgInstanceAvailable is the human-readable message for the available state. + msgInstanceAvailable = "Instance is available" // reasonNetworkFailedToCreate is the reason code for network creation failure. reasonNetworkFailedToCreate = "NetworkFailedToCreate" @@ -830,7 +830,7 @@ func (r *InstanceReconciler) reconcileInstanceReadyCondition( ObservedGeneration: instance.Generation, }) changed = apimeta.SetStatusCondition(&instance.Status.Conditions, metav1.Condition{ - Type: computev1alpha.InstanceRunning, + Type: computev1alpha.InstanceAvailable, Status: metav1.ConditionFalse, Reason: computev1alpha.InstanceProgrammedReasonPendingQuota, Message: msg, @@ -903,26 +903,26 @@ func (r *InstanceReconciler) reconcileInstanceReadyCondition( logger.Info("instance is programmed", "instance", instance.Name) - runningCondition := apimeta.FindStatusCondition(instance.Status.Conditions, computev1alpha.InstanceRunning) - if runningCondition == nil || runningCondition.Status != metav1.ConditionTrue { - logger.Info("instance is not running", "instance", instance.Name) + availableCondition := apimeta.FindStatusCondition(instance.Status.Conditions, computev1alpha.InstanceAvailable) + if availableCondition == nil || availableCondition.Status != metav1.ConditionTrue { + logger.Info("instance is not available", "instance", instance.Name) readyCondition.Status = metav1.ConditionFalse readyCondition.Reason = pendingReason - if runningCondition != nil && runningCondition.Reason != pendingReason { - readyCondition.Reason = runningCondition.Reason + if availableCondition != nil && availableCondition.Reason != pendingReason { + readyCondition.Reason = availableCondition.Reason } - readyCondition.Message = "Instance is not running" - if runningCondition != nil && runningCondition.Status != metav1.ConditionUnknown { - readyCondition.Message = runningCondition.Message + readyCondition.Message = "Instance is not available" + if availableCondition != nil && availableCondition.Status != metav1.ConditionUnknown { + readyCondition.Message = availableCondition.Message } return apimeta.SetStatusCondition(&instance.Status.Conditions, *readyCondition), nil } readyCondition.Status = metav1.ConditionTrue - readyCondition.Reason = computev1alpha.InstanceReadyReasonRunning + readyCondition.Reason = computev1alpha.InstanceReadyReasonAvailable readyCondition.Message = msgInstanceReady return apimeta.SetStatusCondition(&instance.Status.Conditions, *readyCondition), nil diff --git a/internal/controller/instance_controller_test.go b/internal/controller/instance_controller_test.go index ed3a1ef8..278998d8 100644 --- a/internal/controller/instance_controller_test.go +++ b/internal/controller/instance_controller_test.go @@ -175,7 +175,7 @@ func TestReconcileInstanceReadyCondition(t *testing.T) { }, }, { - name: "instance programmed but not running should wait for running", + name: "instance programmed but not available should wait for available", instance: &computev1alpha.Instance{ ObjectMeta: metav1.ObjectMeta{ Name: testInstanceName, @@ -191,7 +191,7 @@ func TestReconcileInstanceReadyCondition(t *testing.T) { Message: msgInstanceProgrammed, }, { - Type: computev1alpha.InstanceRunning, + Type: computev1alpha.InstanceAvailable, Status: metav1.ConditionFalse, Reason: testReasonString, Message: testMessageString, @@ -225,10 +225,10 @@ func TestReconcileInstanceReadyCondition(t *testing.T) { Message: msgInstanceProgrammed, }, { - Type: computev1alpha.InstanceRunning, + Type: computev1alpha.InstanceAvailable, Status: metav1.ConditionTrue, - Reason: computev1alpha.InstanceRunningReasonRunning, - Message: msgInstanceRunning, + Reason: computev1alpha.InstanceAvailableReasonAvailable, + Message: msgInstanceAvailable, }, }, }, @@ -237,7 +237,7 @@ func TestReconcileInstanceReadyCondition(t *testing.T) { expectedCondition: &metav1.Condition{ Type: computev1alpha.InstanceReady, Status: metav1.ConditionTrue, - Reason: computev1alpha.InstanceReadyReasonRunning, + Reason: computev1alpha.InstanceReadyReasonAvailable, Message: msgInstanceReady, ObservedGeneration: 1, }, @@ -255,7 +255,7 @@ func TestReconcileInstanceReadyCondition(t *testing.T) { { Type: computev1alpha.InstanceReady, Status: metav1.ConditionTrue, - Reason: computev1alpha.InstanceReadyReasonRunning, + Reason: computev1alpha.InstanceReadyReasonAvailable, Message: msgInstanceReady, ObservedGeneration: 1, LastTransitionTime: metav1.Now(), @@ -267,10 +267,10 @@ func TestReconcileInstanceReadyCondition(t *testing.T) { Message: msgInstanceProgrammed, }, { - Type: computev1alpha.InstanceRunning, + Type: computev1alpha.InstanceAvailable, Status: metav1.ConditionTrue, - Reason: computev1alpha.InstanceRunningReasonRunning, - Message: msgInstanceRunning, + Reason: computev1alpha.InstanceAvailableReasonAvailable, + Message: msgInstanceAvailable, }, }, }, @@ -279,7 +279,7 @@ func TestReconcileInstanceReadyCondition(t *testing.T) { expectedCondition: &metav1.Condition{ Type: computev1alpha.InstanceReady, Status: metav1.ConditionTrue, - Reason: computev1alpha.InstanceReadyReasonRunning, + Reason: computev1alpha.InstanceReadyReasonAvailable, Message: msgInstanceReady, ObservedGeneration: 1, }, @@ -352,10 +352,10 @@ func TestReconcileInstanceReadyConditionWithQuota(t *testing.T) { LastTransitionTime: metav1.Now(), }, { - Type: computev1alpha.InstanceRunning, + Type: computev1alpha.InstanceAvailable, Status: metav1.ConditionTrue, - Reason: computev1alpha.InstanceRunningReasonRunning, - Message: msgInstanceRunning, + Reason: computev1alpha.InstanceAvailableReasonAvailable, + Message: msgInstanceAvailable, LastTransitionTime: metav1.Now(), }, }, @@ -394,10 +394,10 @@ func TestReconcileInstanceReadyConditionWithQuota(t *testing.T) { LastTransitionTime: metav1.Now(), }, { - Type: computev1alpha.InstanceRunning, + Type: computev1alpha.InstanceAvailable, Status: metav1.ConditionTrue, - Reason: computev1alpha.InstanceRunningReasonRunning, - Message: msgInstanceRunning, + Reason: computev1alpha.InstanceAvailableReasonAvailable, + Message: msgInstanceAvailable, LastTransitionTime: metav1.Now(), }, }, @@ -407,7 +407,7 @@ func TestReconcileInstanceReadyConditionWithQuota(t *testing.T) { expectedCondition: &metav1.Condition{ Type: computev1alpha.InstanceReady, Status: metav1.ConditionTrue, - Reason: computev1alpha.InstanceReadyReasonRunning, + Reason: computev1alpha.InstanceReadyReasonAvailable, Message: msgInstanceReady, }, }, @@ -647,7 +647,7 @@ func TestReconcileQuota(t *testing.T) { assert.False(t, hasQuotaGate, "QuotaSchedulingGate must be removed in the same reconcile pass as the status update") }) - t.Run("quota exceeded flow: conditions cascade to block Programmed/Running/Ready", func(t *testing.T) { + t.Run("quota exceeded flow: conditions cascade to block Programmed/Available/Ready", func(t *testing.T) { s := newTestScheme(t) instance := makeInstance(s, computev1alpha.SchedulingGate{Name: instancecontrol.NetworkSchedulingGate.String()}, @@ -673,10 +673,10 @@ func TestReconcileQuota(t *testing.T) { assert.Equal(t, metav1.ConditionFalse, programmedCond.Status) assert.Equal(t, computev1alpha.InstanceProgrammedReasonPendingQuota, programmedCond.Reason) - runningCond := apimeta.FindStatusCondition(updated.Status.Conditions, computev1alpha.InstanceRunning) - require.NotNil(t, runningCond) - assert.Equal(t, metav1.ConditionFalse, runningCond.Status) - assert.Equal(t, computev1alpha.InstanceProgrammedReasonPendingQuota, runningCond.Reason) + availableCond := apimeta.FindStatusCondition(updated.Status.Conditions, computev1alpha.InstanceAvailable) + require.NotNil(t, availableCond) + assert.Equal(t, metav1.ConditionFalse, availableCond.Status) + assert.Equal(t, computev1alpha.InstanceProgrammedReasonPendingQuota, availableCond.Reason) readyCond := apimeta.FindStatusCondition(updated.Status.Conditions, computev1alpha.InstanceReady) require.NotNil(t, readyCond) diff --git a/internal/controller/instance_writeback_test.go b/internal/controller/instance_writeback_test.go index 17c522f1..0112a630 100644 --- a/internal/controller/instance_writeback_test.go +++ b/internal/controller/instance_writeback_test.go @@ -92,7 +92,7 @@ func wbTestCellInstance() *computev1alpha.Instance { { Type: computev1alpha.InstanceReady, Status: metav1.ConditionTrue, - Reason: computev1alpha.InstanceReadyReasonRunning, + Reason: computev1alpha.InstanceReadyReasonAvailable, Message: "Instance is ready", LastTransitionTime: metav1.Now(), }, From 8f202c905b2483a15db5659df7ea668f3d461b19 Mon Sep 17 00:00:00 2001 From: Scot Wells Date: Wed, 3 Jun 2026 21:30:29 -0400 Subject: [PATCH 07/13] fix(controller): requeue while quota pending so a missed grant self-heals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The instance controller is re-queued by a ResourceClaim watch when the claim is granted, but that grant event lives on the project control plane and can be missed (informer engagement races, watch relist gaps), wedging the instance at QuotaGranted!=True indefinitely (observed: claim Granted, instance stuck QuotaNoBudget until a manual reconcile cleared it). The pending-quota path returned no RequeueAfter, so there was no safety net. Add a backing-off requeue while QuotaGranted is not True, anchored on the condition's last transition: <60s : 1s (catch a grant landing almost immediately) 60s–5m : 15s 5m–10m : 60s >=10m : 300s Folded into the existing referenced-data requeue (soonest wins). The ResourceClaim watch remains the fast path; this only guarantees a missed grant self-heals instead of wedging. Co-Authored-By: Claude Opus 4.8 (1M context) --- internal/controller/instance_controller.go | 52 ++++++++++++++++++- .../controller/instance_controller_test.go | 46 ++++++++++++++++ 2 files changed, 97 insertions(+), 1 deletion(-) diff --git a/internal/controller/instance_controller.go b/internal/controller/instance_controller.go index 5dbe3a47..bb274834 100644 --- a/internal/controller/instance_controller.go +++ b/internal/controller/instance_controller.go @@ -7,6 +7,7 @@ import ( "fmt" "maps" "strings" + "time" corev1 "k8s.io/api/core/v1" apiequality "k8s.io/apimachinery/pkg/api/equality" @@ -95,6 +96,28 @@ const ( reasonNetworkFailedToCreate = "NetworkFailedToCreate" ) +// Quota-pending requeue backoff. The instance controller is normally re-queued by +// the ResourceClaim watch when a claim is granted, but that grant event lives on +// the project control plane and can be missed (informer engagement races, watch +// relist gaps), wedging the instance at QuotaGranted!=True indefinitely. While +// quota is pending we requeue on a backing-off schedule as a safety net so a +// missed grant self-heals. The interval lengthens the longer the instance waits: +// +// elapsed < 60s : every 1s (catch a grant landing almost immediately) +// 60s – 5m : every 15s +// 5m – 10m : every 60s +// >= 10m : every 300s +const ( + quotaPendingRequeueFast = 1 * time.Second + quotaPendingRequeueMedium = 15 * time.Second + quotaPendingRequeueSlow = 60 * time.Second + quotaPendingRequeueIdle = 300 * time.Second + + quotaPendingFastWindow = 60 * time.Second + quotaPendingMediumWindow = 5 * time.Minute + quotaPendingSlowWindow = 10 * time.Minute +) + // clusterGetter is the subset of mcmanager.Manager used by InstanceReconciler. // Keeping it narrow allows unit tests to substitute a minimal fake. type clusterGetter interface { @@ -251,7 +274,10 @@ func (r *InstanceReconciler) Reconcile(ctx context.Context, req mcreconcile.Requ return ctrl.Result{}, err } - return ctrl.Result{}, nil + // Safety net: while quota is still pending (claim created but not yet + // granted), requeue on a backing-off schedule so a missed ResourceClaim + // grant event self-heals instead of wedging the instance. + return ctrl.Result{RequeueAfter: quotaPendingRequeueAfter(&instance, time.Now())}, nil } // reconcileDeletion handles quota-claim cleanup when an Instance is being @@ -319,6 +345,30 @@ func quotaClaimName(instance *computev1alpha.Instance) string { return instanceQuotaClaimNamePrefix + instance.Name } +// quotaPendingRequeueAfter returns a safety-net requeue interval while the +// instance's quota is not yet granted, backing off the longer it has waited (see +// the quotaPendingRequeue* constants). It anchors elapsed time on the +// QuotaGranted condition's last transition (when the instance entered the pending +// state). It returns 0 when quota is already granted (QuotaGranted=True) or the +// condition is absent, so a granted/normal instance is not needlessly requeued. +func quotaPendingRequeueAfter(instance *computev1alpha.Instance, now time.Time) time.Duration { + cond := apimeta.FindStatusCondition(instance.Status.Conditions, computev1alpha.InstanceQuotaGranted) + if cond == nil || cond.Status == metav1.ConditionTrue { + return 0 + } + elapsed := now.Sub(cond.LastTransitionTime.Time) + switch { + case elapsed < quotaPendingFastWindow: + return quotaPendingRequeueFast + case elapsed < quotaPendingMediumWindow: + return quotaPendingRequeueMedium + case elapsed < quotaPendingSlowWindow: + return quotaPendingRequeueSlow + default: + return quotaPendingRequeueIdle + } +} + // reconcileQuotaCondition reconciles the ResourceClaim and updates the // InstanceQuotaGranted status condition. It returns (changed, err) where // changed=true means a status update is required, and err non-nil means the diff --git a/internal/controller/instance_controller_test.go b/internal/controller/instance_controller_test.go index 278998d8..6d0c2d08 100644 --- a/internal/controller/instance_controller_test.go +++ b/internal/controller/instance_controller_test.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "testing" + "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -1570,3 +1571,48 @@ func TestReconcileQuotaFailureModes(t *testing.T) { assert.Equal(t, int64(2), cond.ObservedGeneration, "condition must reflect current generation") }) } + +// TestQuotaPendingRequeueAfter verifies the backing-off safety-net requeue used +// while an instance's quota claim is still pending: 1s for the first minute, then +// 15s, then 60s after 5m, then 300s after 10m; and no requeue once granted. +func TestQuotaPendingRequeueAfter(t *testing.T) { + base := time.Date(2026, 1, 1, 0, 0, 0, 0, time.UTC) + + withQuota := func(s metav1.ConditionStatus, transitioned time.Time) *computev1alpha.Instance { + return &computev1alpha.Instance{ + Status: computev1alpha.InstanceStatus{ + Conditions: []metav1.Condition{{ + Type: computev1alpha.InstanceQuotaGranted, + Status: s, + Reason: "PendingEvaluation", + LastTransitionTime: metav1.NewTime(transitioned), + }}, + }, + } + } + + tests := []struct { + name string + inst *computev1alpha.Instance + now time.Time + want time.Duration + }{ + {"granted -> no requeue", withQuota(metav1.ConditionTrue, base), base.Add(time.Hour), 0}, + {"no quota condition -> no requeue", &computev1alpha.Instance{}, base, 0}, + {"just pending -> 1s", withQuota(metav1.ConditionUnknown, base), base.Add(5 * time.Second), quotaPendingRequeueFast}, + {"59s -> 1s", withQuota(metav1.ConditionUnknown, base), base.Add(59 * time.Second), quotaPendingRequeueFast}, + {"60s boundary -> 15s", withQuota(metav1.ConditionUnknown, base), base.Add(60 * time.Second), quotaPendingRequeueMedium}, + {"3m -> 15s", withQuota(metav1.ConditionUnknown, base), base.Add(3 * time.Minute), quotaPendingRequeueMedium}, + {"5m boundary -> 60s", withQuota(metav1.ConditionUnknown, base), base.Add(5 * time.Minute), quotaPendingRequeueSlow}, + {"8m -> 60s", withQuota(metav1.ConditionUnknown, base), base.Add(8 * time.Minute), quotaPendingRequeueSlow}, + {"10m boundary -> 300s", withQuota(metav1.ConditionUnknown, base), base.Add(10 * time.Minute), quotaPendingRequeueIdle}, + {"1h -> 300s", withQuota(metav1.ConditionUnknown, base), base.Add(time.Hour), quotaPendingRequeueIdle}, + {"denied(False) still polls", withQuota(metav1.ConditionFalse, base), base.Add(2 * time.Minute), quotaPendingRequeueMedium}, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.want, quotaPendingRequeueAfter(tc.inst, tc.now)) + }) + } +} From 029dafddbb28c3025abad93d08545fd674d3faa9 Mon Sep 17 00:00:00 2001 From: Scot Wells Date: Wed, 3 Jun 2026 22:23:37 -0400 Subject: [PATCH 08/13] fix(controller): make quota-pending requeue observable and conflict-proof MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pending-quota safety-net requeue was wired only at the tail of Reconcile, so an early return during the pending window (a status-update or upstream-writeback conflict) silently dropped it onto controller- runtime's exponential error-backoff — which can stretch to minutes, leaving an instance wedged at QuotaGranted!=True even though its ResourceClaim was granted (observed: the 2nd instance in a rapid burst consistently wedged). - Compute the requeue once, up front, so every return path honors it. - On a Conflict during the pending window, requeue at the bounded quota interval instead of returning the error (which would back off). - Log the requeue decision (and conflict-driven requeues) so the path is observable: a re-firing requeue prints every pass while pending, a dropped one does not. Co-Authored-By: Claude Opus 4.8 (1M context) --- internal/controller/instance_controller.go | 32 +++++++++++++++++++--- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/internal/controller/instance_controller.go b/internal/controller/instance_controller.go index bb274834..d734e016 100644 --- a/internal/controller/instance_controller.go +++ b/internal/controller/instance_controller.go @@ -237,6 +237,18 @@ func (r *InstanceReconciler) Reconcile(ctx context.Context, req mcreconcile.Requ statusChanged, quotaErr := r.reconcileQuotaCondition(ctx, req.ClusterName, &instance) + // Safety-net requeue while quota is not yet granted, computed up front so + // every return path below honors it. A conflict during the pending window + // must not drop the instance onto controller-runtime's exponential + // error-backoff (which can stretch to minutes), which would defeat recovery + // from a missed ResourceClaim grant event. Logged so the requeue is + // observable: a re-firing requeue prints this every pass while pending. + quotaReq := quotaPendingRequeueAfter(&instance, time.Now()) + if quotaReq > 0 { + logger.Info("quota pending; scheduling safety-net requeue", + "after", quotaReq.String(), "cluster", req.ClusterName.String(), "instance", instance.Name) + } + // Even when reconcileQuotaCondition returns a transient error, persist any // condition change first so the failure reason is visible on the Instance. // We return the error afterwards so controller-runtime requeues with backoff. @@ -247,6 +259,11 @@ func (r *InstanceReconciler) Reconcile(ctx context.Context, req mcreconcile.Requ if statusChanged || readyChanged { if err := cl.GetClient().Status().Update(ctx, &instance); err != nil { + if quotaReq > 0 && apierrors.IsConflict(err) { + logger.Info("status update conflicted while quota pending; requeuing instead of error-backoff", + "after", quotaReq.String(), "instance", instance.Name) + return ctrl.Result{RequeueAfter: quotaReq}, nil + } return ctrl.Result{}, err } // Return with the quota error (nil or transient) so controller-runtime @@ -271,13 +288,20 @@ func (r *InstanceReconciler) Reconcile(ctx context.Context, req mcreconcile.Requ } if err := r.writeBackToUpstream(ctx, req.ClusterName, &instance); err != nil { + if quotaReq > 0 && apierrors.IsConflict(err) { + logger.Info("upstream writeback conflicted while quota pending; requeuing instead of error-backoff", + "after", quotaReq.String(), "instance", instance.Name) + return ctrl.Result{RequeueAfter: quotaReq}, nil + } return ctrl.Result{}, err } - // Safety net: while quota is still pending (claim created but not yet - // granted), requeue on a backing-off schedule so a missed ResourceClaim - // grant event self-heals instead of wedging the instance. - return ctrl.Result{RequeueAfter: quotaPendingRequeueAfter(&instance, time.Now())}, nil + if quotaReq > 0 { + logger.Info("requeuing instance", "after", quotaReq.String(), + "cluster", req.ClusterName.String(), "instance", instance.Name) + } + + return ctrl.Result{RequeueAfter: quotaReq}, nil } // reconcileDeletion handles quota-claim cleanup when an Instance is being From 5458503188272b375299b54412ca9921a0fba2f9 Mon Sep 17 00:00:00 2001 From: Scot Wells Date: Wed, 3 Jun 2026 22:33:08 -0400 Subject: [PATCH 09/13] fix(controller): anchor quota requeue on creation time, not condition LTT Observability revealed the safety-net requeue was firing every reconcile but always at the slowest tier (300s): elapsed was measured from the QuotaGranted condition's LastTransitionTime, which stays at the 1970-01-01 CRD default while quota is pending (PendingEvaluation and NoBudget are both Unknown, so SetStatusCondition never bumps it). Result: a watch-missed instance waited up to 5 minutes for the safety net instead of ~1s, appearing wedged. Anchor elapsed on instance.CreationTimestamp, which reflects actual wait time, so the fast tiers (1s/15s) apply early as intended. Co-Authored-By: Claude Opus 4.8 (1M context) --- internal/controller/instance_controller.go | 15 ++++++++++----- internal/controller/instance_controller_test.go | 16 +++++++++++----- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/internal/controller/instance_controller.go b/internal/controller/instance_controller.go index d734e016..b4b069a4 100644 --- a/internal/controller/instance_controller.go +++ b/internal/controller/instance_controller.go @@ -371,16 +371,21 @@ func quotaClaimName(instance *computev1alpha.Instance) string { // quotaPendingRequeueAfter returns a safety-net requeue interval while the // instance's quota is not yet granted, backing off the longer it has waited (see -// the quotaPendingRequeue* constants). It anchors elapsed time on the -// QuotaGranted condition's last transition (when the instance entered the pending -// state). It returns 0 when quota is already granted (QuotaGranted=True) or the -// condition is absent, so a granted/normal instance is not needlessly requeued. +// the quotaPendingRequeue* constants). It returns 0 when quota is already granted +// (QuotaGranted=True) or the condition is absent, so a granted/normal instance is +// not needlessly requeued. +// +// Elapsed time is anchored on the instance's creation timestamp, NOT the +// QuotaGranted condition's LastTransitionTime: while quota is pending the +// condition stays Unknown (PendingEvaluation and NoBudget are both Unknown), so +// SetStatusCondition never bumps LastTransitionTime off its 1970-01-01 CRD +// default — which would peg every pending instance to the slowest tier. func quotaPendingRequeueAfter(instance *computev1alpha.Instance, now time.Time) time.Duration { cond := apimeta.FindStatusCondition(instance.Status.Conditions, computev1alpha.InstanceQuotaGranted) if cond == nil || cond.Status == metav1.ConditionTrue { return 0 } - elapsed := now.Sub(cond.LastTransitionTime.Time) + elapsed := now.Sub(instance.CreationTimestamp.Time) switch { case elapsed < quotaPendingFastWindow: return quotaPendingRequeueFast diff --git a/internal/controller/instance_controller_test.go b/internal/controller/instance_controller_test.go index 6d0c2d08..331cae15 100644 --- a/internal/controller/instance_controller_test.go +++ b/internal/controller/instance_controller_test.go @@ -1578,14 +1578,20 @@ func TestReconcileQuotaFailureModes(t *testing.T) { func TestQuotaPendingRequeueAfter(t *testing.T) { base := time.Date(2026, 1, 1, 0, 0, 0, 0, time.UTC) - withQuota := func(s metav1.ConditionStatus, transitioned time.Time) *computev1alpha.Instance { + // created is the instance creation time; quota elapsed is measured from it + // (NOT the condition's LastTransitionTime, which stays at the 1970 default + // while quota is pending). The condition LastTransitionTime here is + // deliberately left at the 1970 zero value to mirror that production reality. + withQuota := func(s metav1.ConditionStatus, created time.Time) *computev1alpha.Instance { return &computev1alpha.Instance{ + ObjectMeta: metav1.ObjectMeta{ + CreationTimestamp: metav1.NewTime(created), + }, Status: computev1alpha.InstanceStatus{ Conditions: []metav1.Condition{{ - Type: computev1alpha.InstanceQuotaGranted, - Status: s, - Reason: "PendingEvaluation", - LastTransitionTime: metav1.NewTime(transitioned), + Type: computev1alpha.InstanceQuotaGranted, + Status: s, + Reason: "PendingEvaluation", }}, }, } From 53e182a3afd43a2dacb0b1c9e7d4a5a901a54dfb Mon Sep 17 00:00:00 2001 From: Scot Wells Date: Wed, 3 Jun 2026 23:07:48 -0400 Subject: [PATCH 10/13] fix(rbac): grant the instance controller permission to emit events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The instance controller emits Warning events on Instances (QuotaNoBudget, ImageUnavailable, InstanceCrashing, ConfigurationError, NetworkFailedToCreate, …) via the event recorder, but no RBAC rule granted it. Every write was rejected — "events is forbidden: ... cannot create resource events in API group \"\" in the namespace ns-" — so the user-facing signals explaining why an instance is stuck never reached the Instance (kubectl describe / activity timeline). Reconciliation was unaffected; this is an observability gap. Add the kubebuilder marker and regenerate the role. The regen also syncs a pre-existing work.karmada.io/resourcebindings rule (from an existing marker that wasn't reflected in the committed role). Co-Authored-By: Claude Opus 4.8 (1M context) --- config/components/controller_rbac/role.yaml | 7 +++++++ internal/controller/instance_controller.go | 1 + 2 files changed, 8 insertions(+) diff --git a/config/components/controller_rbac/role.yaml b/config/components/controller_rbac/role.yaml index e8721899..a634f512 100644 --- a/config/components/controller_rbac/role.yaml +++ b/config/components/controller_rbac/role.yaml @@ -4,6 +4,13 @@ kind: ClusterRole metadata: name: compute rules: +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch - apiGroups: - "" resources: diff --git a/internal/controller/instance_controller.go b/internal/controller/instance_controller.go index b4b069a4..b2b0af1f 100644 --- a/internal/controller/instance_controller.go +++ b/internal/controller/instance_controller.go @@ -189,6 +189,7 @@ type InstanceReconciler struct { // +kubebuilder:rbac:groups=compute.datumapis.com,resources=instances/finalizers,verbs=update // +kubebuilder:rbac:groups=quota.miloapis.com,resources=resourceclaims,verbs=get;list;watch;create;delete // +kubebuilder:rbac:groups="",resources=namespaces,verbs=get +// +kubebuilder:rbac:groups="",resources=events,verbs=create;patch func (r *InstanceReconciler) Reconcile(ctx context.Context, req mcreconcile.Request) (_ ctrl.Result, err error) { logger := log.FromContext(ctx) From 4e87c30a495fd56daa894c739908f30a6d5c7aac Mon Sep 17 00:00:00 2001 From: Scot Wells Date: Wed, 3 Jun 2026 19:48:57 -0400 Subject: [PATCH 11/13] feat(controller): surface rollout progress via UpdatedReplicas + ObservedGeneration A restart/rolling update was invisible from the project plane: there was no status field representing how many instances are on the new template revision. Add UpdatedReplicas (instances whose observed template hash matches the desired template, regardless of readiness) and ObservedGeneration to both WorkloadDeployment and Workload (plus placement) status. UpdatedReplicas is computed on the cell WD reconcile alongside CurrentReplicas (which is now its Programmed subset), aggregated up into the Workload, and rides the existing status sync to the project plane. Repoint the "Up-to-date" printcolumn to .status.updatedReplicas to match `kubectl get deployment` semantics, so a roll is visible as the count dips below Replicas and recovers. Co-Authored-By: Claude Opus 4.8 (1M context) --- api/v1alpha/workload_types.go | 23 +++++++++++-- api/v1alpha/workloaddeployment_types.go | 18 +++++++++-- ...ute.datumapis.com_workloaddeployments.yaml | 23 +++++++++++-- .../compute.datumapis.com_workloads.yaml | 32 ++++++++++++++++--- internal/controller/workload_controller.go | 7 ++++ .../workloaddeployment_controller.go | 30 ++++++++++++----- 6 files changed, 112 insertions(+), 21 deletions(-) diff --git a/api/v1alpha/workload_types.go b/api/v1alpha/workload_types.go index 617172d4..e3e9b04e 100644 --- a/api/v1alpha/workload_types.go +++ b/api/v1alpha/workload_types.go @@ -58,15 +58,27 @@ type WorkloadStatus struct { // The number of instances that currently exist Replicas int32 `json:"replicas"` - // The number of instances which have the latest workload settings applied. + // The number of instances which have the latest workload settings applied + // and are programmed (a subset of UpdatedReplicas that are ready to serve). CurrentReplicas int32 `json:"currentReplicas"` + // The number of instances updated to the latest template revision (their + // observed template hash matches the desired template), regardless of + // readiness. Lags Replicas during a rolling update or restart, then catches + // back up — making an in-progress roll observable. + UpdatedReplicas int32 `json:"updatedReplicas"` + // The desired number of instances DesiredReplicas int32 `json:"desiredReplicas"` // The number of instances which are ready. ReadyReplicas int32 `json:"readyReplicas"` + // The most recent generation observed by the workload controller. + // + // +kubebuilder:validation:Optional + ObservedGeneration int64 `json:"observedGeneration,omitempty"` + // The current status of placemetns in a workload. Placements []WorkloadPlacementStatus `json:"placements,omitempty"` @@ -99,7 +111,7 @@ type WorkloadGatewayStatus struct { // +kubebuilder:printcolumn:name="Replicas",type=string,JSONPath=`.status.replicas` // +kubebuilder:printcolumn:name="Ready",type=string,JSONPath=`.status.readyReplicas` // +kubebuilder:printcolumn:name="Desired",type=string,JSONPath=`.status.desiredReplicas` -// +kubebuilder:printcolumn:name="Up-to-date",type=string,JSONPath=`.status.currentReplicas` +// +kubebuilder:printcolumn:name="Up-to-date",type=string,JSONPath=`.status.updatedReplicas` type Workload struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` @@ -146,9 +158,14 @@ type WorkloadPlacementStatus struct { // The number of instances that currently exist Replicas int32 `json:"replicas"` - // The number of instances which have the latest workload settings applied. + // The number of instances which have the latest workload settings applied + // and are programmed (a subset of UpdatedReplicas that are ready to serve). CurrentReplicas int32 `json:"currentReplicas"` + // The number of instances updated to the latest template revision, regardless + // of readiness. Lags Replicas during a rolling update or restart. + UpdatedReplicas int32 `json:"updatedReplicas"` + // The desired number of instances DesiredReplicas int32 `json:"desiredReplicas"` diff --git a/api/v1alpha/workloaddeployment_types.go b/api/v1alpha/workloaddeployment_types.go index 7da27c89..7da6bf45 100644 --- a/api/v1alpha/workloaddeployment_types.go +++ b/api/v1alpha/workloaddeployment_types.go @@ -49,14 +49,28 @@ type WorkloadDeploymentStatus struct { // The number of instances created Replicas int32 `json:"replicas"` - // The number of instances which have the latest workload settings applied. + // The number of instances which have the latest workload settings applied + // and are programmed (a subset of UpdatedReplicas that are ready to serve). CurrentReplicas int32 `json:"currentReplicas"` + // The number of instances updated to the latest template revision, i.e. + // whose observed template hash matches the desired template, regardless of + // readiness. Lags Replicas during a rolling update or restart, then catches + // back up — making an in-progress roll observable. + UpdatedReplicas int32 `json:"updatedReplicas"` + // The desired number of instances DesiredReplicas int32 `json:"desiredReplicas"` // The number of instances which are ready. ReadyReplicas int32 `json:"readyReplicas"` + + // The most recent generation observed by the deployment controller. When + // this matches metadata.generation, the controller has reconciled the + // latest spec (e.g. a restart request). + // + // +kubebuilder:validation:Optional + ObservedGeneration int64 `json:"observedGeneration,omitempty"` } const ( @@ -79,7 +93,7 @@ const ( // +kubebuilder:printcolumn:name="Replicas",type=string,JSONPath=`.status.replicas` // +kubebuilder:printcolumn:name="Ready",type=string,JSONPath=`.status.readyReplicas` // +kubebuilder:printcolumn:name="Desired",type=string,JSONPath=`.status.desiredReplicas` -// +kubebuilder:printcolumn:name="Up-to-date",type=string,JSONPath=`.status.currentReplicas` +// +kubebuilder:printcolumn:name="Up-to-date",type=string,JSONPath=`.status.updatedReplicas` // +kubebuilder:printcolumn:name="Location Namespace",type=string,JSONPath=`.status.location.namespace`,priority=1 // +kubebuilder:printcolumn:name="Location Name",type=string,JSONPath=`.status.location.name`,priority=1 type WorkloadDeployment struct { diff --git a/config/base/crd/bases/compute.datumapis.com_workloaddeployments.yaml b/config/base/crd/bases/compute.datumapis.com_workloaddeployments.yaml index 48a2501d..e584af9f 100644 --- a/config/base/crd/bases/compute.datumapis.com_workloaddeployments.yaml +++ b/config/base/crd/bases/compute.datumapis.com_workloaddeployments.yaml @@ -34,7 +34,7 @@ spec: - jsonPath: .status.desiredReplicas name: Desired type: string - - jsonPath: .status.currentReplicas + - jsonPath: .status.updatedReplicas name: Up-to-date type: string - jsonPath: .status.location.namespace @@ -1087,8 +1087,9 @@ spec: type: object type: array currentReplicas: - description: The number of instances which have the latest workload - settings applied. + description: |- + The number of instances which have the latest workload settings applied + and are programmed (a subset of UpdatedReplicas that are ready to serve). format: int32 type: integer desiredReplicas: @@ -1109,6 +1110,13 @@ spec: - name - namespace type: object + observedGeneration: + description: |- + The most recent generation observed by the deployment controller. When + this matches metadata.generation, the controller has reconciled the + latest spec (e.g. a restart request). + format: int64 + type: integer readyReplicas: description: The number of instances which are ready. format: int32 @@ -1117,11 +1125,20 @@ spec: description: The number of instances created format: int32 type: integer + updatedReplicas: + description: |- + The number of instances updated to the latest template revision, i.e. + whose observed template hash matches the desired template, regardless of + readiness. Lags Replicas during a rolling update or restart, then catches + back up — making an in-progress roll observable. + format: int32 + type: integer required: - currentReplicas - desiredReplicas - readyReplicas - replicas + - updatedReplicas type: object type: object served: true diff --git a/config/base/crd/bases/compute.datumapis.com_workloads.yaml b/config/base/crd/bases/compute.datumapis.com_workloads.yaml index c452910f..c1c8efd9 100644 --- a/config/base/crd/bases/compute.datumapis.com_workloads.yaml +++ b/config/base/crd/bases/compute.datumapis.com_workloads.yaml @@ -37,7 +37,7 @@ spec: - jsonPath: .status.desiredReplicas name: Desired type: string - - jsonPath: .status.currentReplicas + - jsonPath: .status.updatedReplicas name: Up-to-date type: string name: v1alpha @@ -1081,8 +1081,9 @@ spec: type: object type: array currentReplicas: - description: The number of instances which have the latest workload - settings applied. + description: |- + The number of instances which have the latest workload settings applied + and are programmed (a subset of UpdatedReplicas that are ready to serve). format: int32 type: integer deployments: @@ -1367,6 +1368,10 @@ spec: - name x-kubernetes-list-type: map type: object + observedGeneration: + description: The most recent generation observed by the workload controller. + format: int64 + type: integer placements: description: The current status of placemetns in a workload. items: @@ -1432,8 +1437,9 @@ spec: type: object type: array currentReplicas: - description: The number of instances which have the latest workload - settings applied. + description: |- + The number of instances which have the latest workload settings applied + and are programmed (a subset of UpdatedReplicas that are ready to serve). format: int32 type: integer desiredReplicas: @@ -1451,12 +1457,19 @@ spec: description: The number of instances that currently exist format: int32 type: integer + updatedReplicas: + description: |- + The number of instances updated to the latest template revision, regardless + of readiness. Lags Replicas during a rolling update or restart. + format: int32 + type: integer required: - currentReplicas - desiredReplicas - name - readyReplicas - replicas + - updatedReplicas type: object type: array readyReplicas: @@ -1467,12 +1480,21 @@ spec: description: The number of instances that currently exist format: int32 type: integer + updatedReplicas: + description: |- + The number of instances updated to the latest template revision (their + observed template hash matches the desired template), regardless of + readiness. Lags Replicas during a rolling update or restart, then catches + back up — making an in-progress roll observable. + format: int32 + type: integer required: - currentReplicas - deployments - desiredReplicas - readyReplicas - replicas + - updatedReplicas type: object required: - spec diff --git a/internal/controller/workload_controller.go b/internal/controller/workload_controller.go index 6ca92e03..34f55def 100644 --- a/internal/controller/workload_controller.go +++ b/internal/controller/workload_controller.go @@ -220,6 +220,7 @@ func (r *WorkloadReconciler) reconcileWorkloadStatus( newWorkloadStatus := workload.Status.DeepCopy() totalReplicas := int32(0) totalCurrentReplicas := int32(0) + totalUpdatedReplicas := int32(0) totalDesiredReplicas := int32(0) totalReadyReplicas := int32(0) totalDeployments := int32(0) @@ -251,12 +252,14 @@ func (r *WorkloadReconciler) reconcileWorkloadStatus( foundAvailableDeployment := false replicas := int32(0) currentReplicas := int32(0) + updatedReplicas := int32(0) desiredReplicas := int32(0) readyReplicas := int32(0) totalDeployments += int32(len(placementDeployments)) for _, deployment := range placementDeployments { replicas += deployment.Status.Replicas currentReplicas += deployment.Status.CurrentReplicas + updatedReplicas += deployment.Status.UpdatedReplicas desiredReplicas += deployment.Status.DesiredReplicas readyReplicas += deployment.Status.ReadyReplicas @@ -266,11 +269,13 @@ func (r *WorkloadReconciler) reconcileWorkloadStatus( } totalReplicas += replicas totalCurrentReplicas += currentReplicas + totalUpdatedReplicas += updatedReplicas totalDesiredReplicas += desiredReplicas totalReadyReplicas += readyReplicas placementStatus.Replicas = replicas placementStatus.CurrentReplicas = currentReplicas + placementStatus.UpdatedReplicas = updatedReplicas placementStatus.DesiredReplicas = desiredReplicas placementStatus.ReadyReplicas = readyReplicas @@ -304,8 +309,10 @@ func (r *WorkloadReconciler) reconcileWorkloadStatus( newWorkloadStatus.Deployments = totalDeployments newWorkloadStatus.Replicas = totalReplicas newWorkloadStatus.CurrentReplicas = totalCurrentReplicas + newWorkloadStatus.UpdatedReplicas = totalUpdatedReplicas newWorkloadStatus.DesiredReplicas = totalDesiredReplicas newWorkloadStatus.ReadyReplicas = totalReadyReplicas + newWorkloadStatus.ObservedGeneration = workload.Generation if equality.Semantic.DeepEqual(workload.Status, newWorkloadStatus) { return nil diff --git a/internal/controller/workloaddeployment_controller.go b/internal/controller/workloaddeployment_controller.go index 9b17266e..d4fd9919 100644 --- a/internal/controller/workloaddeployment_controller.go +++ b/internal/controller/workloaddeployment_controller.go @@ -171,15 +171,17 @@ func (r *WorkloadDeploymentReconciler) Reconcile(ctx context.Context, req mcreco desiredReplicas = 0 } - currentReplicas, readyReplicas, quotaBlockedReplicas, err := r.reconcileInstanceGates(ctx, cl.GetClient(), &deployment, instances.Items, networkReady) + currentReplicas, updatedReplicas, readyReplicas, quotaBlockedReplicas, err := r.reconcileInstanceGates(ctx, cl.GetClient(), &deployment, instances.Items, networkReady) if err != nil { return ctrl.Result{}, err } deployment.Status.Replicas = int32(replicas) deployment.Status.CurrentReplicas = int32(currentReplicas) + deployment.Status.UpdatedReplicas = int32(updatedReplicas) deployment.Status.DesiredReplicas = desiredReplicas deployment.Status.ReadyReplicas = int32(readyReplicas) + deployment.Status.ObservedGeneration = deployment.Generation if quotaBlockedReplicas > 0 { apimeta.SetStatusCondition(&deployment.Status.Conditions, metav1.Condition{ @@ -239,7 +241,7 @@ func (r *WorkloadDeploymentReconciler) reconcileInstanceGates( deployment *computev1alpha.WorkloadDeployment, instances []computev1alpha.Instance, networkReady bool, -) (currentReplicas, readyReplicas, quotaBlockedReplicas int, err error) { +) (currentReplicas, updatedReplicas, readyReplicas, quotaBlockedReplicas int, err error) { templateHash := instancecontrol.ComputeHash(deployment.Spec.Template) for _, instance := range instances { if apimeta.IsStatusConditionPresentAndEqual(instance.Status.Conditions, computev1alpha.InstanceQuotaGranted, metav1.ConditionFalse) { @@ -255,22 +257,34 @@ func (r *WorkloadDeploymentReconciler) reconcileInstanceGates( instance.Spec.Controller.SchedulingGates = newGates return nil }); patchErr != nil { - return 0, 0, 0, fmt.Errorf("failed updating instance: %w", patchErr) + return 0, 0, 0, 0, fmt.Errorf("failed updating instance: %w", patchErr) } } } - if apimeta.IsStatusConditionTrue(instance.Status.Conditions, computev1alpha.InstanceProgrammed) { - if instance.Status.Controller.ObservedTemplateHash == templateHash { - currentReplicas++ - } + // An instance is "updated" once it has observed the desired template + // revision, regardless of readiness. Counting these (even before they are + // Programmed) makes a rolling update / restart observable: UpdatedReplicas + // dips below Replicas while the recreated instance comes up, then recovers. + // Status.Controller is a pointer the infra provider may not have populated + // yet; guard the deref to avoid a panic that would abort the reconcile. + onLatestRevision := instance.Status.Controller != nil && + instance.Status.Controller.ObservedTemplateHash == templateHash + if onLatestRevision { + updatedReplicas++ + } + + // CurrentReplicas is the Programmed subset of UpdatedReplicas — updated + // instances that are ready to serve. + if onLatestRevision && apimeta.IsStatusConditionTrue(instance.Status.Conditions, computev1alpha.InstanceProgrammed) { + currentReplicas++ } if apimeta.IsStatusConditionTrue(instance.Status.Conditions, computev1alpha.InstanceReady) { readyReplicas++ } } - return currentReplicas, readyReplicas, quotaBlockedReplicas, nil + return currentReplicas, updatedReplicas, readyReplicas, quotaBlockedReplicas, nil } // writeStatusToKarmada copies the WorkloadDeployment status to the matching From 184ae3bdcad9bc548a2b88be836f8b1e99c39d48 Mon Sep 17 00:00:00 2001 From: Scot Wells Date: Tue, 2 Jun 2026 20:58:50 -0500 Subject: [PATCH 12/13] feat: surface instance blocking reasons and claim instanceType vCPU/memory Two Instance-controller correctness changes: - Blocking-reason rollup: surface the most specific provider sub-condition (ImageUnavailable, InstanceCrashing, ConfigurationError, Provisioning) and its message onto the Instance Ready condition instead of a generic "Instance has not been programmed", so e.g. an image-pull failure reads as ImageUnavailable with the real message. Adds the reason constants and ranks them in the blocking-reason priority. - Quota sizing: resolve vCPU/memory for instanceType-sized instances from a new instanceTypeCatalog (datumcloud/d1-standard-2 = 1 vCPU / 2 GiB) so the quota ResourceClaim requests vcpus + memory, not just instance count. Explicit container limits / instance requests still take precedence. Co-Authored-By: Claude Opus 4.8 (1M context) --- api/v1alpha/instance_types.go | 22 + internal/controller/instance_controller.go | 198 +++++- .../controller/instance_controller_test.go | 599 ++++++++++++++++++ 3 files changed, 799 insertions(+), 20 deletions(-) diff --git a/api/v1alpha/instance_types.go b/api/v1alpha/instance_types.go index 3f61955b..457537a4 100644 --- a/api/v1alpha/instance_types.go +++ b/api/v1alpha/instance_types.go @@ -463,6 +463,28 @@ const ( // InstanceReadyReasonAvailable indicates that the instance is available InstanceReadyReasonAvailable = "Available" + // InstanceReadyReasonImageUnavailable indicates the provider could not pull + // the instance image (bad name, missing credentials, registry unreachable). + // This matches the reason written by translateWaitingReason in the unikraft + // provider when the container enters an image-pull waiting state. + InstanceReadyReasonImageUnavailable = "ImageUnavailable" + + // InstanceReadyReasonInstanceCrashing indicates the instance process started + // but is repeatedly exiting and being restarted (CrashLoopBackOff in the + // underlying runtime). This is user-actionable: the application itself is + // failing, not the platform. + InstanceReadyReasonInstanceCrashing = "InstanceCrashing" + + // InstanceReadyReasonConfigurationError indicates the runtime rejected the + // instance configuration before the process could start (e.g. invalid env + // variable injection, missing device). User must correct the workload spec. + InstanceReadyReasonConfigurationError = "ConfigurationError" + + // InstanceReadyReasonProvisioning indicates the instance runtime is still + // setting up the execution environment (container being created, image being + // unpacked). This is a transient, non-actionable state. + InstanceReadyReasonProvisioning = "Provisioning" + // InstanceAvailableReasonStopped indicates that the instance is stopped InstanceAvailableReasonStopped = "Stopped" diff --git a/internal/controller/instance_controller.go b/internal/controller/instance_controller.go index b2b0af1f..23b2c46b 100644 --- a/internal/controller/instance_controller.go +++ b/internal/controller/instance_controller.go @@ -96,6 +96,30 @@ const ( reasonNetworkFailedToCreate = "NetworkFailedToCreate" ) +// instanceTypeResources holds the vCPU and memory for a named instance type. +type instanceTypeResources struct { + // CPUMillicores is the number of CPU millicores (1000 = 1 vCPU). + CPUMillicores int64 + // MemoryMiB is the amount of RAM in mebibytes. + MemoryMiB int64 +} + +// instanceTypeCatalog maps platform instance type names to their resource +// dimensions used for quota accounting when the instance spec carries only an +// instanceType and no explicit container Limits or instance-level Requests. +// +// These are the platform-declared quota sizes for the instance type, not a +// derivation of any infra provider's machine type. (infra-provider-gcp separately +// maps datumcloud/d1-standard-2 to the GCP n2-standard-2 machine type for VM +// provisioning; that mapping does not define the quota size here.) When new +// instance types are added, add them here with their vCPU/memory values. +var instanceTypeCatalog = map[string]instanceTypeResources{ + "datumcloud/d1-standard-2": { + CPUMillicores: 1000, // 1 vCPU + MemoryMiB: 2048, // 2 GiB + }, +} + // Quota-pending requeue backoff. The instance controller is normally re-queued by // the ResourceClaim watch when a claim is granted, but that grant event lives on // the project control plane and can be missed (informer engagement races, watch @@ -852,8 +876,24 @@ func (r *InstanceReconciler) classifyCreateError( }, fmt.Errorf("failed creating resource claim: %w", err) } +// resolveInstanceResources determines the vCPU and memory amounts to claim +// for an instance. Explicit sizing always takes precedence over the instance +// type catalog, so a workload that overrides container limits is accounted at +// its actual resource footprint rather than the catalog baseline. +// +// Precedence order: +// 1. Sandbox container Limits (sum across all containers) — all containers +// must have both cpu and memory Limits for this path to succeed. +// 2. Instance-level Resources.Requests — both cpu and memory must be present. +// 3. instanceTypeCatalog lookup by instanceType — used for the common case +// where a workload is sized only by instanceType with no explicit limits. +// +// Returns (0, 0, false) when none of the above yield a complete sizing, so +// the caller falls back to claiming only the instance count. func resolveInstanceResources(instance *computev1alpha.Instance) (cpuMillicores int64, memMiB int64, resolved bool) { rt := instance.Spec.Runtime + + // Path 1: explicit per-container Limits — most specific, wins if fully set. if rt.Sandbox != nil { var totalCPU resource.Quantity var totalMem resource.Quantity @@ -872,18 +912,59 @@ func resolveInstanceResources(instance *computev1alpha.Instance) (cpuMillicores totalCPU.Add(cpu) totalMem.Add(mem) } - if !allSet || len(rt.Sandbox.Containers) == 0 { - return 0, 0, false + if allSet && len(rt.Sandbox.Containers) > 0 { + return totalCPU.MilliValue(), totalMem.Value() / (1024 * 1024), true } - return totalCPU.MilliValue(), totalMem.Value() / (1024 * 1024), true + // Containers exist but limits are incomplete — fall through to catalog + // rather than returning false, because instanceType is still set. } + // Path 2: instance-level resource requests. cpu, hasCPU := rt.Resources.Requests[corev1.ResourceCPU] mem, hasMem := rt.Resources.Requests[corev1.ResourceMemory] - if !hasCPU || !hasMem { - return 0, 0, false + if hasCPU && hasMem { + return cpu.MilliValue(), mem.Value() / (1024 * 1024), true + } + + // Path 3: instanceType catalog — handles the typical production case where + // instanceType is the only sizing signal and no explicit limits are set. + if rt.Resources.InstanceType != "" { + if spec, ok := instanceTypeCatalog[rt.Resources.InstanceType]; ok { + return spec.CPUMillicores, spec.MemoryMiB, true + } + } + + return 0, 0, false +} + +// instanceBlockingReasonPriority ranks Instance blocking reasons so the most +// specific, user-actionable cause wins when several conditions are unsatisfied. +// Higher numbers are more specific. Reasons absent from the table rank 0. +// +// 0 - unknown/default +// 1 - Provisioning (transient runtime startup) +// 3 - PendingQuota (operator action may be needed) +// 5 - ImageUnavailable / InstanceCrashing / ConfigurationError +// (hard runtime error, user-actionable) +// 7 - NetworkFailedToCreate (hard infra error) +func instanceBlockingReasonPriority(reason string) int { + switch reason { + case computev1alpha.InstanceReadyReasonProvisioning: + return 1 + case computev1alpha.InstanceProgrammedReasonPendingQuota: + return 3 + case computev1alpha.InstanceReadyReasonImageUnavailable, + computev1alpha.InstanceReadyReasonInstanceCrashing, + computev1alpha.InstanceReadyReasonConfigurationError: + // Hard runtime errors are user-actionable (wrong image, crashing app, bad + // config) and rank highest among non-infra reasons so they are not buried + // under transient startup/quota reasons. + return 5 + case reasonNetworkFailedToCreate: + return 7 + default: + return 0 } - return cpu.MilliValue(), mem.Value() / (1024 * 1024), true } // networkFailureChecker is a function that checks if a network creation failure @@ -967,16 +1048,88 @@ func (r *InstanceReconciler) reconcileInstanceReadyCondition( if programmedCondition == nil || programmedCondition.Status != metav1.ConditionTrue { logger.Info("instance is not programmed", "instance", instance.Name) - readyCondition.Status = metav1.ConditionFalse - readyCondition.Reason = computev1alpha.InstanceProgrammedReasonPendingProgramming - if programmedCondition != nil && programmedCondition.Reason != pendingReason { - readyCondition.Reason = programmedCondition.Reason + // Surface the most specific provider sub-condition rather than a generic + // "Instance has not been programmed". A provider reason like + // ImageUnavailable (set on the Available condition while Programmed is + // still Unknown) must surface on Ready with its actionable message. + // + // Two tiers are tracked: + // - bestKnown: the best candidate from the priority table (ranked 1-7). + // - fallback: the Programmed condition's own reason/message when it has + // one but it is not in the priority table (e.g. a provider + // writes a custom Programmed reason otherwise unknown to + // this controller). Preserves Programmed.Reason → Ready.Reason + // pass-through behavior. + type candidate struct { + status metav1.ConditionStatus + reason string + message string + priority int + } + + // Generic default — used only when nothing better is found. + fallbackCandidate := candidate{ + status: metav1.ConditionFalse, + reason: computev1alpha.InstanceProgrammedReasonPendingProgramming, + message: msgNotProgrammed, + priority: -1, + } + // Promote the Programmed condition's own reason as a fallback when it is + // more specific than PendingProgramming/Pending but not in the priority + // table. Preserves pass-through for provider-written Programmed reasons. + if programmedCondition != nil && programmedCondition.Reason != pendingReason && + programmedCondition.Reason != computev1alpha.InstanceProgrammedReasonPendingProgramming { + fallbackCandidate = candidate{ + status: programmedCondition.Status, + reason: programmedCondition.Reason, + message: programmedCondition.Message, + priority: 0, + } + } + + best := fallbackCandidate + consider := func(status metav1.ConditionStatus, reason, message string) { + // A generic "Pending" reason carries no actionable signal; skip it so + // it cannot displace an already-set specific reason from the provider. + if reason == pendingReason { + return + } + p := instanceBlockingReasonPriority(reason) + if p > best.priority { + best = candidate{status: status, reason: reason, message: message, priority: p} + } } - readyCondition.Message = msgNotProgrammed - if programmedCondition != nil && programmedCondition.Status != metav1.ConditionUnknown { - readyCondition.Message = programmedCondition.Message + // Sub-conditions set by the provider (e.g. Available=Unknown/ImageUnavailable) + // may be more specific than the Programmed condition. Consult each one so + // the highest-priority reason wins, regardless of which condition carries it. + for _, cond := range instance.Status.Conditions { + if cond.Status == metav1.ConditionTrue { + // Satisfied conditions are not blocking; skip them. + continue + } + switch cond.Type { + case computev1alpha.InstanceProgrammed, + computev1alpha.InstanceReady, + computev1alpha.InstanceQuotaGranted: + // InstanceProgrammed is handled below; InstanceReady is being set + // now. InstanceQuotaGranted is a gate-level signal evaluated before + // this branch is reached — including it here would let a transient + // PendingEvaluation reason displace the generic not-programmed + // fallback when no provider sub-condition is set yet. + continue + } + consider(cond.Status, cond.Reason, cond.Message) } + // Also let the Programmed condition itself compete through the priority table + // in case it carries a known reason (e.g. PendingQuota). + if programmedCondition != nil { + consider(programmedCondition.Status, programmedCondition.Reason, programmedCondition.Message) + } + + readyCondition.Status = best.status + readyCondition.Reason = best.reason + readyCondition.Message = best.message return apimeta.SetStatusCondition(&instance.Status.Conditions, *readyCondition), nil } @@ -987,16 +1140,21 @@ func (r *InstanceReconciler) reconcileInstanceReadyCondition( if availableCondition == nil || availableCondition.Status != metav1.ConditionTrue { logger.Info("instance is not available", "instance", instance.Name) - readyCondition.Status = metav1.ConditionFalse - readyCondition.Reason = pendingReason + // Propagate the Available condition's reason and message directly — + // including when the status is Unknown — so provider-set reasons like + // ImageUnavailable surface on Ready rather than a generic message. + readyStatus := metav1.ConditionFalse + readyReason := pendingReason + readyMessage := "Instance is not available" if availableCondition != nil && availableCondition.Reason != pendingReason { - readyCondition.Reason = availableCondition.Reason + readyStatus = availableCondition.Status + readyReason = availableCondition.Reason + readyMessage = availableCondition.Message } - readyCondition.Message = "Instance is not available" - if availableCondition != nil && availableCondition.Status != metav1.ConditionUnknown { - readyCondition.Message = availableCondition.Message - } + readyCondition.Status = readyStatus + readyCondition.Reason = readyReason + readyCondition.Message = readyMessage return apimeta.SetStatusCondition(&instance.Status.Conditions, *readyCondition), nil } diff --git a/internal/controller/instance_controller_test.go b/internal/controller/instance_controller_test.go index 331cae15..6674e701 100644 --- a/internal/controller/instance_controller_test.go +++ b/internal/controller/instance_controller_test.go @@ -8,8 +8,10 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" apimeta "k8s.io/apimachinery/pkg/api/meta" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" @@ -1622,3 +1624,600 @@ func TestQuotaPendingRequeueAfter(t *testing.T) { }) } } + + +// TestReconcileInstanceReadyCondition_ProviderSubConditionSurfacing verifies +// that provider-set sub-condition reasons (e.g. ImageUnavailable written by the +// unikraft provider onto the Running condition) surface on Ready with both the +// reason AND the message preserved — even when the sub-condition status is +// Unknown (the normal state for a retriable image-pull failure). +// +// This is the primary regression-prevention test for the "generic message +// discards actionable reason" bug described in the status-blocking-reason RFC. +func TestReconcileInstanceReadyCondition_ProviderSubConditionSurfacing(t *testing.T) { + // These messages mirror the exact strings that translateWaitingReason in the + // unikraft provider writes. Both the reason AND the message must reach Ready. + const ( + msgImageUnavailable = "The instance image could not be pulled" + msgInstanceCrashing = "The instance is repeatedly failing to start" + msgConfigError = "The instance could not be started due to a configuration error" + msgProvisioning = "Instance is provisioning" + msgProgrammingInProgress = "Instance is being programmed" + ) + + noGates := func(inst *computev1alpha.Instance) *computev1alpha.Instance { return inst } + withQuotaGranted := func(inst *computev1alpha.Instance) *computev1alpha.Instance { + inst.Status.Conditions = append(inst.Status.Conditions, metav1.Condition{ + Type: computev1alpha.InstanceQuotaGranted, + Status: metav1.ConditionTrue, + Reason: computev1alpha.InstanceQuotaGrantedReasonQuotaAvailable, + Message: "Quota allocated", + }) + return inst + } + + tests := []struct { + name string + instance *computev1alpha.Instance + wantStatus metav1.ConditionStatus + wantReason string + wantMessage string + }{ + { + // The key scenario from the design: provider writes Running=Unknown/ + // ImageUnavailable while Programmed is still Unknown/ProgrammingInProgress. + // Ready must carry ImageUnavailable + the actionable message, NOT the + // generic "Instance has not been programmed". + name: "image_pull_failure_surfaces_on_ready", + instance: withQuotaGranted(&computev1alpha.Instance{ + ObjectMeta: metav1.ObjectMeta{ + Name: testInstanceName, + Namespace: testDefaultNamespace, + Generation: 1, + }, + Status: computev1alpha.InstanceStatus{ + Conditions: []metav1.Condition{ + { + Type: computev1alpha.InstanceProgrammed, + Status: metav1.ConditionUnknown, + Reason: computev1alpha.InstanceProgrammedReasonProgrammingInProgress, + Message: msgProgrammingInProgress, + }, + { + // Provider sets Running=Unknown/ImageUnavailable when the + // container enters an image-pull waiting state. + Type: computev1alpha.InstanceAvailable, + Status: metav1.ConditionUnknown, + Reason: computev1alpha.InstanceReadyReasonImageUnavailable, + Message: msgImageUnavailable, + }, + }, + }, + }), + wantStatus: metav1.ConditionUnknown, + wantReason: computev1alpha.InstanceReadyReasonImageUnavailable, + wantMessage: msgImageUnavailable, + }, + { + // Demonstrate the OLD (broken) behavior: if we used the pre-fix logic, + // the generic message would be emitted instead. This case would have + // FAILED before the fix, proving the test catches the regression. + // + // The old code: "if programmedCondition.Status != Unknown { copy message }" + // — since Programmed IS Unknown, message was locked to msgNotProgrammed. + // The test now asserts the NEW, correct output. + name: "old_behavior_generic_message_would_fail_this_assertion", + instance: withQuotaGranted(&computev1alpha.Instance{ + ObjectMeta: metav1.ObjectMeta{ + Name: testInstanceName, + Namespace: testDefaultNamespace, + Generation: 1, + }, + Status: computev1alpha.InstanceStatus{ + Conditions: []metav1.Condition{ + { + Type: computev1alpha.InstanceProgrammed, + Status: metav1.ConditionUnknown, + Reason: computev1alpha.InstanceProgrammedReasonProgrammingInProgress, + Message: msgProgrammingInProgress, + }, + { + Type: computev1alpha.InstanceAvailable, + Status: metav1.ConditionUnknown, + Reason: computev1alpha.InstanceReadyReasonImageUnavailable, + Message: msgImageUnavailable, + }, + }, + }, + }), + // OLD code would produce: wantReason="PendingProgramming", wantMessage=msgNotProgrammed. + // The correct new behavior surfaces the actionable reason+message instead. + wantStatus: metav1.ConditionUnknown, + wantReason: computev1alpha.InstanceReadyReasonImageUnavailable, + wantMessage: msgImageUnavailable, + }, + { + // When both a transient Provisioning and ImageUnavailable are present, + // ImageUnavailable (priority 5) must win over Provisioning (priority 1). + name: "image_unavailable_beats_transient_provisioning", + instance: noGates(&computev1alpha.Instance{ + ObjectMeta: metav1.ObjectMeta{ + Name: testInstanceName, + Namespace: testDefaultNamespace, + Generation: 1, + }, + Status: computev1alpha.InstanceStatus{ + Conditions: []metav1.Condition{ + { + Type: computev1alpha.InstanceProgrammed, + Status: metav1.ConditionUnknown, + Reason: computev1alpha.InstanceReadyReasonProvisioning, + Message: msgProvisioning, + }, + { + Type: computev1alpha.InstanceAvailable, + Status: metav1.ConditionUnknown, + Reason: computev1alpha.InstanceReadyReasonImageUnavailable, + Message: msgImageUnavailable, + }, + }, + }, + }), + wantStatus: metav1.ConditionUnknown, + wantReason: computev1alpha.InstanceReadyReasonImageUnavailable, + wantMessage: msgImageUnavailable, + }, + { + // When no specific provider sub-condition exists but Programmed carries + // a specific reason (ProgrammingInProgress), that reason should + // pass-through to Ready. The generic msgNotProgrammed fallback is only + // used when Programmed is absent or carries only a generic "Pending" reason. + name: "programmed_in_progress_passes_through_when_no_provider_sub_condition", + instance: noGates(&computev1alpha.Instance{ + ObjectMeta: metav1.ObjectMeta{ + Name: testInstanceName, + Namespace: testDefaultNamespace, + Generation: 1, + }, + Status: computev1alpha.InstanceStatus{ + Conditions: []metav1.Condition{ + { + Type: computev1alpha.InstanceProgrammed, + Status: metav1.ConditionUnknown, + Reason: computev1alpha.InstanceProgrammedReasonProgrammingInProgress, + Message: msgProgrammingInProgress, + }, + }, + }, + }), + // ProgrammingInProgress is more specific than PendingProgramming and + // passes through from Programmed → Ready. + wantStatus: metav1.ConditionUnknown, + wantReason: computev1alpha.InstanceProgrammedReasonProgrammingInProgress, + wantMessage: msgProgrammingInProgress, + }, + { + // True generic fallback: no Programmed condition at all. The default + // PendingProgramming/msgNotProgrammed must be emitted. + name: "generic_fallback_when_programmed_condition_absent", + instance: noGates(&computev1alpha.Instance{ + ObjectMeta: metav1.ObjectMeta{ + Name: testInstanceName, + Namespace: testDefaultNamespace, + }, + }), + wantStatus: metav1.ConditionFalse, + wantReason: computev1alpha.InstanceProgrammedReasonPendingProgramming, + wantMessage: msgNotProgrammed, + }, + { + // InstanceCrashing: terminal-ish (not retried indefinitely by the user, + // they must fix the app). Status=Unknown from provider → Ready=Unknown. + name: "instance_crashing_surfaces_on_ready", + instance: withQuotaGranted(&computev1alpha.Instance{ + ObjectMeta: metav1.ObjectMeta{ + Name: testInstanceName, + Namespace: testDefaultNamespace, + Generation: 1, + }, + Status: computev1alpha.InstanceStatus{ + Conditions: []metav1.Condition{ + { + Type: computev1alpha.InstanceProgrammed, + Status: metav1.ConditionUnknown, + Reason: computev1alpha.InstanceProgrammedReasonProgrammingInProgress, + Message: msgProgrammingInProgress, + }, + { + Type: computev1alpha.InstanceAvailable, + Status: metav1.ConditionUnknown, + Reason: computev1alpha.InstanceReadyReasonInstanceCrashing, + Message: msgInstanceCrashing, + }, + }, + }, + }), + wantStatus: metav1.ConditionUnknown, + wantReason: computev1alpha.InstanceReadyReasonInstanceCrashing, + wantMessage: msgInstanceCrashing, + }, + { + // ConfigurationError: provider could not start the container due to a + // spec/config issue. User must correct the workload. + name: "configuration_error_surfaces_on_ready", + instance: withQuotaGranted(&computev1alpha.Instance{ + ObjectMeta: metav1.ObjectMeta{ + Name: testInstanceName, + Namespace: testDefaultNamespace, + Generation: 1, + }, + Status: computev1alpha.InstanceStatus{ + Conditions: []metav1.Condition{ + { + Type: computev1alpha.InstanceProgrammed, + Status: metav1.ConditionUnknown, + Reason: computev1alpha.InstanceProgrammedReasonProgrammingInProgress, + Message: msgProgrammingInProgress, + }, + { + Type: computev1alpha.InstanceAvailable, + Status: metav1.ConditionUnknown, + Reason: computev1alpha.InstanceReadyReasonConfigurationError, + Message: msgConfigError, + }, + }, + }, + }), + wantStatus: metav1.ConditionUnknown, + wantReason: computev1alpha.InstanceReadyReasonConfigurationError, + wantMessage: msgConfigError, + }, + { + // When Programmed=True but Running=Unknown/ImageUnavailable, the + // running-not-true branch must also propagate the provider reason+message. + name: "image_unavailable_on_running_condition_programmed_true", + instance: withQuotaGranted(&computev1alpha.Instance{ + ObjectMeta: metav1.ObjectMeta{ + Name: testInstanceName, + Namespace: testDefaultNamespace, + Generation: 1, + }, + Status: computev1alpha.InstanceStatus{ + Conditions: []metav1.Condition{ + { + Type: computev1alpha.InstanceProgrammed, + Status: metav1.ConditionTrue, + Reason: computev1alpha.InstanceProgrammedReasonProgrammed, + Message: msgInstanceProgrammed, + }, + { + Type: computev1alpha.InstanceAvailable, + Status: metav1.ConditionUnknown, + Reason: computev1alpha.InstanceReadyReasonImageUnavailable, + Message: msgImageUnavailable, + }, + }, + }, + }), + wantStatus: metav1.ConditionUnknown, + wantReason: computev1alpha.InstanceReadyReasonImageUnavailable, + wantMessage: msgImageUnavailable, + }, + } + + noNetworkFailure := func(_ context.Context, _ client.Client, _ *computev1alpha.Instance) (bool, string, error) { + return false, "", nil + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + r := &InstanceReconciler{} + _, err := r.reconcileInstanceReadyCondition(context.Background(), nil, tt.instance, noNetworkFailure) + require.NoError(t, err) + + ready := apimeta.FindStatusCondition(tt.instance.Status.Conditions, computev1alpha.InstanceReady) + require.NotNil(t, ready, "Ready condition must be set") + assert.Equal(t, tt.wantStatus, ready.Status, "Ready.Status mismatch") + assert.Equal(t, tt.wantReason, ready.Reason, "Ready.Reason mismatch") + assert.Equal(t, tt.wantMessage, ready.Message, "Ready.Message mismatch") + }) + } +} + +// TestResolveInstanceResources verifies the three-tier sizing precedence: +// explicit container Limits > instance-level Requests > instanceType catalog. +func TestResolveInstanceResources(t *testing.T) { + // d1Standard2 is the canonical catalog entry for datumcloud/d1-standard-2 + // (1 vCPU = 1000 millicores, 2 GiB = 2048 MiB) — the platform-declared quota + // size for the instance type. + const ( + d1CPUMillicores = int64(1000) + d1MemMiB = int64(2048) + ) + + cpu500m := resource.MustParse("500m") + cpu1 := resource.MustParse("1") + mem256Mi := resource.MustParse("256Mi") + mem512Mi := resource.MustParse("512Mi") + + makeContainerResources := func(cpu, mem resource.Quantity) *computev1alpha.ContainerResourceRequirements { + return &computev1alpha.ContainerResourceRequirements{ + Limits: corev1.ResourceList{ + corev1.ResourceCPU: cpu, + corev1.ResourceMemory: mem, + }, + } + } + + tests := []struct { + name string + instance *computev1alpha.Instance + wantCPU int64 + wantMem int64 + wantResolved bool + }{ + { + // Common production case: instanceType only, no explicit limits. + // resolveInstanceResources must consult the catalog and return the + // d1-standard-2 values so vcpus + memory are included in the claim. + name: "instanceType only: d1-standard-2 resolves from catalog", + instance: &computev1alpha.Instance{ + Spec: computev1alpha.InstanceSpec{ + Runtime: computev1alpha.InstanceRuntimeSpec{ + Resources: computev1alpha.InstanceRuntimeResources{ + InstanceType: "datumcloud/d1-standard-2", + }, + }, + }, + }, + wantCPU: d1CPUMillicores, + wantMem: d1MemMiB, + wantResolved: true, + }, + { + // Explicit container Limits take precedence over the catalog so that + // a workload with custom sizing is accounted at its actual footprint. + name: "explicit container limits override catalog", + instance: &computev1alpha.Instance{ + Spec: computev1alpha.InstanceSpec{ + Runtime: computev1alpha.InstanceRuntimeSpec{ + Resources: computev1alpha.InstanceRuntimeResources{ + InstanceType: "datumcloud/d1-standard-2", + }, + Sandbox: &computev1alpha.SandboxRuntime{ + Containers: []computev1alpha.SandboxContainer{ + { + Name: "app", + Image: "test/image:latest", + Resources: makeContainerResources(cpu500m, mem256Mi), + }, + { + Name: "sidecar", + Image: "test/sidecar:latest", + Resources: makeContainerResources(cpu500m, mem256Mi), + }, + }, + }, + }, + }, + }, + // Two containers each contributing 500m CPU + 256 MiB → 1000m + 512 MiB. + wantCPU: 1000, + wantMem: 512, + wantResolved: true, + }, + { + // A single container with full cpu+memory Limits; no instanceType needed. + name: "single container limits, no instanceType", + instance: &computev1alpha.Instance{ + Spec: computev1alpha.InstanceSpec{ + Runtime: computev1alpha.InstanceRuntimeSpec{ + Sandbox: &computev1alpha.SandboxRuntime{ + Containers: []computev1alpha.SandboxContainer{ + { + Name: "app", + Image: "test/image:latest", + Resources: makeContainerResources(cpu1, mem512Mi), + }, + }, + }, + }, + }, + }, + wantCPU: 1000, + wantMem: 512, + wantResolved: true, + }, + { + // Instance-level Requests (no sandbox, no instanceType) use path 2. + name: "instance-level resources.requests resolve correctly", + instance: &computev1alpha.Instance{ + Spec: computev1alpha.InstanceSpec{ + Runtime: computev1alpha.InstanceRuntimeSpec{ + Resources: computev1alpha.InstanceRuntimeResources{ + Requests: corev1.ResourceList{ + corev1.ResourceCPU: cpu1, + corev1.ResourceMemory: mem512Mi, + }, + }, + }, + }, + }, + wantCPU: 1000, + wantMem: 512, + wantResolved: true, + }, + { + // An unknown instanceType with no explicit sizing must not fabricate + // values; the caller falls back to claiming instance count only. + name: "unknown instanceType, no explicit limits: unresolved", + instance: &computev1alpha.Instance{ + Spec: computev1alpha.InstanceSpec{ + Runtime: computev1alpha.InstanceRuntimeSpec{ + Resources: computev1alpha.InstanceRuntimeResources{ + InstanceType: "datumcloud/unknown-type-99", + }, + }, + }, + }, + wantCPU: 0, + wantMem: 0, + wantResolved: false, + }, + { + // Empty instanceType and no explicit sizing: unresolved. + name: "empty instanceType, nothing explicit: unresolved", + instance: &computev1alpha.Instance{ + Spec: computev1alpha.InstanceSpec{ + Runtime: computev1alpha.InstanceRuntimeSpec{ + Resources: computev1alpha.InstanceRuntimeResources{}, + }, + }, + }, + wantCPU: 0, + wantMem: 0, + wantResolved: false, + }, + { + // Sandbox containers without any Limits fall through to the catalog + // when an instanceType is set — partial container specs must not block + // catalog resolution. + name: "sandbox containers without limits fall through to catalog", + instance: &computev1alpha.Instance{ + Spec: computev1alpha.InstanceSpec{ + Runtime: computev1alpha.InstanceRuntimeSpec{ + Resources: computev1alpha.InstanceRuntimeResources{ + InstanceType: "datumcloud/d1-standard-2", + }, + Sandbox: &computev1alpha.SandboxRuntime{ + Containers: []computev1alpha.SandboxContainer{ + { + Name: "app", + Image: "test/image:latest", + // No Resources.Limits set — common for UKC workloads. + }, + }, + }, + }, + }, + }, + wantCPU: d1CPUMillicores, + wantMem: d1MemMiB, + wantResolved: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + cpu, mem, resolved := resolveInstanceResources(tt.instance) + assert.Equal(t, tt.wantResolved, resolved, "resolved mismatch") + assert.Equal(t, tt.wantCPU, cpu, "cpuMillicores mismatch") + assert.Equal(t, tt.wantMem, mem, "memMiB mismatch") + }) + } +} + +// TestReconcileQuotaClaim_RequestsIncludeVCPUsAndMemory confirms that when an +// instance is sized by instanceType alone (the typical production shape), the +// ResourceClaim created by reconcileQuotaClaim includes vcpus and memory +// requests in addition to the instance count, so the AllowanceBuckets are fed. +func TestReconcileQuotaClaim_RequestsIncludeVCPUsAndMemory(t *testing.T) { + const ( + clusterName = "test-project" + namespace = "default" + instanceName = "claim-resources-test" + ) + + claimName := instanceQuotaClaimNamePrefix + instanceName + + s := newTestScheme(t) + + // Instance sized by instanceType only — no container limits, no explicit + // instance-level requests. This is the common production workload shape. + instance := &computev1alpha.Instance{ + ObjectMeta: metav1.ObjectMeta{ + Name: instanceName, + Namespace: namespace, + Finalizers: []string{instanceQuotaFinalizer, instanceControllerFinalizer}, + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: testComputeAPIVersion, + Kind: kindWorkloadDeploymentTest, + Name: "owner-deployment", + UID: testUIDString, + Controller: func() *bool { b := true; return &b }(), + }, + }, + }, + Spec: computev1alpha.InstanceSpec{ + Controller: &computev1alpha.InstanceController{ + SchedulingGates: []computev1alpha.SchedulingGate{ + {Name: instancecontrol.QuotaSchedulingGate.String()}, + }, + }, + Runtime: computev1alpha.InstanceRuntimeSpec{ + Resources: computev1alpha.InstanceRuntimeResources{ + // No Requests, no container Limits — catalog must supply the values. + InstanceType: "datumcloud/d1-standard-2", + }, + }, + NetworkInterfaces: []computev1alpha.InstanceNetworkInterface{}, + }, + } + + deployment := &computev1alpha.WorkloadDeployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: "owner-deployment", + Namespace: namespace, + UID: testUIDString, + }, + } + + projectClient := fake.NewClientBuilder(). + WithScheme(s). + WithObjects(instance, deployment). + WithStatusSubresource(&computev1alpha.Instance{}). + Build() + + quotaClient := fake.NewClientBuilder(). + WithScheme(s). + WithStatusSubresource("av1alpha1.ResourceClaim{}). + Build() + + qm := quota.New(nil) + qm.StoreClient(clusterName, quotaClient) + + r := &InstanceReconciler{ + mgr: &fakeMCManager{clusters: map[string]cluster.Cluster{clusterName: newFakeCluster(projectClient)}}, + scheme: s, + quotaClientManager: qm, + edgeClusterName: testEdgeClusterName, + projectIDForInstance: func(_ context.Context, cn multicluster.ClusterName, _ *computev1alpha.Instance) (string, error) { + return string(cn), nil + }, + recorder: &record.FakeRecorder{}, + } + r.finalizers = finalizer.NewFinalizers() + require.NoError(t, r.finalizers.Register(instanceControllerFinalizer, r)) + + _, err := r.Reconcile(context.Background(), mcreconcile.Request{ + Request: reconcile.Request{NamespacedName: types.NamespacedName{Namespace: namespace, Name: instanceName}}, + ClusterName: clusterName, + }) + require.NoError(t, err) + + // Verify the created ResourceClaim carries vcpus and memory requests. + var createdClaim quotav1alpha1.ResourceClaim + require.NoError(t, quotaClient.Get(context.Background(), types.NamespacedName{Namespace: namespace, Name: claimName}, &createdClaim)) + + byType := make(map[string]int64, len(createdClaim.Spec.Requests)) + for _, req := range createdClaim.Spec.Requests { + byType[req.ResourceType] = req.Amount + } + + assert.Equal(t, int64(1), byType[quotaResourceTypeInstances], "instance count must be 1") + assert.Equal(t, int64(1000), byType["compute.datumapis.com/vcpus"], + "d1-standard-2 must claim 1000 millicores (1 vCPU)") + assert.Equal(t, int64(2048), byType["compute.datumapis.com/memory"], + "d1-standard-2 must claim 2048 MiB (2 GiB)") +} From a67b32c3221d0b86184ffcd4a893f32f0e388bca Mon Sep 17 00:00:00 2001 From: Scot Wells Date: Thu, 4 Jun 2026 16:19:50 -0500 Subject: [PATCH 13/13] fix(lint): extract goconst constants and gofmt the bundled controller tests Make the cherry-picked instanceType-sizing and blocking-reason tests lint-clean: hoist the repeated "datumcloud/d1-standard-2", "app", and "test/image:latest" literals into named constants (goconst) and apply gofmt. No behavior change. Co-Authored-By: Claude Opus 4.8 (1M context) --- internal/controller/instance_controller.go | 6 +++- .../controller/instance_controller_test.go | 36 +++++++++++-------- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/internal/controller/instance_controller.go b/internal/controller/instance_controller.go index 23b2c46b..86ab5572 100644 --- a/internal/controller/instance_controller.go +++ b/internal/controller/instance_controller.go @@ -96,6 +96,10 @@ const ( reasonNetworkFailedToCreate = "NetworkFailedToCreate" ) +// instanceTypeD1Standard2 is the platform instance type name for the +// 1 vCPU / 2 GiB size used as the catalog baseline for quota accounting. +const instanceTypeD1Standard2 = "datumcloud/d1-standard-2" + // instanceTypeResources holds the vCPU and memory for a named instance type. type instanceTypeResources struct { // CPUMillicores is the number of CPU millicores (1000 = 1 vCPU). @@ -114,7 +118,7 @@ type instanceTypeResources struct { // provisioning; that mapping does not define the quota size here.) When new // instance types are added, add them here with their vCPU/memory values. var instanceTypeCatalog = map[string]instanceTypeResources{ - "datumcloud/d1-standard-2": { + instanceTypeD1Standard2: { CPUMillicores: 1000, // 1 vCPU MemoryMiB: 2048, // 2 GiB }, diff --git a/internal/controller/instance_controller_test.go b/internal/controller/instance_controller_test.go index 6674e701..0d55dfd5 100644 --- a/internal/controller/instance_controller_test.go +++ b/internal/controller/instance_controller_test.go @@ -1625,6 +1625,12 @@ func TestQuotaPendingRequeueAfter(t *testing.T) { } } +// Shared literals for the instance-sizing / blocking-reason tests below +// (extracted to satisfy goconst). +const ( + testContainerName = "app" + testContainerImage = "test/image:latest" +) // TestReconcileInstanceReadyCondition_ProviderSubConditionSurfacing verifies // that provider-set sub-condition reasons (e.g. ImageUnavailable written by the @@ -1950,11 +1956,11 @@ func TestResolveInstanceResources(t *testing.T) { } tests := []struct { - name string - instance *computev1alpha.Instance - wantCPU int64 - wantMem int64 - wantResolved bool + name string + instance *computev1alpha.Instance + wantCPU int64 + wantMem int64 + wantResolved bool }{ { // Common production case: instanceType only, no explicit limits. @@ -1965,7 +1971,7 @@ func TestResolveInstanceResources(t *testing.T) { Spec: computev1alpha.InstanceSpec{ Runtime: computev1alpha.InstanceRuntimeSpec{ Resources: computev1alpha.InstanceRuntimeResources{ - InstanceType: "datumcloud/d1-standard-2", + InstanceType: instanceTypeD1Standard2, }, }, }, @@ -1982,13 +1988,13 @@ func TestResolveInstanceResources(t *testing.T) { Spec: computev1alpha.InstanceSpec{ Runtime: computev1alpha.InstanceRuntimeSpec{ Resources: computev1alpha.InstanceRuntimeResources{ - InstanceType: "datumcloud/d1-standard-2", + InstanceType: instanceTypeD1Standard2, }, Sandbox: &computev1alpha.SandboxRuntime{ Containers: []computev1alpha.SandboxContainer{ { - Name: "app", - Image: "test/image:latest", + Name: testContainerName, + Image: testContainerImage, Resources: makeContainerResources(cpu500m, mem256Mi), }, { @@ -2015,8 +2021,8 @@ func TestResolveInstanceResources(t *testing.T) { Sandbox: &computev1alpha.SandboxRuntime{ Containers: []computev1alpha.SandboxContainer{ { - Name: "app", - Image: "test/image:latest", + Name: testContainerName, + Image: testContainerImage, Resources: makeContainerResources(cpu1, mem512Mi), }, }, @@ -2087,13 +2093,13 @@ func TestResolveInstanceResources(t *testing.T) { Spec: computev1alpha.InstanceSpec{ Runtime: computev1alpha.InstanceRuntimeSpec{ Resources: computev1alpha.InstanceRuntimeResources{ - InstanceType: "datumcloud/d1-standard-2", + InstanceType: instanceTypeD1Standard2, }, Sandbox: &computev1alpha.SandboxRuntime{ Containers: []computev1alpha.SandboxContainer{ { - Name: "app", - Image: "test/image:latest", + Name: testContainerName, + Image: testContainerImage, // No Resources.Limits set — common for UKC workloads. }, }, @@ -2158,7 +2164,7 @@ func TestReconcileQuotaClaim_RequestsIncludeVCPUsAndMemory(t *testing.T) { Runtime: computev1alpha.InstanceRuntimeSpec{ Resources: computev1alpha.InstanceRuntimeResources{ // No Requests, no container Limits — catalog must supply the values. - InstanceType: "datumcloud/d1-standard-2", + InstanceType: instanceTypeD1Standard2, }, }, NetworkInterfaces: []computev1alpha.InstanceNetworkInterface{},