diff --git a/cmd/thv-operator/api/v1alpha1/mcpserver_types.go b/cmd/thv-operator/api/v1alpha1/mcpserver_types.go index a540c819b..ec44474a7 100644 --- a/cmd/thv-operator/api/v1alpha1/mcpserver_types.go +++ b/cmd/thv-operator/api/v1alpha1/mcpserver_types.go @@ -107,6 +107,12 @@ type MCPServerSpec struct { // +optional Resources ResourceRequirements `json:"resources,omitempty"` + // ProxyRunnerResources defines the resource requirements for the ToolHive proxy runner container. + // The proxy runner is the infrastructure container that manages the MCP server container. + // If not specified, defaults to 50m/200m CPU and 64Mi/256Mi memory (lighter than MCP server defaults). + // +optional + ProxyRunnerResources ResourceRequirements `json:"proxyRunnerResources,omitempty"` + // Secrets are references to secrets to mount in the MCP server container // +optional Secrets []SecretRef `json:"secrets,omitempty"` diff --git a/cmd/thv-operator/api/v1alpha1/zz_generated.deepcopy.go b/cmd/thv-operator/api/v1alpha1/zz_generated.deepcopy.go index 693152c3b..5561764a6 100644 --- a/cmd/thv-operator/api/v1alpha1/zz_generated.deepcopy.go +++ b/cmd/thv-operator/api/v1alpha1/zz_generated.deepcopy.go @@ -1146,6 +1146,7 @@ func (in *MCPServerSpec) DeepCopyInto(out *MCPServerSpec) { copy(*out, *in) } out.Resources = in.Resources + out.ProxyRunnerResources = in.ProxyRunnerResources if in.Secrets != nil { in, out := &in.Secrets, &out.Secrets *out = make([]SecretRef, len(*in)) diff --git a/cmd/thv-operator/controllers/mcpserver_controller.go b/cmd/thv-operator/controllers/mcpserver_controller.go index 99660d85f..1030cbffd 100644 --- a/cmd/thv-operator/controllers/mcpserver_controller.go +++ b/cmd/thv-operator/controllers/mcpserver_controller.go @@ -18,7 +18,6 @@ import ( rbacv1 "k8s.io/api/rbac/v1" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" - "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" @@ -928,9 +927,13 @@ func (r *MCPServerReconciler) deploymentForMCPServer( defaultSA := mcpServerServiceAccountName(m.Name) serviceAccount = &defaultSA } + // Convert MCP server resources to Kubernetes format + mcpResources := resourceRequirementsForMCPServer(m) + finalPodTemplateSpec := builder. WithServiceAccount(serviceAccount). WithSecrets(m.Spec.Secrets). + WithResources(mcpResources). Build() // Add pod template patch if we have one if finalPodTemplateSpec != nil { @@ -1062,26 +1065,9 @@ func (r *MCPServerReconciler) deploymentForMCPServer( volumes = append(volumes, *authzVolume) } - // Prepare container resources - resources := corev1.ResourceRequirements{} - if m.Spec.Resources.Limits.CPU != "" || m.Spec.Resources.Limits.Memory != "" { - resources.Limits = corev1.ResourceList{} - if m.Spec.Resources.Limits.CPU != "" { - resources.Limits[corev1.ResourceCPU] = resource.MustParse(m.Spec.Resources.Limits.CPU) - } - if m.Spec.Resources.Limits.Memory != "" { - resources.Limits[corev1.ResourceMemory] = resource.MustParse(m.Spec.Resources.Limits.Memory) - } - } - if m.Spec.Resources.Requests.CPU != "" || m.Spec.Resources.Requests.Memory != "" { - resources.Requests = corev1.ResourceList{} - if m.Spec.Resources.Requests.CPU != "" { - resources.Requests[corev1.ResourceCPU] = resource.MustParse(m.Spec.Resources.Requests.CPU) - } - if m.Spec.Resources.Requests.Memory != "" { - resources.Requests[corev1.ResourceMemory] = resource.MustParse(m.Spec.Resources.Requests.Memory) - } - } + // Prepare container resources for the proxy runner container + // Note: This is separate from MCP server container resources (which are in the pod template patch) + resources := resourceRequirementsForProxyRunner(m) // Prepare deployment metadata with overrides deploymentLabels := ls @@ -1528,9 +1514,13 @@ func (r *MCPServerReconciler) deploymentNeedsUpdate( return true } + // Convert MCP server resources to Kubernetes format (same as in deploymentForMCPServer) + mcpResources := resourceRequirementsForMCPServer(mcpServer) + expectedPodTemplateSpec := builder. WithServiceAccount(serviceAccount). WithSecrets(mcpServer.Spec.Secrets). + WithResources(mcpResources). Build() // Find the current pod template patch in the container args @@ -1560,8 +1550,8 @@ func (r *MCPServerReconciler) deploymentNeedsUpdate( return true } - // Check if the resource requirements have changed - if !reflect.DeepEqual(container.Resources, resourceRequirementsForMCPServer(mcpServer)) { + // Check if the proxy runner container resource requirements have changed + if !reflect.DeepEqual(container.Resources, resourceRequirementsForProxyRunner(mcpServer)) { return true } } @@ -1656,28 +1646,25 @@ func serviceNeedsUpdate(service *corev1.Service, mcpServer *mcpv1alpha1.MCPServe return false } -// resourceRequirementsForMCPServer returns the resource requirements for the MCPServer +// resourceRequirementsForMCPServer returns the resource requirements for the MCP server container +// with intelligent merging of defaults and user-provided values func resourceRequirementsForMCPServer(m *mcpv1alpha1.MCPServer) corev1.ResourceRequirements { - resources := corev1.ResourceRequirements{} - if m.Spec.Resources.Limits.CPU != "" || m.Spec.Resources.Limits.Memory != "" { - resources.Limits = corev1.ResourceList{} - if m.Spec.Resources.Limits.CPU != "" { - resources.Limits[corev1.ResourceCPU] = resource.MustParse(m.Spec.Resources.Limits.CPU) - } - if m.Spec.Resources.Limits.Memory != "" { - resources.Limits[corev1.ResourceMemory] = resource.MustParse(m.Spec.Resources.Limits.Memory) - } - } - if m.Spec.Resources.Requests.CPU != "" || m.Spec.Resources.Requests.Memory != "" { - resources.Requests = corev1.ResourceList{} - if m.Spec.Resources.Requests.CPU != "" { - resources.Requests[corev1.ResourceCPU] = resource.MustParse(m.Spec.Resources.Requests.CPU) - } - if m.Spec.Resources.Requests.Memory != "" { - resources.Requests[corev1.ResourceMemory] = resource.MustParse(m.Spec.Resources.Requests.Memory) - } - } - return resources + defaultResources := ctrlutil.BuildDefaultResourceRequirements() + userResources := ctrlutil.BuildResourceRequirements(m.Spec.Resources) + return ctrlutil.MergeResourceRequirements(defaultResources, userResources) +} + +// resourceRequirementsForProxyRunner returns the resource requirements for the proxy runner container +// with intelligent merging of defaults and user-provided values +func resourceRequirementsForProxyRunner(m *mcpv1alpha1.MCPServer) corev1.ResourceRequirements { + // Get default proxy runner resources (lighter than MCP server defaults) + defaultProxyResources := ctrlutil.BuildDefaultProxyRunnerResourceRequirements() + + // Get user-defined proxy resources from the dedicated field + userProxyResources := ctrlutil.BuildResourceRequirements(m.Spec.ProxyRunnerResources) + + // Merge defaults with user-defined values + return ctrlutil.MergeResourceRequirements(defaultProxyResources, userProxyResources) } // mcpServerServiceAccountName returns the service account name for the mcp server diff --git a/cmd/thv-operator/controllers/mcpserver_deployment_update_test.go b/cmd/thv-operator/controllers/mcpserver_deployment_update_test.go new file mode 100644 index 000000000..20f51f797 --- /dev/null +++ b/cmd/thv-operator/controllers/mcpserver_deployment_update_test.go @@ -0,0 +1,292 @@ +package controllers + +import ( + "context" + "encoding/json" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + mcpv1alpha1 "github.com/stacklok/toolhive/cmd/thv-operator/api/v1alpha1" + ctrlutil "github.com/stacklok/toolhive/cmd/thv-operator/pkg/controllerutil" +) + +func TestMCPServerReconciler_deploymentNeedsUpdate_Resources(t *testing.T) { + t.Parallel() + + // Register scheme + s := scheme.Scheme + _ = mcpv1alpha1.AddToScheme(s) + + // Create fake client + fakeClient := fake.NewClientBuilder().WithScheme(s).Build() + + reconciler := &MCPServerReconciler{ + Client: fakeClient, + } + + tests := []struct { + name string + existingDeployment *appsv1.Deployment + mcpServer *mcpv1alpha1.MCPServer + expectNeedsUpdate bool + description string + }{ + { + name: "no update needed - default resources match", + existingDeployment: createDeploymentWithResources( + "test-server", + // Proxy runner resources (default) + ctrlutil.BuildDefaultProxyRunnerResourceRequirements(), + // MCP server resources in pod patch (default) + ctrlutil.BuildDefaultResourceRequirements(), + ), + mcpServer: &mcpv1alpha1.MCPServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-server", + Namespace: "default", + }, + Spec: mcpv1alpha1.MCPServerSpec{ + Image: "test:latest", + Transport: "stdio", + // No resources specified, so defaults will be used + }, + }, + expectNeedsUpdate: false, + description: "When resources match defaults, no update is needed", + }, + { + name: "update needed - MCP server resources changed", + existingDeployment: createDeploymentWithResources( + "test-server", + ctrlutil.BuildDefaultProxyRunnerResourceRequirements(), + ctrlutil.BuildDefaultResourceRequirements(), + ), + mcpServer: &mcpv1alpha1.MCPServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-server", + Namespace: "default", + }, + Spec: mcpv1alpha1.MCPServerSpec{ + Image: "test:latest", + Transport: "stdio", + Resources: mcpv1alpha1.ResourceRequirements{ + Limits: mcpv1alpha1.ResourceList{ + CPU: "1000m", + Memory: "1Gi", + }, + }, + }, + }, + expectNeedsUpdate: true, + description: "When MCP server resources change, update is needed", + }, + { + name: "update needed - proxy runner resources changed", + existingDeployment: createDeploymentWithResources( + "test-server", + ctrlutil.BuildDefaultProxyRunnerResourceRequirements(), + ctrlutil.BuildDefaultResourceRequirements(), + ), + mcpServer: &mcpv1alpha1.MCPServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-server", + Namespace: "default", + }, + Spec: mcpv1alpha1.MCPServerSpec{ + Image: "test:latest", + Transport: "stdio", + ProxyRunnerResources: mcpv1alpha1.ResourceRequirements{ + Limits: mcpv1alpha1.ResourceList{ + CPU: "300m", + Memory: "256Mi", + }, + }, + }, + }, + expectNeedsUpdate: true, + description: "When proxy runner resources change, update is needed", + }, + { + name: "no update needed - custom resources match", + existingDeployment: createDeploymentWithResources( + "test-server", + // Proxy runner with custom resources + corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("300m"), + corev1.ResourceMemory: resource.MustParse("256Mi"), + }, + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("100m"), + corev1.ResourceMemory: resource.MustParse("128Mi"), + }, + }, + // MCP server with custom resources + corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + }, + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("1Gi"), + }, + }, + ), + mcpServer: &mcpv1alpha1.MCPServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-server", + Namespace: "default", + }, + Spec: mcpv1alpha1.MCPServerSpec{ + Image: "test:latest", + Transport: "stdio", + Resources: mcpv1alpha1.ResourceRequirements{ + Limits: mcpv1alpha1.ResourceList{ + CPU: "2000m", + Memory: "2Gi", + }, + Requests: mcpv1alpha1.ResourceList{ + CPU: "1000m", + Memory: "1Gi", + }, + }, + ProxyRunnerResources: mcpv1alpha1.ResourceRequirements{ + Limits: mcpv1alpha1.ResourceList{ + CPU: "300m", + Memory: "256Mi", + }, + Requests: mcpv1alpha1.ResourceList{ + CPU: "100m", + Memory: "128Mi", + }, + }, + }, + }, + expectNeedsUpdate: false, + description: "When both proxy and MCP resources match, no update is needed", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + ctx := context.Background() + needsUpdate := reconciler.deploymentNeedsUpdate(ctx, tt.existingDeployment, tt.mcpServer, "test-checksum") + + assert.Equal(t, tt.expectNeedsUpdate, needsUpdate, tt.description) + }) + } +} + +// createDeploymentWithResources creates a deployment with specified proxy and MCP server resources +func createDeploymentWithResources(name string, proxyResources, mcpResources corev1.ResourceRequirements) *appsv1.Deployment { + // Create pod template spec with service account and MCP container resources + // This matches what deploymentForMCPServer creates via PodTemplateSpecBuilder + // Note: Image is NOT included in the patch - only resources, secrets, and service account + defaultSA := mcpServerServiceAccountName(name) + podTemplate := &corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + ServiceAccountName: defaultSA, + Containers: []corev1.Container{ + { + Name: mcpContainerName, + Resources: mcpResources, + }, + }, + }, + } + + // Marshal pod template to JSON for the --k8s-pod-patch argument + podTemplateJSON, _ := json.Marshal(podTemplate) + + // Create labels for the deployment (matches labelsForMCPServer) + labels := map[string]string{ + "app": "mcpserver", + "app.kubernetes.io/name": "mcpserver", + "app.kubernetes.io/instance": name, + "toolhive": "true", + "toolhive-name": name, + } + + return &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: "default", + Labels: labels, + }, + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: labels, + Annotations: map[string]string{ + "toolhive.stacklok.dev/runconfig-checksum": "test-checksum", + }, + }, + Spec: corev1.PodSpec{ + ServiceAccountName: ctrlutil.ProxyRunnerServiceAccountName(name), + Containers: []corev1.Container{ + { + Name: "proxy-runner", + Image: "ghcr.io/stacklok/toolhive/proxyrunner:latest", + Resources: proxyResources, + Args: []string{ + "run", + "--k8s-pod-patch=" + string(podTemplateJSON), + "test:latest", + }, + Ports: []corev1.ContainerPort{ + { + ContainerPort: 8080, + Name: "http", + Protocol: corev1.ProtocolTCP, + }, + }, + Env: []corev1.EnvVar{ + {Name: "XDG_CONFIG_HOME", Value: "/tmp"}, + {Name: "HOME", Value: "/tmp"}, + {Name: "TOOLHIVE_RUNTIME", Value: "kubernetes"}, + {Name: "UNSTRUCTURED_LOGS", Value: "false"}, + }, + }, + }, + }, + }, + }, + } +} + +func TestMCPServerReconciler_deploymentNeedsUpdate_EdgeCases(t *testing.T) { + t.Parallel() + + reconciler := &MCPServerReconciler{} + ctx := context.Background() + + t.Run("nil deployment returns true", func(t *testing.T) { + t.Parallel() + mcpServer := &mcpv1alpha1.MCPServer{ + ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "default"}, + Spec: mcpv1alpha1.MCPServerSpec{Image: "test:latest", Transport: "stdio"}, + } + require.True(t, reconciler.deploymentNeedsUpdate(ctx, nil, mcpServer, "checksum")) + }) + + t.Run("nil mcpServer returns true", func(t *testing.T) { + t.Parallel() + deployment := createDeploymentWithResources( + "test", + ctrlutil.BuildDefaultProxyRunnerResourceRequirements(), + ctrlutil.BuildDefaultResourceRequirements(), + ) + require.True(t, reconciler.deploymentNeedsUpdate(ctx, deployment, nil, "checksum")) + }) +} diff --git a/cmd/thv-operator/controllers/virtualmcpserver_controller.go b/cmd/thv-operator/controllers/virtualmcpserver_controller.go index 6a6058ff8..036190bc4 100644 --- a/cmd/thv-operator/controllers/virtualmcpserver_controller.go +++ b/cmd/thv-operator/controllers/virtualmcpserver_controller.go @@ -849,7 +849,9 @@ func (r *VirtualMCPServerReconciler) containerNeedsUpdate( return true } - return false + // Check if resources have changed (important for upgrades adding default resources) + expectedResources := r.buildMergedResourcesForVmcp(vmcp) + return !reflect.DeepEqual(container.Resources, expectedResources) } // deploymentMetadataNeedsUpdate checks if deployment-level metadata has changed diff --git a/cmd/thv-operator/controllers/virtualmcpserver_controller_test.go b/cmd/thv-operator/controllers/virtualmcpserver_controller_test.go index 0c9328f74..ea1f0877e 100644 --- a/cmd/thv-operator/controllers/virtualmcpserver_controller_test.go +++ b/cmd/thv-operator/controllers/virtualmcpserver_controller_test.go @@ -1504,8 +1504,9 @@ func TestVirtualMCPServerContainerNeedsUpdate(t *testing.T) { Ports: []corev1.ContainerPort{ {ContainerPort: 4483}, }, - Args: reconciler.buildContainerArgsForVmcp(vmcp), - Env: reconciler.buildEnvVarsForVmcp(context.Background(), vmcp, []workloads.TypedWorkload{}), + Args: reconciler.buildContainerArgsForVmcp(vmcp), + Env: reconciler.buildEnvVarsForVmcp(context.Background(), vmcp, []workloads.TypedWorkload{}), + Resources: reconciler.buildMergedResourcesForVmcp(vmcp), }, }, ServiceAccountName: vmcpServiceAccountName(vmcp.Name), @@ -1898,8 +1899,9 @@ func TestVirtualMCPServerDeploymentNeedsUpdate(t *testing.T) { Ports: []corev1.ContainerPort{ {ContainerPort: 4483}, }, - Args: reconciler.buildContainerArgsForVmcp(vmcp), - Env: reconciler.buildEnvVarsForVmcp(context.Background(), vmcp, []workloads.TypedWorkload{}), + Args: reconciler.buildContainerArgsForVmcp(vmcp), + Env: reconciler.buildEnvVarsForVmcp(context.Background(), vmcp, []workloads.TypedWorkload{}), + Resources: reconciler.buildMergedResourcesForVmcp(vmcp), }, }, ServiceAccountName: vmcpServiceAccountName(vmcp.Name), diff --git a/cmd/thv-operator/controllers/virtualmcpserver_deployment.go b/cmd/thv-operator/controllers/virtualmcpserver_deployment.go index a07293e6c..8fc068fb6 100644 --- a/cmd/thv-operator/controllers/virtualmcpserver_deployment.go +++ b/cmd/thv-operator/controllers/virtualmcpserver_deployment.go @@ -127,13 +127,16 @@ func (r *VirtualMCPServerReconciler) deploymentForVirtualMCPServer( } // Apply user-provided PodTemplateSpec customizations if present - if vmcp.Spec.PodTemplateSpec != nil && vmcp.Spec.PodTemplateSpec.Raw != nil { + if vmcp.Spec.PodTemplateSpec != nil && len(vmcp.Spec.PodTemplateSpec.Raw) > 0 { if err := r.applyPodTemplateSpecToDeployment(ctx, vmcp, dep); err != nil { ctxLogger := log.FromContext(ctx) ctxLogger.Error(err, "Failed to apply PodTemplateSpec to Deployment") // Return nil to block deployment creation until PodTemplateSpec is fixed return nil } + } else { + // No PodTemplateSpec, so just set default resources + dep.Spec.Template.Spec.Containers[0].Resources = r.buildDefaultResourcesForVmcp() } if err := controllerutil.SetControllerReference(vmcp, dep, r.Scheme); err != nil { @@ -529,6 +532,50 @@ func (*VirtualMCPServerReconciler) buildContainerPortsForVmcp( }} } +// buildDefaultResourcesForVmcp builds default resource requirements for the vmcp container. +// These defaults provide reasonable CPU and memory limits to prevent resource monopolization +// while allowing user customization through PodTemplateSpec. +func (*VirtualMCPServerReconciler) buildDefaultResourcesForVmcp() corev1.ResourceRequirements { + return ctrlutil.BuildDefaultResourceRequirements() +} + +// buildMergedResourcesForVmcp extracts user-provided resources from PodTemplateSpec (if any), +// applies intelligent merging with defaults, and returns the final resource requirements. +// This implements the same intelligent merging strategy as MCPServer: +// 1. User provides both requests and limits → use user's values +// 2. User only provides limits → use limits for both requests and limits +// 3. User only provides requests → compare with defaults and choose appropriately +// 4. User provides nothing → use defaults +func (*VirtualMCPServerReconciler) buildMergedResourcesForVmcp( + vmcp *mcpv1alpha1.VirtualMCPServer, +) corev1.ResourceRequirements { + defaultResources := ctrlutil.BuildDefaultResourceRequirements() + + // If no PodTemplateSpec provided, use defaults + if vmcp.Spec.PodTemplateSpec == nil || len(vmcp.Spec.PodTemplateSpec.Raw) == 0 { + return defaultResources + } + + // Parse PodTemplateSpec to extract user-provided resources + var podTemplateSpec corev1.PodTemplateSpec + if err := json.Unmarshal(vmcp.Spec.PodTemplateSpec.Raw, &podTemplateSpec); err != nil { + // If we can't parse, use defaults + return defaultResources + } + + // Find the vmcp container in user's PodTemplateSpec + var userResources corev1.ResourceRequirements + for _, container := range podTemplateSpec.Spec.Containers { + if container.Name == "vmcp" { + userResources = container.Resources + break + } + } + + // Apply intelligent merging + return ctrlutil.MergeResourceRequirements(defaultResources, userResources) +} + // serviceForVirtualMCPServer returns a VirtualMCPServer Service object func (r *VirtualMCPServerReconciler) serviceForVirtualMCPServer( ctx context.Context, @@ -693,18 +740,13 @@ func (r *VirtualMCPServerReconciler) validateSecretKeyRef( // - User-provided fields override controller-generated defaults // - Arrays are merged based on strategic merge patch rules (e.g., containers merged by name) // - The "vmcp" container is preserved from the controller-generated spec -func (*VirtualMCPServerReconciler) applyPodTemplateSpecToDeployment( +func (r *VirtualMCPServerReconciler) applyPodTemplateSpecToDeployment( ctx context.Context, vmcp *mcpv1alpha1.VirtualMCPServer, deployment *appsv1.Deployment, ) error { ctxLogger := log.FromContext(ctx) - // Early return if no PodTemplateSpec provided - if vmcp.Spec.PodTemplateSpec == nil || len(vmcp.Spec.PodTemplateSpec.Raw) == 0 { - return nil - } - // Validate the PodTemplateSpec and check if there are meaningful customizations builder, err := ctrlutil.NewPodTemplateSpecBuilder(vmcp.Spec.PodTemplateSpec, "vmcp") if err != nil { @@ -712,7 +754,12 @@ func (*VirtualMCPServerReconciler) applyPodTemplateSpecToDeployment( } if builder.Build() == nil { - // No meaningful customizations to apply + // No meaningful customizations to apply via strategic merge patch + // Set default resources and return + deployment.Spec.Template.Spec.Containers[0].Resources = r.buildDefaultResourcesForVmcp() + ctxLogger.V(1).Info("No PodTemplateSpec customizations to apply, using defaults", + "virtualmcpserver", vmcp.Name, + "namespace", vmcp.Namespace) return nil } @@ -740,6 +787,16 @@ func (*VirtualMCPServerReconciler) applyPodTemplateSpecToDeployment( deployment.Spec.Template = patchedPodTemplateSpec + // After strategic merge, set the intelligently merged resources on vmcp container + // This ensures our intelligent merging logic takes precedence over any user-specified resources + mergedResources := r.buildMergedResourcesForVmcp(vmcp) + for i := range deployment.Spec.Template.Spec.Containers { + if deployment.Spec.Template.Spec.Containers[i].Name == "vmcp" { + deployment.Spec.Template.Spec.Containers[i].Resources = mergedResources + break + } + } + ctxLogger.V(1).Info("Applied PodTemplateSpec customizations to deployment", "virtualmcpserver", vmcp.Name, "namespace", vmcp.Namespace) diff --git a/cmd/thv-operator/controllers/virtualmcpserver_deployment_test.go b/cmd/thv-operator/controllers/virtualmcpserver_deployment_test.go index e82f310ea..df76ecc7f 100644 --- a/cmd/thv-operator/controllers/virtualmcpserver_deployment_test.go +++ b/cmd/thv-operator/controllers/virtualmcpserver_deployment_test.go @@ -21,6 +21,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" @@ -430,3 +431,655 @@ func TestServiceNeedsUpdate(t *testing.T) { } assert.True(t, r.serviceNeedsUpdate(service, vmcp)) } + +// TestBuildMergedResourcesForVmcp tests intelligent resource merging +func TestBuildMergedResourcesForVmcp(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + podTemplateSpecJSON string + expectedCPURequest string + expectedCPULimit string + expectedMemoryRequest string + expectedMemoryLimit string + }{ + { + name: "no PodTemplateSpec - use defaults", + podTemplateSpecJSON: "", + expectedCPURequest: ctrlutil.DefaultCPURequest, + expectedCPULimit: ctrlutil.DefaultCPULimit, + expectedMemoryRequest: ctrlutil.DefaultMemoryRequest, + expectedMemoryLimit: ctrlutil.DefaultMemoryLimit, + }, + { + name: "user provides both requests and limits - use user values", + podTemplateSpecJSON: `{ + "spec": { + "containers": [{ + "name": "vmcp", + "resources": { + "requests": { + "cpu": "200m", + "memory": "256Mi" + }, + "limits": { + "cpu": "1000m", + "memory": "1Gi" + } + } + }] + } + }`, + expectedCPURequest: "200m", + expectedCPULimit: "1", // 1000m is normalized to 1 core + expectedMemoryRequest: "256Mi", + expectedMemoryLimit: "1Gi", + }, + { + name: "user provides only limits - use default requests + user limits", + podTemplateSpecJSON: `{ + "spec": { + "containers": [{ + "name": "vmcp", + "resources": { + "limits": { + "cpu": "800m", + "memory": "768Mi" + } + } + }] + } + }`, + expectedCPURequest: ctrlutil.DefaultCPURequest, + expectedCPULimit: "800m", + expectedMemoryRequest: ctrlutil.DefaultMemoryRequest, + expectedMemoryLimit: "768Mi", + }, + { + name: "user provides only requests (below defaults) - use requests + default limits", + podTemplateSpecJSON: `{ + "spec": { + "containers": [{ + "name": "vmcp", + "resources": { + "requests": { + "cpu": "50m", + "memory": "64Mi" + } + } + }] + } + }`, + expectedCPURequest: "50m", + expectedCPULimit: ctrlutil.DefaultCPULimit, + expectedMemoryRequest: "64Mi", + expectedMemoryLimit: ctrlutil.DefaultMemoryLimit, + }, + { + name: "user provides only requests (above defaults) - use requests for both", + podTemplateSpecJSON: `{ + "spec": { + "containers": [{ + "name": "vmcp", + "resources": { + "requests": { + "cpu": "2000m", + "memory": "2Gi" + } + } + }] + } + }`, + expectedCPURequest: "2", // 2000m is normalized to 2 cores + expectedCPULimit: "2", // 2000m is normalized to 2 cores + expectedMemoryRequest: "2Gi", + expectedMemoryLimit: "2Gi", + }, + { + name: "user provides empty resources - use defaults", + podTemplateSpecJSON: `{ + "spec": { + "containers": [{ + "name": "vmcp", + "resources": {} + }] + } + }`, + expectedCPURequest: ctrlutil.DefaultCPURequest, + expectedCPULimit: ctrlutil.DefaultCPULimit, + expectedMemoryRequest: ctrlutil.DefaultMemoryRequest, + expectedMemoryLimit: ctrlutil.DefaultMemoryLimit, + }, + { + name: "user provides vmcp container without resources - use defaults", + podTemplateSpecJSON: `{ + "spec": { + "containers": [{ + "name": "vmcp" + }] + } + }`, + expectedCPURequest: ctrlutil.DefaultCPURequest, + expectedCPULimit: ctrlutil.DefaultCPULimit, + expectedMemoryRequest: ctrlutil.DefaultMemoryRequest, + expectedMemoryLimit: ctrlutil.DefaultMemoryLimit, + }, + { + name: "user provides other containers - use defaults for vmcp", + podTemplateSpecJSON: `{ + "spec": { + "containers": [{ + "name": "sidecar", + "resources": { + "requests": { + "cpu": "100m", + "memory": "128Mi" + } + } + }] + } + }`, + expectedCPURequest: ctrlutil.DefaultCPURequest, + expectedCPULimit: ctrlutil.DefaultCPULimit, + expectedMemoryRequest: ctrlutil.DefaultMemoryRequest, + expectedMemoryLimit: ctrlutil.DefaultMemoryLimit, + }, + { + name: "user provides limit below default request - use limit for both", + podTemplateSpecJSON: `{ + "spec": { + "containers": [{ + "name": "vmcp", + "resources": { + "limits": { + "cpu": "50m", + "memory": "64Mi" + } + } + }] + } + }`, + expectedCPURequest: "50m", + expectedCPULimit: "50m", + expectedMemoryRequest: "64Mi", + expectedMemoryLimit: "64Mi", + }, + { + name: "user provides mixed partial resources - CPU limit and Memory request", + podTemplateSpecJSON: `{ + "spec": { + "containers": [{ + "name": "vmcp", + "resources": { + "requests": { + "memory": "256Mi" + }, + "limits": { + "cpu": "800m" + } + } + }] + } + }`, + expectedCPURequest: ctrlutil.DefaultCPURequest, + expectedCPULimit: "800m", + expectedMemoryRequest: "256Mi", + expectedMemoryLimit: ctrlutil.DefaultMemoryLimit, + }, + { + name: "user provides limit equal to default request - use default request and user limit", + podTemplateSpecJSON: `{ + "spec": { + "containers": [{ + "name": "vmcp", + "resources": { + "limits": { + "cpu": "100m", + "memory": "128Mi" + } + } + }] + } + }`, + expectedCPURequest: ctrlutil.DefaultCPURequest, + expectedCPULimit: "100m", + expectedMemoryRequest: ctrlutil.DefaultMemoryRequest, + expectedMemoryLimit: "128Mi", + }, + { + name: "user provides request equal to default limit - use request for both", + podTemplateSpecJSON: `{ + "spec": { + "containers": [{ + "name": "vmcp", + "resources": { + "requests": { + "cpu": "500m", + "memory": "512Mi" + } + } + }] + } + }`, + expectedCPURequest: "500m", + expectedCPULimit: "500m", + expectedMemoryRequest: "512Mi", + expectedMemoryLimit: "512Mi", + }, + { + name: "user provides only CPU request above default limit - CPU uses request for both, Memory uses all defaults", + podTemplateSpecJSON: `{ + "spec": { + "containers": [{ + "name": "vmcp", + "resources": { + "requests": { + "cpu": "1000m" + } + } + }] + } + }`, + expectedCPURequest: "1", + expectedCPULimit: "1", + expectedMemoryRequest: ctrlutil.DefaultMemoryRequest, + expectedMemoryLimit: ctrlutil.DefaultMemoryLimit, + }, + { + name: "user provides only CPU limit below default request - CPU uses limit for both, Memory uses all defaults", + podTemplateSpecJSON: `{ + "spec": { + "containers": [{ + "name": "vmcp", + "resources": { + "limits": { + "cpu": "50m" + } + } + }] + } + }`, + expectedCPURequest: "50m", + expectedCPULimit: "50m", + expectedMemoryRequest: ctrlutil.DefaultMemoryRequest, + expectedMemoryLimit: ctrlutil.DefaultMemoryLimit, + }, + { + name: "user provides only Memory request above default limit - Memory uses request for both, CPU uses all defaults", + podTemplateSpecJSON: `{ + "spec": { + "containers": [{ + "name": "vmcp", + "resources": { + "requests": { + "memory": "1Gi" + } + } + }] + } + }`, + expectedCPURequest: ctrlutil.DefaultCPURequest, + expectedCPULimit: ctrlutil.DefaultCPULimit, + expectedMemoryRequest: "1Gi", + expectedMemoryLimit: "1Gi", + }, + { + name: "user provides only Memory limit below default request - Memory uses limit for both, CPU uses all defaults", + podTemplateSpecJSON: `{ + "spec": { + "containers": [{ + "name": "vmcp", + "resources": { + "limits": { + "memory": "64Mi" + } + } + }] + } + }`, + expectedCPURequest: ctrlutil.DefaultCPURequest, + expectedCPULimit: ctrlutil.DefaultCPULimit, + expectedMemoryRequest: "64Mi", + expectedMemoryLimit: "64Mi", + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + vmcp := &mcpv1alpha1.VirtualMCPServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-vmcp", + Namespace: "default", + }, + Spec: mcpv1alpha1.VirtualMCPServerSpec{ + GroupRef: mcpv1alpha1.GroupRef{ + Name: "test-group", + }, + }, + } + + if tt.podTemplateSpecJSON != "" { + vmcp.Spec.PodTemplateSpec = &runtime.RawExtension{ + Raw: []byte(tt.podTemplateSpecJSON), + } + } + + r := &VirtualMCPServerReconciler{} + resources := r.buildMergedResourcesForVmcp(vmcp) + + assert.Equal(t, tt.expectedCPURequest, resources.Requests.Cpu().String()) + assert.Equal(t, tt.expectedCPULimit, resources.Limits.Cpu().String()) + assert.Equal(t, tt.expectedMemoryRequest, resources.Requests.Memory().String()) + assert.Equal(t, tt.expectedMemoryLimit, resources.Limits.Memory().String()) + }) + } +} + +// TestDeploymentUsesIntelligentResourceMerging tests that deployments use merged resources +func TestDeploymentUsesIntelligentResourceMerging(t *testing.T) { + t.Parallel() + + vmcp := &mcpv1alpha1.VirtualMCPServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-vmcp", + Namespace: "default", + }, + Spec: mcpv1alpha1.VirtualMCPServerSpec{ + GroupRef: mcpv1alpha1.GroupRef{ + Name: "test-group", + }, + PodTemplateSpec: &runtime.RawExtension{ + Raw: []byte(`{ + "spec": { + "containers": [{ + "name": "vmcp", + "resources": { + "requests": { + "cpu": "200m", + "memory": "256Mi" + }, + "limits": { + "cpu": "1000m", + "memory": "1Gi" + } + } + }] + } + }`), + }, + }, + } + + scheme := runtime.NewScheme() + _ = mcpv1alpha1.AddToScheme(scheme) + _ = corev1.AddToScheme(scheme) + + r := &VirtualMCPServerReconciler{ + Scheme: scheme, + PlatformDetector: ctrlutil.NewSharedPlatformDetector(), + } + + deployment := r.deploymentForVirtualMCPServer(context.Background(), vmcp, "test-checksum", []workloads.TypedWorkload{}) + + require.NotNil(t, deployment) + require.Len(t, deployment.Spec.Template.Spec.Containers, 1) + + container := deployment.Spec.Template.Spec.Containers[0] + assert.Equal(t, "200m", container.Resources.Requests.Cpu().String()) + assert.Equal(t, "1", container.Resources.Limits.Cpu().String()) // 1000m normalized to 1 + assert.Equal(t, "256Mi", container.Resources.Requests.Memory().String()) + assert.Equal(t, "1Gi", container.Resources.Limits.Memory().String()) +} + +// TestApplyPodTemplateSpecStripsResources tests that resources are stripped before strategic merge +func TestApplyPodTemplateSpecStripsResources(t *testing.T) { + t.Parallel() + + vmcp := &mcpv1alpha1.VirtualMCPServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-vmcp", + Namespace: "default", + }, + Spec: mcpv1alpha1.VirtualMCPServerSpec{ + GroupRef: mcpv1alpha1.GroupRef{ + Name: "test-group", + }, + PodTemplateSpec: &runtime.RawExtension{ + Raw: []byte(`{ + "spec": { + "containers": [{ + "name": "vmcp", + "resources": { + "requests": { + "cpu": "300m" + } + } + }], + "nodeSelector": { + "disk": "ssd" + } + } + }`), + }, + }, + } + + scheme := runtime.NewScheme() + _ = mcpv1alpha1.AddToScheme(scheme) + _ = corev1.AddToScheme(scheme) + + r := &VirtualMCPServerReconciler{ + Scheme: scheme, + PlatformDetector: ctrlutil.NewSharedPlatformDetector(), + } + + deployment := r.deploymentForVirtualMCPServer(context.Background(), vmcp, "test-checksum", []workloads.TypedWorkload{}) + + require.NotNil(t, deployment) + + // Verify nodeSelector was applied (from strategic merge) + assert.Equal(t, "ssd", deployment.Spec.Template.Spec.NodeSelector["disk"]) + + // Verify resources use intelligent merging, not raw user values + // User only provided 300m CPU request, so intelligent merge should add default limit + container := deployment.Spec.Template.Spec.Containers[0] + assert.Equal(t, "300m", container.Resources.Requests.Cpu().String()) + assert.Equal(t, ctrlutil.DefaultCPULimit, container.Resources.Limits.Cpu().String()) + // Memory should be all defaults since user didn't specify + assert.Equal(t, ctrlutil.DefaultMemoryRequest, container.Resources.Requests.Memory().String()) + assert.Equal(t, ctrlutil.DefaultMemoryLimit, container.Resources.Limits.Memory().String()) +} + +// TestDeploymentWithEmptyJSONPodTemplateSpec tests that defaults are applied when PodTemplateSpec is empty JSON "{}" +func TestDeploymentWithEmptyJSONPodTemplateSpec(t *testing.T) { + t.Parallel() + + vmcp := &mcpv1alpha1.VirtualMCPServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-vmcp", + Namespace: "default", + }, + Spec: mcpv1alpha1.VirtualMCPServerSpec{ + GroupRef: mcpv1alpha1.GroupRef{ + Name: "test-group", + }, + // Empty JSON object - valid JSON but no meaningful fields + PodTemplateSpec: &runtime.RawExtension{ + Raw: []byte(`{}`), + }, + }, + } + + scheme := runtime.NewScheme() + _ = mcpv1alpha1.AddToScheme(scheme) + _ = corev1.AddToScheme(scheme) + + r := &VirtualMCPServerReconciler{ + Scheme: scheme, + PlatformDetector: ctrlutil.NewSharedPlatformDetector(), + } + + deployment := r.deploymentForVirtualMCPServer(context.Background(), vmcp, "test-checksum", []workloads.TypedWorkload{}) + + require.NotNil(t, deployment) + require.Len(t, deployment.Spec.Template.Spec.Containers, 1) + + container := deployment.Spec.Template.Spec.Containers[0] + assert.Equal(t, ctrlutil.DefaultCPURequest, container.Resources.Requests.Cpu().String(), + "CPU request should be default when PodTemplateSpec is empty JSON") + assert.Equal(t, ctrlutil.DefaultCPULimit, container.Resources.Limits.Cpu().String(), + "CPU limit should be default when PodTemplateSpec is empty JSON") + assert.Equal(t, ctrlutil.DefaultMemoryRequest, container.Resources.Requests.Memory().String(), + "Memory request should be default when PodTemplateSpec is empty JSON") + assert.Equal(t, ctrlutil.DefaultMemoryLimit, container.Resources.Limits.Memory().String(), + "Memory limit should be default when PodTemplateSpec is empty JSON") +} + +// TestDeploymentWithEmptyRawExtension tests that defaults are applied when RawExtension has empty Raw +func TestDeploymentWithEmptyRawExtension(t *testing.T) { + t.Parallel() + + vmcp := &mcpv1alpha1.VirtualMCPServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-vmcp", + Namespace: "default", + }, + Spec: mcpv1alpha1.VirtualMCPServerSpec{ + GroupRef: mcpv1alpha1.GroupRef{ + Name: "test-group", + }, + // RawExtension exists but Raw is empty slice + PodTemplateSpec: &runtime.RawExtension{ + Raw: []byte{}, + }, + }, + } + + scheme := runtime.NewScheme() + _ = mcpv1alpha1.AddToScheme(scheme) + _ = corev1.AddToScheme(scheme) + + r := &VirtualMCPServerReconciler{ + Scheme: scheme, + PlatformDetector: ctrlutil.NewSharedPlatformDetector(), + } + + deployment := r.deploymentForVirtualMCPServer(context.Background(), vmcp, "test-checksum", []workloads.TypedWorkload{}) + + require.NotNil(t, deployment) + require.Len(t, deployment.Spec.Template.Spec.Containers, 1) + + container := deployment.Spec.Template.Spec.Containers[0] + assert.Equal(t, ctrlutil.DefaultCPURequest, container.Resources.Requests.Cpu().String(), + "CPU request should be default when Raw is empty slice") + assert.Equal(t, ctrlutil.DefaultCPULimit, container.Resources.Limits.Cpu().String(), + "CPU limit should be default when Raw is empty slice") + assert.Equal(t, ctrlutil.DefaultMemoryRequest, container.Resources.Requests.Memory().String(), + "Memory request should be default when Raw is empty slice") + assert.Equal(t, ctrlutil.DefaultMemoryLimit, container.Resources.Limits.Memory().String(), + "Memory limit should be default when Raw is empty slice") +} + +// TestDeploymentWithEmptySpecFields tests that defaults are applied when PodTemplateSpec +// has structure but all fields are empty (triggers builder.Build() == nil) +func TestDeploymentWithEmptySpecFields(t *testing.T) { + t.Parallel() + + vmcp := &mcpv1alpha1.VirtualMCPServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-vmcp", + Namespace: "default", + }, + Spec: mcpv1alpha1.VirtualMCPServerSpec{ + GroupRef: mcpv1alpha1.GroupRef{ + Name: "test-group", + }, + // Valid JSON structure with empty spec - triggers builder.Build() == nil + PodTemplateSpec: &runtime.RawExtension{ + Raw: []byte(`{"spec": {}}`), + }, + }, + } + + scheme := runtime.NewScheme() + _ = mcpv1alpha1.AddToScheme(scheme) + _ = corev1.AddToScheme(scheme) + + r := &VirtualMCPServerReconciler{ + Scheme: scheme, + PlatformDetector: ctrlutil.NewSharedPlatformDetector(), + } + + deployment := r.deploymentForVirtualMCPServer(context.Background(), vmcp, "test-checksum", []workloads.TypedWorkload{}) + + require.NotNil(t, deployment) + require.Len(t, deployment.Spec.Template.Spec.Containers, 1) + + container := deployment.Spec.Template.Spec.Containers[0] + assert.Equal(t, ctrlutil.DefaultCPURequest, container.Resources.Requests.Cpu().String(), + "CPU request should be default when spec fields are empty") + assert.Equal(t, ctrlutil.DefaultCPULimit, container.Resources.Limits.Cpu().String(), + "CPU limit should be default when spec fields are empty") + assert.Equal(t, ctrlutil.DefaultMemoryRequest, container.Resources.Requests.Memory().String(), + "Memory request should be default when spec fields are empty") + assert.Equal(t, ctrlutil.DefaultMemoryLimit, container.Resources.Limits.Memory().String(), + "Memory limit should be default when spec fields are empty") +} + +// TestContainerNeedsUpdateDetectsResourceChanges tests that existing deployments without resources +// will trigger an update when the operator is upgraded to include default resources +func TestContainerNeedsUpdateDetectsResourceChanges(t *testing.T) { + t.Parallel() + + vmcp := &mcpv1alpha1.VirtualMCPServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-vmcp", + Namespace: "default", + }, + Spec: mcpv1alpha1.VirtualMCPServerSpec{ + GroupRef: mcpv1alpha1.GroupRef{ + Name: "test-group", + }, + // No PodTemplateSpec - simulates pre-upgrade deployment + }, + } + + scheme := runtime.NewScheme() + _ = mcpv1alpha1.AddToScheme(scheme) + _ = corev1.AddToScheme(scheme) + + r := &VirtualMCPServerReconciler{ + Scheme: scheme, + PlatformDetector: ctrlutil.NewSharedPlatformDetector(), + } + + // Simulate a pre-upgrade deployment with no resources set + deploymentWithoutResources := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: vmcp.Name, + Namespace: vmcp.Namespace, + }, + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + ServiceAccountName: vmcpServiceAccountName(vmcp.Name), + Containers: []corev1.Container{{ + Name: "vmcp", + Image: getVmcpImage(), + Ports: []corev1.ContainerPort{{ContainerPort: vmcpDefaultPort}}, + Env: r.buildEnvVarsForVmcp(context.Background(), vmcp, []workloads.TypedWorkload{}), + // No Resources set - simulates pre-upgrade state + }}, + }, + }, + }, + } + + // containerNeedsUpdate should return true because resources are missing + needsUpdate := r.containerNeedsUpdate(context.Background(), deploymentWithoutResources, vmcp, []workloads.TypedWorkload{}) + assert.True(t, needsUpdate, "containerNeedsUpdate should detect missing resources") +} diff --git a/cmd/thv-operator/pkg/controllerutil/podtemplatespec_builder.go b/cmd/thv-operator/pkg/controllerutil/podtemplatespec_builder.go index ee0573634..95f4435b5 100644 --- a/cmd/thv-operator/pkg/controllerutil/podtemplatespec_builder.go +++ b/cmd/thv-operator/pkg/controllerutil/podtemplatespec_builder.go @@ -106,6 +106,39 @@ func (b *PodTemplateSpecBuilder) WithSecrets(secrets []mcpv1alpha1.SecretRef) *P return b } +// WithResources adds resource requirements to the target container. +// The target container is specified by containerName in the constructor. +// The resources parameter should already be validated and converted to proper K8s format by the controller. +func (b *PodTemplateSpecBuilder) WithResources(k8sResources corev1.ResourceRequirements) *PodTemplateSpecBuilder { + // Check if resources are empty + if len(k8sResources.Limits) == 0 && len(k8sResources.Requests) == 0 { + return b + } + + // Find existing container or create new one + containerIndex := -1 + for i, container := range b.spec.Spec.Containers { + if container.Name == b.containerName { + containerIndex = i + break + } + } + + if containerIndex >= 0 { + // Merge resources with existing container (existing takes precedence) + existingResources := b.spec.Spec.Containers[containerIndex].Resources + mergedResources := MergeResourceRequirements(k8sResources, existingResources) + b.spec.Spec.Containers[containerIndex].Resources = mergedResources + } else { + // Add new container with resources + b.spec.Spec.Containers = append(b.spec.Spec.Containers, corev1.Container{ + Name: b.containerName, + Resources: k8sResources, + }) + } + return b +} + // Build returns the final PodTemplateSpec, or nil if no customizations were made. func (b *PodTemplateSpecBuilder) Build() *corev1.PodTemplateSpec { if b.isEmpty() { diff --git a/cmd/thv-operator/pkg/controllerutil/podtemplatespec_builder_test.go b/cmd/thv-operator/pkg/controllerutil/podtemplatespec_builder_test.go index d684cfcd0..8b1ed030a 100644 --- a/cmd/thv-operator/pkg/controllerutil/podtemplatespec_builder_test.go +++ b/cmd/thv-operator/pkg/controllerutil/podtemplatespec_builder_test.go @@ -10,7 +10,10 @@ import ( mcpv1alpha1 "github.com/stacklok/toolhive/cmd/thv-operator/api/v1alpha1" ) -const testContainerName = "test-container" +const ( + testContainerName = "test-container" + testServiceAccount = "my-sa" +) func TestNewPodTemplateSpecBuilder(t *testing.T) { t.Parallel() @@ -70,7 +73,7 @@ func TestPodTemplateSpecBuilder_Build(t *testing.T) { { name: "with service account returns spec", setup: func(b *PodTemplateSpecBuilder) { - sa := "my-sa" + sa := testServiceAccount b.WithServiceAccount(&sa) }, expectNil: false, @@ -238,18 +241,233 @@ func TestPodTemplateSpecBuilder_Chaining(t *testing.T) { builder, err := NewPodTemplateSpecBuilder(nil, testContainerName) require.NoError(t, err) - sa := "my-sa" + sa := testServiceAccount result := builder. WithServiceAccount(&sa). WithSecrets([]mcpv1alpha1.SecretRef{{Name: "secret", Key: "KEY"}}). Build() require.NotNil(t, result) - assert.Equal(t, "my-sa", result.Spec.ServiceAccountName) + assert.Equal(t, testServiceAccount, result.Spec.ServiceAccountName) require.Len(t, result.Spec.Containers, 1) assert.Equal(t, testContainerName, result.Spec.Containers[0].Name) } +func TestPodTemplateSpecBuilder_WithResources(t *testing.T) { + t.Parallel() + + t.Run("empty resources does nothing", func(t *testing.T) { + t.Parallel() + builder, err := NewPodTemplateSpecBuilder(nil, testContainerName) + require.NoError(t, err) + + // Empty resources should not create a container + emptyResources := BuildResourceRequirements(mcpv1alpha1.ResourceRequirements{}) + builder.WithResources(emptyResources) + + assert.Empty(t, builder.spec.Spec.Containers) + }) + + t.Run("creates container with resources", func(t *testing.T) { + t.Parallel() + builder, err := NewPodTemplateSpecBuilder(nil, testContainerName) + require.NoError(t, err) + + resources := BuildResourceRequirements(mcpv1alpha1.ResourceRequirements{ + Limits: mcpv1alpha1.ResourceList{ + CPU: "500m", + Memory: "512Mi", + }, + Requests: mcpv1alpha1.ResourceList{ + CPU: "100m", + Memory: "128Mi", + }, + }) + builder.WithResources(resources) + + require.Len(t, builder.spec.Spec.Containers, 1) + container := builder.spec.Spec.Containers[0] + assert.Equal(t, testContainerName, container.Name) + + // Verify limits + assert.Equal(t, "500m", container.Resources.Limits.Cpu().String()) + assert.Equal(t, "512Mi", container.Resources.Limits.Memory().String()) + + // Verify requests + assert.Equal(t, "100m", container.Resources.Requests.Cpu().String()) + assert.Equal(t, "128Mi", container.Resources.Requests.Memory().String()) + }) + + t.Run("merges into existing container", func(t *testing.T) { + t.Parallel() + raw := &runtime.RawExtension{ + Raw: []byte(`{"spec":{"containers":[{"name":"test-container","image":"test:latest"}]}}`), + } + builder, err := NewPodTemplateSpecBuilder(raw, testContainerName) + require.NoError(t, err) + + resources := BuildResourceRequirements(mcpv1alpha1.ResourceRequirements{ + Limits: mcpv1alpha1.ResourceList{ + CPU: "200m", + }, + }) + builder.WithResources(resources) + + require.Len(t, builder.spec.Spec.Containers, 1) + container := builder.spec.Spec.Containers[0] + assert.Equal(t, "test:latest", container.Image) + assert.Equal(t, "200m", container.Resources.Limits.Cpu().String()) + }) + + t.Run("adds to different container", func(t *testing.T) { + t.Parallel() + raw := &runtime.RawExtension{ + Raw: []byte(`{"spec":{"containers":[{"name":"other-container"}]}}`), + } + builder, err := NewPodTemplateSpecBuilder(raw, testContainerName) + require.NoError(t, err) + + resources := BuildResourceRequirements(mcpv1alpha1.ResourceRequirements{ + Requests: mcpv1alpha1.ResourceList{ + Memory: "64Mi", + }, + }) + builder.WithResources(resources) + + require.Len(t, builder.spec.Spec.Containers, 2) + assert.Equal(t, "other-container", builder.spec.Spec.Containers[0].Name) + assert.Equal(t, testContainerName, builder.spec.Spec.Containers[1].Name) + assert.Equal(t, "64Mi", builder.spec.Spec.Containers[1].Resources.Requests.Memory().String()) + }) +} + +func TestPodTemplateSpecBuilder_WithResourcesChaining(t *testing.T) { + t.Parallel() + + builder, err := NewPodTemplateSpecBuilder(nil, testContainerName) + require.NoError(t, err) + + sa := testServiceAccount + resources := BuildResourceRequirements(mcpv1alpha1.ResourceRequirements{ + Limits: mcpv1alpha1.ResourceList{ + CPU: "1000m", + Memory: "1Gi", + }, + }) + + result := builder. + WithServiceAccount(&sa). + WithSecrets([]mcpv1alpha1.SecretRef{{Name: "secret", Key: "KEY"}}). + WithResources(resources). + Build() + + require.NotNil(t, result) + assert.Equal(t, testServiceAccount, result.Spec.ServiceAccountName) + require.Len(t, result.Spec.Containers, 1) + + container := result.Spec.Containers[0] + assert.Equal(t, testContainerName, container.Name) + assert.Len(t, container.Env, 1) + // 1000m is normalized to 1 CPU core + assert.Equal(t, "1", container.Resources.Limits.Cpu().String()) + assert.Equal(t, "1Gi", container.Resources.Limits.Memory().String()) +} + +func TestPodTemplateSpecBuilder_WithResourcesMerging(t *testing.T) { + t.Parallel() + + t.Run("existing resources take precedence over defaults", func(t *testing.T) { + t.Parallel() + // Create pod template with existing resources + raw := &runtime.RawExtension{ + Raw: []byte(`{ + "spec": { + "containers": [{ + "name": "test-container", + "resources": { + "limits": {"cpu": "2", "memory": "2Gi"}, + "requests": {"cpu": "500m"} + } + }] + } + }`), + } + builder, err := NewPodTemplateSpecBuilder(raw, testContainerName) + require.NoError(t, err) + + // Try to set different resources (these should be overridden by existing) + defaultResources := BuildResourceRequirements(mcpv1alpha1.ResourceRequirements{ + Limits: mcpv1alpha1.ResourceList{ + CPU: "1", + Memory: "1Gi", + }, + Requests: mcpv1alpha1.ResourceList{ + CPU: "100m", + Memory: "128Mi", + }, + }) + + builder.WithResources(defaultResources) + result := builder.Build() + + require.NotNil(t, result) + require.Len(t, result.Spec.Containers, 1) + container := result.Spec.Containers[0] + + // Existing values should be preserved + assert.Equal(t, "2", container.Resources.Limits.Cpu().String()) + assert.Equal(t, "2Gi", container.Resources.Limits.Memory().String()) + assert.Equal(t, "500m", container.Resources.Requests.Cpu().String()) + // Memory request was not in existing, so default should be used + assert.Equal(t, "128Mi", container.Resources.Requests.Memory().String()) + }) + + t.Run("defaults fill in missing resources", func(t *testing.T) { + t.Parallel() + // Create pod template with partial resources (only CPU limit) + raw := &runtime.RawExtension{ + Raw: []byte(`{ + "spec": { + "containers": [{ + "name": "test-container", + "resources": { + "limits": {"cpu": "3"} + } + }] + } + }`), + } + builder, err := NewPodTemplateSpecBuilder(raw, testContainerName) + require.NoError(t, err) + + // Set complete resource defaults + defaultResources := BuildResourceRequirements(mcpv1alpha1.ResourceRequirements{ + Limits: mcpv1alpha1.ResourceList{ + CPU: "1", + Memory: "1Gi", + }, + Requests: mcpv1alpha1.ResourceList{ + CPU: "100m", + Memory: "128Mi", + }, + }) + + builder.WithResources(defaultResources) + result := builder.Build() + + require.NotNil(t, result) + require.Len(t, result.Spec.Containers, 1) + container := result.Spec.Containers[0] + + // Existing CPU limit should be preserved + assert.Equal(t, "3", container.Resources.Limits.Cpu().String()) + // Other resources should come from defaults + assert.Equal(t, "1Gi", container.Resources.Limits.Memory().String()) + assert.Equal(t, "100m", container.Resources.Requests.Cpu().String()) + assert.Equal(t, "128Mi", container.Resources.Requests.Memory().String()) + }) +} + // ptr is a helper to create a pointer to a string. func ptr(s string) *string { return &s diff --git a/cmd/thv-operator/pkg/controllerutil/resources.go b/cmd/thv-operator/pkg/controllerutil/resources.go index 6dcbe784c..bae1cd1aa 100644 --- a/cmd/thv-operator/pkg/controllerutil/resources.go +++ b/cmd/thv-operator/pkg/controllerutil/resources.go @@ -12,6 +12,62 @@ import ( mcpv1alpha1 "github.com/stacklok/toolhive/cmd/thv-operator/api/v1alpha1" ) +const ( + // DefaultCPURequest is the default CPU request for MCP server containers. + // These values provide reasonable limits to prevent resource monopolization + // while allowing sufficient resources for typical MCP server workloads. + DefaultCPURequest = "100m" + // DefaultCPULimit is the default CPU limit for MCP server containers. + DefaultCPULimit = "500m" + // DefaultMemoryRequest is the default memory request for MCP server containers. + DefaultMemoryRequest = "128Mi" + // DefaultMemoryLimit is the default memory limit for MCP server containers. + DefaultMemoryLimit = "512Mi" + + // DefaultProxyRunnerCPURequest is the default CPU request for proxy runner containers. + // The proxy runner is a lightweight Go process that manages the connection + // to the MCP server container, so it needs fewer resources. + DefaultProxyRunnerCPURequest = "50m" + // DefaultProxyRunnerCPULimit is the default CPU limit for proxy runner containers. + DefaultProxyRunnerCPULimit = "200m" + // DefaultProxyRunnerMemoryRequest is the default memory request for proxy runner containers. + DefaultProxyRunnerMemoryRequest = "64Mi" + // DefaultProxyRunnerMemoryLimit is the default memory limit for proxy runner containers. + DefaultProxyRunnerMemoryLimit = "256Mi" +) + +// BuildDefaultResourceRequirements returns standard default resource requirements +// for MCP server containers (MCPServer, VirtualMCPServer, MCPRemoteProxy). +// These defaults prevent resource monopolization while allowing user customization. +func BuildDefaultResourceRequirements() corev1.ResourceRequirements { + return corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse(DefaultCPULimit), + corev1.ResourceMemory: resource.MustParse(DefaultMemoryLimit), + }, + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse(DefaultCPURequest), + corev1.ResourceMemory: resource.MustParse(DefaultMemoryRequest), + }, + } +} + +// BuildDefaultProxyRunnerResourceRequirements returns default resource requirements +// for the ToolHive proxy runner container (the container running `thv run`). +// The proxy runner is lighter weight than the MCP server container it manages. +func BuildDefaultProxyRunnerResourceRequirements() corev1.ResourceRequirements { + return corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse(DefaultProxyRunnerCPULimit), + corev1.ResourceMemory: resource.MustParse(DefaultProxyRunnerMemoryLimit), + }, + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse(DefaultProxyRunnerCPURequest), + corev1.ResourceMemory: resource.MustParse(DefaultProxyRunnerMemoryRequest), + }, + } +} + // BuildResourceRequirements builds Kubernetes resource requirements from CRD spec // Shared between MCPServer and MCPRemoteProxy func BuildResourceRequirements(resourceSpec mcpv1alpha1.ResourceRequirements) corev1.ResourceRequirements { @@ -40,6 +96,70 @@ func BuildResourceRequirements(resourceSpec mcpv1alpha1.ResourceRequirements) co return resources } +// MergeResourceRequirements intelligently merges user-provided resources with default resources. +// For each resource type (CPU, Memory), handles request and limit independently: +// - If user provides both → use user values +// - If user provides limit only: +// - If limit >= default request → use default request + user limit +// - If limit < default request → use user limit for both +// +// - If user provides request only: +// - If request >= default limit → use user request for both +// - If request < default limit → use user request + default limit +// +// - If user provides neither → use defaults +func MergeResourceRequirements(defaults, user corev1.ResourceRequirements) corev1.ResourceRequirements { + result := corev1.ResourceRequirements{ + Requests: defaults.Requests.DeepCopy(), + Limits: defaults.Limits.DeepCopy(), + } + + mergeResourceType(corev1.ResourceCPU, &result, defaults, user) + mergeResourceType(corev1.ResourceMemory, &result, defaults, user) + + return result +} + +// mergeResourceType handles merging logic for a single resource type (CPU or Memory) +func mergeResourceType( + resourceName corev1.ResourceName, + result *corev1.ResourceRequirements, + defaults, user corev1.ResourceRequirements, +) { + userLimit, hasUserLimit := user.Limits[resourceName] + userRequest, hasUserRequest := user.Requests[resourceName] + defaultRequest := defaults.Requests[resourceName] + defaultLimit := defaults.Limits[resourceName] + + // Process user-provided limit + if hasUserLimit { + result.Limits[resourceName] = userLimit.DeepCopy() + // If no request provided, check against default request + if !hasUserRequest { + if userLimit.Cmp(defaultRequest) >= 0 { + // User limit >= default request, use default request + result.Requests[resourceName] = defaultRequest.DeepCopy() + } else { + // User limit < default request, use user limit for both + result.Requests[resourceName] = userLimit.DeepCopy() + } + } + } + + // Process user-provided request + if hasUserRequest { + result.Requests[resourceName] = userRequest.DeepCopy() + // If no limit provided, compare request with default limit + if !hasUserLimit { + if userRequest.Cmp(defaultLimit) >= 0 { + result.Limits[resourceName] = userRequest.DeepCopy() + } else { + result.Limits[resourceName] = defaultLimit.DeepCopy() + } + } + } +} + // BuildHealthProbe builds a Kubernetes health probe configuration // Shared between MCPServer and MCPRemoteProxy func BuildHealthProbe( diff --git a/cmd/thv-operator/pkg/controllerutil/resources_test.go b/cmd/thv-operator/pkg/controllerutil/resources_test.go new file mode 100644 index 000000000..603240363 --- /dev/null +++ b/cmd/thv-operator/pkg/controllerutil/resources_test.go @@ -0,0 +1,133 @@ +package controllerutil + +import ( + "testing" + + "github.com/stretchr/testify/assert" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" +) + +// mustParseQuantity is a test helper that panics if parsing fails +func mustParseQuantity(s string) resource.Quantity { + q := resource.MustParse(s) + return q +} + +func TestBuildDefaultProxyRunnerResourceRequirements(t *testing.T) { + t.Parallel() + + resources := BuildDefaultProxyRunnerResourceRequirements() + + // Verify limits + assert.Equal(t, DefaultProxyRunnerCPULimit, resources.Limits.Cpu().String()) + assert.Equal(t, DefaultProxyRunnerMemoryLimit, resources.Limits.Memory().String()) + + // Verify requests + assert.Equal(t, DefaultProxyRunnerCPURequest, resources.Requests.Cpu().String()) + assert.Equal(t, DefaultProxyRunnerMemoryRequest, resources.Requests.Memory().String()) +} + +func TestBuildDefaultResourceRequirements(t *testing.T) { + t.Parallel() + + resources := BuildDefaultResourceRequirements() + + // Verify limits + assert.Equal(t, DefaultCPULimit, resources.Limits.Cpu().String()) + assert.Equal(t, DefaultMemoryLimit, resources.Limits.Memory().String()) + + // Verify requests + assert.Equal(t, DefaultCPURequest, resources.Requests.Cpu().String()) + assert.Equal(t, DefaultMemoryRequest, resources.Requests.Memory().String()) +} + +func TestProxyRunnerResourcesAreLighterThanMCPServerResources(t *testing.T) { + t.Parallel() + + proxyResources := BuildDefaultProxyRunnerResourceRequirements() + mcpResources := BuildDefaultResourceRequirements() + + // Proxy runner should use less CPU than MCP server + assert.True(t, proxyResources.Limits.Cpu().Cmp(*mcpResources.Limits.Cpu()) < 0, + "Proxy runner CPU limit should be less than MCP server CPU limit") + assert.True(t, proxyResources.Requests.Cpu().Cmp(*mcpResources.Requests.Cpu()) < 0, + "Proxy runner CPU request should be less than MCP server CPU request") + + // Proxy runner should use less memory than MCP server + assert.True(t, proxyResources.Limits.Memory().Cmp(*mcpResources.Limits.Memory()) < 0, + "Proxy runner memory limit should be less than MCP server memory limit") + assert.True(t, proxyResources.Requests.Memory().Cmp(*mcpResources.Requests.Memory()) < 0, + "Proxy runner memory request should be less than MCP server memory request") +} + +func TestMergeResourceRequirements_ProxyRunner(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + defaults corev1.ResourceRequirements + user corev1.ResourceRequirements + expected map[string]string // resource name -> expected value + }{ + { + name: "empty user resources uses defaults", + defaults: BuildDefaultProxyRunnerResourceRequirements(), + user: corev1.ResourceRequirements{}, + expected: map[string]string{ + "cpu_limit": DefaultProxyRunnerCPULimit, + "memory_limit": DefaultProxyRunnerMemoryLimit, + "cpu_request": DefaultProxyRunnerCPURequest, + "memory_request": DefaultProxyRunnerMemoryRequest, + }, + }, + { + name: "user overrides only CPU limit", + defaults: BuildDefaultProxyRunnerResourceRequirements(), + user: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + corev1.ResourceCPU: mustParseQuantity("300m"), + }, + }, + expected: map[string]string{ + "cpu_limit": "300m", + "memory_limit": DefaultProxyRunnerMemoryLimit, + "cpu_request": DefaultProxyRunnerCPURequest, + "memory_request": DefaultProxyRunnerMemoryRequest, + }, + }, + { + name: "user overrides all resources", + defaults: BuildDefaultProxyRunnerResourceRequirements(), + user: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + corev1.ResourceCPU: mustParseQuantity("500m"), + corev1.ResourceMemory: mustParseQuantity("512Mi"), + }, + Requests: corev1.ResourceList{ + corev1.ResourceCPU: mustParseQuantity("100m"), + corev1.ResourceMemory: mustParseQuantity("128Mi"), + }, + }, + expected: map[string]string{ + "cpu_limit": "500m", + "memory_limit": "512Mi", + "cpu_request": "100m", + "memory_request": "128Mi", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + result := MergeResourceRequirements(tt.defaults, tt.user) + + assert.Equal(t, tt.expected["cpu_limit"], result.Limits.Cpu().String()) + assert.Equal(t, tt.expected["memory_limit"], result.Limits.Memory().String()) + assert.Equal(t, tt.expected["cpu_request"], result.Requests.Cpu().String()) + assert.Equal(t, tt.expected["memory_request"], result.Requests.Memory().String()) + }) + } +} diff --git a/cmd/thv-operator/test-integration/mcp-server/mcpserver_controller_integration_test.go b/cmd/thv-operator/test-integration/mcp-server/mcpserver_controller_integration_test.go index 36b6f1675..84b43c8a8 100644 --- a/cmd/thv-operator/test-integration/mcp-server/mcpserver_controller_integration_test.go +++ b/cmd/thv-operator/test-integration/mcp-server/mcpserver_controller_integration_test.go @@ -82,6 +82,16 @@ var _ = Describe("MCPServer Controller Integration Tests", func() { Memory: "128Mi", }, }, + ProxyRunnerResources: mcpv1alpha1.ResourceRequirements{ + Limits: mcpv1alpha1.ResourceList{ + CPU: "500m", + Memory: "1Gi", + }, + Requests: mcpv1alpha1.ResourceList{ + CPU: "100m", + Memory: "128Mi", + }, + }, ResourceOverrides: &mcpv1alpha1.ResourceOverrides{ ProxyDeployment: &mcpv1alpha1.ProxyDeploymentOverrides{ PodTemplateMetadataOverrides: &mcpv1alpha1.ResourceMetadataOverrides{ diff --git a/deploy/charts/operator-crds/Chart.yaml b/deploy/charts/operator-crds/Chart.yaml index 9221a2fa2..926a86e73 100644 --- a/deploy/charts/operator-crds/Chart.yaml +++ b/deploy/charts/operator-crds/Chart.yaml @@ -2,5 +2,5 @@ apiVersion: v2 name: toolhive-operator-crds description: A Helm chart for installing the ToolHive Operator CRDs into Kubernetes. type: application -version: 0.0.85 +version: 0.0.86 appVersion: "0.0.1" diff --git a/deploy/charts/operator-crds/README.md b/deploy/charts/operator-crds/README.md index dd9ecfc9e..2d88b4c06 100644 --- a/deploy/charts/operator-crds/README.md +++ b/deploy/charts/operator-crds/README.md @@ -1,6 +1,6 @@ # ToolHive Operator CRDs Helm Chart -![Version: 0.0.85](https://img.shields.io/badge/Version-0.0.85-informational?style=flat-square) +![Version: 0.0.86](https://img.shields.io/badge/Version-0.0.86-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) A Helm chart for installing the ToolHive Operator CRDs into Kubernetes. @@ -51,7 +51,7 @@ However, placing CRDs in `templates/` means they would be deleted when the Helm ## Values | Key | Type | Default | Description | -|-----|------|---------|-------------| +|-----|-------------|------|---------| | crds | object | `{"install":{"registry":true,"server":true,"virtualMcp":true},"keep":true}` | CRD installation configuration | | crds.install | object | `{"registry":true,"server":true,"virtualMcp":true}` | Feature flags for CRD groups | | crds.install.registry | bool | `true` | Install Registry CRDs (mcpregistries) | diff --git a/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcpservers.yaml b/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcpservers.yaml index 62dcd9956..820dcb8b3 100644 --- a/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcpservers.yaml +++ b/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcpservers.yaml @@ -372,6 +372,39 @@ spec: maximum: 65535 minimum: 1 type: integer + proxyRunnerResources: + description: |- + ProxyRunnerResources defines the resource requirements for the ToolHive proxy runner container. + The proxy runner is the infrastructure container that manages the MCP server container. + If not specified, defaults to 50m/200m CPU and 64Mi/256Mi memory (lighter than MCP server defaults). + properties: + limits: + description: Limits describes the maximum amount of compute resources + allowed + properties: + cpu: + description: CPU is the CPU limit in cores (e.g., "500m" for + 0.5 cores) + type: string + memory: + description: Memory is the memory limit in bytes (e.g., "64Mi" + for 64 megabytes) + type: string + type: object + requests: + description: Requests describes the minimum amount of compute + resources required + properties: + cpu: + description: CPU is the CPU limit in cores (e.g., "500m" for + 0.5 cores) + type: string + memory: + description: Memory is the memory limit in bytes (e.g., "64Mi" + for 64 megabytes) + type: string + type: object + type: object resourceOverrides: description: ResourceOverrides allows overriding annotations and labels for resources created by the operator diff --git a/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcpservers.yaml b/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcpservers.yaml index 6a2ab9d76..934fc49f7 100644 --- a/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcpservers.yaml +++ b/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcpservers.yaml @@ -375,6 +375,39 @@ spec: maximum: 65535 minimum: 1 type: integer + proxyRunnerResources: + description: |- + ProxyRunnerResources defines the resource requirements for the ToolHive proxy runner container. + The proxy runner is the infrastructure container that manages the MCP server container. + If not specified, defaults to 50m/200m CPU and 64Mi/256Mi memory (lighter than MCP server defaults). + properties: + limits: + description: Limits describes the maximum amount of compute resources + allowed + properties: + cpu: + description: CPU is the CPU limit in cores (e.g., "500m" for + 0.5 cores) + type: string + memory: + description: Memory is the memory limit in bytes (e.g., "64Mi" + for 64 megabytes) + type: string + type: object + requests: + description: Requests describes the minimum amount of compute + resources required + properties: + cpu: + description: CPU is the CPU limit in cores (e.g., "500m" for + 0.5 cores) + type: string + memory: + description: Memory is the memory limit in bytes (e.g., "64Mi" + for 64 megabytes) + type: string + type: object + type: object resourceOverrides: description: ResourceOverrides allows overriding annotations and labels for resources created by the operator diff --git a/deploy/charts/operator/Chart.yaml b/deploy/charts/operator/Chart.yaml index 55e7800d2..6888a19bd 100644 --- a/deploy/charts/operator/Chart.yaml +++ b/deploy/charts/operator/Chart.yaml @@ -2,5 +2,5 @@ apiVersion: v2 name: toolhive-operator description: A Helm chart for deploying the ToolHive Operator into Kubernetes. type: application -version: 0.5.14 +version: 0.5.15 appVersion: "v0.6.13" diff --git a/deploy/charts/operator/README.md b/deploy/charts/operator/README.md index aed44f378..85b7e25b2 100644 --- a/deploy/charts/operator/README.md +++ b/deploy/charts/operator/README.md @@ -1,6 +1,6 @@ # ToolHive Operator Helm Chart -![Version: 0.5.14](https://img.shields.io/badge/Version-0.5.14-informational?style=flat-square) +![Version: 0.5.15](https://img.shields.io/badge/Version-0.5.15-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) A Helm chart for deploying the ToolHive Operator into Kubernetes. @@ -49,7 +49,7 @@ The command removes all the Kubernetes components associated with the chart and ## Values | Key | Type | Default | Description | -|-----|------|---------|-------------| +|-----|-------------|------|---------| | fullnameOverride | string | `"toolhive-operator"` | Provide a fully-qualified name override for resources | | nameOverride | string | `""` | Override the name of the chart | | operator | object | `{"affinity":{},"autoscaling":{"enabled":false,"maxReplicas":100,"minReplicas":1,"targetCPUUtilizationPercentage":80},"containerSecurityContext":{"allowPrivilegeEscalation":false,"capabilities":{"drop":["ALL"]},"readOnlyRootFilesystem":true,"runAsNonRoot":true,"runAsUser":1000,"seccompProfile":{"type":"RuntimeDefault"}},"env":{},"features":{"experimental":false},"gc":{"gogc":75,"gomeglimit":"150MiB"},"image":"ghcr.io/stacklok/toolhive/operator:v0.6.13","imagePullPolicy":"IfNotPresent","imagePullSecrets":[],"leaderElectionRole":{"binding":{"name":"toolhive-operator-leader-election-rolebinding"},"name":"toolhive-operator-leader-election-role","rules":[{"apiGroups":[""],"resources":["configmaps"],"verbs":["get","list","watch","create","update","patch","delete"]},{"apiGroups":["coordination.k8s.io"],"resources":["leases"],"verbs":["get","list","watch","create","update","patch","delete"]},{"apiGroups":[""],"resources":["events"],"verbs":["create","patch"]}]},"livenessProbe":{"httpGet":{"path":"/healthz","port":"health"},"initialDelaySeconds":15,"periodSeconds":20},"nodeSelector":{},"podAnnotations":{},"podLabels":{},"podSecurityContext":{"runAsNonRoot":true},"ports":[{"containerPort":8080,"name":"metrics","protocol":"TCP"},{"containerPort":8081,"name":"health","protocol":"TCP"}],"proxyHost":"0.0.0.0","rbac":{"allowedNamespaces":[],"scope":"cluster"},"readinessProbe":{"httpGet":{"path":"/readyz","port":"health"},"initialDelaySeconds":5,"periodSeconds":10},"replicaCount":1,"resources":{"limits":{"cpu":"500m","memory":"128Mi"},"requests":{"cpu":"10m","memory":"64Mi"}},"serviceAccount":{"annotations":{},"automountServiceAccountToken":true,"create":true,"labels":{},"name":"toolhive-operator"},"tolerations":[],"toolhiveRunnerImage":"ghcr.io/stacklok/toolhive/proxyrunner:v0.6.13","vmcpImage":"ghcr.io/stacklok/toolhive/vmcp:v0.6.13","volumeMounts":[],"volumes":[]}` | All values for the operator deployment and associated resources | diff --git a/docs/operator/crd-api.md b/docs/operator/crd-api.md index 5ba19498e..a9c5550ac 100644 --- a/docs/operator/crd-api.md +++ b/docs/operator/crd-api.md @@ -1075,6 +1075,7 @@ _Appears in:_ | `env` _[EnvVar](#envvar) array_ | Env are environment variables to set in the MCP server container | | | | `volumes` _[Volume](#volume) array_ | Volumes are volumes to mount in the MCP server container | | | | `resources` _[ResourceRequirements](#resourcerequirements)_ | Resources defines the resource requirements for the MCP server container | | | +| `proxyRunnerResources` _[ResourceRequirements](#resourcerequirements)_ | ProxyRunnerResources defines the resource requirements for the ToolHive proxy runner container.
The proxy runner is the infrastructure container that manages the MCP server container.
If not specified, defaults to 50m/200m CPU and 64Mi/256Mi memory (lighter than MCP server defaults). | | | | `secrets` _[SecretRef](#secretref) array_ | Secrets are references to secrets to mount in the MCP server container | | | | `serviceAccount` _string_ | ServiceAccount is the name of an already existing service account to use by the MCP server.
If not specified, a ServiceAccount will be created automatically and used by the MCP server. | | | | `permissionProfile` _[PermissionProfileRef](#permissionprofileref)_ | PermissionProfile defines the permission profile to use | | | diff --git a/examples/operator/mcp-servers/mcpserver_with_resource_overrides.yaml b/examples/operator/mcp-servers/mcpserver_with_resource_overrides.yaml index 5b3b22b3b..1f9a93d68 100644 --- a/examples/operator/mcp-servers/mcpserver_with_resource_overrides.yaml +++ b/examples/operator/mcp-servers/mcpserver_with_resource_overrides.yaml @@ -15,6 +15,7 @@ spec: value: https://api.github.com - name: LOG_LEVEL value: info + # Resource requirements for the MCP server container (the actual workload) resources: limits: cpu: "200m" @@ -22,6 +23,18 @@ spec: requests: cpu: "100m" memory: "128Mi" + + # Resource requirements for the ToolHive proxy runner container (infrastructure) + # The proxy runner is lighter weight and typically needs fewer resources + # Defaults: 50m/200m CPU, 64Mi/256Mi memory + proxyRunnerResources: + limits: + cpu: "150m" # Override default limit of 200m + memory: "192Mi" # Override default limit of 256Mi + requests: + cpu: "75m" # Override default request of 50m + memory: "96Mi" # Override default request of 64Mi + resourceOverrides: proxyDeployment: # Annotations and labels for the proxy deployment