From 48387c9add6be5c4163003c9371f6168f8f79484 Mon Sep 17 00:00:00 2001 From: 4t8dd Date: Wed, 10 Dec 2025 21:16:36 +0800 Subject: [PATCH] Add default resource limits and separate proxy runner resource configuration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements default resource limits for MCP server and VirtualMCP server containers to prevent resource monopolization, with intelligent merging of user-provided values and separate resource configuration for proxy runner containers. Per issue #2873, vmcp and MCP server containers lacked default resource limits, creating security and operational risks where misbehaving containers could exhaust node resources and affect other workloads. Additionally, MCPServer deployments have two containers (MCP server + proxy runner) but only had one resource field (`spec.resources`), creating ambiguity about which container the resources applied to. - MCP server container: 500m/100m CPU, 512Mi/128Mi memory - Proxy runner container: 200m/50m CPU, 256Mi/64Mi memory (lighter weight) - VirtualMCP server container: 500m/100m CPU, 512Mi/128Mi memory - Defaults are intelligently merged with user-provided values - Add `spec.proxyRunnerResources` field for proxy runner container configuration - Existing `spec.resources` applies to MCP server container - Clear separation eliminates ambiguity between the two containers MCP server resources are composed from three sources: - Defaults → `spec.resources` → `spec.podTemplateSpec` (highest precedence) Proxy runner resources are composed from: - Defaults → `spec.proxyRunnerResources` - Add `resourceRequirementsForMCPServer()`: Merges defaults with spec.resources - Add `resourceRequirementsForProxyRunner()`: Merges defaults with spec.proxyRunnerResources - Update `deploymentNeedsUpdate()`: Checks both MCP and proxy resources separately - Add `PodTemplateSpecBuilder.WithResources()`: Merges resources intelligently - Add shared utility functions in `pkg/controllerutil/resources.go` - Add regression tests for `deploymentNeedsUpdate()` resource changes (4 tests) - Add tests for resource merging in PodTemplateSpecBuilder (2 tests) - Add tests for default resources and merging (3 tests) - Add VirtualMCPServer deployment tests (10 tests) - Total: 19 new tests ensuring no reconciliation loops - Add `proxyRunnerResources` field to MCPServer CRD - Regenerate deepcopy methods and CRD manifests - Bump Helm chart version: 0.0.75 → 0.0.76 - Update example YAML with clear comments - Document default values and composition precedence **MCPServer (2 containers):** - `spec.resources` → MCP server container (via pod template patch) - `spec.proxyRunnerResources` → Proxy runner container (in deployment) **VirtualMCPServer (1 container):** - `spec.resources` → VirtualMCP server container - Prevents resource exhaustion by providing sensible defaults - Clear separation: separate fields for separate containers in MCPServer - User can customize each container independently - Intelligent merging allows partial overrides - Prevents reconciliation loops through proper update detection Fixes #2873 Signed-off-by: 4t8dd --- .../api/v1alpha1/mcpserver_types.go | 6 + .../api/v1alpha1/zz_generated.deepcopy.go | 1 + .../controllers/mcpserver_controller.go | 75 +- .../mcpserver_deployment_update_test.go | 292 ++++++++ .../virtualmcpserver_controller.go | 4 +- .../virtualmcpserver_controller_test.go | 10 +- .../virtualmcpserver_deployment.go | 73 +- .../virtualmcpserver_deployment_test.go | 653 ++++++++++++++++++ .../controllerutil/podtemplatespec_builder.go | 33 + .../podtemplatespec_builder_test.go | 226 +++++- .../pkg/controllerutil/resources.go | 120 ++++ .../pkg/controllerutil/resources_test.go | 133 ++++ .../mcpserver_controller_integration_test.go | 10 + deploy/charts/operator-crds/Chart.yaml | 2 +- deploy/charts/operator-crds/README.md | 4 +- .../toolhive.stacklok.dev_mcpservers.yaml | 33 + .../toolhive.stacklok.dev_mcpservers.yaml | 33 + deploy/charts/operator/Chart.yaml | 2 +- deploy/charts/operator/README.md | 4 +- docs/operator/crd-api.md | 1 + .../mcpserver_with_resource_overrides.yaml | 13 + 21 files changed, 1661 insertions(+), 67 deletions(-) create mode 100644 cmd/thv-operator/controllers/mcpserver_deployment_update_test.go create mode 100644 cmd/thv-operator/pkg/controllerutil/resources_test.go diff --git a/cmd/thv-operator/api/v1alpha1/mcpserver_types.go b/cmd/thv-operator/api/v1alpha1/mcpserver_types.go index a540c819b..ec44474a7 100644 --- a/cmd/thv-operator/api/v1alpha1/mcpserver_types.go +++ b/cmd/thv-operator/api/v1alpha1/mcpserver_types.go @@ -107,6 +107,12 @@ type MCPServerSpec struct { // +optional Resources ResourceRequirements `json:"resources,omitempty"` + // ProxyRunnerResources defines the resource requirements for the ToolHive proxy runner container. + // The proxy runner is the infrastructure container that manages the MCP server container. + // If not specified, defaults to 50m/200m CPU and 64Mi/256Mi memory (lighter than MCP server defaults). + // +optional + ProxyRunnerResources ResourceRequirements `json:"proxyRunnerResources,omitempty"` + // Secrets are references to secrets to mount in the MCP server container // +optional Secrets []SecretRef `json:"secrets,omitempty"` diff --git a/cmd/thv-operator/api/v1alpha1/zz_generated.deepcopy.go b/cmd/thv-operator/api/v1alpha1/zz_generated.deepcopy.go index 693152c3b..5561764a6 100644 --- a/cmd/thv-operator/api/v1alpha1/zz_generated.deepcopy.go +++ b/cmd/thv-operator/api/v1alpha1/zz_generated.deepcopy.go @@ -1146,6 +1146,7 @@ func (in *MCPServerSpec) DeepCopyInto(out *MCPServerSpec) { copy(*out, *in) } out.Resources = in.Resources + out.ProxyRunnerResources = in.ProxyRunnerResources if in.Secrets != nil { in, out := &in.Secrets, &out.Secrets *out = make([]SecretRef, len(*in)) diff --git a/cmd/thv-operator/controllers/mcpserver_controller.go b/cmd/thv-operator/controllers/mcpserver_controller.go index 99660d85f..1030cbffd 100644 --- a/cmd/thv-operator/controllers/mcpserver_controller.go +++ b/cmd/thv-operator/controllers/mcpserver_controller.go @@ -18,7 +18,6 @@ import ( rbacv1 "k8s.io/api/rbac/v1" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" - "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" @@ -928,9 +927,13 @@ func (r *MCPServerReconciler) deploymentForMCPServer( defaultSA := mcpServerServiceAccountName(m.Name) serviceAccount = &defaultSA } + // Convert MCP server resources to Kubernetes format + mcpResources := resourceRequirementsForMCPServer(m) + finalPodTemplateSpec := builder. WithServiceAccount(serviceAccount). WithSecrets(m.Spec.Secrets). + WithResources(mcpResources). Build() // Add pod template patch if we have one if finalPodTemplateSpec != nil { @@ -1062,26 +1065,9 @@ func (r *MCPServerReconciler) deploymentForMCPServer( volumes = append(volumes, *authzVolume) } - // Prepare container resources - resources := corev1.ResourceRequirements{} - if m.Spec.Resources.Limits.CPU != "" || m.Spec.Resources.Limits.Memory != "" { - resources.Limits = corev1.ResourceList{} - if m.Spec.Resources.Limits.CPU != "" { - resources.Limits[corev1.ResourceCPU] = resource.MustParse(m.Spec.Resources.Limits.CPU) - } - if m.Spec.Resources.Limits.Memory != "" { - resources.Limits[corev1.ResourceMemory] = resource.MustParse(m.Spec.Resources.Limits.Memory) - } - } - if m.Spec.Resources.Requests.CPU != "" || m.Spec.Resources.Requests.Memory != "" { - resources.Requests = corev1.ResourceList{} - if m.Spec.Resources.Requests.CPU != "" { - resources.Requests[corev1.ResourceCPU] = resource.MustParse(m.Spec.Resources.Requests.CPU) - } - if m.Spec.Resources.Requests.Memory != "" { - resources.Requests[corev1.ResourceMemory] = resource.MustParse(m.Spec.Resources.Requests.Memory) - } - } + // Prepare container resources for the proxy runner container + // Note: This is separate from MCP server container resources (which are in the pod template patch) + resources := resourceRequirementsForProxyRunner(m) // Prepare deployment metadata with overrides deploymentLabels := ls @@ -1528,9 +1514,13 @@ func (r *MCPServerReconciler) deploymentNeedsUpdate( return true } + // Convert MCP server resources to Kubernetes format (same as in deploymentForMCPServer) + mcpResources := resourceRequirementsForMCPServer(mcpServer) + expectedPodTemplateSpec := builder. WithServiceAccount(serviceAccount). WithSecrets(mcpServer.Spec.Secrets). + WithResources(mcpResources). Build() // Find the current pod template patch in the container args @@ -1560,8 +1550,8 @@ func (r *MCPServerReconciler) deploymentNeedsUpdate( return true } - // Check if the resource requirements have changed - if !reflect.DeepEqual(container.Resources, resourceRequirementsForMCPServer(mcpServer)) { + // Check if the proxy runner container resource requirements have changed + if !reflect.DeepEqual(container.Resources, resourceRequirementsForProxyRunner(mcpServer)) { return true } } @@ -1656,28 +1646,25 @@ func serviceNeedsUpdate(service *corev1.Service, mcpServer *mcpv1alpha1.MCPServe return false } -// resourceRequirementsForMCPServer returns the resource requirements for the MCPServer +// resourceRequirementsForMCPServer returns the resource requirements for the MCP server container +// with intelligent merging of defaults and user-provided values func resourceRequirementsForMCPServer(m *mcpv1alpha1.MCPServer) corev1.ResourceRequirements { - resources := corev1.ResourceRequirements{} - if m.Spec.Resources.Limits.CPU != "" || m.Spec.Resources.Limits.Memory != "" { - resources.Limits = corev1.ResourceList{} - if m.Spec.Resources.Limits.CPU != "" { - resources.Limits[corev1.ResourceCPU] = resource.MustParse(m.Spec.Resources.Limits.CPU) - } - if m.Spec.Resources.Limits.Memory != "" { - resources.Limits[corev1.ResourceMemory] = resource.MustParse(m.Spec.Resources.Limits.Memory) - } - } - if m.Spec.Resources.Requests.CPU != "" || m.Spec.Resources.Requests.Memory != "" { - resources.Requests = corev1.ResourceList{} - if m.Spec.Resources.Requests.CPU != "" { - resources.Requests[corev1.ResourceCPU] = resource.MustParse(m.Spec.Resources.Requests.CPU) - } - if m.Spec.Resources.Requests.Memory != "" { - resources.Requests[corev1.ResourceMemory] = resource.MustParse(m.Spec.Resources.Requests.Memory) - } - } - return resources + defaultResources := ctrlutil.BuildDefaultResourceRequirements() + userResources := ctrlutil.BuildResourceRequirements(m.Spec.Resources) + return ctrlutil.MergeResourceRequirements(defaultResources, userResources) +} + +// resourceRequirementsForProxyRunner returns the resource requirements for the proxy runner container +// with intelligent merging of defaults and user-provided values +func resourceRequirementsForProxyRunner(m *mcpv1alpha1.MCPServer) corev1.ResourceRequirements { + // Get default proxy runner resources (lighter than MCP server defaults) + defaultProxyResources := ctrlutil.BuildDefaultProxyRunnerResourceRequirements() + + // Get user-defined proxy resources from the dedicated field + userProxyResources := ctrlutil.BuildResourceRequirements(m.Spec.ProxyRunnerResources) + + // Merge defaults with user-defined values + return ctrlutil.MergeResourceRequirements(defaultProxyResources, userProxyResources) } // mcpServerServiceAccountName returns the service account name for the mcp server diff --git a/cmd/thv-operator/controllers/mcpserver_deployment_update_test.go b/cmd/thv-operator/controllers/mcpserver_deployment_update_test.go new file mode 100644 index 000000000..20f51f797 --- /dev/null +++ b/cmd/thv-operator/controllers/mcpserver_deployment_update_test.go @@ -0,0 +1,292 @@ +package controllers + +import ( + "context" + "encoding/json" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/scheme" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + mcpv1alpha1 "github.com/stacklok/toolhive/cmd/thv-operator/api/v1alpha1" + ctrlutil "github.com/stacklok/toolhive/cmd/thv-operator/pkg/controllerutil" +) + +func TestMCPServerReconciler_deploymentNeedsUpdate_Resources(t *testing.T) { + t.Parallel() + + // Register scheme + s := scheme.Scheme + _ = mcpv1alpha1.AddToScheme(s) + + // Create fake client + fakeClient := fake.NewClientBuilder().WithScheme(s).Build() + + reconciler := &MCPServerReconciler{ + Client: fakeClient, + } + + tests := []struct { + name string + existingDeployment *appsv1.Deployment + mcpServer *mcpv1alpha1.MCPServer + expectNeedsUpdate bool + description string + }{ + { + name: "no update needed - default resources match", + existingDeployment: createDeploymentWithResources( + "test-server", + // Proxy runner resources (default) + ctrlutil.BuildDefaultProxyRunnerResourceRequirements(), + // MCP server resources in pod patch (default) + ctrlutil.BuildDefaultResourceRequirements(), + ), + mcpServer: &mcpv1alpha1.MCPServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-server", + Namespace: "default", + }, + Spec: mcpv1alpha1.MCPServerSpec{ + Image: "test:latest", + Transport: "stdio", + // No resources specified, so defaults will be used + }, + }, + expectNeedsUpdate: false, + description: "When resources match defaults, no update is needed", + }, + { + name: "update needed - MCP server resources changed", + existingDeployment: createDeploymentWithResources( + "test-server", + ctrlutil.BuildDefaultProxyRunnerResourceRequirements(), + ctrlutil.BuildDefaultResourceRequirements(), + ), + mcpServer: &mcpv1alpha1.MCPServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-server", + Namespace: "default", + }, + Spec: mcpv1alpha1.MCPServerSpec{ + Image: "test:latest", + Transport: "stdio", + Resources: mcpv1alpha1.ResourceRequirements{ + Limits: mcpv1alpha1.ResourceList{ + CPU: "1000m", + Memory: "1Gi", + }, + }, + }, + }, + expectNeedsUpdate: true, + description: "When MCP server resources change, update is needed", + }, + { + name: "update needed - proxy runner resources changed", + existingDeployment: createDeploymentWithResources( + "test-server", + ctrlutil.BuildDefaultProxyRunnerResourceRequirements(), + ctrlutil.BuildDefaultResourceRequirements(), + ), + mcpServer: &mcpv1alpha1.MCPServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-server", + Namespace: "default", + }, + Spec: mcpv1alpha1.MCPServerSpec{ + Image: "test:latest", + Transport: "stdio", + ProxyRunnerResources: mcpv1alpha1.ResourceRequirements{ + Limits: mcpv1alpha1.ResourceList{ + CPU: "300m", + Memory: "256Mi", + }, + }, + }, + }, + expectNeedsUpdate: true, + description: "When proxy runner resources change, update is needed", + }, + { + name: "no update needed - custom resources match", + existingDeployment: createDeploymentWithResources( + "test-server", + // Proxy runner with custom resources + corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("300m"), + corev1.ResourceMemory: resource.MustParse("256Mi"), + }, + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("100m"), + corev1.ResourceMemory: resource.MustParse("128Mi"), + }, + }, + // MCP server with custom resources + corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + }, + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("1Gi"), + }, + }, + ), + mcpServer: &mcpv1alpha1.MCPServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-server", + Namespace: "default", + }, + Spec: mcpv1alpha1.MCPServerSpec{ + Image: "test:latest", + Transport: "stdio", + Resources: mcpv1alpha1.ResourceRequirements{ + Limits: mcpv1alpha1.ResourceList{ + CPU: "2000m", + Memory: "2Gi", + }, + Requests: mcpv1alpha1.ResourceList{ + CPU: "1000m", + Memory: "1Gi", + }, + }, + ProxyRunnerResources: mcpv1alpha1.ResourceRequirements{ + Limits: mcpv1alpha1.ResourceList{ + CPU: "300m", + Memory: "256Mi", + }, + Requests: mcpv1alpha1.ResourceList{ + CPU: "100m", + Memory: "128Mi", + }, + }, + }, + }, + expectNeedsUpdate: false, + description: "When both proxy and MCP resources match, no update is needed", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + ctx := context.Background() + needsUpdate := reconciler.deploymentNeedsUpdate(ctx, tt.existingDeployment, tt.mcpServer, "test-checksum") + + assert.Equal(t, tt.expectNeedsUpdate, needsUpdate, tt.description) + }) + } +} + +// createDeploymentWithResources creates a deployment with specified proxy and MCP server resources +func createDeploymentWithResources(name string, proxyResources, mcpResources corev1.ResourceRequirements) *appsv1.Deployment { + // Create pod template spec with service account and MCP container resources + // This matches what deploymentForMCPServer creates via PodTemplateSpecBuilder + // Note: Image is NOT included in the patch - only resources, secrets, and service account + defaultSA := mcpServerServiceAccountName(name) + podTemplate := &corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + ServiceAccountName: defaultSA, + Containers: []corev1.Container{ + { + Name: mcpContainerName, + Resources: mcpResources, + }, + }, + }, + } + + // Marshal pod template to JSON for the --k8s-pod-patch argument + podTemplateJSON, _ := json.Marshal(podTemplate) + + // Create labels for the deployment (matches labelsForMCPServer) + labels := map[string]string{ + "app": "mcpserver", + "app.kubernetes.io/name": "mcpserver", + "app.kubernetes.io/instance": name, + "toolhive": "true", + "toolhive-name": name, + } + + return &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: "default", + Labels: labels, + }, + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: labels, + Annotations: map[string]string{ + "toolhive.stacklok.dev/runconfig-checksum": "test-checksum", + }, + }, + Spec: corev1.PodSpec{ + ServiceAccountName: ctrlutil.ProxyRunnerServiceAccountName(name), + Containers: []corev1.Container{ + { + Name: "proxy-runner", + Image: "ghcr.io/stacklok/toolhive/proxyrunner:latest", + Resources: proxyResources, + Args: []string{ + "run", + "--k8s-pod-patch=" + string(podTemplateJSON), + "test:latest", + }, + Ports: []corev1.ContainerPort{ + { + ContainerPort: 8080, + Name: "http", + Protocol: corev1.ProtocolTCP, + }, + }, + Env: []corev1.EnvVar{ + {Name: "XDG_CONFIG_HOME", Value: "/tmp"}, + {Name: "HOME", Value: "/tmp"}, + {Name: "TOOLHIVE_RUNTIME", Value: "kubernetes"}, + {Name: "UNSTRUCTURED_LOGS", Value: "false"}, + }, + }, + }, + }, + }, + }, + } +} + +func TestMCPServerReconciler_deploymentNeedsUpdate_EdgeCases(t *testing.T) { + t.Parallel() + + reconciler := &MCPServerReconciler{} + ctx := context.Background() + + t.Run("nil deployment returns true", func(t *testing.T) { + t.Parallel() + mcpServer := &mcpv1alpha1.MCPServer{ + ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "default"}, + Spec: mcpv1alpha1.MCPServerSpec{Image: "test:latest", Transport: "stdio"}, + } + require.True(t, reconciler.deploymentNeedsUpdate(ctx, nil, mcpServer, "checksum")) + }) + + t.Run("nil mcpServer returns true", func(t *testing.T) { + t.Parallel() + deployment := createDeploymentWithResources( + "test", + ctrlutil.BuildDefaultProxyRunnerResourceRequirements(), + ctrlutil.BuildDefaultResourceRequirements(), + ) + require.True(t, reconciler.deploymentNeedsUpdate(ctx, deployment, nil, "checksum")) + }) +} diff --git a/cmd/thv-operator/controllers/virtualmcpserver_controller.go b/cmd/thv-operator/controllers/virtualmcpserver_controller.go index 6a6058ff8..036190bc4 100644 --- a/cmd/thv-operator/controllers/virtualmcpserver_controller.go +++ b/cmd/thv-operator/controllers/virtualmcpserver_controller.go @@ -849,7 +849,9 @@ func (r *VirtualMCPServerReconciler) containerNeedsUpdate( return true } - return false + // Check if resources have changed (important for upgrades adding default resources) + expectedResources := r.buildMergedResourcesForVmcp(vmcp) + return !reflect.DeepEqual(container.Resources, expectedResources) } // deploymentMetadataNeedsUpdate checks if deployment-level metadata has changed diff --git a/cmd/thv-operator/controllers/virtualmcpserver_controller_test.go b/cmd/thv-operator/controllers/virtualmcpserver_controller_test.go index 0c9328f74..ea1f0877e 100644 --- a/cmd/thv-operator/controllers/virtualmcpserver_controller_test.go +++ b/cmd/thv-operator/controllers/virtualmcpserver_controller_test.go @@ -1504,8 +1504,9 @@ func TestVirtualMCPServerContainerNeedsUpdate(t *testing.T) { Ports: []corev1.ContainerPort{ {ContainerPort: 4483}, }, - Args: reconciler.buildContainerArgsForVmcp(vmcp), - Env: reconciler.buildEnvVarsForVmcp(context.Background(), vmcp, []workloads.TypedWorkload{}), + Args: reconciler.buildContainerArgsForVmcp(vmcp), + Env: reconciler.buildEnvVarsForVmcp(context.Background(), vmcp, []workloads.TypedWorkload{}), + Resources: reconciler.buildMergedResourcesForVmcp(vmcp), }, }, ServiceAccountName: vmcpServiceAccountName(vmcp.Name), @@ -1898,8 +1899,9 @@ func TestVirtualMCPServerDeploymentNeedsUpdate(t *testing.T) { Ports: []corev1.ContainerPort{ {ContainerPort: 4483}, }, - Args: reconciler.buildContainerArgsForVmcp(vmcp), - Env: reconciler.buildEnvVarsForVmcp(context.Background(), vmcp, []workloads.TypedWorkload{}), + Args: reconciler.buildContainerArgsForVmcp(vmcp), + Env: reconciler.buildEnvVarsForVmcp(context.Background(), vmcp, []workloads.TypedWorkload{}), + Resources: reconciler.buildMergedResourcesForVmcp(vmcp), }, }, ServiceAccountName: vmcpServiceAccountName(vmcp.Name), diff --git a/cmd/thv-operator/controllers/virtualmcpserver_deployment.go b/cmd/thv-operator/controllers/virtualmcpserver_deployment.go index a07293e6c..8fc068fb6 100644 --- a/cmd/thv-operator/controllers/virtualmcpserver_deployment.go +++ b/cmd/thv-operator/controllers/virtualmcpserver_deployment.go @@ -127,13 +127,16 @@ func (r *VirtualMCPServerReconciler) deploymentForVirtualMCPServer( } // Apply user-provided PodTemplateSpec customizations if present - if vmcp.Spec.PodTemplateSpec != nil && vmcp.Spec.PodTemplateSpec.Raw != nil { + if vmcp.Spec.PodTemplateSpec != nil && len(vmcp.Spec.PodTemplateSpec.Raw) > 0 { if err := r.applyPodTemplateSpecToDeployment(ctx, vmcp, dep); err != nil { ctxLogger := log.FromContext(ctx) ctxLogger.Error(err, "Failed to apply PodTemplateSpec to Deployment") // Return nil to block deployment creation until PodTemplateSpec is fixed return nil } + } else { + // No PodTemplateSpec, so just set default resources + dep.Spec.Template.Spec.Containers[0].Resources = r.buildDefaultResourcesForVmcp() } if err := controllerutil.SetControllerReference(vmcp, dep, r.Scheme); err != nil { @@ -529,6 +532,50 @@ func (*VirtualMCPServerReconciler) buildContainerPortsForVmcp( }} } +// buildDefaultResourcesForVmcp builds default resource requirements for the vmcp container. +// These defaults provide reasonable CPU and memory limits to prevent resource monopolization +// while allowing user customization through PodTemplateSpec. +func (*VirtualMCPServerReconciler) buildDefaultResourcesForVmcp() corev1.ResourceRequirements { + return ctrlutil.BuildDefaultResourceRequirements() +} + +// buildMergedResourcesForVmcp extracts user-provided resources from PodTemplateSpec (if any), +// applies intelligent merging with defaults, and returns the final resource requirements. +// This implements the same intelligent merging strategy as MCPServer: +// 1. User provides both requests and limits → use user's values +// 2. User only provides limits → use limits for both requests and limits +// 3. User only provides requests → compare with defaults and choose appropriately +// 4. User provides nothing → use defaults +func (*VirtualMCPServerReconciler) buildMergedResourcesForVmcp( + vmcp *mcpv1alpha1.VirtualMCPServer, +) corev1.ResourceRequirements { + defaultResources := ctrlutil.BuildDefaultResourceRequirements() + + // If no PodTemplateSpec provided, use defaults + if vmcp.Spec.PodTemplateSpec == nil || len(vmcp.Spec.PodTemplateSpec.Raw) == 0 { + return defaultResources + } + + // Parse PodTemplateSpec to extract user-provided resources + var podTemplateSpec corev1.PodTemplateSpec + if err := json.Unmarshal(vmcp.Spec.PodTemplateSpec.Raw, &podTemplateSpec); err != nil { + // If we can't parse, use defaults + return defaultResources + } + + // Find the vmcp container in user's PodTemplateSpec + var userResources corev1.ResourceRequirements + for _, container := range podTemplateSpec.Spec.Containers { + if container.Name == "vmcp" { + userResources = container.Resources + break + } + } + + // Apply intelligent merging + return ctrlutil.MergeResourceRequirements(defaultResources, userResources) +} + // serviceForVirtualMCPServer returns a VirtualMCPServer Service object func (r *VirtualMCPServerReconciler) serviceForVirtualMCPServer( ctx context.Context, @@ -693,18 +740,13 @@ func (r *VirtualMCPServerReconciler) validateSecretKeyRef( // - User-provided fields override controller-generated defaults // - Arrays are merged based on strategic merge patch rules (e.g., containers merged by name) // - The "vmcp" container is preserved from the controller-generated spec -func (*VirtualMCPServerReconciler) applyPodTemplateSpecToDeployment( +func (r *VirtualMCPServerReconciler) applyPodTemplateSpecToDeployment( ctx context.Context, vmcp *mcpv1alpha1.VirtualMCPServer, deployment *appsv1.Deployment, ) error { ctxLogger := log.FromContext(ctx) - // Early return if no PodTemplateSpec provided - if vmcp.Spec.PodTemplateSpec == nil || len(vmcp.Spec.PodTemplateSpec.Raw) == 0 { - return nil - } - // Validate the PodTemplateSpec and check if there are meaningful customizations builder, err := ctrlutil.NewPodTemplateSpecBuilder(vmcp.Spec.PodTemplateSpec, "vmcp") if err != nil { @@ -712,7 +754,12 @@ func (*VirtualMCPServerReconciler) applyPodTemplateSpecToDeployment( } if builder.Build() == nil { - // No meaningful customizations to apply + // No meaningful customizations to apply via strategic merge patch + // Set default resources and return + deployment.Spec.Template.Spec.Containers[0].Resources = r.buildDefaultResourcesForVmcp() + ctxLogger.V(1).Info("No PodTemplateSpec customizations to apply, using defaults", + "virtualmcpserver", vmcp.Name, + "namespace", vmcp.Namespace) return nil } @@ -740,6 +787,16 @@ func (*VirtualMCPServerReconciler) applyPodTemplateSpecToDeployment( deployment.Spec.Template = patchedPodTemplateSpec + // After strategic merge, set the intelligently merged resources on vmcp container + // This ensures our intelligent merging logic takes precedence over any user-specified resources + mergedResources := r.buildMergedResourcesForVmcp(vmcp) + for i := range deployment.Spec.Template.Spec.Containers { + if deployment.Spec.Template.Spec.Containers[i].Name == "vmcp" { + deployment.Spec.Template.Spec.Containers[i].Resources = mergedResources + break + } + } + ctxLogger.V(1).Info("Applied PodTemplateSpec customizations to deployment", "virtualmcpserver", vmcp.Name, "namespace", vmcp.Namespace) diff --git a/cmd/thv-operator/controllers/virtualmcpserver_deployment_test.go b/cmd/thv-operator/controllers/virtualmcpserver_deployment_test.go index e82f310ea..df76ecc7f 100644 --- a/cmd/thv-operator/controllers/virtualmcpserver_deployment_test.go +++ b/cmd/thv-operator/controllers/virtualmcpserver_deployment_test.go @@ -21,6 +21,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" @@ -430,3 +431,655 @@ func TestServiceNeedsUpdate(t *testing.T) { } assert.True(t, r.serviceNeedsUpdate(service, vmcp)) } + +// TestBuildMergedResourcesForVmcp tests intelligent resource merging +func TestBuildMergedResourcesForVmcp(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + podTemplateSpecJSON string + expectedCPURequest string + expectedCPULimit string + expectedMemoryRequest string + expectedMemoryLimit string + }{ + { + name: "no PodTemplateSpec - use defaults", + podTemplateSpecJSON: "", + expectedCPURequest: ctrlutil.DefaultCPURequest, + expectedCPULimit: ctrlutil.DefaultCPULimit, + expectedMemoryRequest: ctrlutil.DefaultMemoryRequest, + expectedMemoryLimit: ctrlutil.DefaultMemoryLimit, + }, + { + name: "user provides both requests and limits - use user values", + podTemplateSpecJSON: `{ + "spec": { + "containers": [{ + "name": "vmcp", + "resources": { + "requests": { + "cpu": "200m", + "memory": "256Mi" + }, + "limits": { + "cpu": "1000m", + "memory": "1Gi" + } + } + }] + } + }`, + expectedCPURequest: "200m", + expectedCPULimit: "1", // 1000m is normalized to 1 core + expectedMemoryRequest: "256Mi", + expectedMemoryLimit: "1Gi", + }, + { + name: "user provides only limits - use default requests + user limits", + podTemplateSpecJSON: `{ + "spec": { + "containers": [{ + "name": "vmcp", + "resources": { + "limits": { + "cpu": "800m", + "memory": "768Mi" + } + } + }] + } + }`, + expectedCPURequest: ctrlutil.DefaultCPURequest, + expectedCPULimit: "800m", + expectedMemoryRequest: ctrlutil.DefaultMemoryRequest, + expectedMemoryLimit: "768Mi", + }, + { + name: "user provides only requests (below defaults) - use requests + default limits", + podTemplateSpecJSON: `{ + "spec": { + "containers": [{ + "name": "vmcp", + "resources": { + "requests": { + "cpu": "50m", + "memory": "64Mi" + } + } + }] + } + }`, + expectedCPURequest: "50m", + expectedCPULimit: ctrlutil.DefaultCPULimit, + expectedMemoryRequest: "64Mi", + expectedMemoryLimit: ctrlutil.DefaultMemoryLimit, + }, + { + name: "user provides only requests (above defaults) - use requests for both", + podTemplateSpecJSON: `{ + "spec": { + "containers": [{ + "name": "vmcp", + "resources": { + "requests": { + "cpu": "2000m", + "memory": "2Gi" + } + } + }] + } + }`, + expectedCPURequest: "2", // 2000m is normalized to 2 cores + expectedCPULimit: "2", // 2000m is normalized to 2 cores + expectedMemoryRequest: "2Gi", + expectedMemoryLimit: "2Gi", + }, + { + name: "user provides empty resources - use defaults", + podTemplateSpecJSON: `{ + "spec": { + "containers": [{ + "name": "vmcp", + "resources": {} + }] + } + }`, + expectedCPURequest: ctrlutil.DefaultCPURequest, + expectedCPULimit: ctrlutil.DefaultCPULimit, + expectedMemoryRequest: ctrlutil.DefaultMemoryRequest, + expectedMemoryLimit: ctrlutil.DefaultMemoryLimit, + }, + { + name: "user provides vmcp container without resources - use defaults", + podTemplateSpecJSON: `{ + "spec": { + "containers": [{ + "name": "vmcp" + }] + } + }`, + expectedCPURequest: ctrlutil.DefaultCPURequest, + expectedCPULimit: ctrlutil.DefaultCPULimit, + expectedMemoryRequest: ctrlutil.DefaultMemoryRequest, + expectedMemoryLimit: ctrlutil.DefaultMemoryLimit, + }, + { + name: "user provides other containers - use defaults for vmcp", + podTemplateSpecJSON: `{ + "spec": { + "containers": [{ + "name": "sidecar", + "resources": { + "requests": { + "cpu": "100m", + "memory": "128Mi" + } + } + }] + } + }`, + expectedCPURequest: ctrlutil.DefaultCPURequest, + expectedCPULimit: ctrlutil.DefaultCPULimit, + expectedMemoryRequest: ctrlutil.DefaultMemoryRequest, + expectedMemoryLimit: ctrlutil.DefaultMemoryLimit, + }, + { + name: "user provides limit below default request - use limit for both", + podTemplateSpecJSON: `{ + "spec": { + "containers": [{ + "name": "vmcp", + "resources": { + "limits": { + "cpu": "50m", + "memory": "64Mi" + } + } + }] + } + }`, + expectedCPURequest: "50m", + expectedCPULimit: "50m", + expectedMemoryRequest: "64Mi", + expectedMemoryLimit: "64Mi", + }, + { + name: "user provides mixed partial resources - CPU limit and Memory request", + podTemplateSpecJSON: `{ + "spec": { + "containers": [{ + "name": "vmcp", + "resources": { + "requests": { + "memory": "256Mi" + }, + "limits": { + "cpu": "800m" + } + } + }] + } + }`, + expectedCPURequest: ctrlutil.DefaultCPURequest, + expectedCPULimit: "800m", + expectedMemoryRequest: "256Mi", + expectedMemoryLimit: ctrlutil.DefaultMemoryLimit, + }, + { + name: "user provides limit equal to default request - use default request and user limit", + podTemplateSpecJSON: `{ + "spec": { + "containers": [{ + "name": "vmcp", + "resources": { + "limits": { + "cpu": "100m", + "memory": "128Mi" + } + } + }] + } + }`, + expectedCPURequest: ctrlutil.DefaultCPURequest, + expectedCPULimit: "100m", + expectedMemoryRequest: ctrlutil.DefaultMemoryRequest, + expectedMemoryLimit: "128Mi", + }, + { + name: "user provides request equal to default limit - use request for both", + podTemplateSpecJSON: `{ + "spec": { + "containers": [{ + "name": "vmcp", + "resources": { + "requests": { + "cpu": "500m", + "memory": "512Mi" + } + } + }] + } + }`, + expectedCPURequest: "500m", + expectedCPULimit: "500m", + expectedMemoryRequest: "512Mi", + expectedMemoryLimit: "512Mi", + }, + { + name: "user provides only CPU request above default limit - CPU uses request for both, Memory uses all defaults", + podTemplateSpecJSON: `{ + "spec": { + "containers": [{ + "name": "vmcp", + "resources": { + "requests": { + "cpu": "1000m" + } + } + }] + } + }`, + expectedCPURequest: "1", + expectedCPULimit: "1", + expectedMemoryRequest: ctrlutil.DefaultMemoryRequest, + expectedMemoryLimit: ctrlutil.DefaultMemoryLimit, + }, + { + name: "user provides only CPU limit below default request - CPU uses limit for both, Memory uses all defaults", + podTemplateSpecJSON: `{ + "spec": { + "containers": [{ + "name": "vmcp", + "resources": { + "limits": { + "cpu": "50m" + } + } + }] + } + }`, + expectedCPURequest: "50m", + expectedCPULimit: "50m", + expectedMemoryRequest: ctrlutil.DefaultMemoryRequest, + expectedMemoryLimit: ctrlutil.DefaultMemoryLimit, + }, + { + name: "user provides only Memory request above default limit - Memory uses request for both, CPU uses all defaults", + podTemplateSpecJSON: `{ + "spec": { + "containers": [{ + "name": "vmcp", + "resources": { + "requests": { + "memory": "1Gi" + } + } + }] + } + }`, + expectedCPURequest: ctrlutil.DefaultCPURequest, + expectedCPULimit: ctrlutil.DefaultCPULimit, + expectedMemoryRequest: "1Gi", + expectedMemoryLimit: "1Gi", + }, + { + name: "user provides only Memory limit below default request - Memory uses limit for both, CPU uses all defaults", + podTemplateSpecJSON: `{ + "spec": { + "containers": [{ + "name": "vmcp", + "resources": { + "limits": { + "memory": "64Mi" + } + } + }] + } + }`, + expectedCPURequest: ctrlutil.DefaultCPURequest, + expectedCPULimit: ctrlutil.DefaultCPULimit, + expectedMemoryRequest: "64Mi", + expectedMemoryLimit: "64Mi", + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + vmcp := &mcpv1alpha1.VirtualMCPServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-vmcp", + Namespace: "default", + }, + Spec: mcpv1alpha1.VirtualMCPServerSpec{ + GroupRef: mcpv1alpha1.GroupRef{ + Name: "test-group", + }, + }, + } + + if tt.podTemplateSpecJSON != "" { + vmcp.Spec.PodTemplateSpec = &runtime.RawExtension{ + Raw: []byte(tt.podTemplateSpecJSON), + } + } + + r := &VirtualMCPServerReconciler{} + resources := r.buildMergedResourcesForVmcp(vmcp) + + assert.Equal(t, tt.expectedCPURequest, resources.Requests.Cpu().String()) + assert.Equal(t, tt.expectedCPULimit, resources.Limits.Cpu().String()) + assert.Equal(t, tt.expectedMemoryRequest, resources.Requests.Memory().String()) + assert.Equal(t, tt.expectedMemoryLimit, resources.Limits.Memory().String()) + }) + } +} + +// TestDeploymentUsesIntelligentResourceMerging tests that deployments use merged resources +func TestDeploymentUsesIntelligentResourceMerging(t *testing.T) { + t.Parallel() + + vmcp := &mcpv1alpha1.VirtualMCPServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-vmcp", + Namespace: "default", + }, + Spec: mcpv1alpha1.VirtualMCPServerSpec{ + GroupRef: mcpv1alpha1.GroupRef{ + Name: "test-group", + }, + PodTemplateSpec: &runtime.RawExtension{ + Raw: []byte(`{ + "spec": { + "containers": [{ + "name": "vmcp", + "resources": { + "requests": { + "cpu": "200m", + "memory": "256Mi" + }, + "limits": { + "cpu": "1000m", + "memory": "1Gi" + } + } + }] + } + }`), + }, + }, + } + + scheme := runtime.NewScheme() + _ = mcpv1alpha1.AddToScheme(scheme) + _ = corev1.AddToScheme(scheme) + + r := &VirtualMCPServerReconciler{ + Scheme: scheme, + PlatformDetector: ctrlutil.NewSharedPlatformDetector(), + } + + deployment := r.deploymentForVirtualMCPServer(context.Background(), vmcp, "test-checksum", []workloads.TypedWorkload{}) + + require.NotNil(t, deployment) + require.Len(t, deployment.Spec.Template.Spec.Containers, 1) + + container := deployment.Spec.Template.Spec.Containers[0] + assert.Equal(t, "200m", container.Resources.Requests.Cpu().String()) + assert.Equal(t, "1", container.Resources.Limits.Cpu().String()) // 1000m normalized to 1 + assert.Equal(t, "256Mi", container.Resources.Requests.Memory().String()) + assert.Equal(t, "1Gi", container.Resources.Limits.Memory().String()) +} + +// TestApplyPodTemplateSpecStripsResources tests that resources are stripped before strategic merge +func TestApplyPodTemplateSpecStripsResources(t *testing.T) { + t.Parallel() + + vmcp := &mcpv1alpha1.VirtualMCPServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-vmcp", + Namespace: "default", + }, + Spec: mcpv1alpha1.VirtualMCPServerSpec{ + GroupRef: mcpv1alpha1.GroupRef{ + Name: "test-group", + }, + PodTemplateSpec: &runtime.RawExtension{ + Raw: []byte(`{ + "spec": { + "containers": [{ + "name": "vmcp", + "resources": { + "requests": { + "cpu": "300m" + } + } + }], + "nodeSelector": { + "disk": "ssd" + } + } + }`), + }, + }, + } + + scheme := runtime.NewScheme() + _ = mcpv1alpha1.AddToScheme(scheme) + _ = corev1.AddToScheme(scheme) + + r := &VirtualMCPServerReconciler{ + Scheme: scheme, + PlatformDetector: ctrlutil.NewSharedPlatformDetector(), + } + + deployment := r.deploymentForVirtualMCPServer(context.Background(), vmcp, "test-checksum", []workloads.TypedWorkload{}) + + require.NotNil(t, deployment) + + // Verify nodeSelector was applied (from strategic merge) + assert.Equal(t, "ssd", deployment.Spec.Template.Spec.NodeSelector["disk"]) + + // Verify resources use intelligent merging, not raw user values + // User only provided 300m CPU request, so intelligent merge should add default limit + container := deployment.Spec.Template.Spec.Containers[0] + assert.Equal(t, "300m", container.Resources.Requests.Cpu().String()) + assert.Equal(t, ctrlutil.DefaultCPULimit, container.Resources.Limits.Cpu().String()) + // Memory should be all defaults since user didn't specify + assert.Equal(t, ctrlutil.DefaultMemoryRequest, container.Resources.Requests.Memory().String()) + assert.Equal(t, ctrlutil.DefaultMemoryLimit, container.Resources.Limits.Memory().String()) +} + +// TestDeploymentWithEmptyJSONPodTemplateSpec tests that defaults are applied when PodTemplateSpec is empty JSON "{}" +func TestDeploymentWithEmptyJSONPodTemplateSpec(t *testing.T) { + t.Parallel() + + vmcp := &mcpv1alpha1.VirtualMCPServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-vmcp", + Namespace: "default", + }, + Spec: mcpv1alpha1.VirtualMCPServerSpec{ + GroupRef: mcpv1alpha1.GroupRef{ + Name: "test-group", + }, + // Empty JSON object - valid JSON but no meaningful fields + PodTemplateSpec: &runtime.RawExtension{ + Raw: []byte(`{}`), + }, + }, + } + + scheme := runtime.NewScheme() + _ = mcpv1alpha1.AddToScheme(scheme) + _ = corev1.AddToScheme(scheme) + + r := &VirtualMCPServerReconciler{ + Scheme: scheme, + PlatformDetector: ctrlutil.NewSharedPlatformDetector(), + } + + deployment := r.deploymentForVirtualMCPServer(context.Background(), vmcp, "test-checksum", []workloads.TypedWorkload{}) + + require.NotNil(t, deployment) + require.Len(t, deployment.Spec.Template.Spec.Containers, 1) + + container := deployment.Spec.Template.Spec.Containers[0] + assert.Equal(t, ctrlutil.DefaultCPURequest, container.Resources.Requests.Cpu().String(), + "CPU request should be default when PodTemplateSpec is empty JSON") + assert.Equal(t, ctrlutil.DefaultCPULimit, container.Resources.Limits.Cpu().String(), + "CPU limit should be default when PodTemplateSpec is empty JSON") + assert.Equal(t, ctrlutil.DefaultMemoryRequest, container.Resources.Requests.Memory().String(), + "Memory request should be default when PodTemplateSpec is empty JSON") + assert.Equal(t, ctrlutil.DefaultMemoryLimit, container.Resources.Limits.Memory().String(), + "Memory limit should be default when PodTemplateSpec is empty JSON") +} + +// TestDeploymentWithEmptyRawExtension tests that defaults are applied when RawExtension has empty Raw +func TestDeploymentWithEmptyRawExtension(t *testing.T) { + t.Parallel() + + vmcp := &mcpv1alpha1.VirtualMCPServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-vmcp", + Namespace: "default", + }, + Spec: mcpv1alpha1.VirtualMCPServerSpec{ + GroupRef: mcpv1alpha1.GroupRef{ + Name: "test-group", + }, + // RawExtension exists but Raw is empty slice + PodTemplateSpec: &runtime.RawExtension{ + Raw: []byte{}, + }, + }, + } + + scheme := runtime.NewScheme() + _ = mcpv1alpha1.AddToScheme(scheme) + _ = corev1.AddToScheme(scheme) + + r := &VirtualMCPServerReconciler{ + Scheme: scheme, + PlatformDetector: ctrlutil.NewSharedPlatformDetector(), + } + + deployment := r.deploymentForVirtualMCPServer(context.Background(), vmcp, "test-checksum", []workloads.TypedWorkload{}) + + require.NotNil(t, deployment) + require.Len(t, deployment.Spec.Template.Spec.Containers, 1) + + container := deployment.Spec.Template.Spec.Containers[0] + assert.Equal(t, ctrlutil.DefaultCPURequest, container.Resources.Requests.Cpu().String(), + "CPU request should be default when Raw is empty slice") + assert.Equal(t, ctrlutil.DefaultCPULimit, container.Resources.Limits.Cpu().String(), + "CPU limit should be default when Raw is empty slice") + assert.Equal(t, ctrlutil.DefaultMemoryRequest, container.Resources.Requests.Memory().String(), + "Memory request should be default when Raw is empty slice") + assert.Equal(t, ctrlutil.DefaultMemoryLimit, container.Resources.Limits.Memory().String(), + "Memory limit should be default when Raw is empty slice") +} + +// TestDeploymentWithEmptySpecFields tests that defaults are applied when PodTemplateSpec +// has structure but all fields are empty (triggers builder.Build() == nil) +func TestDeploymentWithEmptySpecFields(t *testing.T) { + t.Parallel() + + vmcp := &mcpv1alpha1.VirtualMCPServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-vmcp", + Namespace: "default", + }, + Spec: mcpv1alpha1.VirtualMCPServerSpec{ + GroupRef: mcpv1alpha1.GroupRef{ + Name: "test-group", + }, + // Valid JSON structure with empty spec - triggers builder.Build() == nil + PodTemplateSpec: &runtime.RawExtension{ + Raw: []byte(`{"spec": {}}`), + }, + }, + } + + scheme := runtime.NewScheme() + _ = mcpv1alpha1.AddToScheme(scheme) + _ = corev1.AddToScheme(scheme) + + r := &VirtualMCPServerReconciler{ + Scheme: scheme, + PlatformDetector: ctrlutil.NewSharedPlatformDetector(), + } + + deployment := r.deploymentForVirtualMCPServer(context.Background(), vmcp, "test-checksum", []workloads.TypedWorkload{}) + + require.NotNil(t, deployment) + require.Len(t, deployment.Spec.Template.Spec.Containers, 1) + + container := deployment.Spec.Template.Spec.Containers[0] + assert.Equal(t, ctrlutil.DefaultCPURequest, container.Resources.Requests.Cpu().String(), + "CPU request should be default when spec fields are empty") + assert.Equal(t, ctrlutil.DefaultCPULimit, container.Resources.Limits.Cpu().String(), + "CPU limit should be default when spec fields are empty") + assert.Equal(t, ctrlutil.DefaultMemoryRequest, container.Resources.Requests.Memory().String(), + "Memory request should be default when spec fields are empty") + assert.Equal(t, ctrlutil.DefaultMemoryLimit, container.Resources.Limits.Memory().String(), + "Memory limit should be default when spec fields are empty") +} + +// TestContainerNeedsUpdateDetectsResourceChanges tests that existing deployments without resources +// will trigger an update when the operator is upgraded to include default resources +func TestContainerNeedsUpdateDetectsResourceChanges(t *testing.T) { + t.Parallel() + + vmcp := &mcpv1alpha1.VirtualMCPServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-vmcp", + Namespace: "default", + }, + Spec: mcpv1alpha1.VirtualMCPServerSpec{ + GroupRef: mcpv1alpha1.GroupRef{ + Name: "test-group", + }, + // No PodTemplateSpec - simulates pre-upgrade deployment + }, + } + + scheme := runtime.NewScheme() + _ = mcpv1alpha1.AddToScheme(scheme) + _ = corev1.AddToScheme(scheme) + + r := &VirtualMCPServerReconciler{ + Scheme: scheme, + PlatformDetector: ctrlutil.NewSharedPlatformDetector(), + } + + // Simulate a pre-upgrade deployment with no resources set + deploymentWithoutResources := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: vmcp.Name, + Namespace: vmcp.Namespace, + }, + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + ServiceAccountName: vmcpServiceAccountName(vmcp.Name), + Containers: []corev1.Container{{ + Name: "vmcp", + Image: getVmcpImage(), + Ports: []corev1.ContainerPort{{ContainerPort: vmcpDefaultPort}}, + Env: r.buildEnvVarsForVmcp(context.Background(), vmcp, []workloads.TypedWorkload{}), + // No Resources set - simulates pre-upgrade state + }}, + }, + }, + }, + } + + // containerNeedsUpdate should return true because resources are missing + needsUpdate := r.containerNeedsUpdate(context.Background(), deploymentWithoutResources, vmcp, []workloads.TypedWorkload{}) + assert.True(t, needsUpdate, "containerNeedsUpdate should detect missing resources") +} diff --git a/cmd/thv-operator/pkg/controllerutil/podtemplatespec_builder.go b/cmd/thv-operator/pkg/controllerutil/podtemplatespec_builder.go index ee0573634..95f4435b5 100644 --- a/cmd/thv-operator/pkg/controllerutil/podtemplatespec_builder.go +++ b/cmd/thv-operator/pkg/controllerutil/podtemplatespec_builder.go @@ -106,6 +106,39 @@ func (b *PodTemplateSpecBuilder) WithSecrets(secrets []mcpv1alpha1.SecretRef) *P return b } +// WithResources adds resource requirements to the target container. +// The target container is specified by containerName in the constructor. +// The resources parameter should already be validated and converted to proper K8s format by the controller. +func (b *PodTemplateSpecBuilder) WithResources(k8sResources corev1.ResourceRequirements) *PodTemplateSpecBuilder { + // Check if resources are empty + if len(k8sResources.Limits) == 0 && len(k8sResources.Requests) == 0 { + return b + } + + // Find existing container or create new one + containerIndex := -1 + for i, container := range b.spec.Spec.Containers { + if container.Name == b.containerName { + containerIndex = i + break + } + } + + if containerIndex >= 0 { + // Merge resources with existing container (existing takes precedence) + existingResources := b.spec.Spec.Containers[containerIndex].Resources + mergedResources := MergeResourceRequirements(k8sResources, existingResources) + b.spec.Spec.Containers[containerIndex].Resources = mergedResources + } else { + // Add new container with resources + b.spec.Spec.Containers = append(b.spec.Spec.Containers, corev1.Container{ + Name: b.containerName, + Resources: k8sResources, + }) + } + return b +} + // Build returns the final PodTemplateSpec, or nil if no customizations were made. func (b *PodTemplateSpecBuilder) Build() *corev1.PodTemplateSpec { if b.isEmpty() { diff --git a/cmd/thv-operator/pkg/controllerutil/podtemplatespec_builder_test.go b/cmd/thv-operator/pkg/controllerutil/podtemplatespec_builder_test.go index d684cfcd0..8b1ed030a 100644 --- a/cmd/thv-operator/pkg/controllerutil/podtemplatespec_builder_test.go +++ b/cmd/thv-operator/pkg/controllerutil/podtemplatespec_builder_test.go @@ -10,7 +10,10 @@ import ( mcpv1alpha1 "github.com/stacklok/toolhive/cmd/thv-operator/api/v1alpha1" ) -const testContainerName = "test-container" +const ( + testContainerName = "test-container" + testServiceAccount = "my-sa" +) func TestNewPodTemplateSpecBuilder(t *testing.T) { t.Parallel() @@ -70,7 +73,7 @@ func TestPodTemplateSpecBuilder_Build(t *testing.T) { { name: "with service account returns spec", setup: func(b *PodTemplateSpecBuilder) { - sa := "my-sa" + sa := testServiceAccount b.WithServiceAccount(&sa) }, expectNil: false, @@ -238,18 +241,233 @@ func TestPodTemplateSpecBuilder_Chaining(t *testing.T) { builder, err := NewPodTemplateSpecBuilder(nil, testContainerName) require.NoError(t, err) - sa := "my-sa" + sa := testServiceAccount result := builder. WithServiceAccount(&sa). WithSecrets([]mcpv1alpha1.SecretRef{{Name: "secret", Key: "KEY"}}). Build() require.NotNil(t, result) - assert.Equal(t, "my-sa", result.Spec.ServiceAccountName) + assert.Equal(t, testServiceAccount, result.Spec.ServiceAccountName) require.Len(t, result.Spec.Containers, 1) assert.Equal(t, testContainerName, result.Spec.Containers[0].Name) } +func TestPodTemplateSpecBuilder_WithResources(t *testing.T) { + t.Parallel() + + t.Run("empty resources does nothing", func(t *testing.T) { + t.Parallel() + builder, err := NewPodTemplateSpecBuilder(nil, testContainerName) + require.NoError(t, err) + + // Empty resources should not create a container + emptyResources := BuildResourceRequirements(mcpv1alpha1.ResourceRequirements{}) + builder.WithResources(emptyResources) + + assert.Empty(t, builder.spec.Spec.Containers) + }) + + t.Run("creates container with resources", func(t *testing.T) { + t.Parallel() + builder, err := NewPodTemplateSpecBuilder(nil, testContainerName) + require.NoError(t, err) + + resources := BuildResourceRequirements(mcpv1alpha1.ResourceRequirements{ + Limits: mcpv1alpha1.ResourceList{ + CPU: "500m", + Memory: "512Mi", + }, + Requests: mcpv1alpha1.ResourceList{ + CPU: "100m", + Memory: "128Mi", + }, + }) + builder.WithResources(resources) + + require.Len(t, builder.spec.Spec.Containers, 1) + container := builder.spec.Spec.Containers[0] + assert.Equal(t, testContainerName, container.Name) + + // Verify limits + assert.Equal(t, "500m", container.Resources.Limits.Cpu().String()) + assert.Equal(t, "512Mi", container.Resources.Limits.Memory().String()) + + // Verify requests + assert.Equal(t, "100m", container.Resources.Requests.Cpu().String()) + assert.Equal(t, "128Mi", container.Resources.Requests.Memory().String()) + }) + + t.Run("merges into existing container", func(t *testing.T) { + t.Parallel() + raw := &runtime.RawExtension{ + Raw: []byte(`{"spec":{"containers":[{"name":"test-container","image":"test:latest"}]}}`), + } + builder, err := NewPodTemplateSpecBuilder(raw, testContainerName) + require.NoError(t, err) + + resources := BuildResourceRequirements(mcpv1alpha1.ResourceRequirements{ + Limits: mcpv1alpha1.ResourceList{ + CPU: "200m", + }, + }) + builder.WithResources(resources) + + require.Len(t, builder.spec.Spec.Containers, 1) + container := builder.spec.Spec.Containers[0] + assert.Equal(t, "test:latest", container.Image) + assert.Equal(t, "200m", container.Resources.Limits.Cpu().String()) + }) + + t.Run("adds to different container", func(t *testing.T) { + t.Parallel() + raw := &runtime.RawExtension{ + Raw: []byte(`{"spec":{"containers":[{"name":"other-container"}]}}`), + } + builder, err := NewPodTemplateSpecBuilder(raw, testContainerName) + require.NoError(t, err) + + resources := BuildResourceRequirements(mcpv1alpha1.ResourceRequirements{ + Requests: mcpv1alpha1.ResourceList{ + Memory: "64Mi", + }, + }) + builder.WithResources(resources) + + require.Len(t, builder.spec.Spec.Containers, 2) + assert.Equal(t, "other-container", builder.spec.Spec.Containers[0].Name) + assert.Equal(t, testContainerName, builder.spec.Spec.Containers[1].Name) + assert.Equal(t, "64Mi", builder.spec.Spec.Containers[1].Resources.Requests.Memory().String()) + }) +} + +func TestPodTemplateSpecBuilder_WithResourcesChaining(t *testing.T) { + t.Parallel() + + builder, err := NewPodTemplateSpecBuilder(nil, testContainerName) + require.NoError(t, err) + + sa := testServiceAccount + resources := BuildResourceRequirements(mcpv1alpha1.ResourceRequirements{ + Limits: mcpv1alpha1.ResourceList{ + CPU: "1000m", + Memory: "1Gi", + }, + }) + + result := builder. + WithServiceAccount(&sa). + WithSecrets([]mcpv1alpha1.SecretRef{{Name: "secret", Key: "KEY"}}). + WithResources(resources). + Build() + + require.NotNil(t, result) + assert.Equal(t, testServiceAccount, result.Spec.ServiceAccountName) + require.Len(t, result.Spec.Containers, 1) + + container := result.Spec.Containers[0] + assert.Equal(t, testContainerName, container.Name) + assert.Len(t, container.Env, 1) + // 1000m is normalized to 1 CPU core + assert.Equal(t, "1", container.Resources.Limits.Cpu().String()) + assert.Equal(t, "1Gi", container.Resources.Limits.Memory().String()) +} + +func TestPodTemplateSpecBuilder_WithResourcesMerging(t *testing.T) { + t.Parallel() + + t.Run("existing resources take precedence over defaults", func(t *testing.T) { + t.Parallel() + // Create pod template with existing resources + raw := &runtime.RawExtension{ + Raw: []byte(`{ + "spec": { + "containers": [{ + "name": "test-container", + "resources": { + "limits": {"cpu": "2", "memory": "2Gi"}, + "requests": {"cpu": "500m"} + } + }] + } + }`), + } + builder, err := NewPodTemplateSpecBuilder(raw, testContainerName) + require.NoError(t, err) + + // Try to set different resources (these should be overridden by existing) + defaultResources := BuildResourceRequirements(mcpv1alpha1.ResourceRequirements{ + Limits: mcpv1alpha1.ResourceList{ + CPU: "1", + Memory: "1Gi", + }, + Requests: mcpv1alpha1.ResourceList{ + CPU: "100m", + Memory: "128Mi", + }, + }) + + builder.WithResources(defaultResources) + result := builder.Build() + + require.NotNil(t, result) + require.Len(t, result.Spec.Containers, 1) + container := result.Spec.Containers[0] + + // Existing values should be preserved + assert.Equal(t, "2", container.Resources.Limits.Cpu().String()) + assert.Equal(t, "2Gi", container.Resources.Limits.Memory().String()) + assert.Equal(t, "500m", container.Resources.Requests.Cpu().String()) + // Memory request was not in existing, so default should be used + assert.Equal(t, "128Mi", container.Resources.Requests.Memory().String()) + }) + + t.Run("defaults fill in missing resources", func(t *testing.T) { + t.Parallel() + // Create pod template with partial resources (only CPU limit) + raw := &runtime.RawExtension{ + Raw: []byte(`{ + "spec": { + "containers": [{ + "name": "test-container", + "resources": { + "limits": {"cpu": "3"} + } + }] + } + }`), + } + builder, err := NewPodTemplateSpecBuilder(raw, testContainerName) + require.NoError(t, err) + + // Set complete resource defaults + defaultResources := BuildResourceRequirements(mcpv1alpha1.ResourceRequirements{ + Limits: mcpv1alpha1.ResourceList{ + CPU: "1", + Memory: "1Gi", + }, + Requests: mcpv1alpha1.ResourceList{ + CPU: "100m", + Memory: "128Mi", + }, + }) + + builder.WithResources(defaultResources) + result := builder.Build() + + require.NotNil(t, result) + require.Len(t, result.Spec.Containers, 1) + container := result.Spec.Containers[0] + + // Existing CPU limit should be preserved + assert.Equal(t, "3", container.Resources.Limits.Cpu().String()) + // Other resources should come from defaults + assert.Equal(t, "1Gi", container.Resources.Limits.Memory().String()) + assert.Equal(t, "100m", container.Resources.Requests.Cpu().String()) + assert.Equal(t, "128Mi", container.Resources.Requests.Memory().String()) + }) +} + // ptr is a helper to create a pointer to a string. func ptr(s string) *string { return &s diff --git a/cmd/thv-operator/pkg/controllerutil/resources.go b/cmd/thv-operator/pkg/controllerutil/resources.go index 6dcbe784c..bae1cd1aa 100644 --- a/cmd/thv-operator/pkg/controllerutil/resources.go +++ b/cmd/thv-operator/pkg/controllerutil/resources.go @@ -12,6 +12,62 @@ import ( mcpv1alpha1 "github.com/stacklok/toolhive/cmd/thv-operator/api/v1alpha1" ) +const ( + // DefaultCPURequest is the default CPU request for MCP server containers. + // These values provide reasonable limits to prevent resource monopolization + // while allowing sufficient resources for typical MCP server workloads. + DefaultCPURequest = "100m" + // DefaultCPULimit is the default CPU limit for MCP server containers. + DefaultCPULimit = "500m" + // DefaultMemoryRequest is the default memory request for MCP server containers. + DefaultMemoryRequest = "128Mi" + // DefaultMemoryLimit is the default memory limit for MCP server containers. + DefaultMemoryLimit = "512Mi" + + // DefaultProxyRunnerCPURequest is the default CPU request for proxy runner containers. + // The proxy runner is a lightweight Go process that manages the connection + // to the MCP server container, so it needs fewer resources. + DefaultProxyRunnerCPURequest = "50m" + // DefaultProxyRunnerCPULimit is the default CPU limit for proxy runner containers. + DefaultProxyRunnerCPULimit = "200m" + // DefaultProxyRunnerMemoryRequest is the default memory request for proxy runner containers. + DefaultProxyRunnerMemoryRequest = "64Mi" + // DefaultProxyRunnerMemoryLimit is the default memory limit for proxy runner containers. + DefaultProxyRunnerMemoryLimit = "256Mi" +) + +// BuildDefaultResourceRequirements returns standard default resource requirements +// for MCP server containers (MCPServer, VirtualMCPServer, MCPRemoteProxy). +// These defaults prevent resource monopolization while allowing user customization. +func BuildDefaultResourceRequirements() corev1.ResourceRequirements { + return corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse(DefaultCPULimit), + corev1.ResourceMemory: resource.MustParse(DefaultMemoryLimit), + }, + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse(DefaultCPURequest), + corev1.ResourceMemory: resource.MustParse(DefaultMemoryRequest), + }, + } +} + +// BuildDefaultProxyRunnerResourceRequirements returns default resource requirements +// for the ToolHive proxy runner container (the container running `thv run`). +// The proxy runner is lighter weight than the MCP server container it manages. +func BuildDefaultProxyRunnerResourceRequirements() corev1.ResourceRequirements { + return corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse(DefaultProxyRunnerCPULimit), + corev1.ResourceMemory: resource.MustParse(DefaultProxyRunnerMemoryLimit), + }, + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse(DefaultProxyRunnerCPURequest), + corev1.ResourceMemory: resource.MustParse(DefaultProxyRunnerMemoryRequest), + }, + } +} + // BuildResourceRequirements builds Kubernetes resource requirements from CRD spec // Shared between MCPServer and MCPRemoteProxy func BuildResourceRequirements(resourceSpec mcpv1alpha1.ResourceRequirements) corev1.ResourceRequirements { @@ -40,6 +96,70 @@ func BuildResourceRequirements(resourceSpec mcpv1alpha1.ResourceRequirements) co return resources } +// MergeResourceRequirements intelligently merges user-provided resources with default resources. +// For each resource type (CPU, Memory), handles request and limit independently: +// - If user provides both → use user values +// - If user provides limit only: +// - If limit >= default request → use default request + user limit +// - If limit < default request → use user limit for both +// +// - If user provides request only: +// - If request >= default limit → use user request for both +// - If request < default limit → use user request + default limit +// +// - If user provides neither → use defaults +func MergeResourceRequirements(defaults, user corev1.ResourceRequirements) corev1.ResourceRequirements { + result := corev1.ResourceRequirements{ + Requests: defaults.Requests.DeepCopy(), + Limits: defaults.Limits.DeepCopy(), + } + + mergeResourceType(corev1.ResourceCPU, &result, defaults, user) + mergeResourceType(corev1.ResourceMemory, &result, defaults, user) + + return result +} + +// mergeResourceType handles merging logic for a single resource type (CPU or Memory) +func mergeResourceType( + resourceName corev1.ResourceName, + result *corev1.ResourceRequirements, + defaults, user corev1.ResourceRequirements, +) { + userLimit, hasUserLimit := user.Limits[resourceName] + userRequest, hasUserRequest := user.Requests[resourceName] + defaultRequest := defaults.Requests[resourceName] + defaultLimit := defaults.Limits[resourceName] + + // Process user-provided limit + if hasUserLimit { + result.Limits[resourceName] = userLimit.DeepCopy() + // If no request provided, check against default request + if !hasUserRequest { + if userLimit.Cmp(defaultRequest) >= 0 { + // User limit >= default request, use default request + result.Requests[resourceName] = defaultRequest.DeepCopy() + } else { + // User limit < default request, use user limit for both + result.Requests[resourceName] = userLimit.DeepCopy() + } + } + } + + // Process user-provided request + if hasUserRequest { + result.Requests[resourceName] = userRequest.DeepCopy() + // If no limit provided, compare request with default limit + if !hasUserLimit { + if userRequest.Cmp(defaultLimit) >= 0 { + result.Limits[resourceName] = userRequest.DeepCopy() + } else { + result.Limits[resourceName] = defaultLimit.DeepCopy() + } + } + } +} + // BuildHealthProbe builds a Kubernetes health probe configuration // Shared between MCPServer and MCPRemoteProxy func BuildHealthProbe( diff --git a/cmd/thv-operator/pkg/controllerutil/resources_test.go b/cmd/thv-operator/pkg/controllerutil/resources_test.go new file mode 100644 index 000000000..603240363 --- /dev/null +++ b/cmd/thv-operator/pkg/controllerutil/resources_test.go @@ -0,0 +1,133 @@ +package controllerutil + +import ( + "testing" + + "github.com/stretchr/testify/assert" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" +) + +// mustParseQuantity is a test helper that panics if parsing fails +func mustParseQuantity(s string) resource.Quantity { + q := resource.MustParse(s) + return q +} + +func TestBuildDefaultProxyRunnerResourceRequirements(t *testing.T) { + t.Parallel() + + resources := BuildDefaultProxyRunnerResourceRequirements() + + // Verify limits + assert.Equal(t, DefaultProxyRunnerCPULimit, resources.Limits.Cpu().String()) + assert.Equal(t, DefaultProxyRunnerMemoryLimit, resources.Limits.Memory().String()) + + // Verify requests + assert.Equal(t, DefaultProxyRunnerCPURequest, resources.Requests.Cpu().String()) + assert.Equal(t, DefaultProxyRunnerMemoryRequest, resources.Requests.Memory().String()) +} + +func TestBuildDefaultResourceRequirements(t *testing.T) { + t.Parallel() + + resources := BuildDefaultResourceRequirements() + + // Verify limits + assert.Equal(t, DefaultCPULimit, resources.Limits.Cpu().String()) + assert.Equal(t, DefaultMemoryLimit, resources.Limits.Memory().String()) + + // Verify requests + assert.Equal(t, DefaultCPURequest, resources.Requests.Cpu().String()) + assert.Equal(t, DefaultMemoryRequest, resources.Requests.Memory().String()) +} + +func TestProxyRunnerResourcesAreLighterThanMCPServerResources(t *testing.T) { + t.Parallel() + + proxyResources := BuildDefaultProxyRunnerResourceRequirements() + mcpResources := BuildDefaultResourceRequirements() + + // Proxy runner should use less CPU than MCP server + assert.True(t, proxyResources.Limits.Cpu().Cmp(*mcpResources.Limits.Cpu()) < 0, + "Proxy runner CPU limit should be less than MCP server CPU limit") + assert.True(t, proxyResources.Requests.Cpu().Cmp(*mcpResources.Requests.Cpu()) < 0, + "Proxy runner CPU request should be less than MCP server CPU request") + + // Proxy runner should use less memory than MCP server + assert.True(t, proxyResources.Limits.Memory().Cmp(*mcpResources.Limits.Memory()) < 0, + "Proxy runner memory limit should be less than MCP server memory limit") + assert.True(t, proxyResources.Requests.Memory().Cmp(*mcpResources.Requests.Memory()) < 0, + "Proxy runner memory request should be less than MCP server memory request") +} + +func TestMergeResourceRequirements_ProxyRunner(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + defaults corev1.ResourceRequirements + user corev1.ResourceRequirements + expected map[string]string // resource name -> expected value + }{ + { + name: "empty user resources uses defaults", + defaults: BuildDefaultProxyRunnerResourceRequirements(), + user: corev1.ResourceRequirements{}, + expected: map[string]string{ + "cpu_limit": DefaultProxyRunnerCPULimit, + "memory_limit": DefaultProxyRunnerMemoryLimit, + "cpu_request": DefaultProxyRunnerCPURequest, + "memory_request": DefaultProxyRunnerMemoryRequest, + }, + }, + { + name: "user overrides only CPU limit", + defaults: BuildDefaultProxyRunnerResourceRequirements(), + user: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + corev1.ResourceCPU: mustParseQuantity("300m"), + }, + }, + expected: map[string]string{ + "cpu_limit": "300m", + "memory_limit": DefaultProxyRunnerMemoryLimit, + "cpu_request": DefaultProxyRunnerCPURequest, + "memory_request": DefaultProxyRunnerMemoryRequest, + }, + }, + { + name: "user overrides all resources", + defaults: BuildDefaultProxyRunnerResourceRequirements(), + user: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + corev1.ResourceCPU: mustParseQuantity("500m"), + corev1.ResourceMemory: mustParseQuantity("512Mi"), + }, + Requests: corev1.ResourceList{ + corev1.ResourceCPU: mustParseQuantity("100m"), + corev1.ResourceMemory: mustParseQuantity("128Mi"), + }, + }, + expected: map[string]string{ + "cpu_limit": "500m", + "memory_limit": "512Mi", + "cpu_request": "100m", + "memory_request": "128Mi", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + result := MergeResourceRequirements(tt.defaults, tt.user) + + assert.Equal(t, tt.expected["cpu_limit"], result.Limits.Cpu().String()) + assert.Equal(t, tt.expected["memory_limit"], result.Limits.Memory().String()) + assert.Equal(t, tt.expected["cpu_request"], result.Requests.Cpu().String()) + assert.Equal(t, tt.expected["memory_request"], result.Requests.Memory().String()) + }) + } +} diff --git a/cmd/thv-operator/test-integration/mcp-server/mcpserver_controller_integration_test.go b/cmd/thv-operator/test-integration/mcp-server/mcpserver_controller_integration_test.go index 36b6f1675..84b43c8a8 100644 --- a/cmd/thv-operator/test-integration/mcp-server/mcpserver_controller_integration_test.go +++ b/cmd/thv-operator/test-integration/mcp-server/mcpserver_controller_integration_test.go @@ -82,6 +82,16 @@ var _ = Describe("MCPServer Controller Integration Tests", func() { Memory: "128Mi", }, }, + ProxyRunnerResources: mcpv1alpha1.ResourceRequirements{ + Limits: mcpv1alpha1.ResourceList{ + CPU: "500m", + Memory: "1Gi", + }, + Requests: mcpv1alpha1.ResourceList{ + CPU: "100m", + Memory: "128Mi", + }, + }, ResourceOverrides: &mcpv1alpha1.ResourceOverrides{ ProxyDeployment: &mcpv1alpha1.ProxyDeploymentOverrides{ PodTemplateMetadataOverrides: &mcpv1alpha1.ResourceMetadataOverrides{ diff --git a/deploy/charts/operator-crds/Chart.yaml b/deploy/charts/operator-crds/Chart.yaml index 9221a2fa2..926a86e73 100644 --- a/deploy/charts/operator-crds/Chart.yaml +++ b/deploy/charts/operator-crds/Chart.yaml @@ -2,5 +2,5 @@ apiVersion: v2 name: toolhive-operator-crds description: A Helm chart for installing the ToolHive Operator CRDs into Kubernetes. type: application -version: 0.0.85 +version: 0.0.86 appVersion: "0.0.1" diff --git a/deploy/charts/operator-crds/README.md b/deploy/charts/operator-crds/README.md index dd9ecfc9e..2d88b4c06 100644 --- a/deploy/charts/operator-crds/README.md +++ b/deploy/charts/operator-crds/README.md @@ -1,6 +1,6 @@ # ToolHive Operator CRDs Helm Chart -![Version: 0.0.85](https://img.shields.io/badge/Version-0.0.85-informational?style=flat-square) +![Version: 0.0.86](https://img.shields.io/badge/Version-0.0.86-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) A Helm chart for installing the ToolHive Operator CRDs into Kubernetes. @@ -51,7 +51,7 @@ However, placing CRDs in `templates/` means they would be deleted when the Helm ## Values | Key | Type | Default | Description | -|-----|------|---------|-------------| +|-----|-------------|------|---------| | crds | object | `{"install":{"registry":true,"server":true,"virtualMcp":true},"keep":true}` | CRD installation configuration | | crds.install | object | `{"registry":true,"server":true,"virtualMcp":true}` | Feature flags for CRD groups | | crds.install.registry | bool | `true` | Install Registry CRDs (mcpregistries) | diff --git a/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcpservers.yaml b/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcpservers.yaml index 62dcd9956..820dcb8b3 100644 --- a/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcpservers.yaml +++ b/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_mcpservers.yaml @@ -372,6 +372,39 @@ spec: maximum: 65535 minimum: 1 type: integer + proxyRunnerResources: + description: |- + ProxyRunnerResources defines the resource requirements for the ToolHive proxy runner container. + The proxy runner is the infrastructure container that manages the MCP server container. + If not specified, defaults to 50m/200m CPU and 64Mi/256Mi memory (lighter than MCP server defaults). + properties: + limits: + description: Limits describes the maximum amount of compute resources + allowed + properties: + cpu: + description: CPU is the CPU limit in cores (e.g., "500m" for + 0.5 cores) + type: string + memory: + description: Memory is the memory limit in bytes (e.g., "64Mi" + for 64 megabytes) + type: string + type: object + requests: + description: Requests describes the minimum amount of compute + resources required + properties: + cpu: + description: CPU is the CPU limit in cores (e.g., "500m" for + 0.5 cores) + type: string + memory: + description: Memory is the memory limit in bytes (e.g., "64Mi" + for 64 megabytes) + type: string + type: object + type: object resourceOverrides: description: ResourceOverrides allows overriding annotations and labels for resources created by the operator diff --git a/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcpservers.yaml b/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcpservers.yaml index 6a2ab9d76..934fc49f7 100644 --- a/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcpservers.yaml +++ b/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_mcpservers.yaml @@ -375,6 +375,39 @@ spec: maximum: 65535 minimum: 1 type: integer + proxyRunnerResources: + description: |- + ProxyRunnerResources defines the resource requirements for the ToolHive proxy runner container. + The proxy runner is the infrastructure container that manages the MCP server container. + If not specified, defaults to 50m/200m CPU and 64Mi/256Mi memory (lighter than MCP server defaults). + properties: + limits: + description: Limits describes the maximum amount of compute resources + allowed + properties: + cpu: + description: CPU is the CPU limit in cores (e.g., "500m" for + 0.5 cores) + type: string + memory: + description: Memory is the memory limit in bytes (e.g., "64Mi" + for 64 megabytes) + type: string + type: object + requests: + description: Requests describes the minimum amount of compute + resources required + properties: + cpu: + description: CPU is the CPU limit in cores (e.g., "500m" for + 0.5 cores) + type: string + memory: + description: Memory is the memory limit in bytes (e.g., "64Mi" + for 64 megabytes) + type: string + type: object + type: object resourceOverrides: description: ResourceOverrides allows overriding annotations and labels for resources created by the operator diff --git a/deploy/charts/operator/Chart.yaml b/deploy/charts/operator/Chart.yaml index 55e7800d2..6888a19bd 100644 --- a/deploy/charts/operator/Chart.yaml +++ b/deploy/charts/operator/Chart.yaml @@ -2,5 +2,5 @@ apiVersion: v2 name: toolhive-operator description: A Helm chart for deploying the ToolHive Operator into Kubernetes. type: application -version: 0.5.14 +version: 0.5.15 appVersion: "v0.6.13" diff --git a/deploy/charts/operator/README.md b/deploy/charts/operator/README.md index aed44f378..85b7e25b2 100644 --- a/deploy/charts/operator/README.md +++ b/deploy/charts/operator/README.md @@ -1,6 +1,6 @@ # ToolHive Operator Helm Chart -![Version: 0.5.14](https://img.shields.io/badge/Version-0.5.14-informational?style=flat-square) +![Version: 0.5.15](https://img.shields.io/badge/Version-0.5.15-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) A Helm chart for deploying the ToolHive Operator into Kubernetes. @@ -49,7 +49,7 @@ The command removes all the Kubernetes components associated with the chart and ## Values | Key | Type | Default | Description | -|-----|------|---------|-------------| +|-----|-------------|------|---------| | fullnameOverride | string | `"toolhive-operator"` | Provide a fully-qualified name override for resources | | nameOverride | string | `""` | Override the name of the chart | | operator | object | `{"affinity":{},"autoscaling":{"enabled":false,"maxReplicas":100,"minReplicas":1,"targetCPUUtilizationPercentage":80},"containerSecurityContext":{"allowPrivilegeEscalation":false,"capabilities":{"drop":["ALL"]},"readOnlyRootFilesystem":true,"runAsNonRoot":true,"runAsUser":1000,"seccompProfile":{"type":"RuntimeDefault"}},"env":{},"features":{"experimental":false},"gc":{"gogc":75,"gomeglimit":"150MiB"},"image":"ghcr.io/stacklok/toolhive/operator:v0.6.13","imagePullPolicy":"IfNotPresent","imagePullSecrets":[],"leaderElectionRole":{"binding":{"name":"toolhive-operator-leader-election-rolebinding"},"name":"toolhive-operator-leader-election-role","rules":[{"apiGroups":[""],"resources":["configmaps"],"verbs":["get","list","watch","create","update","patch","delete"]},{"apiGroups":["coordination.k8s.io"],"resources":["leases"],"verbs":["get","list","watch","create","update","patch","delete"]},{"apiGroups":[""],"resources":["events"],"verbs":["create","patch"]}]},"livenessProbe":{"httpGet":{"path":"/healthz","port":"health"},"initialDelaySeconds":15,"periodSeconds":20},"nodeSelector":{},"podAnnotations":{},"podLabels":{},"podSecurityContext":{"runAsNonRoot":true},"ports":[{"containerPort":8080,"name":"metrics","protocol":"TCP"},{"containerPort":8081,"name":"health","protocol":"TCP"}],"proxyHost":"0.0.0.0","rbac":{"allowedNamespaces":[],"scope":"cluster"},"readinessProbe":{"httpGet":{"path":"/readyz","port":"health"},"initialDelaySeconds":5,"periodSeconds":10},"replicaCount":1,"resources":{"limits":{"cpu":"500m","memory":"128Mi"},"requests":{"cpu":"10m","memory":"64Mi"}},"serviceAccount":{"annotations":{},"automountServiceAccountToken":true,"create":true,"labels":{},"name":"toolhive-operator"},"tolerations":[],"toolhiveRunnerImage":"ghcr.io/stacklok/toolhive/proxyrunner:v0.6.13","vmcpImage":"ghcr.io/stacklok/toolhive/vmcp:v0.6.13","volumeMounts":[],"volumes":[]}` | All values for the operator deployment and associated resources | diff --git a/docs/operator/crd-api.md b/docs/operator/crd-api.md index 5ba19498e..a9c5550ac 100644 --- a/docs/operator/crd-api.md +++ b/docs/operator/crd-api.md @@ -1075,6 +1075,7 @@ _Appears in:_ | `env` _[EnvVar](#envvar) array_ | Env are environment variables to set in the MCP server container | | | | `volumes` _[Volume](#volume) array_ | Volumes are volumes to mount in the MCP server container | | | | `resources` _[ResourceRequirements](#resourcerequirements)_ | Resources defines the resource requirements for the MCP server container | | | +| `proxyRunnerResources` _[ResourceRequirements](#resourcerequirements)_ | ProxyRunnerResources defines the resource requirements for the ToolHive proxy runner container.
The proxy runner is the infrastructure container that manages the MCP server container.
If not specified, defaults to 50m/200m CPU and 64Mi/256Mi memory (lighter than MCP server defaults). | | | | `secrets` _[SecretRef](#secretref) array_ | Secrets are references to secrets to mount in the MCP server container | | | | `serviceAccount` _string_ | ServiceAccount is the name of an already existing service account to use by the MCP server.
If not specified, a ServiceAccount will be created automatically and used by the MCP server. | | | | `permissionProfile` _[PermissionProfileRef](#permissionprofileref)_ | PermissionProfile defines the permission profile to use | | | diff --git a/examples/operator/mcp-servers/mcpserver_with_resource_overrides.yaml b/examples/operator/mcp-servers/mcpserver_with_resource_overrides.yaml index 5b3b22b3b..1f9a93d68 100644 --- a/examples/operator/mcp-servers/mcpserver_with_resource_overrides.yaml +++ b/examples/operator/mcp-servers/mcpserver_with_resource_overrides.yaml @@ -15,6 +15,7 @@ spec: value: https://api.github.com - name: LOG_LEVEL value: info + # Resource requirements for the MCP server container (the actual workload) resources: limits: cpu: "200m" @@ -22,6 +23,18 @@ spec: requests: cpu: "100m" memory: "128Mi" + + # Resource requirements for the ToolHive proxy runner container (infrastructure) + # The proxy runner is lighter weight and typically needs fewer resources + # Defaults: 50m/200m CPU, 64Mi/256Mi memory + proxyRunnerResources: + limits: + cpu: "150m" # Override default limit of 200m + memory: "192Mi" # Override default limit of 256Mi + requests: + cpu: "75m" # Override default request of 50m + memory: "96Mi" # Override default request of 64Mi + resourceOverrides: proxyDeployment: # Annotations and labels for the proxy deployment