diff --git a/cmd/thv-operator/api/v1beta1/mcpserver_types.go b/cmd/thv-operator/api/v1beta1/mcpserver_types.go index 200874bc95..c980d168fb 100644 --- a/cmd/thv-operator/api/v1beta1/mcpserver_types.go +++ b/cmd/thv-operator/api/v1beta1/mcpserver_types.go @@ -518,18 +518,18 @@ type SessionStorageConfig struct { // // +kubebuilder:validation:XValidation:rule="has(self.shared) || has(self.perUser) || (has(self.tools) && size(self.tools) > 0)",message="at least one of shared, perUser, or tools must be configured" // -//nolint:lll // CEL validation rules exceed line length limit +//nolint:lll // kubebuilder marker exceeds line length type RateLimitConfig struct { // Shared is a token bucket shared across all users for the entire server. // +optional - Shared *RateLimitBucket `json:"shared,omitempty"` + Shared *RateLimitBucket `json:"shared,omitempty" yaml:"shared,omitempty"` // PerUser is a token bucket applied independently to each authenticated user // at the server level. Requires authentication to be enabled. // Each unique userID creates Redis keys that expire after 2x refillPeriod. // Memory formula: unique_users_per_TTL_window * (1 + num_tools_with_per_user_limits) keys. // +optional - PerUser *RateLimitBucket `json:"perUser,omitempty"` + PerUser *RateLimitBucket `json:"perUser,omitempty" yaml:"perUser,omitempty"` // Tools defines per-tool rate limit overrides. // Each entry applies additional rate limits to calls targeting a specific tool name. @@ -537,24 +537,24 @@ type RateLimitConfig struct { // +listType=map // +listMapKey=name // +optional - Tools []ToolRateLimitConfig `json:"tools,omitempty"` + Tools []ToolRateLimitConfig `json:"tools,omitempty" yaml:"tools,omitempty"` } // RateLimitBucket defines a token bucket configuration with a maximum capacity -// and a refill period. Used by both shared (global) and per-user rate limits. +// and a refill period. Used by both shared and per-user rate limits. type RateLimitBucket struct { // MaxTokens is the maximum number of tokens (bucket capacity). // This is also the burst size: the maximum number of requests that can be served // instantaneously before the bucket is depleted. // +kubebuilder:validation:Required // +kubebuilder:validation:Minimum=1 - MaxTokens int32 `json:"maxTokens"` + MaxTokens int32 `json:"maxTokens" yaml:"maxTokens"` // RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. // The effective refill rate is maxTokens / refillPeriod tokens per second. // Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). // +kubebuilder:validation:Required - RefillPeriod metav1.Duration `json:"refillPeriod"` + RefillPeriod metav1.Duration `json:"refillPeriod" yaml:"refillPeriod"` } // ToolRateLimitConfig defines rate limits for a specific tool. @@ -567,15 +567,15 @@ type ToolRateLimitConfig struct { // Name is the MCP tool name this limit applies to. // +kubebuilder:validation:Required // +kubebuilder:validation:MinLength=1 - Name string `json:"name"` + Name string `json:"name" yaml:"name"` // Shared token bucket for this specific tool. // +optional - Shared *RateLimitBucket `json:"shared,omitempty"` + Shared *RateLimitBucket `json:"shared,omitempty" yaml:"shared,omitempty"` // PerUser token bucket configuration for this tool. // +optional - PerUser *RateLimitBucket `json:"perUser,omitempty"` + PerUser *RateLimitBucket `json:"perUser,omitempty" yaml:"perUser,omitempty"` } // Permission profile types diff --git a/cmd/thv-operator/api/v1beta1/mcpserver_types_test.go b/cmd/thv-operator/api/v1beta1/mcpserver_types_test.go index 0e69f33836..70830c8578 100644 --- a/cmd/thv-operator/api/v1beta1/mcpserver_types_test.go +++ b/cmd/thv-operator/api/v1beta1/mcpserver_types_test.go @@ -11,6 +11,8 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + vmcpconfig "github.com/stacklok/toolhive/pkg/vmcp/config" ) func TestSessionStorageConfigJSONRoundtrip(t *testing.T) { @@ -116,6 +118,46 @@ func TestRateLimitConfigJSONRoundtrip(t *testing.T) { } } +func TestVirtualMCPServerSpecRateLimitingJSONRoundtrip(t *testing.T) { + t.Parallel() + + spec := VirtualMCPServerSpec{ + IncomingAuth: &IncomingAuthConfig{Type: "oidc"}, + GroupRef: &MCPGroupRef{Name: "group-a"}, + SessionStorage: &SessionStorageConfig{ + Provider: "redis", + Address: "redis.default.svc.cluster.local:6379", + }, + Config: vmcpconfig.Config{ + RateLimiting: &vmcpconfig.RateLimitConfig{ + Shared: &vmcpconfig.RateLimitBucket{MaxTokens: 10, RefillPeriod: metav1.Duration{Duration: time.Minute}}, + PerUser: &vmcpconfig.RateLimitBucket{ + MaxTokens: 2, + RefillPeriod: metav1.Duration{Duration: time.Minute}, + }, + Tools: []vmcpconfig.ToolRateLimitConfig{ + { + Name: "backend_a_echo", + Shared: &vmcpconfig.RateLimitBucket{ + MaxTokens: 5, + RefillPeriod: metav1.Duration{Duration: 30 * time.Second}, + }, + }, + }, + }, + }, + } + + b, err := json.Marshal(spec) + require.NoError(t, err) + out := string(b) + assert.Contains(t, out, `"rateLimiting"`) + assert.Contains(t, out, `"shared"`) + assert.Contains(t, out, `"perUser"`) + assert.Contains(t, out, `"backend_a_echo"`) + assert.Contains(t, out, `"config":{"rateLimiting"`) +} + func TestMCPServerSpecScalingFieldsJSONRoundtrip(t *testing.T) { t.Parallel() diff --git a/cmd/thv-operator/api/v1beta1/virtualmcpserver_types.go b/cmd/thv-operator/api/v1beta1/virtualmcpserver_types.go index c63139b133..4975c88286 100644 --- a/cmd/thv-operator/api/v1beta1/virtualmcpserver_types.go +++ b/cmd/thv-operator/api/v1beta1/virtualmcpserver_types.go @@ -16,6 +16,10 @@ import ( // VirtualMCPServerSpec defines the desired state of VirtualMCPServer // +// +kubebuilder:validation:XValidation:rule="!has(self.config) || !has(self.config.rateLimiting) || (has(self.sessionStorage) && self.sessionStorage.provider == 'redis')",message="config.rateLimiting requires sessionStorage with provider 'redis'" +// +kubebuilder:validation:XValidation:rule="!(has(self.config) && has(self.config.rateLimiting) && has(self.config.rateLimiting.perUser)) || (has(self.incomingAuth) && self.incomingAuth.type == 'oidc')",message="config.rateLimiting.perUser requires incomingAuth.type oidc" +// +kubebuilder:validation:XValidation:rule="!has(self.config) || !has(self.config.rateLimiting) || !has(self.config.rateLimiting.tools) || self.config.rateLimiting.tools.all(t, !has(t.perUser)) || (has(self.incomingAuth) && self.incomingAuth.type == 'oidc')",message="per-tool perUser rate limiting requires incomingAuth.type oidc" +// //nolint:lll // CEL validation rules exceed line length limit type VirtualMCPServerSpec struct { // IncomingAuth configures authentication for clients connecting to the Virtual MCP server. diff --git a/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig_test.go b/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig_test.go index 5d0fe5efde..2da5a24a45 100644 --- a/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig_test.go +++ b/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig_test.go @@ -507,6 +507,11 @@ func TestEnsureVmcpConfigConfigMap(t *testing.T) { assert.Equal(t, "test-vmcp-vmcp-config", cm.Name) assert.Contains(t, cm.Data, "config.yaml") assert.NotEmpty(t, cm.Annotations["toolhive.stacklok.dev/content-checksum"]) + + var cfg vmcpconfig.Config + require.NoError(t, yaml.Unmarshal([]byte(cm.Data["config.yaml"]), &cfg)) + assert.Equal(t, "test-vmcp", cfg.Name) + assert.Equal(t, "test-group", cfg.Group) } // TestSetAuthConfigConditions tests that auth config conditions reflect the current state diff --git a/cmd/thv-operator/pkg/vmcpconfig/converter_test.go b/cmd/thv-operator/pkg/vmcpconfig/converter_test.go index cee72256af..a100d380f5 100644 --- a/cmd/thv-operator/pkg/vmcpconfig/converter_test.go +++ b/cmd/thv-operator/pkg/vmcpconfig/converter_test.go @@ -1601,6 +1601,51 @@ func TestConverter_SessionStorage(t *testing.T) { } } +func TestConverter_RateLimitingPassThrough(t *testing.T) { + t.Parallel() + + vmcpServer := &mcpv1beta1.VirtualMCPServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-vmcp", + Namespace: "default", + }, + Spec: mcpv1beta1.VirtualMCPServerSpec{ + GroupRef: &mcpv1beta1.MCPGroupRef{Name: "test-group"}, + Config: vmcpconfig.Config{ + RateLimiting: &vmcpconfig.RateLimitConfig{ + PerUser: &vmcpconfig.RateLimitBucket{ + MaxTokens: 2, + RefillPeriod: metav1.Duration{Duration: time.Minute}, + }, + Tools: []vmcpconfig.ToolRateLimitConfig{ + { + Name: "backend_a_echo", + Shared: &vmcpconfig.RateLimitBucket{ + MaxTokens: 5, + RefillPeriod: metav1.Duration{Duration: 30 * time.Second}, + }, + }, + }, + }, + }, + }, + } + + converter := newTestConverter(t, newNoOpMockResolver(t)) + ctx := log.IntoContext(context.Background(), logr.Discard()) + + config, _, err := converter.Convert(ctx, vmcpServer, nil) + require.NoError(t, err) + require.NotNil(t, config) + require.NotNil(t, config.RateLimiting) + + assert.EqualValues(t, 2, config.RateLimiting.PerUser.MaxTokens) + require.Len(t, config.RateLimiting.Tools, 1) + assert.Equal(t, "backend_a_echo", config.RateLimiting.Tools[0].Name) + require.NotNil(t, config.RateLimiting.Tools[0].Shared) + assert.EqualValues(t, 5, config.RateLimiting.Tools[0].Shared.MaxTokens) +} + func TestDeriveAllowedAudiences(t *testing.T) { t.Parallel() diff --git a/cmd/thv-operator/test-integration/embedding-server/embeddingserver_update_test.go b/cmd/thv-operator/test-integration/embedding-server/embeddingserver_update_test.go index b91f9d021d..c4a3484bd2 100644 --- a/cmd/thv-operator/test-integration/embedding-server/embeddingserver_update_test.go +++ b/cmd/thv-operator/test-integration/embedding-server/embeddingserver_update_test.go @@ -466,6 +466,7 @@ var _ = Describe("EmbeddingServer Controller Update Tests", func() { Expect(k8sClient.Create(ctx, embeddingServer)).To(Succeed()) Eventually(func(g Gomega) { g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(embeddingServer), &appsv1.StatefulSet{})).To(Succeed()) + g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(embeddingServer), &corev1.Service{})).To(Succeed()) }, timeout, interval).Should(Succeed()) }) diff --git a/cmd/thv-operator/test-integration/virtualmcp/virtualmcpserver_sessionstorage_cel_test.go b/cmd/thv-operator/test-integration/virtualmcp/virtualmcpserver_sessionstorage_cel_test.go index 45b6043196..6499c3df4d 100644 --- a/cmd/thv-operator/test-integration/virtualmcp/virtualmcpserver_sessionstorage_cel_test.go +++ b/cmd/thv-operator/test-integration/virtualmcp/virtualmcpserver_sessionstorage_cel_test.go @@ -5,6 +5,8 @@ package controllers import ( + "time" + . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -106,4 +108,60 @@ var _ = Describe("CEL Validation for SessionStorageConfig on VirtualMCPServer", Expect(err).To(HaveOccurred()) }) }) + + Context("rateLimiting", func() { + It("should reject rate limiting without redis session storage", func() { + vmcp := newVirtualMCPServerWithSessionStorage("vmcp-rl-no-redis", nil) + vmcp.Spec.Config.RateLimiting = &vmcpconfig.RateLimitConfig{ + Shared: &vmcpconfig.RateLimitBucket{ + MaxTokens: 1, + RefillPeriod: metav1.Duration{Duration: time.Minute}, + }, + } + + err := k8sClient.Create(ctx, vmcp) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("config.rateLimiting requires sessionStorage with provider 'redis'")) + }) + + It("should reject perUser rate limiting with anonymous auth", func() { + vmcp := newVirtualMCPServerWithSessionStorage("vmcp-rl-peruser-anon", &mcpv1beta1.SessionStorageConfig{ + Provider: "redis", + Address: "redis:6379", + }) + vmcp.Spec.Config.RateLimiting = &vmcpconfig.RateLimitConfig{ + PerUser: &vmcpconfig.RateLimitBucket{ + MaxTokens: 1, + RefillPeriod: metav1.Duration{Duration: time.Minute}, + }, + } + + err := k8sClient.Create(ctx, vmcp) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("config.rateLimiting.perUser requires incomingAuth.type oidc")) + }) + + It("should accept perUser rate limiting with oidc auth and redis session storage", func() { + vmcp := newVirtualMCPServerWithSessionStorage("vmcp-rl-peruser-oidc", &mcpv1beta1.SessionStorageConfig{ + Provider: "redis", + Address: "redis:6379", + }) + vmcp.Spec.IncomingAuth = &mcpv1beta1.IncomingAuthConfig{ + Type: "oidc", + OIDCConfigRef: &mcpv1beta1.MCPOIDCConfigReference{ + Name: "oidc", + Audience: "test-audience", + }, + } + vmcp.Spec.Config.RateLimiting = &vmcpconfig.RateLimitConfig{ + PerUser: &vmcpconfig.RateLimitBucket{ + MaxTokens: 1, + RefillPeriod: metav1.Duration{Duration: time.Minute}, + }, + } + + err := k8sClient.Create(ctx, vmcp) + Expect(err).NotTo(HaveOccurred()) + }) + }) }) diff --git a/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_virtualmcpservers.yaml b/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_virtualmcpservers.yaml index a51fe4b5bd..584a6215b2 100644 --- a/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_virtualmcpservers.yaml +++ b/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_virtualmcpservers.yaml @@ -1844,6 +1844,131 @@ spec: required: - source type: object + rateLimiting: + description: |- + RateLimiting defines rate limiting configuration for the Virtual MCP server. + Requires Redis session storage to be configured for distributed rate limiting. + properties: + perUser: + description: |- + PerUser is a token bucket applied independently to each authenticated user + at the server level. Requires authentication to be enabled. + Each unique userID creates Redis keys that expire after 2x refillPeriod. + Memory formula: unique_users_per_TTL_window * (1 + num_tools_with_per_user_limits) keys. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + shared: + description: Shared is a token bucket shared across all users + for the entire server. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + tools: + description: |- + Tools defines per-tool rate limit overrides. + Each entry applies additional rate limits to calls targeting a specific tool name. + A request must pass both the server-level limit and the per-tool limit. + items: + description: |- + ToolRateLimitConfig defines rate limits for a specific tool. + At least one of shared or perUser must be configured. + properties: + name: + description: Name is the MCP tool name this limit applies + to. + minLength: 1 + type: string + perUser: + description: PerUser token bucket configuration for + this tool. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + shared: + description: Shared token bucket for this specific tool. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + required: + - name + type: object + x-kubernetes-validations: + - message: at least one of shared or perUser must be configured + rule: has(self.shared) || has(self.perUser) + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + type: object + x-kubernetes-validations: + - message: at least one of shared, perUser, or tools must be configured + rule: has(self.shared) || has(self.perUser) || (has(self.tools) + && size(self.tools) > 0) sessionStorage: description: |- SessionStorage configures session storage for stateful horizontal scaling. @@ -2347,6 +2472,17 @@ spec: - groupRef - incomingAuth type: object + x-kubernetes-validations: + - message: config.rateLimiting requires sessionStorage with provider 'redis' + rule: '!has(self.config) || !has(self.config.rateLimiting) || (has(self.sessionStorage) + && self.sessionStorage.provider == ''redis'')' + - message: config.rateLimiting.perUser requires incomingAuth.type oidc + rule: '!(has(self.config) && has(self.config.rateLimiting) && has(self.config.rateLimiting.perUser)) + || (has(self.incomingAuth) && self.incomingAuth.type == ''oidc'')' + - message: per-tool perUser rate limiting requires incomingAuth.type oidc + rule: '!has(self.config) || !has(self.config.rateLimiting) || !has(self.config.rateLimiting.tools) + || self.config.rateLimiting.tools.all(t, !has(t.perUser)) || (has(self.incomingAuth) + && self.incomingAuth.type == ''oidc'')' status: description: VirtualMCPServerStatus defines the observed state of VirtualMCPServer properties: @@ -4340,6 +4476,131 @@ spec: required: - source type: object + rateLimiting: + description: |- + RateLimiting defines rate limiting configuration for the Virtual MCP server. + Requires Redis session storage to be configured for distributed rate limiting. + properties: + perUser: + description: |- + PerUser is a token bucket applied independently to each authenticated user + at the server level. Requires authentication to be enabled. + Each unique userID creates Redis keys that expire after 2x refillPeriod. + Memory formula: unique_users_per_TTL_window * (1 + num_tools_with_per_user_limits) keys. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + shared: + description: Shared is a token bucket shared across all users + for the entire server. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + tools: + description: |- + Tools defines per-tool rate limit overrides. + Each entry applies additional rate limits to calls targeting a specific tool name. + A request must pass both the server-level limit and the per-tool limit. + items: + description: |- + ToolRateLimitConfig defines rate limits for a specific tool. + At least one of shared or perUser must be configured. + properties: + name: + description: Name is the MCP tool name this limit applies + to. + minLength: 1 + type: string + perUser: + description: PerUser token bucket configuration for + this tool. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + shared: + description: Shared token bucket for this specific tool. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + required: + - name + type: object + x-kubernetes-validations: + - message: at least one of shared or perUser must be configured + rule: has(self.shared) || has(self.perUser) + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + type: object + x-kubernetes-validations: + - message: at least one of shared, perUser, or tools must be configured + rule: has(self.shared) || has(self.perUser) || (has(self.tools) + && size(self.tools) > 0) sessionStorage: description: |- SessionStorage configures session storage for stateful horizontal scaling. @@ -4843,6 +5104,17 @@ spec: - groupRef - incomingAuth type: object + x-kubernetes-validations: + - message: config.rateLimiting requires sessionStorage with provider 'redis' + rule: '!has(self.config) || !has(self.config.rateLimiting) || (has(self.sessionStorage) + && self.sessionStorage.provider == ''redis'')' + - message: config.rateLimiting.perUser requires incomingAuth.type oidc + rule: '!(has(self.config) && has(self.config.rateLimiting) && has(self.config.rateLimiting.perUser)) + || (has(self.incomingAuth) && self.incomingAuth.type == ''oidc'')' + - message: per-tool perUser rate limiting requires incomingAuth.type oidc + rule: '!has(self.config) || !has(self.config.rateLimiting) || !has(self.config.rateLimiting.tools) + || self.config.rateLimiting.tools.all(t, !has(t.perUser)) || (has(self.incomingAuth) + && self.incomingAuth.type == ''oidc'')' status: description: VirtualMCPServerStatus defines the observed state of VirtualMCPServer properties: diff --git a/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_virtualmcpservers.yaml b/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_virtualmcpservers.yaml index 6078670479..fa25125884 100644 --- a/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_virtualmcpservers.yaml +++ b/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_virtualmcpservers.yaml @@ -1847,6 +1847,131 @@ spec: required: - source type: object + rateLimiting: + description: |- + RateLimiting defines rate limiting configuration for the Virtual MCP server. + Requires Redis session storage to be configured for distributed rate limiting. + properties: + perUser: + description: |- + PerUser is a token bucket applied independently to each authenticated user + at the server level. Requires authentication to be enabled. + Each unique userID creates Redis keys that expire after 2x refillPeriod. + Memory formula: unique_users_per_TTL_window * (1 + num_tools_with_per_user_limits) keys. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + shared: + description: Shared is a token bucket shared across all users + for the entire server. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + tools: + description: |- + Tools defines per-tool rate limit overrides. + Each entry applies additional rate limits to calls targeting a specific tool name. + A request must pass both the server-level limit and the per-tool limit. + items: + description: |- + ToolRateLimitConfig defines rate limits for a specific tool. + At least one of shared or perUser must be configured. + properties: + name: + description: Name is the MCP tool name this limit applies + to. + minLength: 1 + type: string + perUser: + description: PerUser token bucket configuration for + this tool. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + shared: + description: Shared token bucket for this specific tool. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + required: + - name + type: object + x-kubernetes-validations: + - message: at least one of shared or perUser must be configured + rule: has(self.shared) || has(self.perUser) + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + type: object + x-kubernetes-validations: + - message: at least one of shared, perUser, or tools must be configured + rule: has(self.shared) || has(self.perUser) || (has(self.tools) + && size(self.tools) > 0) sessionStorage: description: |- SessionStorage configures session storage for stateful horizontal scaling. @@ -2350,6 +2475,17 @@ spec: - groupRef - incomingAuth type: object + x-kubernetes-validations: + - message: config.rateLimiting requires sessionStorage with provider 'redis' + rule: '!has(self.config) || !has(self.config.rateLimiting) || (has(self.sessionStorage) + && self.sessionStorage.provider == ''redis'')' + - message: config.rateLimiting.perUser requires incomingAuth.type oidc + rule: '!(has(self.config) && has(self.config.rateLimiting) && has(self.config.rateLimiting.perUser)) + || (has(self.incomingAuth) && self.incomingAuth.type == ''oidc'')' + - message: per-tool perUser rate limiting requires incomingAuth.type oidc + rule: '!has(self.config) || !has(self.config.rateLimiting) || !has(self.config.rateLimiting.tools) + || self.config.rateLimiting.tools.all(t, !has(t.perUser)) || (has(self.incomingAuth) + && self.incomingAuth.type == ''oidc'')' status: description: VirtualMCPServerStatus defines the observed state of VirtualMCPServer properties: @@ -4343,6 +4479,131 @@ spec: required: - source type: object + rateLimiting: + description: |- + RateLimiting defines rate limiting configuration for the Virtual MCP server. + Requires Redis session storage to be configured for distributed rate limiting. + properties: + perUser: + description: |- + PerUser is a token bucket applied independently to each authenticated user + at the server level. Requires authentication to be enabled. + Each unique userID creates Redis keys that expire after 2x refillPeriod. + Memory formula: unique_users_per_TTL_window * (1 + num_tools_with_per_user_limits) keys. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + shared: + description: Shared is a token bucket shared across all users + for the entire server. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + tools: + description: |- + Tools defines per-tool rate limit overrides. + Each entry applies additional rate limits to calls targeting a specific tool name. + A request must pass both the server-level limit and the per-tool limit. + items: + description: |- + ToolRateLimitConfig defines rate limits for a specific tool. + At least one of shared or perUser must be configured. + properties: + name: + description: Name is the MCP tool name this limit applies + to. + minLength: 1 + type: string + perUser: + description: PerUser token bucket configuration for + this tool. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + shared: + description: Shared token bucket for this specific tool. + properties: + maxTokens: + description: |- + MaxTokens is the maximum number of tokens (bucket capacity). + This is also the burst size: the maximum number of requests that can be served + instantaneously before the bucket is depleted. + format: int32 + minimum: 1 + type: integer + refillPeriod: + description: |- + RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + The effective refill rate is maxTokens / refillPeriod tokens per second. + Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + type: string + required: + - maxTokens + - refillPeriod + type: object + required: + - name + type: object + x-kubernetes-validations: + - message: at least one of shared or perUser must be configured + rule: has(self.shared) || has(self.perUser) + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + type: object + x-kubernetes-validations: + - message: at least one of shared, perUser, or tools must be configured + rule: has(self.shared) || has(self.perUser) || (has(self.tools) + && size(self.tools) > 0) sessionStorage: description: |- SessionStorage configures session storage for stateful horizontal scaling. @@ -4846,6 +5107,17 @@ spec: - groupRef - incomingAuth type: object + x-kubernetes-validations: + - message: config.rateLimiting requires sessionStorage with provider 'redis' + rule: '!has(self.config) || !has(self.config.rateLimiting) || (has(self.sessionStorage) + && self.sessionStorage.provider == ''redis'')' + - message: config.rateLimiting.perUser requires incomingAuth.type oidc + rule: '!(has(self.config) && has(self.config.rateLimiting) && has(self.config.rateLimiting.perUser)) + || (has(self.incomingAuth) && self.incomingAuth.type == ''oidc'')' + - message: per-tool perUser rate limiting requires incomingAuth.type oidc + rule: '!has(self.config) || !has(self.config.rateLimiting) || !has(self.config.rateLimiting.tools) + || self.config.rateLimiting.tools.all(t, !has(t.perUser)) || (has(self.incomingAuth) + && self.incomingAuth.type == ''oidc'')' status: description: VirtualMCPServerStatus defines the observed state of VirtualMCPServer properties: diff --git a/docs/operator/crd-api.md b/docs/operator/crd-api.md index e60f05015e..306d140554 100644 --- a/docs/operator/crd-api.md +++ b/docs/operator/crd-api.md @@ -323,6 +323,7 @@ _Appears in:_ | `audit` _[pkg.audit.Config](#pkgauditconfig)_ | Audit configures audit logging for the Virtual MCP server.
When present, audit logs include MCP protocol operations.
See audit.Config for available configuration options. | | Optional: \{\}
| | `optimizer` _[vmcp.config.OptimizerConfig](#vmcpconfigoptimizerconfig)_ | Optimizer configures the MCP optimizer for context optimization on large toolsets.
When enabled, vMCP exposes only find_tool and call_tool operations to clients
instead of all backend tools directly. This reduces token usage by allowing
LLMs to discover relevant tools on demand rather than receiving all tool definitions. | | Optional: \{\}
| | `sessionStorage` _[vmcp.config.SessionStorageConfig](#vmcpconfigsessionstorageconfig)_ | SessionStorage configures session storage for stateful horizontal scaling.
When provider is "redis", the operator injects Redis connection parameters
(address, db, keyPrefix) here. The Redis password is provided separately via
the THV_SESSION_REDIS_PASSWORD environment variable. | | Optional: \{\}
| +| `rateLimiting` _[vmcp.config.RateLimitConfig](#vmcpconfigratelimitconfig)_ | RateLimiting defines rate limiting configuration for the Virtual MCP server.
Requires Redis session storage to be configured for distributed rate limiting. | | Optional: \{\}
| #### vmcp.config.ConflictResolutionConfig @@ -546,6 +547,44 @@ _Appears in:_ | `default` _[pkg.json.Any](#pkgjsonany)_ | Default is the fallback value if template expansion fails.
Type coercion is applied to match the declared Type. | | Schemaless: \{\}
Optional: \{\}
| +#### vmcp.config.RateLimitBucket + + + +RateLimitBucket defines a token bucket configuration with a maximum capacity +and a refill period. Used by both shared and per-user rate limits. + + + +_Appears in:_ +- [vmcp.config.RateLimitConfig](#vmcpconfigratelimitconfig) +- [vmcp.config.ToolRateLimitConfig](#vmcpconfigtoolratelimitconfig) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `maxTokens` _integer_ | MaxTokens is the maximum number of tokens (bucket capacity).
This is also the burst size: the maximum number of requests that can be served
instantaneously before the bucket is depleted. | | Minimum: 1
Required: \{\}
| +| `refillPeriod` _[Duration](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#duration-v1-meta)_ | RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
The effective refill rate is maxTokens / refillPeriod tokens per second.
Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). | | Pattern: `^([0-9]+(\.[0-9]+)?(ns\|us\|µs\|ms\|s\|m\|h))+$`
Required: \{\}
Type: string
| + + +#### vmcp.config.RateLimitConfig + + + +RateLimitConfig defines rate limiting configuration for a Virtual MCP server. +At least one of shared, perUser, or tools must be configured. + + + +_Appears in:_ +- [vmcp.config.Config](#vmcpconfigconfig) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `shared` _[vmcp.config.RateLimitBucket](#vmcpconfigratelimitbucket)_ | Shared is a token bucket shared across all users for the entire server. | | Optional: \{\}
| +| `perUser` _[vmcp.config.RateLimitBucket](#vmcpconfigratelimitbucket)_ | PerUser is a token bucket applied independently to each authenticated user
at the server level. Requires authentication to be enabled.
Each unique userID creates Redis keys that expire after 2x refillPeriod.
Memory formula: unique_users_per_TTL_window * (1 + num_tools_with_per_user_limits) keys. | | Optional: \{\}
| +| `tools` _[vmcp.config.ToolRateLimitConfig](#vmcpconfigtoolratelimitconfig) array_ | Tools defines per-tool rate limit overrides.
Each entry applies additional rate limits to calls targeting a specific tool name.
A request must pass both the server-level limit and the per-tool limit. | | Optional: \{\}
| + + #### vmcp.config.SessionStorageConfig @@ -675,6 +714,25 @@ _Appears in:_ | `annotations` _[vmcp.config.ToolAnnotationsOverride](#vmcpconfigtoolannotationsoverride)_ | Annotations overrides specific tool annotation fields.
Only specified fields are overridden; others pass through from the backend. | | Optional: \{\}
| +#### vmcp.config.ToolRateLimitConfig + + + +ToolRateLimitConfig defines rate limits for a specific tool. +At least one of shared or perUser must be configured. + + + +_Appears in:_ +- [vmcp.config.RateLimitConfig](#vmcpconfigratelimitconfig) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `name` _string_ | Name is the MCP tool name this limit applies to. | | MinLength: 1
Required: \{\}
| +| `shared` _[vmcp.config.RateLimitBucket](#vmcpconfigratelimitbucket)_ | Shared token bucket for this specific tool. | | Optional: \{\}
| +| `perUser` _[vmcp.config.RateLimitBucket](#vmcpconfigratelimitbucket)_ | PerUser token bucket configuration for this tool. | | Optional: \{\}
| + + #### vmcp.config.WorkflowStepConfig @@ -2729,7 +2787,7 @@ _Appears in:_ RateLimitBucket defines a token bucket configuration with a maximum capacity -and a refill period. Used by both shared (global) and per-user rate limits. +and a refill period. Used by both shared and per-user rate limits. @@ -2740,7 +2798,7 @@ _Appears in:_ | Field | Description | Default | Validation | | --- | --- | --- | --- | | `maxTokens` _integer_ | MaxTokens is the maximum number of tokens (bucket capacity).
This is also the burst size: the maximum number of requests that can be served
instantaneously before the bucket is depleted. | | Minimum: 1
Required: \{\}
| -| `refillPeriod` _[Duration](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#duration-v1-meta)_ | RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
The effective refill rate is maxTokens / refillPeriod tokens per second.
Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). | | Required: \{\}
| +| `refillPeriod` _[Duration](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#duration-v1-meta)_ | RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
The effective refill rate is maxTokens / refillPeriod tokens per second.
Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). | | Pattern: `^([0-9]+(\.[0-9]+)?(ns\|us\|µs\|ms\|s\|m\|h))+$`
Required: \{\}
Type: string
| #### api.v1beta1.RateLimitConfig @@ -3577,7 +3635,7 @@ _Appears in:_ | --- | --- | --- | --- | | `name` _string_ | Name is a unique identifier for this webhook | | MaxLength: 63
MinLength: 1
| | `url` _string_ | URL is the endpoint to call for this webhook. Must be an HTTP/HTTPS URL. | | Format: uri
| -| `timeout` _[Duration](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#duration-v1-meta)_ | Timeout configures the maximum time to wait for the webhook to respond.
Defaults to 10s if not specified. Maximum is 30s. | | Format: duration
Type: string
Optional: \{\}
| +| `timeout` _[Duration](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#duration-v1-meta)_ | Timeout configures the maximum time to wait for the webhook to respond.
Defaults to 10s if not specified. Maximum is 30s. | | Format: duration
Pattern: `^([0-9]+(\.[0-9]+)?(ns\|us\|µs\|ms\|s\|m\|h))+$`
Type: string
Optional: \{\}
| | `failurePolicy` _[api.v1beta1.WebhookFailurePolicy](#apiv1beta1webhookfailurepolicy)_ | FailurePolicy defines how to handle errors when communicating with the webhook.
Supported values: "fail", "ignore". Defaults to "fail". | fail | Enum: [fail ignore]
Optional: \{\}
| | `tlsConfig` _[api.v1beta1.WebhookTLSConfig](#apiv1beta1webhooktlsconfig)_ | TLSConfig contains optional TLS configuration for the webhook connection. | | Optional: \{\}
| | `hmacSecretRef` _[api.v1beta1.SecretKeyRef](#apiv1beta1secretkeyref)_ | HMACSecretRef references a Kubernetes Secret containing the HMAC signing key
used to sign the webhook payload. If set, the X-Toolhive-Signature header will be injected. | | Optional: \{\}
| diff --git a/pkg/ratelimit/internal/bucket/bucket.go b/pkg/ratelimit/internal/bucket/bucket.go index 28903bcdcd..d68a1709c8 100644 --- a/pkg/ratelimit/internal/bucket/bucket.go +++ b/pkg/ratelimit/internal/bucket/bucket.go @@ -90,7 +90,7 @@ type TokenBucket struct { } // New creates a TokenBucket. The Redis key is derived from namespace, server -// name, and suffix (e.g., "global" or "global:tool:search"). +// name, and suffix (e.g., "shared" or "shared:tool:search"). func New(namespace, serverName, suffix string, maxTokens int32, refillPeriod time.Duration) *TokenBucket { refillSec := refillPeriod.Seconds() return &TokenBucket{ diff --git a/pkg/ratelimit/limiter_test.go b/pkg/ratelimit/limiter_test.go index a007191d3c..147e22cd7e 100644 --- a/pkg/ratelimit/limiter_test.go +++ b/pkg/ratelimit/limiter_test.go @@ -70,7 +70,7 @@ func TestNewLimiter_ZeroDuration(t *testing.T) { assert.Contains(t, err.Error(), "refillPeriod must be positive") } -func TestLimiter_ServerGlobalExhausted(t *testing.T) { +func TestLimiter_ServerSharedExhausted(t *testing.T) { t.Parallel() client, _ := newTestClient(t) ctx := t.Context() @@ -93,6 +93,35 @@ func TestLimiter_ServerGlobalExhausted(t *testing.T) { assert.Greater(t, d.RetryAfter, time.Duration(0)) } +func TestLimiter_SharedUsesRedisKeys(t *testing.T) { + t.Parallel() + client, _ := newTestClient(t) + ctx := t.Context() + + crd := &v1beta1.RateLimitConfig{ + Shared: &v1beta1.RateLimitBucket{MaxTokens: 10, RefillPeriod: metav1.Duration{Duration: time.Minute}}, + Tools: []v1beta1.ToolRateLimitConfig{ + { + Name: "search", + Shared: &v1beta1.RateLimitBucket{MaxTokens: 10, RefillPeriod: metav1.Duration{Duration: time.Minute}}, + }, + }, + } + l, err := NewLimiter(client, "ns", "srv", crd) + require.NoError(t, err) + + d, err := l.Allow(ctx, "search", "") + require.NoError(t, err) + require.True(t, d.Allowed) + + serverKey := "thv:rl:{ns:srv}:shared" + toolKey := "thv:rl:{ns:srv}:shared:tool:search" + + exists, err := client.Exists(ctx, serverKey, toolKey).Result() + require.NoError(t, err) + assert.Equal(t, int64(2), exists) +} + func TestLimiter_PerToolIsolation(t *testing.T) { t.Parallel() client, _ := newTestClient(t) diff --git a/pkg/vmcp/config/config.go b/pkg/vmcp/config/config.go index 7a2c699290..1ab2422591 100644 --- a/pkg/vmcp/config/config.go +++ b/pkg/vmcp/config/config.go @@ -13,6 +13,8 @@ import ( "fmt" "time" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "github.com/stacklok/toolhive/pkg/audit" thvjson "github.com/stacklok/toolhive/pkg/json" "github.com/stacklok/toolhive/pkg/telemetry" @@ -173,6 +175,84 @@ type Config struct { // the THV_SESSION_REDIS_PASSWORD environment variable. // +optional SessionStorage *SessionStorageConfig `json:"sessionStorage,omitempty" yaml:"sessionStorage,omitempty"` + + // RateLimiting defines rate limiting configuration for the Virtual MCP server. + // Requires Redis session storage to be configured for distributed rate limiting. + // +optional + RateLimiting *RateLimitConfig `json:"rateLimiting,omitempty" yaml:"rateLimiting,omitempty"` +} + +// RateLimitConfig defines rate limiting configuration for a Virtual MCP server. +// At least one of shared, perUser, or tools must be configured. +// +// +kubebuilder:validation:XValidation:rule="has(self.shared) || has(self.perUser) || (has(self.tools) && size(self.tools) > 0)",message="at least one of shared, perUser, or tools must be configured" +// +// +kubebuilder:object:generate=true +// +gendoc +// +//nolint:lll // kubebuilder marker exceeds line length +type RateLimitConfig struct { + // Shared is a token bucket shared across all users for the entire server. + // +optional + Shared *RateLimitBucket `json:"shared,omitempty" yaml:"shared,omitempty"` + + // PerUser is a token bucket applied independently to each authenticated user + // at the server level. Requires authentication to be enabled. + // Each unique userID creates Redis keys that expire after 2x refillPeriod. + // Memory formula: unique_users_per_TTL_window * (1 + num_tools_with_per_user_limits) keys. + // +optional + PerUser *RateLimitBucket `json:"perUser,omitempty" yaml:"perUser,omitempty"` + + // Tools defines per-tool rate limit overrides. + // Each entry applies additional rate limits to calls targeting a specific tool name. + // A request must pass both the server-level limit and the per-tool limit. + // +listType=map + // +listMapKey=name + // +optional + Tools []ToolRateLimitConfig `json:"tools,omitempty" yaml:"tools,omitempty"` +} + +// RateLimitBucket defines a token bucket configuration with a maximum capacity +// and a refill period. Used by both shared and per-user rate limits. +// +kubebuilder:object:generate=true +// +gendoc +type RateLimitBucket struct { + // MaxTokens is the maximum number of tokens (bucket capacity). + // This is also the burst size: the maximum number of requests that can be served + // instantaneously before the bucket is depleted. + // +kubebuilder:validation:Required + // +kubebuilder:validation:Minimum=1 + MaxTokens int32 `json:"maxTokens" yaml:"maxTokens"` + + // RefillPeriod is the duration to fully refill the bucket from zero to maxTokens. + // The effective refill rate is maxTokens / refillPeriod tokens per second. + // Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). + // +kubebuilder:validation:Required + RefillPeriod metav1.Duration `json:"refillPeriod" yaml:"refillPeriod"` +} + +// ToolRateLimitConfig defines rate limits for a specific tool. +// At least one of shared or perUser must be configured. +// +// +kubebuilder:validation:XValidation:rule="has(self.shared) || has(self.perUser)",message="at least one of shared or perUser must be configured" +// +// +kubebuilder:object:generate=true +// +gendoc +// +//nolint:lll // kubebuilder marker exceeds line length +type ToolRateLimitConfig struct { + // Name is the MCP tool name this limit applies to. + // +kubebuilder:validation:Required + // +kubebuilder:validation:MinLength=1 + Name string `json:"name" yaml:"name"` + + // Shared token bucket for this specific tool. + // +optional + Shared *RateLimitBucket `json:"shared,omitempty" yaml:"shared,omitempty"` + + // PerUser token bucket configuration for this tool. + // +optional + PerUser *RateLimitBucket `json:"perUser,omitempty" yaml:"perUser,omitempty"` } // IncomingAuthConfig configures client authentication to the virtual MCP server. diff --git a/pkg/vmcp/config/zz_generated.deepcopy.go b/pkg/vmcp/config/zz_generated.deepcopy.go index 80861bff11..5a7a60ef45 100644 --- a/pkg/vmcp/config/zz_generated.deepcopy.go +++ b/pkg/vmcp/config/zz_generated.deepcopy.go @@ -204,6 +204,11 @@ func (in *Config) DeepCopyInto(out *Config) { *out = new(SessionStorageConfig) **out = **in } + if in.RateLimiting != nil { + in, out := &in.RateLimiting, &out.RateLimiting + *out = new(RateLimitConfig) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Config. @@ -442,6 +447,54 @@ func (in *OutputProperty) DeepCopy() *OutputProperty { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RateLimitBucket) DeepCopyInto(out *RateLimitBucket) { + *out = *in + out.RefillPeriod = in.RefillPeriod +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RateLimitBucket. +func (in *RateLimitBucket) DeepCopy() *RateLimitBucket { + if in == nil { + return nil + } + out := new(RateLimitBucket) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RateLimitConfig) DeepCopyInto(out *RateLimitConfig) { + *out = *in + if in.Shared != nil { + in, out := &in.Shared, &out.Shared + *out = new(RateLimitBucket) + **out = **in + } + if in.PerUser != nil { + in, out := &in.PerUser, &out.PerUser + *out = new(RateLimitBucket) + **out = **in + } + if in.Tools != nil { + in, out := &in.Tools, &out.Tools + *out = make([]ToolRateLimitConfig, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RateLimitConfig. +func (in *RateLimitConfig) DeepCopy() *RateLimitConfig { + if in == nil { + return nil + } + out := new(RateLimitConfig) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *SessionStorageConfig) DeepCopyInto(out *SessionStorageConfig) { *out = *in @@ -591,6 +644,31 @@ func (in *ToolOverride) DeepCopy() *ToolOverride { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ToolRateLimitConfig) DeepCopyInto(out *ToolRateLimitConfig) { + *out = *in + if in.Shared != nil { + in, out := &in.Shared, &out.Shared + *out = new(RateLimitBucket) + **out = **in + } + if in.PerUser != nil { + in, out := &in.PerUser, &out.PerUser + *out = new(RateLimitBucket) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ToolRateLimitConfig. +func (in *ToolRateLimitConfig) DeepCopy() *ToolRateLimitConfig { + if in == nil { + return nil + } + out := new(ToolRateLimitConfig) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *WorkflowStepConfig) DeepCopyInto(out *WorkflowStepConfig) { *out = *in diff --git a/test/e2e/thv-operator/virtualmcp/virtualmcp_circuit_breaker_test.go b/test/e2e/thv-operator/virtualmcp/virtualmcp_circuit_breaker_test.go index 12cebf47d2..61a24c86be 100644 --- a/test/e2e/thv-operator/virtualmcp/virtualmcp_circuit_breaker_test.go +++ b/test/e2e/thv-operator/virtualmcp/virtualmcp_circuit_breaker_test.go @@ -10,6 +10,7 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" @@ -460,9 +461,30 @@ var _ = Describe("VirtualMCPServer Circuit Breaker Lifecycle", Ordered, func() { backend.Spec.Image = images.YardstickServerImage Expect(k8sClient.Update(ctx, backend)).To(Succeed()) + By("Waiting for backend StatefulSet template to use the fixed image") + Eventually(func() error { + sts := &appsv1.StatefulSet{} + if err := k8sClient.Get(ctx, types.NamespacedName{ + Name: backend2Name, + Namespace: testNamespace, + }, sts); err != nil { + return err + } + for _, container := range sts.Spec.Template.Spec.Containers { + if container.Name == "mcp" { + if container.Image != images.YardstickServerImage { + return fmt.Errorf("statefulset still has image %q", container.Image) + } + return nil + } + } + return fmt.Errorf("mcp container not found in statefulset template") + }, timeout, pollingInterval).Should(Succeed()) + By("Deleting stuck pods to force recreation with fixed image") // Pods in ImagePullBackOff don't automatically recreate when image is fixed - // Delete them to force the statefulset to create new pods with the correct image + // Delete them after the statefulset template is updated, otherwise the old template + // can immediately recreate the pod with the broken image again. podList := &corev1.PodList{} Expect(k8sClient.List(ctx, podList, client.InNamespace(testNamespace),