diff --git a/cmd/thv-operator/api/v1beta1/mcpserver_types.go b/cmd/thv-operator/api/v1beta1/mcpserver_types.go
index 200874bc95..c980d168fb 100644
--- a/cmd/thv-operator/api/v1beta1/mcpserver_types.go
+++ b/cmd/thv-operator/api/v1beta1/mcpserver_types.go
@@ -518,18 +518,18 @@ type SessionStorageConfig struct {
 //
 // +kubebuilder:validation:XValidation:rule="has(self.shared) || has(self.perUser) || (has(self.tools) && size(self.tools) > 0)",message="at least one of shared, perUser, or tools must be configured"
 //
-//nolint:lll // CEL validation rules exceed line length limit
+//nolint:lll // kubebuilder marker exceeds line length
 type RateLimitConfig struct {
 	// Shared is a token bucket shared across all users for the entire server.
 	// +optional
-	Shared *RateLimitBucket `json:"shared,omitempty"`
+	Shared *RateLimitBucket `json:"shared,omitempty" yaml:"shared,omitempty"`
 
 	// PerUser is a token bucket applied independently to each authenticated user
 	// at the server level. Requires authentication to be enabled.
 	// Each unique userID creates Redis keys that expire after 2x refillPeriod.
 	// Memory formula: unique_users_per_TTL_window * (1 + num_tools_with_per_user_limits) keys.
 	// +optional
-	PerUser *RateLimitBucket `json:"perUser,omitempty"`
+	PerUser *RateLimitBucket `json:"perUser,omitempty" yaml:"perUser,omitempty"`
 
 	// Tools defines per-tool rate limit overrides.
 	// Each entry applies additional rate limits to calls targeting a specific tool name.
@@ -537,24 +537,24 @@ type RateLimitConfig struct {
 	// +listType=map
 	// +listMapKey=name
 	// +optional
-	Tools []ToolRateLimitConfig `json:"tools,omitempty"`
+	Tools []ToolRateLimitConfig `json:"tools,omitempty" yaml:"tools,omitempty"`
 }
 
 // RateLimitBucket defines a token bucket configuration with a maximum capacity
-// and a refill period. Used by both shared (global) and per-user rate limits.
+// and a refill period. Used by both shared and per-user rate limits.
 type RateLimitBucket struct {
 	// MaxTokens is the maximum number of tokens (bucket capacity).
 	// This is also the burst size: the maximum number of requests that can be served
 	// instantaneously before the bucket is depleted.
 	// +kubebuilder:validation:Required
 	// +kubebuilder:validation:Minimum=1
-	MaxTokens int32 `json:"maxTokens"`
+	MaxTokens int32 `json:"maxTokens" yaml:"maxTokens"`
 
 	// RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
 	// The effective refill rate is maxTokens / refillPeriod tokens per second.
 	// Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
 	// +kubebuilder:validation:Required
-	RefillPeriod metav1.Duration `json:"refillPeriod"`
+	RefillPeriod metav1.Duration `json:"refillPeriod" yaml:"refillPeriod"`
 }
 
 // ToolRateLimitConfig defines rate limits for a specific tool.
@@ -567,15 +567,15 @@ type ToolRateLimitConfig struct {
 	// Name is the MCP tool name this limit applies to.
 	// +kubebuilder:validation:Required
 	// +kubebuilder:validation:MinLength=1
-	Name string `json:"name"`
+	Name string `json:"name" yaml:"name"`
 
 	// Shared token bucket for this specific tool.
 	// +optional
-	Shared *RateLimitBucket `json:"shared,omitempty"`
+	Shared *RateLimitBucket `json:"shared,omitempty" yaml:"shared,omitempty"`
 
 	// PerUser token bucket configuration for this tool.
 	// +optional
-	PerUser *RateLimitBucket `json:"perUser,omitempty"`
+	PerUser *RateLimitBucket `json:"perUser,omitempty" yaml:"perUser,omitempty"`
 }
 
 // Permission profile types
diff --git a/cmd/thv-operator/api/v1beta1/mcpserver_types_test.go b/cmd/thv-operator/api/v1beta1/mcpserver_types_test.go
index 0e69f33836..70830c8578 100644
--- a/cmd/thv-operator/api/v1beta1/mcpserver_types_test.go
+++ b/cmd/thv-operator/api/v1beta1/mcpserver_types_test.go
@@ -11,6 +11,8 @@ import (
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+
+	vmcpconfig "github.com/stacklok/toolhive/pkg/vmcp/config"
 )
 
 func TestSessionStorageConfigJSONRoundtrip(t *testing.T) {
@@ -116,6 +118,46 @@ func TestRateLimitConfigJSONRoundtrip(t *testing.T) {
 	}
 }
 
+func TestVirtualMCPServerSpecRateLimitingJSONRoundtrip(t *testing.T) {
+	t.Parallel()
+
+	spec := VirtualMCPServerSpec{
+		IncomingAuth: &IncomingAuthConfig{Type: "oidc"},
+		GroupRef:     &MCPGroupRef{Name: "group-a"},
+		SessionStorage: &SessionStorageConfig{
+			Provider: "redis",
+			Address:  "redis.default.svc.cluster.local:6379",
+		},
+		Config: vmcpconfig.Config{
+			RateLimiting: &vmcpconfig.RateLimitConfig{
+				Shared: &vmcpconfig.RateLimitBucket{MaxTokens: 10, RefillPeriod: metav1.Duration{Duration: time.Minute}},
+				PerUser: &vmcpconfig.RateLimitBucket{
+					MaxTokens:    2,
+					RefillPeriod: metav1.Duration{Duration: time.Minute},
+				},
+				Tools: []vmcpconfig.ToolRateLimitConfig{
+					{
+						Name: "backend_a_echo",
+						Shared: &vmcpconfig.RateLimitBucket{
+							MaxTokens:    5,
+							RefillPeriod: metav1.Duration{Duration: 30 * time.Second},
+						},
+					},
+				},
+			},
+		},
+	}
+
+	b, err := json.Marshal(spec)
+	require.NoError(t, err)
+	out := string(b)
+	assert.Contains(t, out, `"rateLimiting"`)
+	assert.Contains(t, out, `"shared"`)
+	assert.Contains(t, out, `"perUser"`)
+	assert.Contains(t, out, `"backend_a_echo"`)
+	assert.Contains(t, out, `"config":{"rateLimiting"`)
+}
+
 func TestMCPServerSpecScalingFieldsJSONRoundtrip(t *testing.T) {
 	t.Parallel()
 
diff --git a/cmd/thv-operator/api/v1beta1/virtualmcpserver_types.go b/cmd/thv-operator/api/v1beta1/virtualmcpserver_types.go
index c63139b133..4975c88286 100644
--- a/cmd/thv-operator/api/v1beta1/virtualmcpserver_types.go
+++ b/cmd/thv-operator/api/v1beta1/virtualmcpserver_types.go
@@ -16,6 +16,10 @@ import (
 
 // VirtualMCPServerSpec defines the desired state of VirtualMCPServer
 //
+// +kubebuilder:validation:XValidation:rule="!has(self.config) || !has(self.config.rateLimiting) || (has(self.sessionStorage) && self.sessionStorage.provider == 'redis')",message="config.rateLimiting requires sessionStorage with provider 'redis'"
+// +kubebuilder:validation:XValidation:rule="!(has(self.config) && has(self.config.rateLimiting) && has(self.config.rateLimiting.perUser)) || (has(self.incomingAuth) && self.incomingAuth.type == 'oidc')",message="config.rateLimiting.perUser requires incomingAuth.type oidc"
+// +kubebuilder:validation:XValidation:rule="!has(self.config) || !has(self.config.rateLimiting) || !has(self.config.rateLimiting.tools) || self.config.rateLimiting.tools.all(t, !has(t.perUser)) || (has(self.incomingAuth) && self.incomingAuth.type == 'oidc')",message="per-tool perUser rate limiting requires incomingAuth.type oidc"
+//
 //nolint:lll // CEL validation rules exceed line length limit
 type VirtualMCPServerSpec struct {
 	// IncomingAuth configures authentication for clients connecting to the Virtual MCP server.
diff --git a/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig_test.go b/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig_test.go
index 5d0fe5efde..2da5a24a45 100644
--- a/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig_test.go
+++ b/cmd/thv-operator/controllers/virtualmcpserver_vmcpconfig_test.go
@@ -507,6 +507,11 @@ func TestEnsureVmcpConfigConfigMap(t *testing.T) {
 	assert.Equal(t, "test-vmcp-vmcp-config", cm.Name)
 	assert.Contains(t, cm.Data, "config.yaml")
 	assert.NotEmpty(t, cm.Annotations["toolhive.stacklok.dev/content-checksum"])
+
+	var cfg vmcpconfig.Config
+	require.NoError(t, yaml.Unmarshal([]byte(cm.Data["config.yaml"]), &cfg))
+	assert.Equal(t, "test-vmcp", cfg.Name)
+	assert.Equal(t, "test-group", cfg.Group)
 }
 
 // TestSetAuthConfigConditions tests that auth config conditions reflect the current state
diff --git a/cmd/thv-operator/pkg/vmcpconfig/converter_test.go b/cmd/thv-operator/pkg/vmcpconfig/converter_test.go
index cee72256af..a100d380f5 100644
--- a/cmd/thv-operator/pkg/vmcpconfig/converter_test.go
+++ b/cmd/thv-operator/pkg/vmcpconfig/converter_test.go
@@ -1601,6 +1601,51 @@ func TestConverter_SessionStorage(t *testing.T) {
 	}
 }
 
+func TestConverter_RateLimitingPassThrough(t *testing.T) {
+	t.Parallel()
+
+	vmcpServer := &mcpv1beta1.VirtualMCPServer{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      "test-vmcp",
+			Namespace: "default",
+		},
+		Spec: mcpv1beta1.VirtualMCPServerSpec{
+			GroupRef: &mcpv1beta1.MCPGroupRef{Name: "test-group"},
+			Config: vmcpconfig.Config{
+				RateLimiting: &vmcpconfig.RateLimitConfig{
+					PerUser: &vmcpconfig.RateLimitBucket{
+						MaxTokens:    2,
+						RefillPeriod: metav1.Duration{Duration: time.Minute},
+					},
+					Tools: []vmcpconfig.ToolRateLimitConfig{
+						{
+							Name: "backend_a_echo",
+							Shared: &vmcpconfig.RateLimitBucket{
+								MaxTokens:    5,
+								RefillPeriod: metav1.Duration{Duration: 30 * time.Second},
+							},
+						},
+					},
+				},
+			},
+		},
+	}
+
+	converter := newTestConverter(t, newNoOpMockResolver(t))
+	ctx := log.IntoContext(context.Background(), logr.Discard())
+
+	config, _, err := converter.Convert(ctx, vmcpServer, nil)
+	require.NoError(t, err)
+	require.NotNil(t, config)
+	require.NotNil(t, config.RateLimiting)
+
+	assert.EqualValues(t, 2, config.RateLimiting.PerUser.MaxTokens)
+	require.Len(t, config.RateLimiting.Tools, 1)
+	assert.Equal(t, "backend_a_echo", config.RateLimiting.Tools[0].Name)
+	require.NotNil(t, config.RateLimiting.Tools[0].Shared)
+	assert.EqualValues(t, 5, config.RateLimiting.Tools[0].Shared.MaxTokens)
+}
+
 func TestDeriveAllowedAudiences(t *testing.T) {
 	t.Parallel()
 
diff --git a/cmd/thv-operator/test-integration/embedding-server/embeddingserver_update_test.go b/cmd/thv-operator/test-integration/embedding-server/embeddingserver_update_test.go
index b91f9d021d..c4a3484bd2 100644
--- a/cmd/thv-operator/test-integration/embedding-server/embeddingserver_update_test.go
+++ b/cmd/thv-operator/test-integration/embedding-server/embeddingserver_update_test.go
@@ -466,6 +466,7 @@ var _ = Describe("EmbeddingServer Controller Update Tests", func() {
 				Expect(k8sClient.Create(ctx, embeddingServer)).To(Succeed())
 				Eventually(func(g Gomega) {
 					g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(embeddingServer), &appsv1.StatefulSet{})).To(Succeed())
+					g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(embeddingServer), &corev1.Service{})).To(Succeed())
 				}, timeout, interval).Should(Succeed())
 			})
 
diff --git a/cmd/thv-operator/test-integration/virtualmcp/virtualmcpserver_sessionstorage_cel_test.go b/cmd/thv-operator/test-integration/virtualmcp/virtualmcpserver_sessionstorage_cel_test.go
index 45b6043196..6499c3df4d 100644
--- a/cmd/thv-operator/test-integration/virtualmcp/virtualmcpserver_sessionstorage_cel_test.go
+++ b/cmd/thv-operator/test-integration/virtualmcp/virtualmcpserver_sessionstorage_cel_test.go
@@ -5,6 +5,8 @@
 package controllers
 
 import (
+	"time"
+
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -106,4 +108,60 @@ var _ = Describe("CEL Validation for SessionStorageConfig on VirtualMCPServer",
 				Expect(err).To(HaveOccurred())
 			})
 		})
+
+		Context("rateLimiting", func() {
+			It("should reject rate limiting without redis session storage", func() {
+				vmcp := newVirtualMCPServerWithSessionStorage("vmcp-rl-no-redis", nil)
+				vmcp.Spec.Config.RateLimiting = &vmcpconfig.RateLimitConfig{
+					Shared: &vmcpconfig.RateLimitBucket{
+						MaxTokens:    1,
+						RefillPeriod: metav1.Duration{Duration: time.Minute},
+					},
+				}
+
+				err := k8sClient.Create(ctx, vmcp)
+				Expect(err).To(HaveOccurred())
+				Expect(err.Error()).To(ContainSubstring("config.rateLimiting requires sessionStorage with provider 'redis'"))
+			})
+
+			It("should reject perUser rate limiting with anonymous auth", func() {
+				vmcp := newVirtualMCPServerWithSessionStorage("vmcp-rl-peruser-anon", &mcpv1beta1.SessionStorageConfig{
+					Provider: "redis",
+					Address:  "redis:6379",
+				})
+				vmcp.Spec.Config.RateLimiting = &vmcpconfig.RateLimitConfig{
+					PerUser: &vmcpconfig.RateLimitBucket{
+						MaxTokens:    1,
+						RefillPeriod: metav1.Duration{Duration: time.Minute},
+					},
+				}
+
+				err := k8sClient.Create(ctx, vmcp)
+				Expect(err).To(HaveOccurred())
+				Expect(err.Error()).To(ContainSubstring("config.rateLimiting.perUser requires incomingAuth.type oidc"))
+			})
+
+			It("should accept perUser rate limiting with oidc auth and redis session storage", func() {
+				vmcp := newVirtualMCPServerWithSessionStorage("vmcp-rl-peruser-oidc", &mcpv1beta1.SessionStorageConfig{
+					Provider: "redis",
+					Address:  "redis:6379",
+				})
+				vmcp.Spec.IncomingAuth = &mcpv1beta1.IncomingAuthConfig{
+					Type: "oidc",
+					OIDCConfigRef: &mcpv1beta1.MCPOIDCConfigReference{
+						Name:     "oidc",
+						Audience: "test-audience",
+					},
+				}
+				vmcp.Spec.Config.RateLimiting = &vmcpconfig.RateLimitConfig{
+					PerUser: &vmcpconfig.RateLimitBucket{
+						MaxTokens:    1,
+						RefillPeriod: metav1.Duration{Duration: time.Minute},
+					},
+				}
+
+				err := k8sClient.Create(ctx, vmcp)
+				Expect(err).NotTo(HaveOccurred())
+			})
+		})
 	})
diff --git a/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_virtualmcpservers.yaml b/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_virtualmcpservers.yaml
index a51fe4b5bd..584a6215b2 100644
--- a/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_virtualmcpservers.yaml
+++ b/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_virtualmcpservers.yaml
@@ -1844,6 +1844,131 @@ spec:
                     required:
                     - source
                     type: object
+                  rateLimiting:
+                    description: |-
+                      RateLimiting defines rate limiting configuration for the Virtual MCP server.
+                      Requires Redis session storage to be configured for distributed rate limiting.
+                    properties:
+                      perUser:
+                        description: |-
+                          PerUser is a token bucket applied independently to each authenticated user
+                          at the server level. Requires authentication to be enabled.
+                          Each unique userID creates Redis keys that expire after 2x refillPeriod.
+                          Memory formula: unique_users_per_TTL_window * (1 + num_tools_with_per_user_limits) keys.
+                        properties:
+                          maxTokens:
+                            description: |-
+                              MaxTokens is the maximum number of tokens (bucket capacity).
+                              This is also the burst size: the maximum number of requests that can be served
+                              instantaneously before the bucket is depleted.
+                            format: int32
+                            minimum: 1
+                            type: integer
+                          refillPeriod:
+                            description: |-
+                              RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
+                              The effective refill rate is maxTokens / refillPeriod tokens per second.
+                              Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
+                            type: string
+                        required:
+                        - maxTokens
+                        - refillPeriod
+                        type: object
+                      shared:
+                        description: Shared is a token bucket shared across all users
+                          for the entire server.
+                        properties:
+                          maxTokens:
+                            description: |-
+                              MaxTokens is the maximum number of tokens (bucket capacity).
+                              This is also the burst size: the maximum number of requests that can be served
+                              instantaneously before the bucket is depleted.
+                            format: int32
+                            minimum: 1
+                            type: integer
+                          refillPeriod:
+                            description: |-
+                              RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
+                              The effective refill rate is maxTokens / refillPeriod tokens per second.
+                              Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
+                            type: string
+                        required:
+                        - maxTokens
+                        - refillPeriod
+                        type: object
+                      tools:
+                        description: |-
+                          Tools defines per-tool rate limit overrides.
+                          Each entry applies additional rate limits to calls targeting a specific tool name.
+                          A request must pass both the server-level limit and the per-tool limit.
+                        items:
+                          description: |-
+                            ToolRateLimitConfig defines rate limits for a specific tool.
+                            At least one of shared or perUser must be configured.
+                          properties:
+                            name:
+                              description: Name is the MCP tool name this limit applies
+                                to.
+                              minLength: 1
+                              type: string
+                            perUser:
+                              description: PerUser token bucket configuration for
+                                this tool.
+                              properties:
+                                maxTokens:
+                                  description: |-
+                                    MaxTokens is the maximum number of tokens (bucket capacity).
+                                    This is also the burst size: the maximum number of requests that can be served
+                                    instantaneously before the bucket is depleted.
+                                  format: int32
+                                  minimum: 1
+                                  type: integer
+                                refillPeriod:
+                                  description: |-
+                                    RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
+                                    The effective refill rate is maxTokens / refillPeriod tokens per second.
+                                    Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
+                                  type: string
+                              required:
+                              - maxTokens
+                              - refillPeriod
+                              type: object
+                            shared:
+                              description: Shared token bucket for this specific tool.
+                              properties:
+                                maxTokens:
+                                  description: |-
+                                    MaxTokens is the maximum number of tokens (bucket capacity).
+                                    This is also the burst size: the maximum number of requests that can be served
+                                    instantaneously before the bucket is depleted.
+                                  format: int32
+                                  minimum: 1
+                                  type: integer
+                                refillPeriod:
+                                  description: |-
+                                    RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
+                                    The effective refill rate is maxTokens / refillPeriod tokens per second.
+                                    Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
+                                  type: string
+                              required:
+                              - maxTokens
+                              - refillPeriod
+                              type: object
+                          required:
+                          - name
+                          type: object
+                          x-kubernetes-validations:
+                          - message: at least one of shared or perUser must be configured
+                            rule: has(self.shared) || has(self.perUser)
+                        type: array
+                        x-kubernetes-list-map-keys:
+                        - name
+                        x-kubernetes-list-type: map
+                    type: object
+                    x-kubernetes-validations:
+                    - message: at least one of shared, perUser, or tools must be configured
+                      rule: has(self.shared) || has(self.perUser) || (has(self.tools)
+                        && size(self.tools) > 0)
                   sessionStorage:
                     description: |-
                       SessionStorage configures session storage for stateful horizontal scaling.
@@ -2347,6 +2472,17 @@ spec:
             - groupRef
             - incomingAuth
             type: object
+            x-kubernetes-validations:
+            - message: config.rateLimiting requires sessionStorage with provider 'redis'
+              rule: '!has(self.config) || !has(self.config.rateLimiting) || (has(self.sessionStorage)
+                && self.sessionStorage.provider == ''redis'')'
+            - message: config.rateLimiting.perUser requires incomingAuth.type oidc
+              rule: '!(has(self.config) && has(self.config.rateLimiting) && has(self.config.rateLimiting.perUser))
+                || (has(self.incomingAuth) && self.incomingAuth.type == ''oidc'')'
+            - message: per-tool perUser rate limiting requires incomingAuth.type oidc
+              rule: '!has(self.config) || !has(self.config.rateLimiting) || !has(self.config.rateLimiting.tools)
+                || self.config.rateLimiting.tools.all(t, !has(t.perUser)) || (has(self.incomingAuth)
+                && self.incomingAuth.type == ''oidc'')'
           status:
             description: VirtualMCPServerStatus defines the observed state of VirtualMCPServer
             properties:
@@ -4340,6 +4476,131 @@ spec:
                     required:
                     - source
                     type: object
+                  rateLimiting:
+                    description: |-
+                      RateLimiting defines rate limiting configuration for the Virtual MCP server.
+                      Requires Redis session storage to be configured for distributed rate limiting.
+                    properties:
+                      perUser:
+                        description: |-
+                          PerUser is a token bucket applied independently to each authenticated user
+                          at the server level. Requires authentication to be enabled.
+                          Each unique userID creates Redis keys that expire after 2x refillPeriod.
+                          Memory formula: unique_users_per_TTL_window * (1 + num_tools_with_per_user_limits) keys.
+                        properties:
+                          maxTokens:
+                            description: |-
+                              MaxTokens is the maximum number of tokens (bucket capacity).
+                              This is also the burst size: the maximum number of requests that can be served
+                              instantaneously before the bucket is depleted.
+                            format: int32
+                            minimum: 1
+                            type: integer
+                          refillPeriod:
+                            description: |-
+                              RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
+                              The effective refill rate is maxTokens / refillPeriod tokens per second.
+                              Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
+                            type: string
+                        required:
+                        - maxTokens
+                        - refillPeriod
+                        type: object
+                      shared:
+                        description: Shared is a token bucket shared across all users
+                          for the entire server.
+                        properties:
+                          maxTokens:
+                            description: |-
+                              MaxTokens is the maximum number of tokens (bucket capacity).
+                              This is also the burst size: the maximum number of requests that can be served
+                              instantaneously before the bucket is depleted.
+                            format: int32
+                            minimum: 1
+                            type: integer
+                          refillPeriod:
+                            description: |-
+                              RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
+                              The effective refill rate is maxTokens / refillPeriod tokens per second.
+                              Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
+                            type: string
+                        required:
+                        - maxTokens
+                        - refillPeriod
+                        type: object
+                      tools:
+                        description: |-
+                          Tools defines per-tool rate limit overrides.
+                          Each entry applies additional rate limits to calls targeting a specific tool name.
+                          A request must pass both the server-level limit and the per-tool limit.
+                        items:
+                          description: |-
+                            ToolRateLimitConfig defines rate limits for a specific tool.
+                            At least one of shared or perUser must be configured.
+                          properties:
+                            name:
+                              description: Name is the MCP tool name this limit applies
+                                to.
+                              minLength: 1
+                              type: string
+                            perUser:
+                              description: PerUser token bucket configuration for
+                                this tool.
+                              properties:
+                                maxTokens:
+                                  description: |-
+                                    MaxTokens is the maximum number of tokens (bucket capacity).
+                                    This is also the burst size: the maximum number of requests that can be served
+                                    instantaneously before the bucket is depleted.
+                                  format: int32
+                                  minimum: 1
+                                  type: integer
+                                refillPeriod:
+                                  description: |-
+                                    RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
+                                    The effective refill rate is maxTokens / refillPeriod tokens per second.
+                                    Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
+                                  type: string
+                              required:
+                              - maxTokens
+                              - refillPeriod
+                              type: object
+                            shared:
+                              description: Shared token bucket for this specific tool.
+                              properties:
+                                maxTokens:
+                                  description: |-
+                                    MaxTokens is the maximum number of tokens (bucket capacity).
+                                    This is also the burst size: the maximum number of requests that can be served
+                                    instantaneously before the bucket is depleted.
+                                  format: int32
+                                  minimum: 1
+                                  type: integer
+                                refillPeriod:
+                                  description: |-
+                                    RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
+                                    The effective refill rate is maxTokens / refillPeriod tokens per second.
+                                    Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
+                                  type: string
+                              required:
+                              - maxTokens
+                              - refillPeriod
+                              type: object
+                          required:
+                          - name
+                          type: object
+                          x-kubernetes-validations:
+                          - message: at least one of shared or perUser must be configured
+                            rule: has(self.shared) || has(self.perUser)
+                        type: array
+                        x-kubernetes-list-map-keys:
+                        - name
+                        x-kubernetes-list-type: map
+                    type: object
+                    x-kubernetes-validations:
+                    - message: at least one of shared, perUser, or tools must be configured
+                      rule: has(self.shared) || has(self.perUser) || (has(self.tools)
+                        && size(self.tools) > 0)
                   sessionStorage:
                     description: |-
                       SessionStorage configures session storage for stateful horizontal scaling.
@@ -4843,6 +5104,17 @@ spec:
             - groupRef
             - incomingAuth
             type: object
+            x-kubernetes-validations:
+            - message: config.rateLimiting requires sessionStorage with provider 'redis'
+              rule: '!has(self.config) || !has(self.config.rateLimiting) || (has(self.sessionStorage)
+                && self.sessionStorage.provider == ''redis'')'
+            - message: config.rateLimiting.perUser requires incomingAuth.type oidc
+              rule: '!(has(self.config) && has(self.config.rateLimiting) && has(self.config.rateLimiting.perUser))
+                || (has(self.incomingAuth) && self.incomingAuth.type == ''oidc'')'
+            - message: per-tool perUser rate limiting requires incomingAuth.type oidc
+              rule: '!has(self.config) || !has(self.config.rateLimiting) || !has(self.config.rateLimiting.tools)
+                || self.config.rateLimiting.tools.all(t, !has(t.perUser)) || (has(self.incomingAuth)
+                && self.incomingAuth.type == ''oidc'')'
           status:
             description: VirtualMCPServerStatus defines the observed state of VirtualMCPServer
             properties:
diff --git a/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_virtualmcpservers.yaml b/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_virtualmcpservers.yaml
index 6078670479..fa25125884 100644
--- a/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_virtualmcpservers.yaml
+++ b/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_virtualmcpservers.yaml
@@ -1847,6 +1847,131 @@ spec:
                     required:
                     - source
                     type: object
+                  rateLimiting:
+                    description: |-
+                      RateLimiting defines rate limiting configuration for the Virtual MCP server.
+                      Requires Redis session storage to be configured for distributed rate limiting.
+                    properties:
+                      perUser:
+                        description: |-
+                          PerUser is a token bucket applied independently to each authenticated user
+                          at the server level. Requires authentication to be enabled.
+                          Each unique userID creates Redis keys that expire after 2x refillPeriod.
+                          Memory formula: unique_users_per_TTL_window * (1 + num_tools_with_per_user_limits) keys.
+                        properties:
+                          maxTokens:
+                            description: |-
+                              MaxTokens is the maximum number of tokens (bucket capacity).
+                              This is also the burst size: the maximum number of requests that can be served
+                              instantaneously before the bucket is depleted.
+                            format: int32
+                            minimum: 1
+                            type: integer
+                          refillPeriod:
+                            description: |-
+                              RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
+                              The effective refill rate is maxTokens / refillPeriod tokens per second.
+                              Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
+                            type: string
+                        required:
+                        - maxTokens
+                        - refillPeriod
+                        type: object
+                      shared:
+                        description: Shared is a token bucket shared across all users
+                          for the entire server.
+                        properties:
+                          maxTokens:
+                            description: |-
+                              MaxTokens is the maximum number of tokens (bucket capacity).
+                              This is also the burst size: the maximum number of requests that can be served
+                              instantaneously before the bucket is depleted.
+                            format: int32
+                            minimum: 1
+                            type: integer
+                          refillPeriod:
+                            description: |-
+                              RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
+                              The effective refill rate is maxTokens / refillPeriod tokens per second.
+                              Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
+                            type: string
+                        required:
+                        - maxTokens
+                        - refillPeriod
+                        type: object
+                      tools:
+                        description: |-
+                          Tools defines per-tool rate limit overrides.
+                          Each entry applies additional rate limits to calls targeting a specific tool name.
+                          A request must pass both the server-level limit and the per-tool limit.
+                        items:
+                          description: |-
+                            ToolRateLimitConfig defines rate limits for a specific tool.
+                            At least one of shared or perUser must be configured.
+                          properties:
+                            name:
+                              description: Name is the MCP tool name this limit applies
+                                to.
+                              minLength: 1
+                              type: string
+                            perUser:
+                              description: PerUser token bucket configuration for
+                                this tool.
+                              properties:
+                                maxTokens:
+                                  description: |-
+                                    MaxTokens is the maximum number of tokens (bucket capacity).
+                                    This is also the burst size: the maximum number of requests that can be served
+                                    instantaneously before the bucket is depleted.
+                                  format: int32
+                                  minimum: 1
+                                  type: integer
+                                refillPeriod:
+                                  description: |-
+                                    RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
+                                    The effective refill rate is maxTokens / refillPeriod tokens per second.
+                                    Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
+                                  type: string
+                              required:
+                              - maxTokens
+                              - refillPeriod
+                              type: object
+                            shared:
+                              description: Shared token bucket for this specific tool.
+                              properties:
+                                maxTokens:
+                                  description: |-
+                                    MaxTokens is the maximum number of tokens (bucket capacity).
+                                    This is also the burst size: the maximum number of requests that can be served
+                                    instantaneously before the bucket is depleted.
+                                  format: int32
+                                  minimum: 1
+                                  type: integer
+                                refillPeriod:
+                                  description: |-
+                                    RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
+                                    The effective refill rate is maxTokens / refillPeriod tokens per second.
+                                    Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
+                                  type: string
+                              required:
+                              - maxTokens
+                              - refillPeriod
+                              type: object
+                          required:
+                          - name
+                          type: object
+                          x-kubernetes-validations:
+                          - message: at least one of shared or perUser must be configured
+                            rule: has(self.shared) || has(self.perUser)
+                        type: array
+                        x-kubernetes-list-map-keys:
+                        - name
+                        x-kubernetes-list-type: map
+                    type: object
+                    x-kubernetes-validations:
+                    - message: at least one of shared, perUser, or tools must be configured
+                      rule: has(self.shared) || has(self.perUser) || (has(self.tools)
+                        && size(self.tools) > 0)
                   sessionStorage:
                     description: |-
                       SessionStorage configures session storage for stateful horizontal scaling.
@@ -2350,6 +2475,17 @@ spec:
             - groupRef
             - incomingAuth
             type: object
+            x-kubernetes-validations:
+            - message: config.rateLimiting requires sessionStorage with provider 'redis'
+              rule: '!has(self.config) || !has(self.config.rateLimiting) || (has(self.sessionStorage)
+                && self.sessionStorage.provider == ''redis'')'
+            - message: config.rateLimiting.perUser requires incomingAuth.type oidc
+              rule: '!(has(self.config) && has(self.config.rateLimiting) && has(self.config.rateLimiting.perUser))
+                || (has(self.incomingAuth) && self.incomingAuth.type == ''oidc'')'
+            - message: per-tool perUser rate limiting requires incomingAuth.type oidc
+              rule: '!has(self.config) || !has(self.config.rateLimiting) || !has(self.config.rateLimiting.tools)
+                || self.config.rateLimiting.tools.all(t, !has(t.perUser)) || (has(self.incomingAuth)
+                && self.incomingAuth.type == ''oidc'')'
           status:
             description: VirtualMCPServerStatus defines the observed state of VirtualMCPServer
             properties:
@@ -4343,6 +4479,131 @@ spec:
                     required:
                     - source
                     type: object
+                  rateLimiting:
+                    description: |-
+                      RateLimiting defines rate limiting configuration for the Virtual MCP server.
+                      Requires Redis session storage to be configured for distributed rate limiting.
+                    properties:
+                      perUser:
+                        description: |-
+                          PerUser is a token bucket applied independently to each authenticated user
+                          at the server level. Requires authentication to be enabled.
+                          Each unique userID creates Redis keys that expire after 2x refillPeriod.
+                          Memory formula: unique_users_per_TTL_window * (1 + num_tools_with_per_user_limits) keys.
+                        properties:
+                          maxTokens:
+                            description: |-
+                              MaxTokens is the maximum number of tokens (bucket capacity).
+                              This is also the burst size: the maximum number of requests that can be served
+                              instantaneously before the bucket is depleted.
+                            format: int32
+                            minimum: 1
+                            type: integer
+                          refillPeriod:
+                            description: |-
+                              RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
+                              The effective refill rate is maxTokens / refillPeriod tokens per second.
+                              Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
+                            type: string
+                        required:
+                        - maxTokens
+                        - refillPeriod
+                        type: object
+                      shared:
+                        description: Shared is a token bucket shared across all users
+                          for the entire server.
+                        properties:
+                          maxTokens:
+                            description: |-
+                              MaxTokens is the maximum number of tokens (bucket capacity).
+                              This is also the burst size: the maximum number of requests that can be served
+                              instantaneously before the bucket is depleted.
+                            format: int32
+                            minimum: 1
+                            type: integer
+                          refillPeriod:
+                            description: |-
+                              RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
+                              The effective refill rate is maxTokens / refillPeriod tokens per second.
+                              Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
+                            type: string
+                        required:
+                        - maxTokens
+                        - refillPeriod
+                        type: object
+                      tools:
+                        description: |-
+                          Tools defines per-tool rate limit overrides.
+                          Each entry applies additional rate limits to calls targeting a specific tool name.
+                          A request must pass both the server-level limit and the per-tool limit.
+                        items:
+                          description: |-
+                            ToolRateLimitConfig defines rate limits for a specific tool.
+                            At least one of shared or perUser must be configured.
+                          properties:
+                            name:
+                              description: Name is the MCP tool name this limit applies
+                                to.
+                              minLength: 1
+                              type: string
+                            perUser:
+                              description: PerUser token bucket configuration for
+                                this tool.
+                              properties:
+                                maxTokens:
+                                  description: |-
+                                    MaxTokens is the maximum number of tokens (bucket capacity).
+                                    This is also the burst size: the maximum number of requests that can be served
+                                    instantaneously before the bucket is depleted.
+                                  format: int32
+                                  minimum: 1
+                                  type: integer
+                                refillPeriod:
+                                  description: |-
+                                    RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
+                                    The effective refill rate is maxTokens / refillPeriod tokens per second.
+                                    Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
+                                  type: string
+                              required:
+                              - maxTokens
+                              - refillPeriod
+                              type: object
+                            shared:
+                              description: Shared token bucket for this specific tool.
+                              properties:
+                                maxTokens:
+                                  description: |-
+                                    MaxTokens is the maximum number of tokens (bucket capacity).
+                                    This is also the burst size: the maximum number of requests that can be served
+                                    instantaneously before the bucket is depleted.
+                                  format: int32
+                                  minimum: 1
+                                  type: integer
+                                refillPeriod:
+                                  description: |-
+                                    RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
+                                    The effective refill rate is maxTokens / refillPeriod tokens per second.
+                                    Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
+                                  type: string
+                              required:
+                              - maxTokens
+                              - refillPeriod
+                              type: object
+                          required:
+                          - name
+                          type: object
+                          x-kubernetes-validations:
+                          - message: at least one of shared or perUser must be configured
+                            rule: has(self.shared) || has(self.perUser)
+                        type: array
+                        x-kubernetes-list-map-keys:
+                        - name
+                        x-kubernetes-list-type: map
+                    type: object
+                    x-kubernetes-validations:
+                    - message: at least one of shared, perUser, or tools must be configured
+                      rule: has(self.shared) || has(self.perUser) || (has(self.tools)
+                        && size(self.tools) > 0)
                   sessionStorage:
                     description: |-
                       SessionStorage configures session storage for stateful horizontal scaling.
@@ -4846,6 +5107,17 @@ spec:
             - groupRef
             - incomingAuth
             type: object
+            x-kubernetes-validations:
+            - message: config.rateLimiting requires sessionStorage with provider 'redis'
+              rule: '!has(self.config) || !has(self.config.rateLimiting) || (has(self.sessionStorage)
+                && self.sessionStorage.provider == ''redis'')'
+            - message: config.rateLimiting.perUser requires incomingAuth.type oidc
+              rule: '!(has(self.config) && has(self.config.rateLimiting) && has(self.config.rateLimiting.perUser))
+                || (has(self.incomingAuth) && self.incomingAuth.type == ''oidc'')'
+            - message: per-tool perUser rate limiting requires incomingAuth.type oidc
+              rule: '!has(self.config) || !has(self.config.rateLimiting) || !has(self.config.rateLimiting.tools)
+                || self.config.rateLimiting.tools.all(t, !has(t.perUser)) || (has(self.incomingAuth)
+                && self.incomingAuth.type == ''oidc'')'
           status:
             description: VirtualMCPServerStatus defines the observed state of VirtualMCPServer
             properties:
diff --git a/docs/operator/crd-api.md b/docs/operator/crd-api.md
index e60f05015e..306d140554 100644
--- a/docs/operator/crd-api.md
+++ b/docs/operator/crd-api.md
@@ -323,6 +323,7 @@ _Appears in:_
 | `audit` _[pkg.audit.Config](#pkgauditconfig)_ | Audit configures audit logging for the Virtual MCP server.<br />When present, audit logs include MCP protocol operations.<br />See audit.Config for available configuration options. |  | Optional: \{\} <br /> |
 | `optimizer` _[vmcp.config.OptimizerConfig](#vmcpconfigoptimizerconfig)_ | Optimizer configures the MCP optimizer for context optimization on large toolsets.<br />When enabled, vMCP exposes only find_tool and call_tool operations to clients<br />instead of all backend tools directly. This reduces token usage by allowing<br />LLMs to discover relevant tools on demand rather than receiving all tool definitions. |  | Optional: \{\} <br /> |
 | `sessionStorage` _[vmcp.config.SessionStorageConfig](#vmcpconfigsessionstorageconfig)_ | SessionStorage configures session storage for stateful horizontal scaling.<br />When provider is "redis", the operator injects Redis connection parameters<br />(address, db, keyPrefix) here. The Redis password is provided separately via<br />the THV_SESSION_REDIS_PASSWORD environment variable. |  | Optional: \{\} <br /> |
+| `rateLimiting` _[vmcp.config.RateLimitConfig](#vmcpconfigratelimitconfig)_ | RateLimiting defines rate limiting configuration for the Virtual MCP server.<br />Requires Redis session storage to be configured for distributed rate limiting. |  | Optional: \{\} <br /> |
 
 
 #### vmcp.config.ConflictResolutionConfig
@@ -546,6 +547,44 @@ _Appears in:_
 | `default` _[pkg.json.Any](#pkgjsonany)_ | Default is the fallback value if template expansion fails.<br />Type coercion is applied to match the declared Type. |  | Schemaless: \{\} <br />Optional: \{\} <br /> |
 
 
+#### vmcp.config.RateLimitBucket
+
+
+
+RateLimitBucket defines a token bucket configuration with a maximum capacity
+and a refill period. Used by both shared and per-user rate limits.
+
+
+
+_Appears in:_
+- [vmcp.config.RateLimitConfig](#vmcpconfigratelimitconfig)
+- [vmcp.config.ToolRateLimitConfig](#vmcpconfigtoolratelimitconfig)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `maxTokens` _integer_ | MaxTokens is the maximum number of tokens (bucket capacity).<br />This is also the burst size: the maximum number of requests that can be served<br />instantaneously before the bucket is depleted. |  | Minimum: 1 <br />Required: \{\} <br /> |
+| `refillPeriod` _[Duration](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#duration-v1-meta)_ | RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.<br />The effective refill rate is maxTokens / refillPeriod tokens per second.<br />Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). |  | Pattern: `^([0-9]+(\.[0-9]+)?(ns\|us\|µs\|ms\|s\|m\|h))+$` <br />Required: \{\} <br />Type: string <br /> |
+
+
+#### vmcp.config.RateLimitConfig
+
+
+
+RateLimitConfig defines rate limiting configuration for a Virtual MCP server.
+At least one of shared, perUser, or tools must be configured.
+
+
+
+_Appears in:_
+- [vmcp.config.Config](#vmcpconfigconfig)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `shared` _[vmcp.config.RateLimitBucket](#vmcpconfigratelimitbucket)_ | Shared is a token bucket shared across all users for the entire server. |  | Optional: \{\} <br /> |
+| `perUser` _[vmcp.config.RateLimitBucket](#vmcpconfigratelimitbucket)_ | PerUser is a token bucket applied independently to each authenticated user<br />at the server level. Requires authentication to be enabled.<br />Each unique userID creates Redis keys that expire after 2x refillPeriod.<br />Memory formula: unique_users_per_TTL_window * (1 + num_tools_with_per_user_limits) keys. |  | Optional: \{\} <br /> |
+| `tools` _[vmcp.config.ToolRateLimitConfig](#vmcpconfigtoolratelimitconfig) array_ | Tools defines per-tool rate limit overrides.<br />Each entry applies additional rate limits to calls targeting a specific tool name.<br />A request must pass both the server-level limit and the per-tool limit. |  | Optional: \{\} <br /> |
+
+
 #### vmcp.config.SessionStorageConfig
 
 
@@ -675,6 +714,25 @@ _Appears in:_
 | `annotations` _[vmcp.config.ToolAnnotationsOverride](#vmcpconfigtoolannotationsoverride)_ | Annotations overrides specific tool annotation fields.<br />Only specified fields are overridden; others pass through from the backend. |  | Optional: \{\} <br /> |
 
 
+#### vmcp.config.ToolRateLimitConfig
+
+
+
+ToolRateLimitConfig defines rate limits for a specific tool.
+At least one of shared or perUser must be configured.
+
+
+
+_Appears in:_
+- [vmcp.config.RateLimitConfig](#vmcpconfigratelimitconfig)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `name` _string_ | Name is the MCP tool name this limit applies to. |  | MinLength: 1 <br />Required: \{\} <br /> |
+| `shared` _[vmcp.config.RateLimitBucket](#vmcpconfigratelimitbucket)_ | Shared token bucket for this specific tool. |  | Optional: \{\} <br /> |
+| `perUser` _[vmcp.config.RateLimitBucket](#vmcpconfigratelimitbucket)_ | PerUser token bucket configuration for this tool. |  | Optional: \{\} <br /> |
+
+
 
 
 #### vmcp.config.WorkflowStepConfig
@@ -2729,7 +2787,7 @@ _Appears in:_
 
 
 RateLimitBucket defines a token bucket configuration with a maximum capacity
-and a refill period. Used by both shared (global) and per-user rate limits.
+and a refill period. Used by both shared and per-user rate limits.
 
 
 
@@ -2740,7 +2798,7 @@ _Appears in:_
 | Field | Description | Default | Validation |
 | --- | --- | --- | --- |
 | `maxTokens` _integer_ | MaxTokens is the maximum number of tokens (bucket capacity).<br />This is also the burst size: the maximum number of requests that can be served<br />instantaneously before the bucket is depleted. |  | Minimum: 1 <br />Required: \{\} <br /> |
-| `refillPeriod` _[Duration](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#duration-v1-meta)_ | RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.<br />The effective refill rate is maxTokens / refillPeriod tokens per second.<br />Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). |  | Required: \{\} <br /> |
+| `refillPeriod` _[Duration](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#duration-v1-meta)_ | RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.<br />The effective refill rate is maxTokens / refillPeriod tokens per second.<br />Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s"). |  | Pattern: `^([0-9]+(\.[0-9]+)?(ns\|us\|µs\|ms\|s\|m\|h))+$` <br />Required: \{\} <br />Type: string <br /> |
 
 
 #### api.v1beta1.RateLimitConfig
@@ -3577,7 +3635,7 @@ _Appears in:_
 | --- | --- | --- | --- |
 | `name` _string_ | Name is a unique identifier for this webhook |  | MaxLength: 63 <br />MinLength: 1 <br /> |
 | `url` _string_ | URL is the endpoint to call for this webhook. Must be an HTTP/HTTPS URL. |  | Format: uri <br /> |
-| `timeout` _[Duration](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#duration-v1-meta)_ | Timeout configures the maximum time to wait for the webhook to respond.<br />Defaults to 10s if not specified. Maximum is 30s. |  | Format: duration <br />Type: string <br />Optional: \{\} <br /> |
+| `timeout` _[Duration](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#duration-v1-meta)_ | Timeout configures the maximum time to wait for the webhook to respond.<br />Defaults to 10s if not specified. Maximum is 30s. |  | Format: duration <br />Pattern: `^([0-9]+(\.[0-9]+)?(ns\|us\|µs\|ms\|s\|m\|h))+$` <br />Type: string <br />Optional: \{\} <br /> |
 | `failurePolicy` _[api.v1beta1.WebhookFailurePolicy](#apiv1beta1webhookfailurepolicy)_ | FailurePolicy defines how to handle errors when communicating with the webhook.<br />Supported values: "fail", "ignore". Defaults to "fail". | fail | Enum: [fail ignore] <br />Optional: \{\} <br /> |
 | `tlsConfig` _[api.v1beta1.WebhookTLSConfig](#apiv1beta1webhooktlsconfig)_ | TLSConfig contains optional TLS configuration for the webhook connection. |  | Optional: \{\} <br /> |
 | `hmacSecretRef` _[api.v1beta1.SecretKeyRef](#apiv1beta1secretkeyref)_ | HMACSecretRef references a Kubernetes Secret containing the HMAC signing key<br />used to sign the webhook payload. If set, the X-Toolhive-Signature header will be injected. |  | Optional: \{\} <br /> |
diff --git a/pkg/ratelimit/internal/bucket/bucket.go b/pkg/ratelimit/internal/bucket/bucket.go
index 28903bcdcd..d68a1709c8 100644
--- a/pkg/ratelimit/internal/bucket/bucket.go
+++ b/pkg/ratelimit/internal/bucket/bucket.go
@@ -90,7 +90,7 @@ type TokenBucket struct {
 }
 
 // New creates a TokenBucket. The Redis key is derived from namespace, server
-// name, and suffix (e.g., "global" or "global:tool:search").
+// name, and suffix (e.g., "shared" or "shared:tool:search").
 func New(namespace, serverName, suffix string, maxTokens int32, refillPeriod time.Duration) *TokenBucket {
 	refillSec := refillPeriod.Seconds()
 	return &TokenBucket{
diff --git a/pkg/ratelimit/limiter_test.go b/pkg/ratelimit/limiter_test.go
index a007191d3c..147e22cd7e 100644
--- a/pkg/ratelimit/limiter_test.go
+++ b/pkg/ratelimit/limiter_test.go
@@ -70,7 +70,7 @@ func TestNewLimiter_ZeroDuration(t *testing.T) {
 	assert.Contains(t, err.Error(), "refillPeriod must be positive")
 }
 
-func TestLimiter_ServerGlobalExhausted(t *testing.T) {
+func TestLimiter_ServerSharedExhausted(t *testing.T) {
 	t.Parallel()
 	client, _ := newTestClient(t)
 	ctx := t.Context()
@@ -93,6 +93,35 @@ func TestLimiter_ServerGlobalExhausted(t *testing.T) {
 	assert.Greater(t, d.RetryAfter, time.Duration(0))
 }
 
+func TestLimiter_SharedUsesRedisKeys(t *testing.T) {
+	t.Parallel()
+	client, _ := newTestClient(t)
+	ctx := t.Context()
+
+	crd := &v1beta1.RateLimitConfig{
+		Shared: &v1beta1.RateLimitBucket{MaxTokens: 10, RefillPeriod: metav1.Duration{Duration: time.Minute}},
+		Tools: []v1beta1.ToolRateLimitConfig{
+			{
+				Name:   "search",
+				Shared: &v1beta1.RateLimitBucket{MaxTokens: 10, RefillPeriod: metav1.Duration{Duration: time.Minute}},
+			},
+		},
+	}
+	l, err := NewLimiter(client, "ns", "srv", crd)
+	require.NoError(t, err)
+
+	d, err := l.Allow(ctx, "search", "")
+	require.NoError(t, err)
+	require.True(t, d.Allowed)
+
+	serverKey := "thv:rl:{ns:srv}:shared"
+	toolKey := "thv:rl:{ns:srv}:shared:tool:search"
+
+	exists, err := client.Exists(ctx, serverKey, toolKey).Result()
+	require.NoError(t, err)
+	assert.Equal(t, int64(2), exists)
+}
+
 func TestLimiter_PerToolIsolation(t *testing.T) {
 	t.Parallel()
 	client, _ := newTestClient(t)
diff --git a/pkg/vmcp/config/config.go b/pkg/vmcp/config/config.go
index 7a2c699290..1ab2422591 100644
--- a/pkg/vmcp/config/config.go
+++ b/pkg/vmcp/config/config.go
@@ -13,6 +13,8 @@ import (
 	"fmt"
 	"time"
 
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+
 	"github.com/stacklok/toolhive/pkg/audit"
 	thvjson "github.com/stacklok/toolhive/pkg/json"
 	"github.com/stacklok/toolhive/pkg/telemetry"
@@ -173,6 +175,84 @@ type Config struct {
 	// the THV_SESSION_REDIS_PASSWORD environment variable.
 	// +optional
 	SessionStorage *SessionStorageConfig `json:"sessionStorage,omitempty" yaml:"sessionStorage,omitempty"`
+
+	// RateLimiting defines rate limiting configuration for the Virtual MCP server.
+	// Requires Redis session storage to be configured for distributed rate limiting.
+	// +optional
+	RateLimiting *RateLimitConfig `json:"rateLimiting,omitempty" yaml:"rateLimiting,omitempty"`
+}
+
+// RateLimitConfig defines rate limiting configuration for a Virtual MCP server.
+// At least one of shared, perUser, or tools must be configured.
+//
+// +kubebuilder:validation:XValidation:rule="has(self.shared) || has(self.perUser) || (has(self.tools) && size(self.tools) > 0)",message="at least one of shared, perUser, or tools must be configured"
+//
+// +kubebuilder:object:generate=true
+// +gendoc
+//
+//nolint:lll // kubebuilder marker exceeds line length
+type RateLimitConfig struct {
+	// Shared is a token bucket shared across all users for the entire server.
+	// +optional
+	Shared *RateLimitBucket `json:"shared,omitempty" yaml:"shared,omitempty"`
+
+	// PerUser is a token bucket applied independently to each authenticated user
+	// at the server level. Requires authentication to be enabled.
+	// Each unique userID creates Redis keys that expire after 2x refillPeriod.
+	// Memory formula: unique_users_per_TTL_window * (1 + num_tools_with_per_user_limits) keys.
+	// +optional
+	PerUser *RateLimitBucket `json:"perUser,omitempty" yaml:"perUser,omitempty"`
+
+	// Tools defines per-tool rate limit overrides.
+	// Each entry applies additional rate limits to calls targeting a specific tool name.
+	// A request must pass both the server-level limit and the per-tool limit.
+	// +listType=map
+	// +listMapKey=name
+	// +optional
+	Tools []ToolRateLimitConfig `json:"tools,omitempty" yaml:"tools,omitempty"`
+}
+
+// RateLimitBucket defines a token bucket configuration with a maximum capacity
+// and a refill period. Used by both shared and per-user rate limits.
+// +kubebuilder:object:generate=true
+// +gendoc
+type RateLimitBucket struct {
+	// MaxTokens is the maximum number of tokens (bucket capacity).
+	// This is also the burst size: the maximum number of requests that can be served
+	// instantaneously before the bucket is depleted.
+	// +kubebuilder:validation:Required
+	// +kubebuilder:validation:Minimum=1
+	MaxTokens int32 `json:"maxTokens" yaml:"maxTokens"`
+
+	// RefillPeriod is the duration to fully refill the bucket from zero to maxTokens.
+	// The effective refill rate is maxTokens / refillPeriod tokens per second.
+	// Format: Go duration string (e.g., "1m0s", "30s", "1h0m0s").
+	// +kubebuilder:validation:Required
+	RefillPeriod metav1.Duration `json:"refillPeriod" yaml:"refillPeriod"`
+}
+
+// ToolRateLimitConfig defines rate limits for a specific tool.
+// At least one of shared or perUser must be configured.
+//
+// +kubebuilder:validation:XValidation:rule="has(self.shared) || has(self.perUser)",message="at least one of shared or perUser must be configured"
+//
+// +kubebuilder:object:generate=true
+// +gendoc
+//
+//nolint:lll // kubebuilder marker exceeds line length
+type ToolRateLimitConfig struct {
+	// Name is the MCP tool name this limit applies to.
+	// +kubebuilder:validation:Required
+	// +kubebuilder:validation:MinLength=1
+	Name string `json:"name" yaml:"name"`
+
+	// Shared token bucket for this specific tool.
+	// +optional
+	Shared *RateLimitBucket `json:"shared,omitempty" yaml:"shared,omitempty"`
+
+	// PerUser token bucket configuration for this tool.
+	// +optional
+	PerUser *RateLimitBucket `json:"perUser,omitempty" yaml:"perUser,omitempty"`
 }
 
 // IncomingAuthConfig configures client authentication to the virtual MCP server.
diff --git a/pkg/vmcp/config/zz_generated.deepcopy.go b/pkg/vmcp/config/zz_generated.deepcopy.go
index 80861bff11..5a7a60ef45 100644
--- a/pkg/vmcp/config/zz_generated.deepcopy.go
+++ b/pkg/vmcp/config/zz_generated.deepcopy.go
@@ -204,6 +204,11 @@ func (in *Config) DeepCopyInto(out *Config) {
 		*out = new(SessionStorageConfig)
 		**out = **in
 	}
+	if in.RateLimiting != nil {
+		in, out := &in.RateLimiting, &out.RateLimiting
+		*out = new(RateLimitConfig)
+		(*in).DeepCopyInto(*out)
+	}
 }
 
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Config.
@@ -442,6 +447,54 @@ func (in *OutputProperty) DeepCopy() *OutputProperty {
 	return out
 }
 
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *RateLimitBucket) DeepCopyInto(out *RateLimitBucket) {
+	*out = *in
+	out.RefillPeriod = in.RefillPeriod
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RateLimitBucket.
+func (in *RateLimitBucket) DeepCopy() *RateLimitBucket {
+	if in == nil {
+		return nil
+	}
+	out := new(RateLimitBucket)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *RateLimitConfig) DeepCopyInto(out *RateLimitConfig) {
+	*out = *in
+	if in.Shared != nil {
+		in, out := &in.Shared, &out.Shared
+		*out = new(RateLimitBucket)
+		**out = **in
+	}
+	if in.PerUser != nil {
+		in, out := &in.PerUser, &out.PerUser
+		*out = new(RateLimitBucket)
+		**out = **in
+	}
+	if in.Tools != nil {
+		in, out := &in.Tools, &out.Tools
+		*out = make([]ToolRateLimitConfig, len(*in))
+		for i := range *in {
+			(*in)[i].DeepCopyInto(&(*out)[i])
+		}
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RateLimitConfig.
+func (in *RateLimitConfig) DeepCopy() *RateLimitConfig {
+	if in == nil {
+		return nil
+	}
+	out := new(RateLimitConfig)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *SessionStorageConfig) DeepCopyInto(out *SessionStorageConfig) {
 	*out = *in
@@ -591,6 +644,31 @@ func (in *ToolOverride) DeepCopy() *ToolOverride {
 	return out
 }
 
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ToolRateLimitConfig) DeepCopyInto(out *ToolRateLimitConfig) {
+	*out = *in
+	if in.Shared != nil {
+		in, out := &in.Shared, &out.Shared
+		*out = new(RateLimitBucket)
+		**out = **in
+	}
+	if in.PerUser != nil {
+		in, out := &in.PerUser, &out.PerUser
+		*out = new(RateLimitBucket)
+		**out = **in
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ToolRateLimitConfig.
+func (in *ToolRateLimitConfig) DeepCopy() *ToolRateLimitConfig {
+	if in == nil {
+		return nil
+	}
+	out := new(ToolRateLimitConfig)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *WorkflowStepConfig) DeepCopyInto(out *WorkflowStepConfig) {
 	*out = *in
diff --git a/test/e2e/thv-operator/virtualmcp/virtualmcp_circuit_breaker_test.go b/test/e2e/thv-operator/virtualmcp/virtualmcp_circuit_breaker_test.go
index 12cebf47d2..61a24c86be 100644
--- a/test/e2e/thv-operator/virtualmcp/virtualmcp_circuit_breaker_test.go
+++ b/test/e2e/thv-operator/virtualmcp/virtualmcp_circuit_breaker_test.go
@@ -10,6 +10,7 @@ import (
 
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
+	appsv1 "k8s.io/api/apps/v1"
 	corev1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/types"
@@ -460,9 +461,30 @@ var _ = Describe("VirtualMCPServer Circuit Breaker Lifecycle", Ordered, func() {
 		backend.Spec.Image = images.YardstickServerImage
 		Expect(k8sClient.Update(ctx, backend)).To(Succeed())
 
+		By("Waiting for backend StatefulSet template to use the fixed image")
+		Eventually(func() error {
+			sts := &appsv1.StatefulSet{}
+			if err := k8sClient.Get(ctx, types.NamespacedName{
+				Name:      backend2Name,
+				Namespace: testNamespace,
+			}, sts); err != nil {
+				return err
+			}
+			for _, container := range sts.Spec.Template.Spec.Containers {
+				if container.Name == "mcp" {
+					if container.Image != images.YardstickServerImage {
+						return fmt.Errorf("statefulset still has image %q", container.Image)
+					}
+					return nil
+				}
+			}
+			return fmt.Errorf("mcp container not found in statefulset template")
+		}, timeout, pollingInterval).Should(Succeed())
+
 		By("Deleting stuck pods to force recreation with fixed image")
 		// Pods in ImagePullBackOff don't automatically recreate when image is fixed
-		// Delete them to force the statefulset to create new pods with the correct image
+		// Delete them after the statefulset template is updated, otherwise the old template
+		// can immediately recreate the pod with the broken image again.
 		podList := &corev1.PodList{}
 		Expect(k8sClient.List(ctx, podList,
 			client.InNamespace(testNamespace),