diff --git a/CLAUDE.md b/CLAUDE.md
index 83dcefa055..0be7ab06c3 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -313,6 +313,40 @@ For the complete documentation structure and navigation, see `docs/arch/README.m
- Do not use "Conventional Commits", e.g. starting with `feat`, `fix`, `chore`, etc.
- Use mockgen for creating mocks instead of generating mocks by hand.
+### Go Coding Style
+
+- **Prefer immutable variable assignment with anonymous functions**:
+ When you need to assign a variable based on complex conditional logic, prefer using an immediately-invoked anonymous function instead of mutating the variable across multiple branches:
+
+ ```go
+ // ✅ Good: Immutable assignment with anonymous function
+ phase := func() PhaseType {
+ if someCondition {
+ return PhaseA
+ }
+ if anotherCondition {
+ return PhaseB
+ }
+ return PhaseDefault
+ }()
+
+ // ❌ Avoid: Mutable variable across branches
+ var phase PhaseType
+ if someCondition {
+ phase = PhaseA
+ } else if anotherCondition {
+ phase = PhaseB
+ } else {
+ phase = PhaseDefault
+ }
+ ```
+
+ **Benefits**:
+ - The variable is immutable after assignment, reducing bugs from accidental modification
+ - All decision logic is in one place with explicit returns
+ - Clearer logic flow and easier to understand
+ - Reduces cognitive load from tracking which branch sets which value
+
## Error Handling Guidelines
See `docs/error-handling.md` for comprehensive documentation.
diff --git a/cmd/thv-operator/api/v1alpha1/embeddingserver_types.go b/cmd/thv-operator/api/v1alpha1/embeddingserver_types.go
new file mode 100644
index 0000000000..c7909cb3f5
--- /dev/null
+++ b/cmd/thv-operator/api/v1alpha1/embeddingserver_types.go
@@ -0,0 +1,272 @@
+// SPDX-License-Identifier: Apache-2.0
+
+package v1alpha1
+
+import (
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/apimachinery/pkg/runtime"
+)
+
+// Condition types for EmbeddingServer (reuses common conditions from MCPServer)
+// ConditionImageValidated and ConditionPodTemplateValid are shared with MCPServer
+
+const (
+ // ConditionModelReady indicates whether the embedding model is downloaded and ready
+ ConditionModelReady = "ModelReady"
+
+ // ConditionVolumeReady indicates whether the PVC for model caching is ready
+ ConditionVolumeReady = "VolumeReady"
+)
+
+// Condition reasons for EmbeddingServer
+// Image validation and PodTemplate reasons are shared with MCPServer
+
+const (
+ // ConditionReasonModelDownloading indicates the model is being downloaded
+ ConditionReasonModelDownloading = "ModelDownloading"
+ // ConditionReasonModelReady indicates the model is downloaded and ready
+ ConditionReasonModelReady = "ModelReady"
+ // ConditionReasonModelFailed indicates the model download or initialization failed
+ ConditionReasonModelFailed = "ModelFailed"
+
+ // ConditionReasonVolumeCreating indicates the PVC is being created
+ ConditionReasonVolumeCreating = "VolumeCreating"
+ // ConditionReasonVolumeReady indicates the PVC is ready
+ ConditionReasonVolumeReady = "VolumeReady"
+ // ConditionReasonVolumeFailed indicates the PVC creation failed
+ ConditionReasonVolumeFailed = "VolumeFailed"
+)
+
+// EmbeddingServerSpec defines the desired state of EmbeddingServer
+type EmbeddingServerSpec struct {
+ // Model is the HuggingFace embedding model to use (e.g., "sentence-transformers/all-MiniLM-L6-v2")
+ // +kubebuilder:validation:Required
+ Model string `json:"model"`
+
+ // HFTokenSecretRef is a reference to a Kubernetes Secret containing the huggingface token.
+ // If provided, the secret value will be provided to the embedding server for authentication with huggingface.
+ // +optional
+ HFTokenSecretRef *SecretKeyRef `json:"hfTokenSecretRef,omitempty"`
+
+ // Image is the container image for huggingface-embedding-inference
+ // +kubebuilder:validation:Required
+ // +kubebuilder:default="ghcr.io/huggingface/text-embeddings-inference:latest"
+ Image string `json:"image,omitempty"`
+
+ // ImagePullPolicy defines the pull policy for the container image
+ // +kubebuilder:validation:Enum=Always;Never;IfNotPresent
+ // +kubebuilder:default="IfNotPresent"
+ // +optional
+ ImagePullPolicy string `json:"imagePullPolicy,omitempty"`
+
+ // Port is the port to expose the embedding service on
+ // +kubebuilder:validation:Minimum=1
+ // +kubebuilder:validation:Maximum=65535
+ // +kubebuilder:default=8080
+ Port int32 `json:"port,omitempty"`
+
+ // Args are additional arguments to pass to the embedding inference server
+ // +optional
+ Args []string `json:"args,omitempty"`
+
+ // Env are environment variables to set in the container
+ // +optional
+ Env []EnvVar `json:"env,omitempty"`
+
+ // Resources defines compute resources for the embedding server
+ // +optional
+ Resources ResourceRequirements `json:"resources,omitempty"`
+
+ // ModelCache configures persistent storage for downloaded models
+ // When enabled, models are cached in a PVC and reused across pod restarts
+ // +optional
+ ModelCache *ModelCacheConfig `json:"modelCache,omitempty"`
+
+ // PodTemplateSpec allows customizing the pod (node selection, tolerations, etc.)
+ // This field accepts a PodTemplateSpec object as JSON/YAML.
+ // Note that to modify the specific container the embedding server runs in, you must specify
+ // the 'embedding' container name in the PodTemplateSpec.
+ // +optional
+ // +kubebuilder:pruning:PreserveUnknownFields
+ // +kubebuilder:validation:Type=object
+ PodTemplateSpec *runtime.RawExtension `json:"podTemplateSpec,omitempty"`
+
+ // ResourceOverrides allows overriding annotations and labels for resources created by the operator
+ // +optional
+ ResourceOverrides *EmbeddingResourceOverrides `json:"resourceOverrides,omitempty"`
+
+ // Replicas is the number of embedding server replicas to run
+ // +kubebuilder:validation:Minimum=1
+ // +kubebuilder:default=1
+ // +optional
+ Replicas *int32 `json:"replicas,omitempty"`
+}
+
+// ModelCacheConfig configures persistent storage for model caching
+type ModelCacheConfig struct {
+ // Enabled controls whether model caching is enabled
+ // +kubebuilder:default=true
+ // +optional
+ Enabled bool `json:"enabled,omitempty"`
+
+ // StorageClassName is the storage class to use for the PVC
+ // If not specified, uses the cluster's default storage class
+ // +optional
+ StorageClassName *string `json:"storageClassName,omitempty"`
+
+ // Size is the size of the PVC for model caching (e.g., "10Gi")
+ // +kubebuilder:default="10Gi"
+ // +optional
+ Size string `json:"size,omitempty"`
+
+ // AccessMode is the access mode for the PVC
+ // +kubebuilder:default="ReadWriteOnce"
+ // +kubebuilder:validation:Enum=ReadWriteOnce;ReadWriteMany;ReadOnlyMany
+ // +optional
+ AccessMode string `json:"accessMode,omitempty"`
+}
+
+// EmbeddingResourceOverrides defines overrides for annotations and labels on created resources
+type EmbeddingResourceOverrides struct {
+ // StatefulSet defines overrides for the StatefulSet resource
+ // +optional
+ StatefulSet *EmbeddingStatefulSetOverrides `json:"statefulSet,omitempty"`
+
+ // Service defines overrides for the Service resource
+ // +optional
+ Service *ResourceMetadataOverrides `json:"service,omitempty"`
+
+ // PersistentVolumeClaim defines overrides for the PVC resource
+ // +optional
+ PersistentVolumeClaim *ResourceMetadataOverrides `json:"persistentVolumeClaim,omitempty"`
+}
+
+// EmbeddingStatefulSetOverrides defines overrides specific to the embedding statefulset
+type EmbeddingStatefulSetOverrides struct {
+ // ResourceMetadataOverrides is embedded to inherit annotations and labels fields
+ ResourceMetadataOverrides `json:",inline"` // nolint:revive
+
+ // PodTemplateMetadataOverrides defines metadata overrides for the pod template
+ // +optional
+ PodTemplateMetadataOverrides *ResourceMetadataOverrides `json:"podTemplateMetadataOverrides,omitempty"`
+}
+
+// EmbeddingServerStatus defines the observed state of EmbeddingServer
+type EmbeddingServerStatus struct {
+ // Conditions represent the latest available observations of the EmbeddingServer's state
+ // +optional
+ Conditions []metav1.Condition `json:"conditions,omitempty"`
+
+ // Phase is the current phase of the EmbeddingServer
+ // +optional
+ Phase EmbeddingServerPhase `json:"phase,omitempty"`
+
+ // Message provides additional information about the current phase
+ // +optional
+ Message string `json:"message,omitempty"`
+
+ // URL is the URL where the embedding service can be accessed
+ // +optional
+ URL string `json:"url,omitempty"`
+
+ // ReadyReplicas is the number of ready replicas
+ // +optional
+ ReadyReplicas int32 `json:"readyReplicas,omitempty"`
+
+ // ObservedGeneration reflects the generation most recently observed by the controller
+ // +optional
+ ObservedGeneration int64 `json:"observedGeneration,omitempty"`
+}
+
+// EmbeddingServerPhase is the phase of the EmbeddingServer
+// +kubebuilder:validation:Enum=Pending;Downloading;Running;Failed;Terminating
+type EmbeddingServerPhase string
+
+const (
+ // EmbeddingServerPhasePending means the EmbeddingServer is being created
+ EmbeddingServerPhasePending EmbeddingServerPhase = "Pending"
+
+ // EmbeddingServerPhaseDownloading means the model is being downloaded
+ EmbeddingServerPhaseDownloading EmbeddingServerPhase = "Downloading"
+
+ // EmbeddingServerPhaseRunning means the EmbeddingServer is running and ready
+ EmbeddingServerPhaseRunning EmbeddingServerPhase = "Running"
+
+ // EmbeddingServerPhaseFailed means the EmbeddingServer failed to start
+ EmbeddingServerPhaseFailed EmbeddingServerPhase = "Failed"
+
+ // EmbeddingServerPhaseTerminating means the EmbeddingServer is being deleted
+ EmbeddingServerPhaseTerminating EmbeddingServerPhase = "Terminating"
+)
+
+//+kubebuilder:object:root=true
+//+kubebuilder:subresource:status
+//+kubebuilder:printcolumn:name="Status",type="string",JSONPath=".status.phase"
+//+kubebuilder:printcolumn:name="Model",type="string",JSONPath=".spec.model"
+//+kubebuilder:printcolumn:name="Ready",type="integer",JSONPath=".status.readyReplicas"
+//+kubebuilder:printcolumn:name="URL",type="string",JSONPath=".status.url"
+//+kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp"
+
+// EmbeddingServer is the Schema for the embeddingservers API
+type EmbeddingServer struct {
+ metav1.TypeMeta `json:",inline"` // nolint:revive
+ metav1.ObjectMeta `json:"metadata,omitempty"`
+
+ Spec EmbeddingServerSpec `json:"spec,omitempty"`
+ Status EmbeddingServerStatus `json:"status,omitempty"`
+}
+
+//+kubebuilder:object:root=true
+
+// EmbeddingServerList contains a list of EmbeddingServer
+type EmbeddingServerList struct {
+ metav1.TypeMeta `json:",inline"` // nolint:revive
+ metav1.ListMeta `json:"metadata,omitempty"`
+ Items []EmbeddingServer `json:"items"`
+}
+
+// GetName returns the name of the EmbeddingServer
+func (e *EmbeddingServer) GetName() string {
+ return e.Name
+}
+
+// GetNamespace returns the namespace of the EmbeddingServer
+func (e *EmbeddingServer) GetNamespace() string {
+ return e.Namespace
+}
+
+// GetPort returns the port of the EmbeddingServer
+func (e *EmbeddingServer) GetPort() int32 {
+ if e.Spec.Port > 0 {
+ return e.Spec.Port
+ }
+ return 8080
+}
+
+// GetReplicas returns the number of replicas for the EmbeddingServer
+func (e *EmbeddingServer) GetReplicas() int32 {
+ if e.Spec.Replicas != nil {
+ return *e.Spec.Replicas
+ }
+ return 1
+}
+
+// IsModelCacheEnabled returns whether model caching is enabled
+func (e *EmbeddingServer) IsModelCacheEnabled() bool {
+ if e.Spec.ModelCache == nil {
+ return false
+ }
+ return e.Spec.ModelCache.Enabled
+}
+
+// GetImagePullPolicy returns the image pull policy for the EmbeddingServer
+func (e *EmbeddingServer) GetImagePullPolicy() string {
+ if e.Spec.ImagePullPolicy != "" {
+ return e.Spec.ImagePullPolicy
+ }
+ return "IfNotPresent"
+}
+
+func init() {
+ SchemeBuilder.Register(&EmbeddingServer{}, &EmbeddingServerList{})
+}
diff --git a/cmd/thv-operator/api/v1alpha1/zz_generated.deepcopy.go b/cmd/thv-operator/api/v1alpha1/zz_generated.deepcopy.go
index b9342d79db..f3da8d75a6 100644
--- a/cmd/thv-operator/api/v1alpha1/zz_generated.deepcopy.go
+++ b/cmd/thv-operator/api/v1alpha1/zz_generated.deepcopy.go
@@ -22,7 +22,7 @@ package v1alpha1
import (
corev1 "k8s.io/api/core/v1"
- v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
)
@@ -120,6 +120,7 @@ func (in *BackendAuthConfig) DeepCopy() *BackendAuthConfig {
return out
}
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *BearerTokenConfig) DeepCopyInto(out *BearerTokenConfig) {
*out = *in
if in.TokenSecretRef != nil {
@@ -194,6 +195,189 @@ func (in *ConfigMapOIDCRef) DeepCopy() *ConfigMapOIDCRef {
return out
}
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *EmbeddingResourceOverrides) DeepCopyInto(out *EmbeddingResourceOverrides) {
+ *out = *in
+ if in.StatefulSet != nil {
+ in, out := &in.StatefulSet, &out.StatefulSet
+ *out = new(EmbeddingStatefulSetOverrides)
+ (*in).DeepCopyInto(*out)
+ }
+ if in.Service != nil {
+ in, out := &in.Service, &out.Service
+ *out = new(ResourceMetadataOverrides)
+ (*in).DeepCopyInto(*out)
+ }
+ if in.PersistentVolumeClaim != nil {
+ in, out := &in.PersistentVolumeClaim, &out.PersistentVolumeClaim
+ *out = new(ResourceMetadataOverrides)
+ (*in).DeepCopyInto(*out)
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EmbeddingResourceOverrides.
+func (in *EmbeddingResourceOverrides) DeepCopy() *EmbeddingResourceOverrides {
+ if in == nil {
+ return nil
+ }
+ out := new(EmbeddingResourceOverrides)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *EmbeddingServer) DeepCopyInto(out *EmbeddingServer) {
+ *out = *in
+ out.TypeMeta = in.TypeMeta
+ in.ObjectMeta.DeepCopyInto(&out.ObjectMeta)
+ in.Spec.DeepCopyInto(&out.Spec)
+ in.Status.DeepCopyInto(&out.Status)
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EmbeddingServer.
+func (in *EmbeddingServer) DeepCopy() *EmbeddingServer {
+ if in == nil {
+ return nil
+ }
+ out := new(EmbeddingServer)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *EmbeddingServer) DeepCopyObject() runtime.Object {
+ if c := in.DeepCopy(); c != nil {
+ return c
+ }
+ return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *EmbeddingServerList) DeepCopyInto(out *EmbeddingServerList) {
+ *out = *in
+ out.TypeMeta = in.TypeMeta
+ in.ListMeta.DeepCopyInto(&out.ListMeta)
+ if in.Items != nil {
+ in, out := &in.Items, &out.Items
+ *out = make([]EmbeddingServer, len(*in))
+ for i := range *in {
+ (*in)[i].DeepCopyInto(&(*out)[i])
+ }
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EmbeddingServerList.
+func (in *EmbeddingServerList) DeepCopy() *EmbeddingServerList {
+ if in == nil {
+ return nil
+ }
+ out := new(EmbeddingServerList)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
+func (in *EmbeddingServerList) DeepCopyObject() runtime.Object {
+ if c := in.DeepCopy(); c != nil {
+ return c
+ }
+ return nil
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *EmbeddingServerSpec) DeepCopyInto(out *EmbeddingServerSpec) {
+ *out = *in
+ if in.HFTokenSecretRef != nil {
+ in, out := &in.HFTokenSecretRef, &out.HFTokenSecretRef
+ *out = new(SecretKeyRef)
+ **out = **in
+ }
+ if in.Args != nil {
+ in, out := &in.Args, &out.Args
+ *out = make([]string, len(*in))
+ copy(*out, *in)
+ }
+ if in.Env != nil {
+ in, out := &in.Env, &out.Env
+ *out = make([]EnvVar, len(*in))
+ copy(*out, *in)
+ }
+ out.Resources = in.Resources
+ if in.ModelCache != nil {
+ in, out := &in.ModelCache, &out.ModelCache
+ *out = new(ModelCacheConfig)
+ (*in).DeepCopyInto(*out)
+ }
+ if in.PodTemplateSpec != nil {
+ in, out := &in.PodTemplateSpec, &out.PodTemplateSpec
+ *out = new(runtime.RawExtension)
+ (*in).DeepCopyInto(*out)
+ }
+ if in.ResourceOverrides != nil {
+ in, out := &in.ResourceOverrides, &out.ResourceOverrides
+ *out = new(EmbeddingResourceOverrides)
+ (*in).DeepCopyInto(*out)
+ }
+ if in.Replicas != nil {
+ in, out := &in.Replicas, &out.Replicas
+ *out = new(int32)
+ **out = **in
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EmbeddingServerSpec.
+func (in *EmbeddingServerSpec) DeepCopy() *EmbeddingServerSpec {
+ if in == nil {
+ return nil
+ }
+ out := new(EmbeddingServerSpec)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *EmbeddingServerStatus) DeepCopyInto(out *EmbeddingServerStatus) {
+ *out = *in
+ if in.Conditions != nil {
+ in, out := &in.Conditions, &out.Conditions
+ *out = make([]v1.Condition, len(*in))
+ for i := range *in {
+ (*in)[i].DeepCopyInto(&(*out)[i])
+ }
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EmbeddingServerStatus.
+func (in *EmbeddingServerStatus) DeepCopy() *EmbeddingServerStatus {
+ if in == nil {
+ return nil
+ }
+ out := new(EmbeddingServerStatus)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *EmbeddingStatefulSetOverrides) DeepCopyInto(out *EmbeddingStatefulSetOverrides) {
+ *out = *in
+ in.ResourceMetadataOverrides.DeepCopyInto(&out.ResourceMetadataOverrides)
+ if in.PodTemplateMetadataOverrides != nil {
+ in, out := &in.PodTemplateMetadataOverrides, &out.PodTemplateMetadataOverrides
+ *out = new(ResourceMetadataOverrides)
+ (*in).DeepCopyInto(*out)
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EmbeddingStatefulSetOverrides.
+func (in *EmbeddingStatefulSetOverrides) DeepCopy() *EmbeddingStatefulSetOverrides {
+ if in == nil {
+ return nil
+ }
+ out := new(EmbeddingStatefulSetOverrides)
+ in.DeepCopyInto(out)
+ return out
+}
+
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *EnvVar) DeepCopyInto(out *EnvVar) {
*out = *in
@@ -1252,6 +1436,26 @@ func (in *MCPToolConfigStatus) DeepCopy() *MCPToolConfigStatus {
return out
}
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *ModelCacheConfig) DeepCopyInto(out *ModelCacheConfig) {
+ *out = *in
+ if in.StorageClassName != nil {
+ in, out := &in.StorageClassName, &out.StorageClassName
+ *out = new(string)
+ **out = **in
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelCacheConfig.
+func (in *ModelCacheConfig) DeepCopy() *ModelCacheConfig {
+ if in == nil {
+ return nil
+ }
+ out := new(ModelCacheConfig)
+ in.DeepCopyInto(out)
+ return out
+}
+
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *NameFilter) DeepCopyInto(out *NameFilter) {
*out = *in
diff --git a/cmd/thv-operator/controllers/embeddingserver_controller.go b/cmd/thv-operator/controllers/embeddingserver_controller.go
new file mode 100644
index 0000000000..a17f79197c
--- /dev/null
+++ b/cmd/thv-operator/controllers/embeddingserver_controller.go
@@ -0,0 +1,1069 @@
+// SPDX-License-Identifier: Apache-2.0
+
+// Package controllers contains the reconciliation logic for the EmbeddingServer custom resource.
+// It handles the creation, update, and deletion of HuggingFace embedding inference servers in Kubernetes.
+package controllers
+
+import (
+ "context"
+ "fmt"
+ "maps"
+ "reflect"
+ "time"
+
+ appsv1 "k8s.io/api/apps/v1"
+ corev1 "k8s.io/api/core/v1"
+ "k8s.io/apimachinery/pkg/api/errors"
+ "k8s.io/apimachinery/pkg/api/meta"
+ "k8s.io/apimachinery/pkg/api/resource"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/apimachinery/pkg/runtime"
+ "k8s.io/apimachinery/pkg/types"
+ "k8s.io/apimachinery/pkg/util/intstr"
+ "k8s.io/client-go/tools/record"
+ ctrl "sigs.k8s.io/controller-runtime"
+ "sigs.k8s.io/controller-runtime/pkg/client"
+ "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
+ "sigs.k8s.io/controller-runtime/pkg/log"
+
+ mcpv1alpha1 "github.com/stacklok/toolhive/cmd/thv-operator/api/v1alpha1"
+ ctrlutil "github.com/stacklok/toolhive/cmd/thv-operator/pkg/controllerutil"
+ "github.com/stacklok/toolhive/cmd/thv-operator/pkg/validation"
+)
+
+// EmbeddingServerReconciler reconciles a EmbeddingServer object
+type EmbeddingServerReconciler struct {
+ client.Client
+ Scheme *runtime.Scheme
+ Recorder record.EventRecorder
+ PlatformDetector *ctrlutil.SharedPlatformDetector
+ ImageValidation validation.ImageValidation
+}
+
+const (
+ // embeddingContainerName is the name of the embedding container used in pod templates
+ embeddingContainerName = "embedding"
+
+ // embeddingFinalizerName is the finalizer name for EmbeddingServer resources
+ embeddingFinalizerName = "embeddingserver.toolhive.stacklok.dev/finalizer"
+
+ // modelCacheMountPath is the mount path for the model cache volume
+ modelCacheMountPath = "/data"
+)
+
+//+kubebuilder:rbac:groups=toolhive.stacklok.dev,resources=embeddingservers,verbs=get;list;watch;create;update;patch;delete
+//+kubebuilder:rbac:groups=toolhive.stacklok.dev,resources=embeddingservers/status,verbs=get;update;patch
+//+kubebuilder:rbac:groups=toolhive.stacklok.dev,resources=embeddingservers/finalizers,verbs=update
+//+kubebuilder:rbac:groups=apps,resources=statefulsets,verbs=get;list;watch;create;update;patch;delete
+//+kubebuilder:rbac:groups="",resources=services,verbs=get;list;watch;create;update;patch;delete
+//+kubebuilder:rbac:groups="",resources=persistentvolumeclaims,verbs=get;list;watch;create;update;patch;delete
+//+kubebuilder:rbac:groups="",resources=secrets,verbs=get;list;watch
+//+kubebuilder:rbac:groups="",resources=events,verbs=create;patch
+
+// Reconcile is part of the main kubernetes reconciliation loop which aims to
+// move the current state of the cluster closer to the desired state.
+//
+//nolint:gocyclo // Reconciliation logic complexity is acceptable
+func (r *EmbeddingServerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
+ ctxLogger := log.FromContext(ctx)
+
+ // Fetch the EmbeddingServer instance
+ embedding := &mcpv1alpha1.EmbeddingServer{}
+ err := r.Get(ctx, req.NamespacedName, embedding)
+ if err != nil {
+ if errors.IsNotFound(err) {
+ ctxLogger.Info("EmbeddingServer resource not found. Ignoring since object must be deleted")
+ return ctrl.Result{}, nil
+ }
+ ctxLogger.Error(err, "Failed to get EmbeddingServer")
+ return ctrl.Result{}, err
+ }
+
+ // Perform early validations
+ if result, err := r.performValidations(ctx, embedding); err != nil || result.RequeueAfter > 0 {
+ return result, err
+ }
+
+ // Handle deletion
+ if result, done, err := r.handleDeletion(ctx, embedding); done {
+ return result, err
+ }
+
+ // Add finalizer if needed
+ if result, done, err := r.ensureFinalizer(ctx, embedding); done {
+ return result, err
+ }
+
+ // Track if we need to requeue after status update
+ var requeueResult ctrl.Result
+
+ // Ensure statefulset exists and is up to date
+ if result, err := r.ensureStatefulSet(ctx, embedding); err != nil {
+ return ctrl.Result{}, err
+ } else if result.RequeueAfter > 0 {
+ requeueResult = result
+ }
+
+ // Ensure service exists
+ if result, err := r.ensureService(ctx, embedding); err != nil {
+ return ctrl.Result{}, err
+ } else if result.RequeueAfter > 0 {
+ // If we already have a requeue scheduled, keep the shorter duration
+ if requeueResult.RequeueAfter == 0 || (result.RequeueAfter > 0 && result.RequeueAfter < requeueResult.RequeueAfter) {
+ requeueResult = result
+ }
+ }
+
+ // Always update the EmbeddingServer status before returning
+ if err := r.updateEmbeddingServerStatus(ctx, embedding); err != nil {
+ ctxLogger.Error(err, "Failed to update EmbeddingServer status")
+ return ctrl.Result{}, err
+ }
+
+ return requeueResult, nil
+}
+
+// performValidations performs all early validations for the EmbeddingServer
+//
+//nolint:unparam // error return kept for consistency with reconciler pattern
+func (r *EmbeddingServerReconciler) performValidations(
+ ctx context.Context,
+ embedding *mcpv1alpha1.EmbeddingServer,
+) (ctrl.Result, error) {
+ ctxLogger := log.FromContext(ctx)
+
+ // Validate PodTemplateSpec early
+ if !r.validateAndUpdatePodTemplateStatus(ctx, embedding) {
+ // Status fields were set by validateAndUpdatePodTemplateStatus, now update
+ if err := r.Status().Update(ctx, embedding); err != nil {
+ ctxLogger.Error(err, "Failed to update EmbeddingServer status after PodTemplateSpec validation failure")
+ return ctrl.Result{}, err
+ }
+ return ctrl.Result{}, nil
+ }
+
+ // Validate image
+ if err := r.validateImage(ctx, embedding); err != nil {
+ // Status fields were set by validateImage, now update
+ if statusErr := r.Status().Update(ctx, embedding); statusErr != nil {
+ ctxLogger.Error(statusErr, "Failed to update EmbeddingServer status after image validation failure")
+ return ctrl.Result{}, statusErr
+ }
+ // We requeue to retry validation after image issues are resolved
+ ctxLogger.Error(err, "Image validation failed, will retry",
+ "image", embedding.Spec.Image,
+ "requeueAfter", 5*time.Minute)
+ return ctrl.Result{RequeueAfter: 5 * time.Minute}, nil
+ }
+
+ return ctrl.Result{}, nil
+}
+
+// handleDeletion handles the deletion of EmbeddingServer resources
+//
+//nolint:unparam // ctrl.Result return kept for consistency with reconciler pattern
+func (r *EmbeddingServerReconciler) handleDeletion(
+ ctx context.Context,
+ embedding *mcpv1alpha1.EmbeddingServer,
+) (ctrl.Result, bool, error) {
+ if embedding.GetDeletionTimestamp() == nil {
+ return ctrl.Result{}, false, nil
+ }
+
+ if controllerutil.ContainsFinalizer(embedding, embeddingFinalizerName) {
+ r.finalizeEmbeddingServer(ctx, embedding)
+
+ controllerutil.RemoveFinalizer(embedding, embeddingFinalizerName)
+ err := r.Update(ctx, embedding)
+ if err != nil {
+ return ctrl.Result{}, true, err
+ }
+ }
+ return ctrl.Result{}, true, nil
+}
+
+// ensureFinalizer ensures the finalizer is added to the EmbeddingServer
+//
+//nolint:unparam // ctrl.Result return kept for consistency with reconciler pattern
+func (r *EmbeddingServerReconciler) ensureFinalizer(
+ ctx context.Context,
+ embedding *mcpv1alpha1.EmbeddingServer,
+) (ctrl.Result, bool, error) {
+ if controllerutil.ContainsFinalizer(embedding, embeddingFinalizerName) {
+ return ctrl.Result{}, false, nil
+ }
+
+ controllerutil.AddFinalizer(embedding, embeddingFinalizerName)
+ err := r.Update(ctx, embedding)
+ if err != nil {
+ return ctrl.Result{}, true, err
+ }
+ return ctrl.Result{}, false, nil
+}
+
+// ensureStatefulSet ensures the statefulset exists and is up to date
+func (r *EmbeddingServerReconciler) ensureStatefulSet(
+ ctx context.Context,
+ embedding *mcpv1alpha1.EmbeddingServer,
+) (ctrl.Result, error) {
+ ctxLogger := log.FromContext(ctx)
+
+ statefulSet := &appsv1.StatefulSet{}
+ err := r.Get(ctx, types.NamespacedName{Name: embedding.Name, Namespace: embedding.Namespace}, statefulSet)
+ if err != nil && errors.IsNotFound(err) {
+ sts := r.statefulSetForEmbedding(ctx, embedding)
+ if sts == nil {
+ ctxLogger.Error(nil, "Failed to create StatefulSet object")
+ return ctrl.Result{}, fmt.Errorf("failed to create StatefulSet object")
+ }
+ ctxLogger.Info("Creating a new StatefulSet", "StatefulSet.Namespace", sts.Namespace, "StatefulSet.Name", sts.Name)
+ err = r.Create(ctx, sts)
+ if err != nil {
+ ctxLogger.Error(err, "Failed to create new StatefulSet", "StatefulSet.Namespace", sts.Namespace, "StatefulSet.Name", sts.Name)
+ return ctrl.Result{}, err
+ }
+ // StatefulSet created successfully, continue to ensure service
+ return ctrl.Result{}, nil
+ } else if err != nil {
+ ctxLogger.Error(err, "Failed to get StatefulSet")
+ return ctrl.Result{}, err
+ }
+
+ // Ensure the statefulset size matches the spec
+ desiredReplicas := embedding.GetReplicas()
+ if *statefulSet.Spec.Replicas != desiredReplicas {
+ statefulSet.Spec.Replicas = &desiredReplicas
+ if err := r.Update(ctx, statefulSet); err != nil {
+ ctxLogger.Error(err, "Failed to update StatefulSet replicas",
+ "StatefulSet.Namespace", statefulSet.Namespace,
+ "StatefulSet.Name", statefulSet.Name)
+ return ctrl.Result{}, err
+ }
+ return ctrl.Result{RequeueAfter: time.Second}, nil
+ }
+
+ // Check if the statefulset spec changed
+ if r.statefulSetNeedsUpdate(ctx, statefulSet, embedding) {
+ newStatefulSet := r.statefulSetForEmbedding(ctx, embedding)
+ statefulSet.Spec = newStatefulSet.Spec
+ statefulSet.Annotations = newStatefulSet.Annotations
+ statefulSet.Labels = newStatefulSet.Labels
+ if err := r.Update(ctx, statefulSet); err != nil {
+ ctxLogger.Error(err, "Failed to update StatefulSet",
+ "StatefulSet.Namespace", statefulSet.Namespace,
+ "StatefulSet.Name", statefulSet.Name)
+ return ctrl.Result{}, err
+ }
+ return ctrl.Result{RequeueAfter: time.Second}, nil
+ }
+
+ return ctrl.Result{}, nil
+}
+
+// ensureService ensures the service exists and is up to date
+//
+//nolint:unparam // ctrl.Result return kept for consistency with reconciler pattern
+func (r *EmbeddingServerReconciler) ensureService(
+ ctx context.Context,
+ embedding *mcpv1alpha1.EmbeddingServer,
+) (ctrl.Result, error) {
+ ctxLogger := log.FromContext(ctx)
+
+ service := &corev1.Service{}
+ err := r.Get(ctx, types.NamespacedName{Name: embedding.Name, Namespace: embedding.Namespace}, service)
+ if err != nil && errors.IsNotFound(err) {
+ svc := r.serviceForEmbedding(ctx, embedding)
+ if svc == nil {
+ ctxLogger.Error(nil, "Failed to create Service object")
+ return ctrl.Result{}, fmt.Errorf("failed to create Service object")
+ }
+ ctxLogger.Info("Creating a new Service", "Service.Namespace", svc.Namespace, "Service.Name", svc.Name)
+ err = r.Create(ctx, svc)
+ if err != nil {
+ ctxLogger.Error(err, "Failed to create new Service", "Service.Namespace", svc.Namespace, "Service.Name", svc.Name)
+ return ctrl.Result{}, err
+ }
+ // Service created successfully, continue to update status
+ return ctrl.Result{}, nil
+ } else if err != nil {
+ ctxLogger.Error(err, "Failed to get Service")
+ return ctrl.Result{}, err
+ }
+
+ // Check if the service needs to be updated
+ if r.serviceNeedsUpdate(service, embedding) {
+ desiredService := r.serviceForEmbedding(ctx, embedding)
+ service.Spec.Ports = desiredService.Spec.Ports
+ service.Labels = desiredService.Labels
+ service.Annotations = desiredService.Annotations
+ // Preserve ClusterIP as it's immutable
+ if err := r.Update(ctx, service); err != nil {
+ ctxLogger.Error(err, "Failed to update Service",
+ "Service.Namespace", service.Namespace,
+ "Service.Name", service.Name)
+ return ctrl.Result{}, err
+ }
+ ctxLogger.Info("Updated Service", "Service.Namespace", service.Namespace, "Service.Name", service.Name)
+ return ctrl.Result{RequeueAfter: time.Second}, nil
+ }
+
+ return ctrl.Result{}, nil
+}
+
+// serviceNeedsUpdate checks if the service needs to be updated based on the embedding spec
+func (*EmbeddingServerReconciler) serviceNeedsUpdate(
+ service *corev1.Service,
+ embedding *mcpv1alpha1.EmbeddingServer,
+) bool {
+ desiredPort := embedding.GetPort()
+
+ // Check if any port has changed
+ for _, port := range service.Spec.Ports {
+ if port.Name == "http" && port.Port != desiredPort {
+ return true
+ }
+ }
+
+ // Check ResourceOverrides (annotations and labels)
+ expectedAnnotations := make(map[string]string)
+ expectedLabels := make(map[string]string)
+
+ if embedding.Spec.ResourceOverrides != nil && embedding.Spec.ResourceOverrides.Service != nil {
+ if embedding.Spec.ResourceOverrides.Service.Annotations != nil {
+ maps.Copy(expectedAnnotations, embedding.Spec.ResourceOverrides.Service.Annotations)
+ }
+ if embedding.Spec.ResourceOverrides.Service.Labels != nil {
+ maps.Copy(expectedLabels, embedding.Spec.ResourceOverrides.Service.Labels)
+ }
+ }
+
+ // Check if expected annotations are present in service
+ for key, value := range expectedAnnotations {
+ if service.Annotations[key] != value {
+ return true
+ }
+ }
+
+ // Check if expected labels are present in service
+ for key, value := range expectedLabels {
+ if service.Labels[key] != value {
+ return true
+ }
+ }
+
+ return false
+}
+
+// validateAndUpdatePodTemplateStatus validates the PodTemplateSpec and sets the status condition
+// Status is not updated here - it will be updated at the end of reconciliation
+func (r *EmbeddingServerReconciler) validateAndUpdatePodTemplateStatus(
+ ctx context.Context,
+ embedding *mcpv1alpha1.EmbeddingServer,
+) bool {
+ ctxLogger := log.FromContext(ctx)
+
+ if embedding.Spec.PodTemplateSpec == nil {
+ meta.SetStatusCondition(&embedding.Status.Conditions, metav1.Condition{
+ Type: mcpv1alpha1.ConditionPodTemplateValid,
+ Status: metav1.ConditionTrue,
+ Reason: mcpv1alpha1.ConditionReasonPodTemplateValid,
+ Message: "No PodTemplateSpec provided",
+ ObservedGeneration: embedding.Generation,
+ })
+ return true
+ }
+
+ // Parse and validate PodTemplateSpec using builder
+ _, err := ctrlutil.NewPodTemplateSpecBuilder(embedding.Spec.PodTemplateSpec, embeddingContainerName)
+ if err != nil {
+ ctxLogger.Error(err, "Invalid PodTemplateSpec")
+ embedding.Status.Phase = mcpv1alpha1.EmbeddingServerPhaseFailed
+ embedding.Status.Message = fmt.Sprintf("Invalid PodTemplateSpec: %v", err)
+ meta.SetStatusCondition(&embedding.Status.Conditions, metav1.Condition{
+ Type: mcpv1alpha1.ConditionPodTemplateValid,
+ Status: metav1.ConditionFalse,
+ Reason: mcpv1alpha1.ConditionReasonPodTemplateInvalid,
+ Message: fmt.Sprintf("Invalid PodTemplateSpec: %v", err),
+ ObservedGeneration: embedding.Generation,
+ })
+ r.Recorder.Event(embedding, corev1.EventTypeWarning, "ValidationFailed", fmt.Sprintf("Invalid PodTemplateSpec: %v", err))
+ return false
+ }
+
+ meta.SetStatusCondition(&embedding.Status.Conditions, metav1.Condition{
+ Type: mcpv1alpha1.ConditionPodTemplateValid,
+ Status: metav1.ConditionTrue,
+ Reason: mcpv1alpha1.ConditionReasonPodTemplateValid,
+ Message: "PodTemplateSpec is valid",
+ ObservedGeneration: embedding.Generation,
+ })
+
+ return true
+}
+
+// validateImage validates the embedding image and sets the status condition
+// Status is not updated here - it will be updated at the end of reconciliation
+func (r *EmbeddingServerReconciler) validateImage(ctx context.Context, embedding *mcpv1alpha1.EmbeddingServer) error {
+ ctxLogger := log.FromContext(ctx)
+
+ imageValidator := validation.NewImageValidator(r.Client, embedding.Namespace, r.ImageValidation)
+ err := imageValidator.ValidateImage(ctx, embedding.Spec.Image, embedding.ObjectMeta)
+
+ if err == validation.ErrImageNotChecked {
+ ctxLogger.Info("Image validation skipped - no enforcement configured")
+ meta.SetStatusCondition(&embedding.Status.Conditions, metav1.Condition{
+ Type: mcpv1alpha1.ConditionImageValidated,
+ Status: metav1.ConditionTrue,
+ Reason: mcpv1alpha1.ConditionReasonImageValidationSkipped,
+ Message: "Image validation was not performed (no enforcement configured)",
+ })
+ return nil
+ } else if err == validation.ErrImageInvalid {
+ ctxLogger.Error(err, "EmbeddingServer image validation failed", "image", embedding.Spec.Image)
+ embedding.Status.Phase = mcpv1alpha1.EmbeddingServerPhaseFailed
+ embedding.Status.Message = err.Error()
+ meta.SetStatusCondition(&embedding.Status.Conditions, metav1.Condition{
+ Type: mcpv1alpha1.ConditionImageValidated,
+ Status: metav1.ConditionFalse,
+ Reason: mcpv1alpha1.ConditionReasonImageValidationFailed,
+ Message: err.Error(),
+ })
+ return err
+ } else if err != nil {
+ ctxLogger.Error(err, "EmbeddingServer image validation system error", "image", embedding.Spec.Image)
+ meta.SetStatusCondition(&embedding.Status.Conditions, metav1.Condition{
+ Type: mcpv1alpha1.ConditionImageValidated,
+ Status: metav1.ConditionFalse,
+ Reason: mcpv1alpha1.ConditionReasonImageValidationError,
+ Message: fmt.Sprintf("Error checking image validity: %v", err),
+ })
+ return err
+ }
+
+ ctxLogger.Info("Image validation passed", "image", embedding.Spec.Image)
+ meta.SetStatusCondition(&embedding.Status.Conditions, metav1.Condition{
+ Type: mcpv1alpha1.ConditionImageValidated,
+ Status: metav1.ConditionTrue,
+ Reason: mcpv1alpha1.ConditionReasonImageValidationSuccess,
+ Message: "Image validation passed",
+ })
+
+ return nil
+}
+
+// statefulSetForEmbedding creates a StatefulSet for the embedding server
+func (r *EmbeddingServerReconciler) statefulSetForEmbedding(
+ _ context.Context,
+ embedding *mcpv1alpha1.EmbeddingServer,
+) *appsv1.StatefulSet {
+ replicas := embedding.GetReplicas()
+ labels := r.labelsForEmbedding(embedding)
+
+ // Build container
+ container := r.buildEmbeddingContainer(embedding)
+
+ // Build pod template
+ podTemplate := r.buildPodTemplate(embedding, labels, container)
+
+ // Apply statefulset overrides
+ stsAnnotations, stsLabels := r.applyStatefulSetOverrides(embedding, &podTemplate)
+
+ // Merge ResourceOverrides labels into base labels
+ finalLabels := make(map[string]string)
+ maps.Copy(finalLabels, labels)
+ maps.Copy(finalLabels, stsLabels)
+
+ statefulSet := &appsv1.StatefulSet{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: embedding.Name,
+ Namespace: embedding.Namespace,
+ Labels: finalLabels,
+ Annotations: stsAnnotations,
+ },
+ Spec: appsv1.StatefulSetSpec{
+ Replicas: &replicas,
+ ServiceName: embedding.Name, // Required for StatefulSet
+ Selector: &metav1.LabelSelector{
+ MatchLabels: labels,
+ },
+ Template: podTemplate,
+ },
+ }
+
+ // Add volumeClaimTemplates if model caching is enabled
+ if embedding.IsModelCacheEnabled() {
+ statefulSet.Spec.VolumeClaimTemplates = r.buildVolumeClaimTemplates(embedding)
+ }
+
+ if err := ctrl.SetControllerReference(embedding, statefulSet, r.Scheme); err != nil {
+ return nil
+ }
+ return statefulSet
+}
+
+// buildVolumeClaimTemplates builds the volumeClaimTemplates for the StatefulSet
+func (r *EmbeddingServerReconciler) buildVolumeClaimTemplates(
+ embedding *mcpv1alpha1.EmbeddingServer,
+) []corev1.PersistentVolumeClaim {
+ size := "10Gi"
+ if embedding.Spec.ModelCache.Size != "" {
+ size = embedding.Spec.ModelCache.Size
+ }
+
+ accessMode := corev1.ReadWriteOnce
+ if embedding.Spec.ModelCache.AccessMode != "" {
+ accessMode = corev1.PersistentVolumeAccessMode(embedding.Spec.ModelCache.AccessMode)
+ }
+
+ pvc := corev1.PersistentVolumeClaim{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "model-cache",
+ Labels: r.labelsForEmbedding(embedding),
+ },
+ Spec: corev1.PersistentVolumeClaimSpec{
+ AccessModes: []corev1.PersistentVolumeAccessMode{accessMode},
+ Resources: corev1.VolumeResourceRequirements{
+ Requests: corev1.ResourceList{
+ corev1.ResourceStorage: resource.MustParse(size),
+ },
+ },
+ },
+ }
+
+ if embedding.Spec.ModelCache.StorageClassName != nil {
+ pvc.Spec.StorageClassName = embedding.Spec.ModelCache.StorageClassName
+ }
+
+ // Apply resource overrides if specified
+ if embedding.Spec.ResourceOverrides != nil && embedding.Spec.ResourceOverrides.PersistentVolumeClaim != nil {
+ if pvc.Annotations == nil && embedding.Spec.ResourceOverrides.PersistentVolumeClaim.Annotations != nil {
+ pvc.Annotations = make(map[string]string)
+ }
+ if embedding.Spec.ResourceOverrides.PersistentVolumeClaim.Annotations != nil {
+ maps.Copy(pvc.Annotations, embedding.Spec.ResourceOverrides.PersistentVolumeClaim.Annotations)
+ }
+ if embedding.Spec.ResourceOverrides.PersistentVolumeClaim.Labels != nil {
+ maps.Copy(pvc.Labels, embedding.Spec.ResourceOverrides.PersistentVolumeClaim.Labels)
+ }
+ }
+
+ return []corev1.PersistentVolumeClaim{pvc}
+}
+
+// buildEmbeddingContainer builds the container spec for the embedding server
+func (r *EmbeddingServerReconciler) buildEmbeddingContainer(embedding *mcpv1alpha1.EmbeddingServer) corev1.Container {
+ // Build container args
+ args := []string{
+ "--model-id", embedding.Spec.Model,
+ "--port", fmt.Sprintf("%d", embedding.GetPort()),
+ }
+ args = append(args, embedding.Spec.Args...)
+
+ // Build environment variables
+ envVars := r.buildEnvVars(embedding)
+
+ // Build container
+ container := corev1.Container{
+ Name: embeddingContainerName,
+ Image: embedding.Spec.Image,
+ Args: args,
+ Env: envVars,
+ ImagePullPolicy: corev1.PullPolicy(embedding.GetImagePullPolicy()),
+ Ports: []corev1.ContainerPort{
+ {
+ Name: "http",
+ ContainerPort: embedding.GetPort(),
+ Protocol: corev1.ProtocolTCP,
+ },
+ },
+ LivenessProbe: r.buildLivenessProbe(embedding),
+ ReadinessProbe: r.buildReadinessProbe(embedding),
+ }
+
+ // Add volume mount and HF_HOME for model cache if enabled
+ if embedding.IsModelCacheEnabled() {
+ container.VolumeMounts = []corev1.VolumeMount{
+ {
+ Name: "model-cache",
+ MountPath: modelCacheMountPath,
+ },
+ }
+ container.Env = append(container.Env, corev1.EnvVar{
+ Name: "HF_HOME",
+ Value: modelCacheMountPath,
+ })
+ }
+
+ // Add resources if specified
+ r.applyResourceRequirements(embedding, &container)
+
+ return container
+}
+
+// buildEnvVars builds environment variables for the container
+func (*EmbeddingServerReconciler) buildEnvVars(embedding *mcpv1alpha1.EmbeddingServer) []corev1.EnvVar {
+ envVars := []corev1.EnvVar{
+ {
+ Name: "MODEL_ID",
+ Value: embedding.Spec.Model,
+ },
+ }
+
+ // Add HuggingFace token from secret if provided
+ if embedding.Spec.HFTokenSecretRef != nil {
+ envVars = append(envVars, corev1.EnvVar{
+ Name: "HF_TOKEN",
+ ValueFrom: &corev1.EnvVarSource{
+ SecretKeyRef: &corev1.SecretKeySelector{
+ LocalObjectReference: corev1.LocalObjectReference{
+ Name: embedding.Spec.HFTokenSecretRef.Name,
+ },
+ Key: embedding.Spec.HFTokenSecretRef.Key,
+ },
+ },
+ })
+ }
+
+ for _, env := range embedding.Spec.Env {
+ envVars = append(envVars, corev1.EnvVar{
+ Name: env.Name,
+ Value: env.Value,
+ })
+ }
+ return envVars
+}
+
+// buildLivenessProbe builds the liveness probe for the container
+func (*EmbeddingServerReconciler) buildLivenessProbe(embedding *mcpv1alpha1.EmbeddingServer) *corev1.Probe {
+ return &corev1.Probe{
+ ProbeHandler: corev1.ProbeHandler{
+ HTTPGet: &corev1.HTTPGetAction{
+ Path: "/health",
+ Port: intstr.FromInt(int(embedding.GetPort())),
+ },
+ },
+ InitialDelaySeconds: 60,
+ PeriodSeconds: 30,
+ TimeoutSeconds: 10,
+ FailureThreshold: 3,
+ }
+}
+
+// buildReadinessProbe builds the readiness probe for the container
+func (*EmbeddingServerReconciler) buildReadinessProbe(embedding *mcpv1alpha1.EmbeddingServer) *corev1.Probe {
+ return &corev1.Probe{
+ ProbeHandler: corev1.ProbeHandler{
+ HTTPGet: &corev1.HTTPGetAction{
+ Path: "/health",
+ Port: intstr.FromInt(int(embedding.GetPort())),
+ },
+ },
+ InitialDelaySeconds: 30,
+ PeriodSeconds: 10,
+ TimeoutSeconds: 5,
+ FailureThreshold: 3,
+ }
+}
+
+// applyResourceRequirements applies resource requirements to the container
+func (*EmbeddingServerReconciler) applyResourceRequirements(embedding *mcpv1alpha1.EmbeddingServer, container *corev1.Container) {
+ if embedding.Spec.Resources.Limits.CPU == "" && embedding.Spec.Resources.Limits.Memory == "" &&
+ embedding.Spec.Resources.Requests.CPU == "" && embedding.Spec.Resources.Requests.Memory == "" {
+ return
+ }
+
+ container.Resources = corev1.ResourceRequirements{
+ Limits: corev1.ResourceList{},
+ Requests: corev1.ResourceList{},
+ }
+
+ if embedding.Spec.Resources.Limits.CPU != "" {
+ container.Resources.Limits[corev1.ResourceCPU] = resource.MustParse(embedding.Spec.Resources.Limits.CPU)
+ }
+ if embedding.Spec.Resources.Limits.Memory != "" {
+ container.Resources.Limits[corev1.ResourceMemory] = resource.MustParse(embedding.Spec.Resources.Limits.Memory)
+ }
+ if embedding.Spec.Resources.Requests.CPU != "" {
+ container.Resources.Requests[corev1.ResourceCPU] = resource.MustParse(embedding.Spec.Resources.Requests.CPU)
+ }
+ if embedding.Spec.Resources.Requests.Memory != "" {
+ container.Resources.Requests[corev1.ResourceMemory] = resource.MustParse(embedding.Spec.Resources.Requests.Memory)
+ }
+}
+
+// buildPodTemplate builds the pod template for the statefulset
+func (r *EmbeddingServerReconciler) buildPodTemplate(
+ embedding *mcpv1alpha1.EmbeddingServer,
+ labels map[string]string,
+ container corev1.Container,
+) corev1.PodTemplateSpec {
+ podTemplate := corev1.PodTemplateSpec{
+ ObjectMeta: metav1.ObjectMeta{
+ Labels: labels,
+ },
+ Spec: corev1.PodSpec{
+ Containers: []corev1.Container{container},
+ },
+ }
+
+ // Note: Volumes for model cache are managed by StatefulSet volumeClaimTemplates
+ // and will be automatically mounted with the name "model-cache"
+
+ // Merge with user-provided PodTemplateSpec if specified
+ r.mergePodTemplateSpec(embedding, &podTemplate)
+
+ return podTemplate
+}
+
+// mergePodTemplateSpec merges user-provided PodTemplateSpec customizations
+func (r *EmbeddingServerReconciler) mergePodTemplateSpec(
+ embedding *mcpv1alpha1.EmbeddingServer,
+ podTemplate *corev1.PodTemplateSpec,
+) {
+ if embedding.Spec.PodTemplateSpec == nil {
+ return
+ }
+
+ builder, err := ctrlutil.NewPodTemplateSpecBuilder(embedding.Spec.PodTemplateSpec, embeddingContainerName)
+ if err != nil {
+ return
+ }
+
+ userTemplate := builder.Build()
+ if userTemplate == nil {
+ return
+ }
+
+ // Merge user customizations into base pod template
+ if userTemplate.Spec.NodeSelector != nil {
+ podTemplate.Spec.NodeSelector = userTemplate.Spec.NodeSelector
+ }
+ if userTemplate.Spec.Affinity != nil {
+ podTemplate.Spec.Affinity = userTemplate.Spec.Affinity
+ }
+ if len(userTemplate.Spec.Tolerations) > 0 {
+ podTemplate.Spec.Tolerations = userTemplate.Spec.Tolerations
+ }
+ if userTemplate.Spec.SecurityContext != nil {
+ podTemplate.Spec.SecurityContext = userTemplate.Spec.SecurityContext
+ }
+ if userTemplate.Spec.ServiceAccountName != "" {
+ podTemplate.Spec.ServiceAccountName = userTemplate.Spec.ServiceAccountName
+ }
+
+ // Merge container-level customizations
+ r.mergeContainerSecurityContext(podTemplate, userTemplate)
+}
+
+// mergeContainerSecurityContext merges container-level security context
+func (*EmbeddingServerReconciler) mergeContainerSecurityContext(
+ podTemplate *corev1.PodTemplateSpec,
+ userTemplate *corev1.PodTemplateSpec,
+) {
+ for i := range podTemplate.Spec.Containers {
+ if podTemplate.Spec.Containers[i].Name != embeddingContainerName {
+ continue
+ }
+ for _, userContainer := range userTemplate.Spec.Containers {
+ if userContainer.Name == embeddingContainerName && userContainer.SecurityContext != nil {
+ podTemplate.Spec.Containers[i].SecurityContext = userContainer.SecurityContext
+ break
+ }
+ }
+ break
+ }
+}
+
+// applyStatefulSetOverrides applies statefulset-level overrides and returns annotations and labels
+func (*EmbeddingServerReconciler) applyStatefulSetOverrides(
+ embedding *mcpv1alpha1.EmbeddingServer,
+ podTemplate *corev1.PodTemplateSpec,
+) (map[string]string, map[string]string) {
+ annotations := make(map[string]string)
+ labels := make(map[string]string)
+
+ if embedding.Spec.ResourceOverrides == nil || embedding.Spec.ResourceOverrides.StatefulSet == nil {
+ return annotations, labels
+ }
+
+ if embedding.Spec.ResourceOverrides.StatefulSet.Annotations != nil {
+ maps.Copy(annotations, embedding.Spec.ResourceOverrides.StatefulSet.Annotations)
+ }
+
+ if embedding.Spec.ResourceOverrides.StatefulSet.Labels != nil {
+ maps.Copy(labels, embedding.Spec.ResourceOverrides.StatefulSet.Labels)
+ }
+
+ if embedding.Spec.ResourceOverrides.StatefulSet.PodTemplateMetadataOverrides != nil {
+ if podTemplate.Annotations == nil {
+ podTemplate.Annotations = make(map[string]string)
+ }
+ if embedding.Spec.ResourceOverrides.StatefulSet.PodTemplateMetadataOverrides.Annotations != nil {
+ maps.Copy(
+ podTemplate.Annotations,
+ embedding.Spec.ResourceOverrides.StatefulSet.PodTemplateMetadataOverrides.Annotations,
+ )
+ }
+ if embedding.Spec.ResourceOverrides.StatefulSet.PodTemplateMetadataOverrides.Labels != nil {
+ maps.Copy(podTemplate.Labels, embedding.Spec.ResourceOverrides.StatefulSet.PodTemplateMetadataOverrides.Labels)
+ }
+ }
+
+ return annotations, labels
+}
+
+// serviceForEmbedding creates a Service for the embedding server
+func (r *EmbeddingServerReconciler) serviceForEmbedding(
+ _ context.Context,
+ embedding *mcpv1alpha1.EmbeddingServer,
+) *corev1.Service {
+ labels := r.labelsForEmbedding(embedding)
+ annotations := make(map[string]string)
+
+ // Apply service overrides if specified
+ finalLabels := make(map[string]string)
+ maps.Copy(finalLabels, labels)
+
+ if embedding.Spec.ResourceOverrides != nil && embedding.Spec.ResourceOverrides.Service != nil {
+ if embedding.Spec.ResourceOverrides.Service.Annotations != nil {
+ maps.Copy(annotations, embedding.Spec.ResourceOverrides.Service.Annotations)
+ }
+ if embedding.Spec.ResourceOverrides.Service.Labels != nil {
+ maps.Copy(finalLabels, embedding.Spec.ResourceOverrides.Service.Labels)
+ }
+ }
+
+ service := &corev1.Service{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: embedding.Name,
+ Namespace: embedding.Namespace,
+ Labels: finalLabels,
+ Annotations: annotations,
+ },
+ Spec: corev1.ServiceSpec{
+ Selector: labels,
+ Ports: []corev1.ServicePort{
+ {
+ Name: "http",
+ Port: embedding.GetPort(),
+ TargetPort: intstr.FromInt(int(embedding.GetPort())),
+ Protocol: corev1.ProtocolTCP,
+ },
+ },
+ },
+ }
+
+ if err := ctrl.SetControllerReference(embedding, service, r.Scheme); err != nil {
+ return nil
+ }
+ return service
+}
+
+// labelsForEmbedding returns the labels for the embedding resources
+func (*EmbeddingServerReconciler) labelsForEmbedding(embedding *mcpv1alpha1.EmbeddingServer) map[string]string {
+ return map[string]string{
+ "app.kubernetes.io/name": "embeddingserver",
+ "app.kubernetes.io/instance": embedding.Name,
+ "app.kubernetes.io/component": "embedding-server",
+ "app.kubernetes.io/managed-by": "toolhive-operator",
+ }
+}
+
+// statefulSetNeedsUpdate checks if the statefulset needs to be updated
+func (r *EmbeddingServerReconciler) statefulSetNeedsUpdate(
+ ctx context.Context,
+ currentSts *appsv1.StatefulSet,
+ embedding *mcpv1alpha1.EmbeddingServer,
+) bool {
+ // Generate the expected StatefulSet from the current spec
+ newSts := r.statefulSetForEmbedding(ctx, embedding)
+ if newSts == nil {
+ // If we can't generate a new StatefulSet, assume update is needed
+ return true
+ }
+
+ // Check StatefulSet-level fields
+ if r.statefulSetMetadataChanged(currentSts, newSts) {
+ return true
+ }
+
+ // Check container-level fields
+ existingContainer, newContainer := r.findEmbeddingContainers(currentSts, newSts)
+ if existingContainer == nil || newContainer == nil {
+ return true
+ }
+
+ if r.containerNeedsUpdate(existingContainer, newContainer) {
+ return true
+ }
+
+ // Check pod template metadata
+ if r.podTemplateMetadataChanged(currentSts, newSts) {
+ return true
+ }
+
+ return false
+}
+
+// statefulSetMetadataChanged checks if StatefulSet-level metadata has changed
+func (*EmbeddingServerReconciler) statefulSetMetadataChanged(currentSts, newSts *appsv1.StatefulSet) bool {
+ if *currentSts.Spec.Replicas != *newSts.Spec.Replicas {
+ return true
+ }
+ if !reflect.DeepEqual(newSts.Annotations, currentSts.Annotations) {
+ return true
+ }
+ if !reflect.DeepEqual(newSts.Labels, currentSts.Labels) {
+ return true
+ }
+ return false
+}
+
+// findEmbeddingContainers finds the embedding container in both StatefulSets
+func (*EmbeddingServerReconciler) findEmbeddingContainers(
+ currentSts, newSts *appsv1.StatefulSet,
+) (*corev1.Container, *corev1.Container) {
+ var existingContainer *corev1.Container
+ for i := range currentSts.Spec.Template.Spec.Containers {
+ if currentSts.Spec.Template.Spec.Containers[i].Name == embeddingContainerName {
+ existingContainer = ¤tSts.Spec.Template.Spec.Containers[i]
+ break
+ }
+ }
+
+ var newContainer *corev1.Container
+ for i := range newSts.Spec.Template.Spec.Containers {
+ if newSts.Spec.Template.Spec.Containers[i].Name == embeddingContainerName {
+ newContainer = &newSts.Spec.Template.Spec.Containers[i]
+ break
+ }
+ }
+
+ return existingContainer, newContainer
+}
+
+// containerNeedsUpdate checks if the container spec has changed
+func (*EmbeddingServerReconciler) containerNeedsUpdate(existingContainer, newContainer *corev1.Container) bool {
+ if existingContainer.Image != newContainer.Image {
+ return true
+ }
+ if !reflect.DeepEqual(existingContainer.Args, newContainer.Args) {
+ return true
+ }
+ if !reflect.DeepEqual(existingContainer.Env, newContainer.Env) {
+ return true
+ }
+ if !reflect.DeepEqual(existingContainer.Ports, newContainer.Ports) {
+ return true
+ }
+ if existingContainer.ImagePullPolicy != newContainer.ImagePullPolicy {
+ return true
+ }
+ if !reflect.DeepEqual(existingContainer.Resources, newContainer.Resources) {
+ return true
+ }
+ return false
+}
+
+// podTemplateMetadataChanged checks if pod template metadata has changed
+func (*EmbeddingServerReconciler) podTemplateMetadataChanged(currentSts, newSts *appsv1.StatefulSet) bool {
+ if !reflect.DeepEqual(currentSts.Spec.Template.Annotations, newSts.Spec.Template.Annotations) {
+ return true
+ }
+ if !reflect.DeepEqual(currentSts.Spec.Template.Labels, newSts.Spec.Template.Labels) {
+ return true
+ }
+ return false
+}
+
+// updateEmbeddingServerStatus updates the status based on statefulset state
+func (r *EmbeddingServerReconciler) updateEmbeddingServerStatus(
+ ctx context.Context,
+ embedding *mcpv1alpha1.EmbeddingServer,
+) error {
+ ctxLogger := log.FromContext(ctx)
+
+ // Set the service URL if not already set
+ if embedding.Status.URL == "" {
+ embedding.Status.URL = fmt.Sprintf("http://%s.%s.svc.cluster.local:%d",
+ embedding.Name, embedding.Namespace, embedding.GetPort())
+ }
+
+ statefulSet := &appsv1.StatefulSet{}
+ err := r.Get(ctx, types.NamespacedName{Name: embedding.Name, Namespace: embedding.Namespace}, statefulSet)
+ if err != nil {
+ if errors.IsNotFound(err) {
+ embedding.Status.Phase = mcpv1alpha1.EmbeddingServerPhasePending
+ embedding.Status.ReadyReplicas = 0
+ } else {
+ return err
+ }
+ } else {
+ embedding.Status.ReadyReplicas = statefulSet.Status.ReadyReplicas
+ embedding.Status.ObservedGeneration = embedding.Generation
+
+ // Determine phase and message based on statefulset status using immutable assignment
+ type phaseInfo struct {
+ phase mcpv1alpha1.EmbeddingServerPhase
+ message string
+ }
+
+ info := func() phaseInfo {
+ if statefulSet.Status.ReadyReplicas > 0 {
+ return phaseInfo{
+ phase: mcpv1alpha1.EmbeddingServerPhaseRunning,
+ message: "Embedding server is running",
+ }
+ }
+ if statefulSet.Status.Replicas > 0 && statefulSet.Status.ReadyReplicas == 0 {
+ // Check if pods are downloading the model
+ return phaseInfo{
+ phase: mcpv1alpha1.EmbeddingServerPhaseDownloading,
+ message: "Downloading embedding model",
+ }
+ }
+ return phaseInfo{
+ phase: mcpv1alpha1.EmbeddingServerPhasePending,
+ message: "Waiting for statefulset",
+ }
+ }()
+
+ embedding.Status.Phase = info.phase
+ embedding.Status.Message = info.message
+ }
+
+ err = r.Status().Update(ctx, embedding)
+ if err != nil {
+ ctxLogger.Error(err, "Failed to update EmbeddingServer status")
+ return err
+ }
+
+ return nil
+}
+
+// finalizeEmbeddingServer performs cleanup before the EmbeddingServer is deleted
+func (r *EmbeddingServerReconciler) finalizeEmbeddingServer(ctx context.Context, embedding *mcpv1alpha1.EmbeddingServer) {
+ ctxLogger := log.FromContext(ctx)
+ ctxLogger.Info("Finalizing EmbeddingServer", "name", embedding.Name)
+
+ // Update status to Terminating
+ embedding.Status.Phase = mcpv1alpha1.EmbeddingServerPhaseTerminating
+ if err := r.Status().Update(ctx, embedding); err != nil {
+ ctxLogger.Error(err, "Failed to update EmbeddingServer status to Terminating")
+ }
+
+ // Cleanup logic here if needed
+ // For now, Kubernetes will handle cascade deletion of owned resources
+
+ r.Recorder.Event(embedding, corev1.EventTypeNormal, "Deleted", "EmbeddingServer has been finalized")
+}
+
+// SetupWithManager sets up the controller with the Manager.
+func (r *EmbeddingServerReconciler) SetupWithManager(mgr ctrl.Manager) error {
+ return ctrl.NewControllerManagedBy(mgr).
+ For(&mcpv1alpha1.EmbeddingServer{}).
+ Owns(&appsv1.StatefulSet{}).
+ Owns(&corev1.Service{}).
+ Owns(&corev1.PersistentVolumeClaim{}).
+ Complete(r)
+}
diff --git a/cmd/thv-operator/controllers/embeddingserver_controller_test.go b/cmd/thv-operator/controllers/embeddingserver_controller_test.go
new file mode 100644
index 0000000000..d783be5e43
--- /dev/null
+++ b/cmd/thv-operator/controllers/embeddingserver_controller_test.go
@@ -0,0 +1,883 @@
+// SPDX-License-Identifier: Apache-2.0
+
+package controllers
+
+import (
+ "context"
+ "fmt"
+ "testing"
+ "time"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ appsv1 "k8s.io/api/apps/v1"
+ corev1 "k8s.io/api/core/v1"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/apimachinery/pkg/runtime"
+ "k8s.io/apimachinery/pkg/types"
+ "k8s.io/client-go/tools/record"
+ "k8s.io/utils/ptr"
+ ctrl "sigs.k8s.io/controller-runtime"
+ "sigs.k8s.io/controller-runtime/pkg/client/fake"
+
+ mcpv1alpha1 "github.com/stacklok/toolhive/cmd/thv-operator/api/v1alpha1"
+ ctrlutil "github.com/stacklok/toolhive/cmd/thv-operator/pkg/controllerutil"
+ "github.com/stacklok/toolhive/cmd/thv-operator/pkg/validation"
+)
+
+func TestEmbeddingServer_GetPort(t *testing.T) {
+ t.Parallel()
+
+ tests := []struct {
+ name string
+ port int32
+ expected int32
+ }{
+ {
+ name: "default port",
+ port: 0,
+ expected: 8080,
+ },
+ {
+ name: "custom port",
+ port: 9000,
+ expected: 9000,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ t.Parallel()
+
+ embedding := &mcpv1alpha1.EmbeddingServer{
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ Port: tt.port,
+ },
+ }
+
+ assert.Equal(t, tt.expected, embedding.GetPort())
+ })
+ }
+}
+
+func TestEmbeddingServer_GetReplicas(t *testing.T) {
+ t.Parallel()
+
+ replicas2 := int32(2)
+ tests := []struct {
+ name string
+ replicas *int32
+ expected int32
+ }{
+ {
+ name: "default replicas",
+ replicas: nil,
+ expected: 1,
+ },
+ {
+ name: "custom replicas",
+ replicas: &replicas2,
+ expected: 2,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ t.Parallel()
+
+ embedding := &mcpv1alpha1.EmbeddingServer{
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ Replicas: tt.replicas,
+ },
+ }
+
+ assert.Equal(t, tt.expected, embedding.GetReplicas())
+ })
+ }
+}
+
+func TestEmbeddingServer_IsModelCacheEnabled(t *testing.T) {
+ t.Parallel()
+
+ tests := []struct {
+ name string
+ modelCache *mcpv1alpha1.ModelCacheConfig
+ expected bool
+ }{
+ {
+ name: "nil model cache",
+ modelCache: nil,
+ expected: false,
+ },
+ {
+ name: "model cache disabled",
+ modelCache: &mcpv1alpha1.ModelCacheConfig{
+ Enabled: false,
+ },
+ expected: false,
+ },
+ {
+ name: "model cache enabled",
+ modelCache: &mcpv1alpha1.ModelCacheConfig{
+ Enabled: true,
+ },
+ expected: true,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ t.Parallel()
+
+ embedding := &mcpv1alpha1.EmbeddingServer{
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ ModelCache: tt.modelCache,
+ },
+ }
+
+ assert.Equal(t, tt.expected, embedding.IsModelCacheEnabled())
+ })
+ }
+}
+
+func TestEmbeddingServer_GetImagePullPolicy(t *testing.T) {
+ t.Parallel()
+
+ tests := []struct {
+ name string
+ imagePullPolicy string
+ expected string
+ }{
+ {
+ name: "default pull policy",
+ imagePullPolicy: "",
+ expected: "IfNotPresent",
+ },
+ {
+ name: "Never pull policy",
+ imagePullPolicy: "Never",
+ expected: "Never",
+ },
+ {
+ name: "Always pull policy",
+ imagePullPolicy: "Always",
+ expected: "Always",
+ },
+ {
+ name: "IfNotPresent pull policy",
+ imagePullPolicy: "IfNotPresent",
+ expected: "IfNotPresent",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ t.Parallel()
+
+ embedding := &mcpv1alpha1.EmbeddingServer{
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ ImagePullPolicy: tt.imagePullPolicy,
+ },
+ }
+
+ assert.Equal(t, tt.expected, embedding.GetImagePullPolicy())
+ })
+ }
+}
+
+func TestEmbeddingServerPodTemplateSpecValidation(t *testing.T) {
+ t.Parallel()
+
+ tests := []struct {
+ name string
+ podTemplateSpec *runtime.RawExtension
+ expectValid bool
+ }{
+ {
+ name: "no PodTemplateSpec provided",
+ podTemplateSpec: nil,
+ expectValid: true,
+ },
+ {
+ name: "valid PodTemplateSpec",
+ podTemplateSpec: &runtime.RawExtension{
+ Raw: []byte(`{"spec":{"nodeSelector":{"disktype":"ssd"}}}`),
+ },
+ expectValid: true,
+ },
+ {
+ name: "invalid PodTemplateSpec",
+ podTemplateSpec: &runtime.RawExtension{
+ Raw: []byte(`{invalid json`),
+ },
+ expectValid: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ t.Parallel()
+
+ if tt.podTemplateSpec == nil {
+ // nil is always valid
+ assert.True(t, tt.expectValid)
+ return
+ }
+
+ _, err := ctrlutil.NewPodTemplateSpecBuilder(tt.podTemplateSpec, embeddingContainerName)
+
+ if tt.expectValid {
+ assert.NoError(t, err)
+ } else {
+ assert.Error(t, err)
+ }
+ })
+ }
+}
+
+func TestEmbeddingServer_Labels(t *testing.T) {
+ t.Parallel()
+
+ embedding := &mcpv1alpha1.EmbeddingServer{
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ Model: "test-model",
+ },
+ }
+ embedding.Name = "test-embedding"
+
+ reconciler := &EmbeddingServerReconciler{}
+ labels := reconciler.labelsForEmbedding(embedding)
+
+ // Check required labels
+ assert.Equal(t, "embeddingserver", labels["app.kubernetes.io/name"])
+ assert.Equal(t, "test-embedding", labels["app.kubernetes.io/instance"])
+ assert.Equal(t, "embedding-server", labels["app.kubernetes.io/component"])
+ assert.Equal(t, "toolhive-operator", labels["app.kubernetes.io/managed-by"])
+
+}
+
+func TestEmbeddingServer_ModelCacheConfig(t *testing.T) {
+ t.Parallel()
+
+ storageClassName := "fast-ssd"
+ tests := []struct {
+ name string
+ modelCache *mcpv1alpha1.ModelCacheConfig
+ expectedSize string
+ expectedAccess string
+ }{
+ {
+ name: "default values",
+ modelCache: &mcpv1alpha1.ModelCacheConfig{
+ Enabled: true,
+ },
+ expectedSize: "10Gi",
+ expectedAccess: "ReadWriteOnce",
+ },
+ {
+ name: "custom values",
+ modelCache: &mcpv1alpha1.ModelCacheConfig{
+ Enabled: true,
+ Size: "20Gi",
+ AccessMode: "ReadWriteMany",
+ StorageClassName: &storageClassName,
+ },
+ expectedSize: "20Gi",
+ expectedAccess: "ReadWriteMany",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ t.Parallel()
+
+ embedding := &mcpv1alpha1.EmbeddingServer{
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ Model: "test-model",
+ ModelCache: tt.modelCache,
+ },
+ }
+ embedding.Name = "test-embedding"
+ embedding.Namespace = "default"
+
+ // Note: We're testing the PVC structure creation, not SetControllerReference
+ // which requires a Scheme. In actual reconciliation, the Scheme is set.
+ // For this unit test, we test just the PVC structure without owner references.
+ pvcName := fmt.Sprintf("%s-model-cache", embedding.Name)
+
+ size := tt.modelCache.Size
+ if size == "" {
+ size = "10Gi"
+ }
+
+ accessMode := corev1.ReadWriteOnce
+ if tt.modelCache.AccessMode != "" {
+ accessMode = corev1.PersistentVolumeAccessMode(tt.modelCache.AccessMode)
+ }
+
+ // Verify expected values
+ assert.Equal(t, "test-embedding-model-cache", pvcName)
+ assert.Equal(t, tt.expectedSize, size)
+ assert.Equal(t, tt.expectedAccess, string(accessMode))
+
+ // Verify storage class name if provided
+ if tt.modelCache.StorageClassName != nil {
+ assert.Equal(t, storageClassName, *tt.modelCache.StorageClassName)
+ }
+ })
+ }
+}
+
+// Test helpers
+
+func createEmbeddingServerTestScheme() *runtime.Scheme {
+ testScheme := runtime.NewScheme()
+ _ = corev1.AddToScheme(testScheme)
+ _ = appsv1.AddToScheme(testScheme)
+ _ = mcpv1alpha1.AddToScheme(testScheme)
+ return testScheme
+}
+
+func createTestEmbeddingServer(name, namespace, image, model string) *mcpv1alpha1.EmbeddingServer {
+ return &mcpv1alpha1.EmbeddingServer{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: name,
+ Namespace: namespace,
+ Generation: 1,
+ },
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ Image: image,
+ Model: model,
+ },
+ }
+}
+
+// TestReconcile_NotFound tests reconciliation when resource is not found
+func TestReconcile_NotFound(t *testing.T) {
+ t.Parallel()
+
+ scheme := createEmbeddingServerTestScheme()
+ fakeClient := fake.NewClientBuilder().
+ WithScheme(scheme).
+ Build()
+
+ reconciler := &EmbeddingServerReconciler{
+ Client: fakeClient,
+ Scheme: scheme,
+ Recorder: record.NewFakeRecorder(10),
+ ImageValidation: validation.ImageValidationAlwaysAllow,
+ }
+
+ req := ctrl.Request{
+ NamespacedName: types.NamespacedName{
+ Name: "non-existent",
+ Namespace: "default",
+ },
+ }
+
+ result, err := reconciler.Reconcile(context.TODO(), req)
+ assert.NoError(t, err)
+ assert.Equal(t, ctrl.Result{}, result)
+}
+
+// TestReconcile_CreateResources tests the reconciliation creates all necessary resources
+func TestReconcile_CreateResources(t *testing.T) {
+ t.Parallel()
+
+ embedding := createTestEmbeddingServer("test-embedding", "test-ns", "test-image:latest", "test-model")
+
+ scheme := createEmbeddingServerTestScheme()
+ fakeClient := fake.NewClientBuilder().
+ WithScheme(scheme).
+ WithRuntimeObjects(embedding).
+ WithStatusSubresource(embedding).
+ Build()
+
+ reconciler := &EmbeddingServerReconciler{
+ Client: fakeClient,
+ Scheme: scheme,
+ Recorder: record.NewFakeRecorder(10),
+ PlatformDetector: ctrlutil.NewSharedPlatformDetector(),
+ ImageValidation: validation.ImageValidationAlwaysAllow,
+ }
+
+ ctx := context.TODO()
+ req := ctrl.Request{
+ NamespacedName: types.NamespacedName{
+ Name: embedding.Name,
+ Namespace: embedding.Namespace,
+ },
+ }
+
+ // First reconcile should create resources
+ result, err := reconciler.Reconcile(ctx, req)
+ require.NoError(t, err)
+ assert.Equal(t, ctrl.Result{}, result)
+
+ // Verify finalizer was added
+ updatedEmbedding := &mcpv1alpha1.EmbeddingServer{}
+ err = fakeClient.Get(ctx, types.NamespacedName{
+ Name: embedding.Name,
+ Namespace: embedding.Namespace,
+ }, updatedEmbedding)
+ require.NoError(t, err)
+ assert.Contains(t, updatedEmbedding.Finalizers, embeddingFinalizerName)
+
+ // Verify StatefulSet was created
+ sts := &appsv1.StatefulSet{}
+ err = fakeClient.Get(ctx, types.NamespacedName{
+ Name: embedding.Name,
+ Namespace: embedding.Namespace,
+ }, sts)
+ assert.NoError(t, err, "StatefulSet should be created")
+ assert.Equal(t, embedding.Name, sts.Name)
+ assert.Equal(t, int32(1), *sts.Spec.Replicas)
+
+ // Verify Service was created
+ svc := &corev1.Service{}
+ err = fakeClient.Get(ctx, types.NamespacedName{
+ Name: embedding.Name,
+ Namespace: embedding.Namespace,
+ }, svc)
+ assert.NoError(t, err, "Service should be created")
+ assert.Equal(t, embedding.Name, svc.Name)
+}
+
+// TestValidateImage tests image validation with different scenarios
+func TestValidateImage(t *testing.T) {
+ t.Parallel()
+
+ tests := []struct {
+ name string
+ embedding *mcpv1alpha1.EmbeddingServer
+ imageValidation validation.ImageValidation
+ registries []runtime.Object
+ expectError bool
+ expectedCondition metav1.ConditionStatus
+ expectedReason string
+ }{
+ {
+ name: "always allow - no validation",
+ embedding: createTestEmbeddingServer("test", "default", "any-image:latest", "model"),
+ imageValidation: validation.ImageValidationAlwaysAllow,
+ expectError: false,
+ expectedCondition: metav1.ConditionTrue,
+ expectedReason: mcpv1alpha1.ConditionReasonImageValidationSkipped,
+ },
+ {
+ name: "registry enforcing - no registries",
+ embedding: createTestEmbeddingServer("test", "default", "test-image:latest", "model"),
+ imageValidation: validation.ImageValidationRegistryEnforcing,
+ registries: []runtime.Object{},
+ expectError: false,
+ expectedCondition: metav1.ConditionTrue,
+ expectedReason: mcpv1alpha1.ConditionReasonImageValidationSkipped,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ t.Parallel()
+
+ scheme := createEmbeddingServerTestScheme()
+ objects := append([]runtime.Object{tt.embedding}, tt.registries...)
+
+ fakeClient := fake.NewClientBuilder().
+ WithScheme(scheme).
+ WithRuntimeObjects(objects...).
+ WithStatusSubresource(tt.embedding).
+ Build()
+
+ reconciler := &EmbeddingServerReconciler{
+ Client: fakeClient,
+ Scheme: scheme,
+ ImageValidation: tt.imageValidation,
+ }
+
+ err := reconciler.validateImage(context.TODO(), tt.embedding)
+
+ if tt.expectError {
+ assert.Error(t, err)
+ } else {
+ assert.NoError(t, err)
+ }
+
+ // Verify condition was set
+ updatedEmbedding := &mcpv1alpha1.EmbeddingServer{}
+ err = fakeClient.Get(context.TODO(), types.NamespacedName{
+ Name: tt.embedding.Name,
+ Namespace: tt.embedding.Namespace,
+ }, updatedEmbedding)
+ require.NoError(t, err)
+
+ // Find the ImageValidated condition
+ for _, cond := range updatedEmbedding.Status.Conditions {
+ if cond.Type == mcpv1alpha1.ConditionImageValidated {
+ assert.Equal(t, tt.expectedCondition, cond.Status)
+ assert.Equal(t, tt.expectedReason, cond.Reason)
+ return
+ }
+ }
+ })
+ }
+}
+
+// TestStatefulSetNeedsUpdate tests drift detection logic
+func TestStatefulSetNeedsUpdate(t *testing.T) {
+ t.Parallel()
+
+ scheme := createEmbeddingServerTestScheme()
+ reconciler := &EmbeddingServerReconciler{
+ Scheme: scheme,
+ PlatformDetector: ctrlutil.NewSharedPlatformDetector(),
+ }
+
+ // Helper to generate a StatefulSet from an embedding using the reconciler
+ generateSts := func(e *mcpv1alpha1.EmbeddingServer) *appsv1.StatefulSet {
+ return reconciler.statefulSetForEmbedding(context.TODO(), e)
+ }
+
+ tests := []struct {
+ name string
+ embedding *mcpv1alpha1.EmbeddingServer
+ existingSts *appsv1.StatefulSet
+ expectedUpdate bool
+ updateReason string
+ }{
+ {
+ name: "no update needed - identical",
+ embedding: createTestEmbeddingServer("test", "default", "image:v1", "model1"),
+ existingSts: generateSts(createTestEmbeddingServer("test", "default", "image:v1", "model1")),
+ expectedUpdate: false,
+ },
+ {
+ name: "update needed - image changed",
+ embedding: createTestEmbeddingServer("test", "default", "image:v2", "model1"),
+ existingSts: generateSts(createTestEmbeddingServer("test", "default", "image:v1", "model1")),
+ expectedUpdate: true,
+ updateReason: "image changed",
+ },
+ {
+ name: "update needed - model changed",
+ embedding: createTestEmbeddingServer("test", "default", "image:v1", "model2"),
+ existingSts: generateSts(createTestEmbeddingServer("test", "default", "image:v1", "model1")),
+ expectedUpdate: true,
+ updateReason: "model changed",
+ },
+ {
+ name: "update needed - port changed",
+ embedding: &mcpv1alpha1.EmbeddingServer{
+ ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "default", Generation: 1},
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ Image: "image:v1",
+ Model: "model1",
+ Port: 9090,
+ },
+ },
+ existingSts: generateSts(createTestEmbeddingServer("test", "default", "image:v1", "model1")),
+ expectedUpdate: true,
+ updateReason: "port changed",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ t.Parallel()
+
+ needsUpdate := reconciler.statefulSetNeedsUpdate(context.TODO(), tt.existingSts, tt.embedding)
+
+ assert.Equal(t, tt.expectedUpdate, needsUpdate, tt.updateReason)
+ })
+ }
+}
+
+// TestHandleDeletion tests finalizer cleanup
+func TestHandleDeletion(t *testing.T) {
+ t.Parallel()
+
+ tests := []struct {
+ name string
+ embedding *mcpv1alpha1.EmbeddingServer
+ expectDone bool
+ expectError bool
+ expectFinalizer bool
+ }{
+ {
+ name: "not being deleted",
+ embedding: &mcpv1alpha1.EmbeddingServer{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test",
+ Namespace: "default",
+ Finalizers: []string{embeddingFinalizerName},
+ },
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ Image: "test:latest",
+ Model: "test-model",
+ },
+ },
+ expectDone: false,
+ expectError: false,
+ expectFinalizer: true,
+ },
+ {
+ name: "being deleted with finalizer",
+ embedding: &mcpv1alpha1.EmbeddingServer{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test",
+ Namespace: "default",
+ Finalizers: []string{embeddingFinalizerName},
+ DeletionTimestamp: &metav1.Time{Time: time.Now()},
+ },
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ Image: "test:latest",
+ Model: "test-model",
+ },
+ },
+ expectDone: true,
+ expectError: false,
+ expectFinalizer: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ t.Parallel()
+
+ scheme := createEmbeddingServerTestScheme()
+ fakeClient := fake.NewClientBuilder().
+ WithScheme(scheme).
+ WithRuntimeObjects(tt.embedding).
+ WithStatusSubresource(tt.embedding).
+ Build()
+
+ reconciler := &EmbeddingServerReconciler{
+ Client: fakeClient,
+ Scheme: scheme,
+ Recorder: record.NewFakeRecorder(10),
+ }
+
+ result, done, err := reconciler.handleDeletion(context.TODO(), tt.embedding)
+
+ assert.Equal(t, tt.expectDone, done)
+ if tt.expectError {
+ assert.Error(t, err)
+ } else {
+ assert.NoError(t, err)
+ }
+
+ if done {
+ assert.Equal(t, ctrl.Result{}, result)
+ }
+
+ // Verify finalizer state if not being deleted
+ if tt.embedding.DeletionTimestamp == nil {
+ updatedEmbedding := &mcpv1alpha1.EmbeddingServer{}
+ err := fakeClient.Get(context.TODO(), types.NamespacedName{
+ Name: tt.embedding.Name,
+ Namespace: tt.embedding.Namespace,
+ }, updatedEmbedding)
+ require.NoError(t, err)
+
+ hasFinalizer := false
+ for _, f := range updatedEmbedding.Finalizers {
+ if f == embeddingFinalizerName {
+ hasFinalizer = true
+ break
+ }
+ }
+ assert.Equal(t, tt.expectFinalizer, hasFinalizer)
+ }
+ })
+ }
+}
+
+// TestEnsureStatefulSet tests statefulset creation and updates
+func TestEnsureStatefulSet(t *testing.T) {
+ t.Parallel()
+
+ tests := []struct {
+ name string
+ embedding *mcpv1alpha1.EmbeddingServer
+ existingSts *appsv1.StatefulSet
+ expectCreate bool
+ expectUpdate bool
+ expectDone bool
+ }{
+ {
+ name: "create new statefulset",
+ embedding: createTestEmbeddingServer("test", "default", "image:v1", "model1"),
+ existingSts: nil,
+ expectCreate: true,
+ expectDone: false,
+ },
+ {
+ name: "update replicas",
+ embedding: func() *mcpv1alpha1.EmbeddingServer {
+ e := createTestEmbeddingServer("test", "default", "image:v1", "model1")
+ replicas := int32(3)
+ e.Spec.Replicas = &replicas
+ return e
+ }(),
+ existingSts: &appsv1.StatefulSet{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test",
+ Namespace: "default",
+ },
+ Spec: appsv1.StatefulSetSpec{
+ Replicas: ptr.To(int32(1)),
+ Template: corev1.PodTemplateSpec{
+ Spec: corev1.PodSpec{
+ Containers: []corev1.Container{
+ {
+ Name: embeddingContainerName,
+ Image: "image:v1",
+ Args: []string{"--model-id", "model1", "--port", "8080"},
+ Env: []corev1.EnvVar{
+ {Name: "MODEL_ID", Value: "model1"},
+ },
+ Ports: []corev1.ContainerPort{
+ {ContainerPort: 8080},
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ expectUpdate: true,
+ expectDone: true,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ t.Parallel()
+
+ scheme := createEmbeddingServerTestScheme()
+ objects := []runtime.Object{tt.embedding}
+ if tt.existingSts != nil {
+ objects = append(objects, tt.existingSts)
+ }
+
+ fakeClient := fake.NewClientBuilder().
+ WithScheme(scheme).
+ WithRuntimeObjects(objects...).
+ Build()
+
+ reconciler := &EmbeddingServerReconciler{
+ Client: fakeClient,
+ Scheme: scheme,
+ PlatformDetector: ctrlutil.NewSharedPlatformDetector(),
+ }
+
+ result, err := reconciler.ensureStatefulSet(context.TODO(), tt.embedding)
+ require.NoError(t, err)
+ // expectDone is now represented by whether we need to requeue
+ if tt.expectDone {
+ assert.True(t, result.RequeueAfter > 0)
+ }
+
+ // Verify statefulset exists
+ sts := &appsv1.StatefulSet{}
+ err = fakeClient.Get(context.TODO(), types.NamespacedName{
+ Name: tt.embedding.Name,
+ Namespace: tt.embedding.Namespace,
+ }, sts)
+ assert.NoError(t, err)
+
+ if tt.expectUpdate {
+ assert.Greater(t, result.RequeueAfter, time.Duration(0))
+ }
+ })
+ }
+}
+
+// TestUpdateEmbeddingServerStatus tests status updates
+func TestUpdateEmbeddingServerStatus(t *testing.T) {
+ t.Parallel()
+
+ tests := []struct {
+ name string
+ embedding *mcpv1alpha1.EmbeddingServer
+ statefulSet *appsv1.StatefulSet
+ expectedPhase mcpv1alpha1.EmbeddingServerPhase
+ expectedURL string
+ }{
+ {
+ name: "no statefulset - pending",
+ embedding: createTestEmbeddingServer("test", "default", "image:v1", "model1"),
+ statefulSet: nil,
+ expectedPhase: mcpv1alpha1.EmbeddingServerPhasePending,
+ expectedURL: "http://test.default.svc.cluster.local:8080",
+ },
+ {
+ name: "statefulset ready",
+ embedding: createTestEmbeddingServer("test", "default", "image:v1", "model1"),
+ statefulSet: &appsv1.StatefulSet{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test",
+ Namespace: "default",
+ },
+ Status: appsv1.StatefulSetStatus{
+ Replicas: 1,
+ ReadyReplicas: 1,
+ },
+ },
+ expectedPhase: mcpv1alpha1.EmbeddingServerPhaseRunning,
+ expectedURL: "http://test.default.svc.cluster.local:8080",
+ },
+ {
+ name: "statefulset downloading",
+ embedding: createTestEmbeddingServer("test", "default", "image:v1", "model1"),
+ statefulSet: &appsv1.StatefulSet{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test",
+ Namespace: "default",
+ },
+ Status: appsv1.StatefulSetStatus{
+ Replicas: 1,
+ ReadyReplicas: 0,
+ },
+ },
+ expectedPhase: mcpv1alpha1.EmbeddingServerPhaseDownloading,
+ expectedURL: "http://test.default.svc.cluster.local:8080",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ t.Parallel()
+
+ scheme := createEmbeddingServerTestScheme()
+ objects := []runtime.Object{tt.embedding}
+ if tt.statefulSet != nil {
+ objects = append(objects, tt.statefulSet)
+ }
+
+ fakeClient := fake.NewClientBuilder().
+ WithScheme(scheme).
+ WithRuntimeObjects(objects...).
+ WithStatusSubresource(tt.embedding).
+ Build()
+
+ reconciler := &EmbeddingServerReconciler{
+ Client: fakeClient,
+ Scheme: scheme,
+ }
+
+ err := reconciler.updateEmbeddingServerStatus(context.TODO(), tt.embedding)
+ assert.NoError(t, err)
+
+ // Verify status was updated
+ updatedEmbedding := &mcpv1alpha1.EmbeddingServer{}
+ err = fakeClient.Get(context.TODO(), types.NamespacedName{
+ Name: tt.embedding.Name,
+ Namespace: tt.embedding.Namespace,
+ }, updatedEmbedding)
+ require.NoError(t, err)
+
+ assert.Equal(t, tt.expectedPhase, updatedEmbedding.Status.Phase)
+ assert.Equal(t, tt.expectedURL, updatedEmbedding.Status.URL)
+ })
+ }
+}
diff --git a/cmd/thv-operator/main.go b/cmd/thv-operator/main.go
index cf3316d0aa..ffba4f70ee 100644
--- a/cmd/thv-operator/main.go
+++ b/cmd/thv-operator/main.go
@@ -267,6 +267,17 @@ func setupServerControllers(mgr ctrl.Manager, enableRegistry bool) error {
return fmt.Errorf("unable to create controller MCPRemoteProxy: %w", err)
}
+ // Set up EmbeddingServer controller
+ if err := (&controllers.EmbeddingServerReconciler{
+ Client: mgr.GetClient(),
+ Scheme: mgr.GetScheme(),
+ Recorder: mgr.GetEventRecorderFor("embeddingserver-controller"),
+ PlatformDetector: ctrlutil.NewSharedPlatformDetector(),
+ ImageValidation: imageValidation,
+ }).SetupWithManager(mgr); err != nil {
+ return fmt.Errorf("unable to create controller EmbeddingServer: %w", err)
+ }
+
return nil
}
diff --git a/cmd/thv-operator/test-integration/embedding-server/embeddingserver_creation_test.go b/cmd/thv-operator/test-integration/embedding-server/embeddingserver_creation_test.go
new file mode 100644
index 0000000000..efb3841a54
--- /dev/null
+++ b/cmd/thv-operator/test-integration/embedding-server/embeddingserver_creation_test.go
@@ -0,0 +1,1177 @@
+// SPDX-License-Identifier: Apache-2.0
+
+// Package controllers contains integration tests for the EmbeddingServer controller.
+package controllers
+
+import (
+ "time"
+
+ . "github.com/onsi/ginkgo/v2"
+ . "github.com/onsi/gomega"
+ appsv1 "k8s.io/api/apps/v1"
+ corev1 "k8s.io/api/core/v1"
+ "k8s.io/apimachinery/pkg/api/resource"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/apimachinery/pkg/runtime"
+ "k8s.io/apimachinery/pkg/types"
+ "k8s.io/utils/ptr"
+
+ mcpv1alpha1 "github.com/stacklok/toolhive/cmd/thv-operator/api/v1alpha1"
+)
+
+// TestCase defines a table-driven test case for EmbeddingServer controller
+type TestCase struct {
+ Name string
+ // InitialState contains objects to create before running assertions
+ InitialState InitialState
+ // FinalState defines the expected Kubernetes state after reconciliation
+ FinalState FinalState
+}
+
+// InitialState represents the initial Kubernetes objects to create
+type InitialState struct {
+ EmbeddingServer *mcpv1alpha1.EmbeddingServer
+ Secrets []*corev1.Secret
+}
+
+// FinalState represents the expected Kubernetes state after reconciliation
+// Uses actual K8s objects for comparison - only non-nil/non-zero fields are checked
+type FinalState struct {
+ // StatefulSet expected state (nil means don't check specific fields)
+ StatefulSet *appsv1.StatefulSet
+ // Service expected state (nil means don't check specific fields)
+ Service *corev1.Service
+ // EmbeddingServer status expectations
+ Status *mcpv1alpha1.EmbeddingServerStatus
+}
+
+var _ = Describe("EmbeddingServer Controller Integration Tests", func() {
+ const (
+ timeout = time.Second * 30
+ interval = time.Millisecond * 250
+ defaultNamespace = "default"
+ )
+
+ // Helper function to create test namespace
+ createNamespace := func(namespace string) {
+ ns := &corev1.Namespace{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: namespace,
+ },
+ }
+ _ = k8sClient.Create(ctx, ns)
+ }
+
+ // Helper to run a single test case
+ runTestCase := func(tc TestCase) {
+ Context(tc.Name, Ordered, func() {
+ var createdEmbeddingServer *mcpv1alpha1.EmbeddingServer
+
+ BeforeAll(func() {
+ namespace := tc.InitialState.EmbeddingServer.Namespace
+ createNamespace(namespace)
+
+ // Create secrets first
+ for _, secret := range tc.InitialState.Secrets {
+ Expect(k8sClient.Create(ctx, secret)).Should(Succeed())
+ }
+
+ // Create the EmbeddingServer
+ Expect(k8sClient.Create(ctx, tc.InitialState.EmbeddingServer)).Should(Succeed())
+
+ // Fetch the created resource to get UID etc.
+ createdEmbeddingServer = &mcpv1alpha1.EmbeddingServer{}
+ Eventually(func() error {
+ return k8sClient.Get(ctx, types.NamespacedName{
+ Name: tc.InitialState.EmbeddingServer.Name,
+ Namespace: tc.InitialState.EmbeddingServer.Namespace,
+ }, createdEmbeddingServer)
+ }, timeout, interval).Should(Succeed())
+ })
+
+ AfterAll(func() {
+ // Clean up EmbeddingServer
+ if tc.InitialState.EmbeddingServer != nil {
+ _ = k8sClient.Delete(ctx, tc.InitialState.EmbeddingServer)
+ }
+ // Clean up secrets
+ for _, secret := range tc.InitialState.Secrets {
+ _ = k8sClient.Delete(ctx, secret)
+ }
+ })
+
+ // StatefulSet assertions
+ It("Should create StatefulSet with expected configuration", func() {
+ actual := &appsv1.StatefulSet{}
+ Eventually(func() error {
+ return k8sClient.Get(ctx, types.NamespacedName{
+ Name: tc.InitialState.EmbeddingServer.Name,
+ Namespace: tc.InitialState.EmbeddingServer.Namespace,
+ }, actual)
+ }, timeout, interval).Should(Succeed())
+
+ if tc.FinalState.StatefulSet != nil {
+ verifyStatefulSetEquals(actual, tc.FinalState.StatefulSet)
+ }
+ verifyOwnerReference(actual.OwnerReferences, createdEmbeddingServer, "StatefulSet")
+ })
+
+ // Service assertions
+ It("Should create Service with expected configuration", func() {
+ actual := &corev1.Service{}
+ Eventually(func() error {
+ return k8sClient.Get(ctx, types.NamespacedName{
+ Name: tc.InitialState.EmbeddingServer.Name,
+ Namespace: tc.InitialState.EmbeddingServer.Namespace,
+ }, actual)
+ }, timeout, interval).Should(Succeed())
+
+ if tc.FinalState.Service != nil {
+ verifyServiceEquals(actual, tc.FinalState.Service)
+ }
+ verifyOwnerReference(actual.OwnerReferences, createdEmbeddingServer, "Service")
+ })
+
+ // Status assertions
+ It("Should have expected status and finalizer", func() {
+ Eventually(func() bool {
+ actual := &mcpv1alpha1.EmbeddingServer{}
+ err := k8sClient.Get(ctx, types.NamespacedName{
+ Name: tc.InitialState.EmbeddingServer.Name,
+ Namespace: tc.InitialState.EmbeddingServer.Namespace,
+ }, actual)
+ if err != nil {
+ return false
+ }
+ return verifyStatusEquals(actual, tc.FinalState.Status)
+ }, timeout, interval).Should(BeTrue())
+ })
+ })
+ }
+
+ // Define test cases as a table using actual K8s objects
+ testCases := []TestCase{
+ {
+ Name: "When creating an EmbeddingServer with minimal config (verifies defaults)",
+ InitialState: InitialState{
+ EmbeddingServer: &mcpv1alpha1.EmbeddingServer{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-defaults",
+ Namespace: defaultNamespace,
+ },
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ // Only required fields - model and image
+ Model: "sentence-transformers/all-MiniLM-L6-v2",
+ Image: "ghcr.io/huggingface/text-embeddings-inference:latest",
+ },
+ },
+ },
+ FinalState: FinalState{
+ StatefulSet: &appsv1.StatefulSet{
+ ObjectMeta: metav1.ObjectMeta{
+ Labels: map[string]string{
+ "app.kubernetes.io/name": "embeddingserver",
+ "app.kubernetes.io/instance": "test-defaults",
+ "app.kubernetes.io/component": "embedding-server",
+ "app.kubernetes.io/managed-by": "toolhive-operator",
+ },
+ },
+ Spec: appsv1.StatefulSetSpec{
+ // Default: 1 replica
+ Replicas: ptr.To(int32(1)),
+ Template: corev1.PodTemplateSpec{
+ Spec: corev1.PodSpec{
+ Containers: []corev1.Container{{
+ Name: "embedding",
+ Image: "ghcr.io/huggingface/text-embeddings-inference:latest",
+ // Default port: 8080
+ Args: []string{"--model-id", "sentence-transformers/all-MiniLM-L6-v2", "--port", "8080"},
+ Env: []corev1.EnvVar{{Name: "MODEL_ID", Value: "sentence-transformers/all-MiniLM-L6-v2"}},
+ // Default: IfNotPresent
+ ImagePullPolicy: corev1.PullIfNotPresent,
+ // Default: no resource limits or requests
+ Resources: corev1.ResourceRequirements{},
+ LivenessProbe: &corev1.Probe{
+ ProbeHandler: corev1.ProbeHandler{HTTPGet: &corev1.HTTPGetAction{Path: "/health"}},
+ },
+ ReadinessProbe: &corev1.Probe{
+ ProbeHandler: corev1.ProbeHandler{HTTPGet: &corev1.HTTPGetAction{Path: "/health"}},
+ },
+ }},
+ },
+ },
+ },
+ },
+ // Default port: 8080
+ Service: &corev1.Service{
+ Spec: corev1.ServiceSpec{
+ Ports: []corev1.ServicePort{{Port: 8080}},
+ },
+ },
+ Status: &mcpv1alpha1.EmbeddingServerStatus{
+ // URL uses default port
+ URL: "http://test-defaults.default.svc.cluster.local:8080",
+ },
+ },
+ },
+ {
+ Name: "When creating a basic EmbeddingServer",
+ InitialState: InitialState{
+ EmbeddingServer: &mcpv1alpha1.EmbeddingServer{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-basic",
+ Namespace: defaultNamespace,
+ },
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ Model: "sentence-transformers/all-MiniLM-L6-v2",
+ Image: "ghcr.io/huggingface/text-embeddings-inference:latest",
+ Port: 8080,
+ },
+ },
+ },
+ FinalState: FinalState{
+ StatefulSet: &appsv1.StatefulSet{
+ ObjectMeta: metav1.ObjectMeta{
+ Labels: map[string]string{
+ "app.kubernetes.io/name": "embeddingserver",
+ "app.kubernetes.io/instance": "test-basic",
+ "app.kubernetes.io/component": "embedding-server",
+ "app.kubernetes.io/managed-by": "toolhive-operator",
+ },
+ },
+ Spec: appsv1.StatefulSetSpec{
+ Replicas: ptr.To(int32(1)),
+ Template: corev1.PodTemplateSpec{
+ Spec: corev1.PodSpec{
+ Containers: []corev1.Container{{
+ Name: "embedding",
+ Image: "ghcr.io/huggingface/text-embeddings-inference:latest",
+ Args: []string{"--model-id", "sentence-transformers/all-MiniLM-L6-v2", "--port", "8080"},
+ Env: []corev1.EnvVar{{Name: "MODEL_ID", Value: "sentence-transformers/all-MiniLM-L6-v2"}},
+ LivenessProbe: &corev1.Probe{
+ ProbeHandler: corev1.ProbeHandler{HTTPGet: &corev1.HTTPGetAction{Path: "/health"}},
+ },
+ ReadinessProbe: &corev1.Probe{
+ ProbeHandler: corev1.ProbeHandler{HTTPGet: &corev1.HTTPGetAction{Path: "/health"}},
+ },
+ }},
+ },
+ },
+ },
+ },
+ Service: &corev1.Service{
+ Spec: corev1.ServiceSpec{
+ Ports: []corev1.ServicePort{{Port: 8080}},
+ },
+ },
+ Status: &mcpv1alpha1.EmbeddingServerStatus{
+ URL: "http://test-basic.default.svc.cluster.local:8080",
+ },
+ },
+ },
+ {
+ Name: "When creating an EmbeddingServer with model cache enabled",
+ InitialState: InitialState{
+ EmbeddingServer: &mcpv1alpha1.EmbeddingServer{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-with-cache",
+ Namespace: defaultNamespace,
+ },
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ Model: "sentence-transformers/all-MiniLM-L6-v2",
+ Image: "ghcr.io/huggingface/text-embeddings-inference:latest",
+ Port: 8080,
+ ModelCache: &mcpv1alpha1.ModelCacheConfig{
+ Enabled: true,
+ Size: "20Gi",
+ },
+ },
+ },
+ },
+ FinalState: FinalState{
+ StatefulSet: &appsv1.StatefulSet{
+ Spec: appsv1.StatefulSetSpec{
+ Replicas: ptr.To(int32(1)),
+ Template: corev1.PodTemplateSpec{
+ Spec: corev1.PodSpec{
+ Containers: []corev1.Container{{
+ Name: "embedding",
+ Env: []corev1.EnvVar{{Name: "HF_HOME", Value: "/data"}},
+ VolumeMounts: []corev1.VolumeMount{{Name: "model-cache", MountPath: "/data"}},
+ }},
+ },
+ },
+ VolumeClaimTemplates: []corev1.PersistentVolumeClaim{{
+ ObjectMeta: metav1.ObjectMeta{Name: "model-cache"},
+ Spec: corev1.PersistentVolumeClaimSpec{
+ AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce},
+ Resources: corev1.VolumeResourceRequirements{
+ Requests: corev1.ResourceList{corev1.ResourceStorage: resource.MustParse("20Gi")},
+ },
+ },
+ }},
+ },
+ },
+ Service: &corev1.Service{Spec: corev1.ServiceSpec{Ports: []corev1.ServicePort{{Port: 8080}}}},
+ },
+ },
+ {
+ Name: "When creating an EmbeddingServer with resource requirements",
+ InitialState: InitialState{
+ EmbeddingServer: &mcpv1alpha1.EmbeddingServer{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-resources",
+ Namespace: defaultNamespace,
+ },
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ Model: "sentence-transformers/all-MiniLM-L6-v2",
+ Image: "ghcr.io/huggingface/text-embeddings-inference:latest",
+ Port: 8080,
+ Resources: mcpv1alpha1.ResourceRequirements{
+ Limits: mcpv1alpha1.ResourceList{CPU: "2", Memory: "4Gi"},
+ Requests: mcpv1alpha1.ResourceList{CPU: "500m", Memory: "1Gi"},
+ },
+ },
+ },
+ },
+ FinalState: FinalState{
+ StatefulSet: &appsv1.StatefulSet{
+ Spec: appsv1.StatefulSetSpec{
+ Template: corev1.PodTemplateSpec{
+ Spec: corev1.PodSpec{
+ Containers: []corev1.Container{{
+ Name: "embedding",
+ Resources: corev1.ResourceRequirements{
+ Limits: corev1.ResourceList{corev1.ResourceCPU: resource.MustParse("2"), corev1.ResourceMemory: resource.MustParse("4Gi")},
+ Requests: corev1.ResourceList{corev1.ResourceCPU: resource.MustParse("500m"), corev1.ResourceMemory: resource.MustParse("1Gi")},
+ },
+ }},
+ },
+ },
+ },
+ },
+ },
+ },
+ {
+ Name: "When creating an EmbeddingServer with custom replicas",
+ InitialState: InitialState{
+ EmbeddingServer: &mcpv1alpha1.EmbeddingServer{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-replicas",
+ Namespace: defaultNamespace,
+ },
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ Model: "sentence-transformers/all-MiniLM-L6-v2",
+ Image: "ghcr.io/huggingface/text-embeddings-inference:latest",
+ Port: 8080,
+ Replicas: ptr.To(int32(3)),
+ },
+ },
+ },
+ FinalState: FinalState{
+ StatefulSet: &appsv1.StatefulSet{
+ Spec: appsv1.StatefulSetSpec{
+ Replicas: ptr.To(int32(3)),
+ },
+ },
+ },
+ },
+ {
+ Name: "When creating an EmbeddingServer with invalid PodTemplateSpec",
+ InitialState: InitialState{
+ EmbeddingServer: &mcpv1alpha1.EmbeddingServer{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-invalid-podtemplate",
+ Namespace: defaultNamespace,
+ },
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ Model: "sentence-transformers/all-MiniLM-L6-v2",
+ Image: "ghcr.io/huggingface/text-embeddings-inference:latest",
+ Port: 8080,
+ PodTemplateSpec: &runtime.RawExtension{
+ Raw: []byte(`{"spec": {"containers": "invalid-not-an-array"}}`),
+ },
+ },
+ },
+ },
+ FinalState: FinalState{
+ Status: &mcpv1alpha1.EmbeddingServerStatus{
+ Phase: mcpv1alpha1.EmbeddingServerPhaseFailed,
+ Conditions: []metav1.Condition{{
+ Type: mcpv1alpha1.ConditionPodTemplateValid,
+ Status: metav1.ConditionFalse,
+ Reason: mcpv1alpha1.ConditionReasonPodTemplateInvalid,
+ }},
+ },
+ },
+ },
+ {
+ Name: "When creating an EmbeddingServer with valid PodTemplateSpec (nodeSelector)",
+ InitialState: InitialState{
+ EmbeddingServer: &mcpv1alpha1.EmbeddingServer{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-valid-podtemplate",
+ Namespace: defaultNamespace,
+ },
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ Model: "sentence-transformers/all-MiniLM-L6-v2",
+ Image: "ghcr.io/huggingface/text-embeddings-inference:latest",
+ Port: 8080,
+ PodTemplateSpec: &runtime.RawExtension{
+ Raw: []byte(`{"spec":{"nodeSelector":{"disktype":"ssd"}}}`),
+ },
+ },
+ },
+ },
+ FinalState: FinalState{
+ StatefulSet: &appsv1.StatefulSet{
+ Spec: appsv1.StatefulSetSpec{
+ Template: corev1.PodTemplateSpec{
+ Spec: corev1.PodSpec{
+ NodeSelector: map[string]string{"disktype": "ssd"},
+ },
+ },
+ },
+ },
+ Status: &mcpv1alpha1.EmbeddingServerStatus{
+ Conditions: []metav1.Condition{{
+ Type: mcpv1alpha1.ConditionPodTemplateValid,
+ Status: metav1.ConditionTrue,
+ }},
+ },
+ },
+ },
+ {
+ Name: "When creating an EmbeddingServer with HuggingFace token secret",
+ InitialState: InitialState{
+ EmbeddingServer: &mcpv1alpha1.EmbeddingServer{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-hf-token",
+ Namespace: defaultNamespace,
+ },
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ Model: "sentence-transformers/all-MiniLM-L6-v2",
+ Image: "ghcr.io/huggingface/text-embeddings-inference:latest",
+ Port: 8080,
+ HFTokenSecretRef: &mcpv1alpha1.SecretKeyRef{
+ Name: "hf-token-secret",
+ Key: "token",
+ },
+ },
+ },
+ Secrets: []*corev1.Secret{{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "hf-token-secret",
+ Namespace: defaultNamespace,
+ },
+ Data: map[string][]byte{"token": []byte("hf_test_token_value")},
+ }},
+ },
+ FinalState: FinalState{
+ StatefulSet: &appsv1.StatefulSet{
+ Spec: appsv1.StatefulSetSpec{
+ Template: corev1.PodTemplateSpec{
+ Spec: corev1.PodSpec{
+ Containers: []corev1.Container{{
+ Name: "embedding",
+ Env: []corev1.EnvVar{{
+ Name: "HF_TOKEN",
+ ValueFrom: &corev1.EnvVarSource{
+ SecretKeyRef: &corev1.SecretKeySelector{
+ LocalObjectReference: corev1.LocalObjectReference{Name: "hf-token-secret"},
+ Key: "token",
+ },
+ },
+ }},
+ }},
+ },
+ },
+ },
+ },
+ },
+ },
+ {
+ Name: "When creating an EmbeddingServer with custom environment variables",
+ InitialState: InitialState{
+ EmbeddingServer: &mcpv1alpha1.EmbeddingServer{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-custom-env",
+ Namespace: defaultNamespace,
+ },
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ Model: "sentence-transformers/all-MiniLM-L6-v2",
+ Image: "ghcr.io/huggingface/text-embeddings-inference:latest",
+ Port: 8080,
+ Env: []mcpv1alpha1.EnvVar{
+ {Name: "CUSTOM_VAR_1", Value: "value1"},
+ {Name: "CUSTOM_VAR_2", Value: "value2"},
+ },
+ },
+ },
+ },
+ FinalState: FinalState{
+ StatefulSet: &appsv1.StatefulSet{
+ Spec: appsv1.StatefulSetSpec{
+ Template: corev1.PodTemplateSpec{
+ Spec: corev1.PodSpec{
+ Containers: []corev1.Container{{
+ Name: "embedding",
+ Env: []corev1.EnvVar{
+ {Name: "CUSTOM_VAR_1", Value: "value1"},
+ {Name: "CUSTOM_VAR_2", Value: "value2"},
+ },
+ }},
+ },
+ },
+ },
+ },
+ },
+ },
+ {
+ Name: "When creating an EmbeddingServer with custom args",
+ InitialState: InitialState{
+ EmbeddingServer: &mcpv1alpha1.EmbeddingServer{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-custom-args",
+ Namespace: defaultNamespace,
+ },
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ Model: "sentence-transformers/all-MiniLM-L6-v2",
+ Image: "ghcr.io/huggingface/text-embeddings-inference:latest",
+ Port: 8080,
+ Args: []string{"--max-concurrent-requests", "512", "--tokenization-workers", "4"},
+ },
+ },
+ },
+ FinalState: FinalState{
+ StatefulSet: &appsv1.StatefulSet{
+ Spec: appsv1.StatefulSetSpec{
+ Template: corev1.PodTemplateSpec{
+ Spec: corev1.PodSpec{
+ Containers: []corev1.Container{{
+ Name: "embedding",
+ Args: []string{"--model-id", "sentence-transformers/all-MiniLM-L6-v2", "--max-concurrent-requests", "512", "--tokenization-workers", "4"},
+ }},
+ },
+ },
+ },
+ },
+ },
+ },
+ {
+ Name: "When creating an EmbeddingServer with custom port",
+ InitialState: InitialState{
+ EmbeddingServer: &mcpv1alpha1.EmbeddingServer{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-custom-port",
+ Namespace: defaultNamespace,
+ },
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ Model: "sentence-transformers/all-MiniLM-L6-v2",
+ Image: "ghcr.io/huggingface/text-embeddings-inference:latest",
+ Port: 9090,
+ },
+ },
+ },
+ FinalState: FinalState{
+ StatefulSet: &appsv1.StatefulSet{
+ Spec: appsv1.StatefulSetSpec{
+ Template: corev1.PodTemplateSpec{
+ Spec: corev1.PodSpec{
+ Containers: []corev1.Container{{
+ Name: "embedding",
+ Args: []string{"--port", "9090"},
+ }},
+ },
+ },
+ },
+ },
+ Service: &corev1.Service{Spec: corev1.ServiceSpec{Ports: []corev1.ServicePort{{Port: 9090}}}},
+ Status: &mcpv1alpha1.EmbeddingServerStatus{URL: "http://test-custom-port.default.svc.cluster.local:9090"},
+ },
+ },
+ {
+ Name: "When creating an EmbeddingServer with ImagePullPolicy Always",
+ InitialState: InitialState{
+ EmbeddingServer: &mcpv1alpha1.EmbeddingServer{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-imagepullpolicy-always",
+ Namespace: defaultNamespace,
+ },
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ Model: "sentence-transformers/all-MiniLM-L6-v2",
+ Image: "ghcr.io/huggingface/text-embeddings-inference:latest",
+ ImagePullPolicy: "Always",
+ },
+ },
+ },
+ FinalState: FinalState{
+ StatefulSet: &appsv1.StatefulSet{
+ Spec: appsv1.StatefulSetSpec{
+ Template: corev1.PodTemplateSpec{
+ Spec: corev1.PodSpec{
+ Containers: []corev1.Container{{
+ Name: "embedding",
+ ImagePullPolicy: corev1.PullAlways,
+ }},
+ },
+ },
+ },
+ },
+ },
+ },
+ {
+ Name: "When creating an EmbeddingServer with ImagePullPolicy Never",
+ InitialState: InitialState{
+ EmbeddingServer: &mcpv1alpha1.EmbeddingServer{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-imagepullpolicy-never",
+ Namespace: defaultNamespace,
+ },
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ Model: "sentence-transformers/all-MiniLM-L6-v2",
+ Image: "ghcr.io/huggingface/text-embeddings-inference:latest",
+ ImagePullPolicy: "Never",
+ },
+ },
+ },
+ FinalState: FinalState{
+ StatefulSet: &appsv1.StatefulSet{
+ Spec: appsv1.StatefulSetSpec{
+ Template: corev1.PodTemplateSpec{
+ Spec: corev1.PodSpec{
+ Containers: []corev1.Container{{
+ Name: "embedding",
+ ImagePullPolicy: corev1.PullNever,
+ }},
+ },
+ },
+ },
+ },
+ },
+ },
+ {
+ Name: "When creating an EmbeddingServer with model cache and custom storage class",
+ InitialState: InitialState{
+ EmbeddingServer: &mcpv1alpha1.EmbeddingServer{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-cache-storageclass",
+ Namespace: defaultNamespace,
+ },
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ Model: "sentence-transformers/all-MiniLM-L6-v2",
+ Image: "ghcr.io/huggingface/text-embeddings-inference:latest",
+ ModelCache: &mcpv1alpha1.ModelCacheConfig{
+ Enabled: true,
+ Size: "50Gi",
+ StorageClassName: ptr.To("fast-ssd"),
+ },
+ },
+ },
+ },
+ FinalState: FinalState{
+ StatefulSet: &appsv1.StatefulSet{
+ Spec: appsv1.StatefulSetSpec{
+ VolumeClaimTemplates: []corev1.PersistentVolumeClaim{{
+ ObjectMeta: metav1.ObjectMeta{Name: "model-cache"},
+ Spec: corev1.PersistentVolumeClaimSpec{
+ StorageClassName: ptr.To("fast-ssd"),
+ AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce},
+ Resources: corev1.VolumeResourceRequirements{
+ Requests: corev1.ResourceList{corev1.ResourceStorage: resource.MustParse("50Gi")},
+ },
+ },
+ }},
+ },
+ },
+ },
+ },
+ {
+ Name: "When creating an EmbeddingServer with model cache ReadWriteMany access mode",
+ InitialState: InitialState{
+ EmbeddingServer: &mcpv1alpha1.EmbeddingServer{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-cache-rwx",
+ Namespace: defaultNamespace,
+ },
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ Model: "sentence-transformers/all-MiniLM-L6-v2",
+ Image: "ghcr.io/huggingface/text-embeddings-inference:latest",
+ ModelCache: &mcpv1alpha1.ModelCacheConfig{
+ Enabled: true,
+ Size: "10Gi",
+ AccessMode: "ReadWriteMany",
+ },
+ },
+ },
+ },
+ FinalState: FinalState{
+ StatefulSet: &appsv1.StatefulSet{
+ Spec: appsv1.StatefulSetSpec{
+ VolumeClaimTemplates: []corev1.PersistentVolumeClaim{{
+ ObjectMeta: metav1.ObjectMeta{Name: "model-cache"},
+ Spec: corev1.PersistentVolumeClaimSpec{
+ AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteMany},
+ },
+ }},
+ },
+ },
+ },
+ },
+ {
+ Name: "When creating an EmbeddingServer with PodTemplateSpec tolerations",
+ InitialState: InitialState{
+ EmbeddingServer: &mcpv1alpha1.EmbeddingServer{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-tolerations",
+ Namespace: defaultNamespace,
+ },
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ Model: "sentence-transformers/all-MiniLM-L6-v2",
+ Image: "ghcr.io/huggingface/text-embeddings-inference:latest",
+ PodTemplateSpec: &runtime.RawExtension{
+ Raw: []byte(`{"spec":{"tolerations":[{"key":"gpu","operator":"Exists","effect":"NoSchedule"}]}}`),
+ },
+ },
+ },
+ },
+ FinalState: FinalState{
+ StatefulSet: &appsv1.StatefulSet{
+ Spec: appsv1.StatefulSetSpec{
+ Template: corev1.PodTemplateSpec{
+ Spec: corev1.PodSpec{
+ Tolerations: []corev1.Toleration{{
+ Key: "gpu",
+ Operator: corev1.TolerationOpExists,
+ Effect: corev1.TaintEffectNoSchedule,
+ }},
+ },
+ },
+ },
+ },
+ },
+ },
+ {
+ Name: "When creating an EmbeddingServer with PodTemplateSpec serviceAccountName",
+ InitialState: InitialState{
+ EmbeddingServer: &mcpv1alpha1.EmbeddingServer{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-serviceaccount",
+ Namespace: defaultNamespace,
+ },
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ Model: "sentence-transformers/all-MiniLM-L6-v2",
+ Image: "ghcr.io/huggingface/text-embeddings-inference:latest",
+ PodTemplateSpec: &runtime.RawExtension{
+ Raw: []byte(`{"spec":{"serviceAccountName":"custom-sa"}}`),
+ },
+ },
+ },
+ },
+ FinalState: FinalState{
+ StatefulSet: &appsv1.StatefulSet{
+ Spec: appsv1.StatefulSetSpec{
+ Replicas: ptr.To(int32(1)),
+ Template: corev1.PodTemplateSpec{
+ Spec: corev1.PodSpec{
+ ServiceAccountName: "custom-sa",
+ },
+ },
+ },
+ },
+ },
+ },
+ {
+ Name: "When creating an EmbeddingServer with ResourceOverrides on StatefulSet",
+ InitialState: InitialState{
+ EmbeddingServer: &mcpv1alpha1.EmbeddingServer{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-resource-overrides-sts",
+ Namespace: defaultNamespace,
+ },
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ Model: "sentence-transformers/all-MiniLM-L6-v2",
+ Image: "ghcr.io/huggingface/text-embeddings-inference:latest",
+ ResourceOverrides: &mcpv1alpha1.EmbeddingResourceOverrides{
+ StatefulSet: &mcpv1alpha1.EmbeddingStatefulSetOverrides{
+ ResourceMetadataOverrides: mcpv1alpha1.ResourceMetadataOverrides{
+ Annotations: map[string]string{"custom-annotation": "sts-value"},
+ Labels: map[string]string{"custom-label": "sts-value"},
+ },
+ },
+ },
+ },
+ },
+ },
+ FinalState: FinalState{
+ StatefulSet: &appsv1.StatefulSet{
+ ObjectMeta: metav1.ObjectMeta{
+ Labels: map[string]string{
+ "app.kubernetes.io/name": "embeddingserver",
+ "app.kubernetes.io/instance": "test-resource-overrides-sts",
+ "app.kubernetes.io/component": "embedding-server",
+ "app.kubernetes.io/managed-by": "toolhive-operator",
+ "custom-label": "sts-value",
+ },
+ Annotations: map[string]string{
+ "custom-annotation": "sts-value",
+ },
+ },
+ },
+ },
+ },
+ {
+ Name: "When creating an EmbeddingServer with ResourceOverrides on Service",
+ InitialState: InitialState{
+ EmbeddingServer: &mcpv1alpha1.EmbeddingServer{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-resource-overrides-svc",
+ Namespace: defaultNamespace,
+ },
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ Model: "sentence-transformers/all-MiniLM-L6-v2",
+ Image: "ghcr.io/huggingface/text-embeddings-inference:latest",
+ ResourceOverrides: &mcpv1alpha1.EmbeddingResourceOverrides{
+ Service: &mcpv1alpha1.ResourceMetadataOverrides{
+ Annotations: map[string]string{"service-annotation": "svc-value"},
+ Labels: map[string]string{"service-label": "svc-value"},
+ },
+ },
+ },
+ },
+ },
+ FinalState: FinalState{
+ Service: &corev1.Service{
+ ObjectMeta: metav1.ObjectMeta{
+ Labels: map[string]string{
+ "app.kubernetes.io/name": "embeddingserver",
+ "app.kubernetes.io/instance": "test-resource-overrides-svc",
+ "app.kubernetes.io/component": "embedding-server",
+ "app.kubernetes.io/managed-by": "toolhive-operator",
+ "service-label": "svc-value",
+ },
+ Annotations: map[string]string{
+ "service-annotation": "svc-value",
+ },
+ },
+ Spec: corev1.ServiceSpec{
+ Ports: []corev1.ServicePort{{Port: 8080}},
+ },
+ },
+ },
+ },
+ {
+ Name: "When creating an EmbeddingServer with ResourceOverrides on pod template",
+ InitialState: InitialState{
+ EmbeddingServer: &mcpv1alpha1.EmbeddingServer{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-resource-overrides-pod",
+ Namespace: defaultNamespace,
+ },
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ Model: "sentence-transformers/all-MiniLM-L6-v2",
+ Image: "ghcr.io/huggingface/text-embeddings-inference:latest",
+ ResourceOverrides: &mcpv1alpha1.EmbeddingResourceOverrides{
+ StatefulSet: &mcpv1alpha1.EmbeddingStatefulSetOverrides{
+ PodTemplateMetadataOverrides: &mcpv1alpha1.ResourceMetadataOverrides{
+ Annotations: map[string]string{"pod-annotation": "pod-value"},
+ Labels: map[string]string{"pod-label": "pod-value"},
+ },
+ },
+ },
+ },
+ },
+ },
+ FinalState: FinalState{
+ StatefulSet: &appsv1.StatefulSet{
+ Spec: appsv1.StatefulSetSpec{
+ Replicas: ptr.To(int32(1)),
+ Template: corev1.PodTemplateSpec{
+ ObjectMeta: metav1.ObjectMeta{
+ Labels: map[string]string{
+ "app.kubernetes.io/name": "embeddingserver",
+ "app.kubernetes.io/instance": "test-resource-overrides-pod",
+ "pod-label": "pod-value",
+ },
+ Annotations: map[string]string{
+ "pod-annotation": "pod-value",
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ {
+ Name: "When creating an EmbeddingServer verifies container port",
+ InitialState: InitialState{
+ EmbeddingServer: &mcpv1alpha1.EmbeddingServer{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-container-port",
+ Namespace: defaultNamespace,
+ },
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ Model: "sentence-transformers/all-MiniLM-L6-v2",
+ Image: "ghcr.io/huggingface/text-embeddings-inference:latest",
+ Port: 8080,
+ },
+ },
+ },
+ FinalState: FinalState{
+ StatefulSet: &appsv1.StatefulSet{
+ Spec: appsv1.StatefulSetSpec{
+ Template: corev1.PodTemplateSpec{
+ Spec: corev1.PodSpec{
+ Containers: []corev1.Container{{
+ Name: "embedding",
+ Ports: []corev1.ContainerPort{{
+ Name: "http",
+ ContainerPort: 8080,
+ Protocol: corev1.ProtocolTCP,
+ }},
+ }},
+ },
+ },
+ },
+ },
+ },
+ },
+ {
+ Name: "When creating an EmbeddingServer verifies Service selector and type",
+ InitialState: InitialState{
+ EmbeddingServer: &mcpv1alpha1.EmbeddingServer{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-service-selector",
+ Namespace: defaultNamespace,
+ },
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ Model: "sentence-transformers/all-MiniLM-L6-v2",
+ Image: "ghcr.io/huggingface/text-embeddings-inference:latest",
+ },
+ },
+ },
+ FinalState: FinalState{
+ Service: &corev1.Service{
+ Spec: corev1.ServiceSpec{
+ Type: corev1.ServiceTypeClusterIP,
+ Selector: map[string]string{
+ "app.kubernetes.io/name": "embeddingserver",
+ "app.kubernetes.io/instance": "test-service-selector",
+ },
+ Ports: []corev1.ServicePort{{Port: 8080}},
+ },
+ },
+ },
+ },
+ }
+
+ // Run all test cases
+ for _, tc := range testCases {
+ runTestCase(tc)
+ }
+})
+
+// --- Equality helper functions for K8s objects ---
+// These functions accept an optional Gomega parameter for use inside Eventually blocks.
+// When g is nil, they use the global Expect.
+
+// verifyStatefulSetEquals checks that actual StatefulSet contains expected fields.
+func verifyStatefulSetEquals(actual, expected *appsv1.StatefulSet) {
+ verifyStatefulSetEqualsG(Default, actual, expected)
+}
+
+// verifyStatefulSetEqualsG is the Gomega-aware version for use in Eventually blocks.
+func verifyStatefulSetEqualsG(g Gomega, actual, expected *appsv1.StatefulSet) {
+ // Replicas
+ if expected.Spec.Replicas != nil {
+ g.Expect(actual.Spec.Replicas).To(Equal(expected.Spec.Replicas), "replicas mismatch")
+ }
+
+ // Labels
+ for k, v := range expected.Labels {
+ g.Expect(actual.Labels).To(HaveKeyWithValue(k, v))
+ }
+
+ // Annotations
+ for k, v := range expected.Annotations {
+ g.Expect(actual.Annotations).To(HaveKeyWithValue(k, v))
+ }
+
+ // NodeSelector
+ for k, v := range expected.Spec.Template.Spec.NodeSelector {
+ g.Expect(actual.Spec.Template.Spec.NodeSelector).To(HaveKeyWithValue(k, v))
+ }
+
+ // Tolerations
+ for _, exp := range expected.Spec.Template.Spec.Tolerations {
+ g.Expect(actual.Spec.Template.Spec.Tolerations).To(ContainElement(exp))
+ }
+
+ // ServiceAccountName
+ if expected.Spec.Template.Spec.ServiceAccountName != "" {
+ g.Expect(actual.Spec.Template.Spec.ServiceAccountName).To(Equal(expected.Spec.Template.Spec.ServiceAccountName))
+ }
+
+ // Pod template labels
+ for k, v := range expected.Spec.Template.Labels {
+ g.Expect(actual.Spec.Template.Labels).To(HaveKeyWithValue(k, v))
+ }
+
+ // Pod template annotations
+ for k, v := range expected.Spec.Template.Annotations {
+ g.Expect(actual.Spec.Template.Annotations).To(HaveKeyWithValue(k, v))
+ }
+
+ // Containers
+ for i, exp := range expected.Spec.Template.Spec.Containers {
+ verifyContainerEqualsG(g, actual.Spec.Template.Spec.Containers[i], exp)
+ }
+
+ // VolumeClaimTemplates
+ for i, exp := range expected.Spec.VolumeClaimTemplates {
+ verifyPVCEqualsG(g, actual.Spec.VolumeClaimTemplates[i], exp)
+ }
+}
+
+// verifyContainerEqualsG is the Gomega-aware version for use in Eventually blocks.
+func verifyContainerEqualsG(g Gomega, actual, expected corev1.Container) {
+ if expected.Name != "" {
+ g.Expect(actual.Name).To(Equal(expected.Name))
+ }
+ if expected.Image != "" {
+ g.Expect(actual.Image).To(Equal(expected.Image))
+ }
+ if expected.ImagePullPolicy != "" {
+ g.Expect(actual.ImagePullPolicy).To(Equal(expected.ImagePullPolicy))
+ }
+
+ for _, arg := range expected.Args {
+ g.Expect(actual.Args).To(ContainElement(arg))
+ }
+
+ for _, env := range expected.Env {
+ g.Expect(actual.Env).To(ContainElement(HaveField("Name", env.Name)))
+ }
+
+ for _, vm := range expected.VolumeMounts {
+ g.Expect(actual.VolumeMounts).To(ContainElement(And(
+ HaveField("Name", vm.Name),
+ HaveField("MountPath", vm.MountPath),
+ )))
+ }
+
+ // Check resource limits - only verify if expected has values
+ for k, v := range expected.Resources.Limits {
+ g.Expect(actual.Resources.Limits[k]).To(Equal(v))
+ }
+
+ // Check resource requests - only verify if expected has values
+ for k, v := range expected.Resources.Requests {
+ g.Expect(actual.Resources.Requests[k]).To(Equal(v))
+ }
+
+ if expected.LivenessProbe != nil {
+ g.Expect(actual.LivenessProbe).NotTo(BeNil())
+ }
+ if expected.ReadinessProbe != nil {
+ g.Expect(actual.ReadinessProbe).NotTo(BeNil())
+ }
+
+ // Container ports
+ for _, exp := range expected.Ports {
+ g.Expect(actual.Ports).To(ContainElement(And(
+ HaveField("Name", exp.Name),
+ HaveField("ContainerPort", exp.ContainerPort),
+ HaveField("Protocol", exp.Protocol),
+ )))
+ }
+}
+
+// verifyPVCEqualsG is the Gomega-aware version for use in Eventually blocks.
+func verifyPVCEqualsG(g Gomega, actual, expected corev1.PersistentVolumeClaim) {
+ if expected.Name != "" {
+ g.Expect(actual.Name).To(Equal(expected.Name))
+ }
+ for _, mode := range expected.Spec.AccessModes {
+ g.Expect(actual.Spec.AccessModes).To(ContainElement(mode))
+ }
+ // StorageClassName
+ if expected.Spec.StorageClassName != nil {
+ g.Expect(actual.Spec.StorageClassName).To(Equal(expected.Spec.StorageClassName))
+ }
+ // Storage size
+ if expected.Spec.Resources.Requests != nil {
+ expectedSize := expected.Spec.Resources.Requests[corev1.ResourceStorage]
+ actualSize := actual.Spec.Resources.Requests[corev1.ResourceStorage]
+ g.Expect(actualSize.Cmp(expectedSize)).To(Equal(0), "storage size mismatch")
+ }
+}
+
+// verifyServiceEquals checks that actual Service contains expected ports.
+func verifyServiceEquals(actual, expected *corev1.Service) {
+ verifyServiceEqualsG(Default, actual, expected)
+}
+
+// verifyServiceEqualsG is the Gomega-aware version for use in Eventually blocks.
+func verifyServiceEqualsG(g Gomega, actual, expected *corev1.Service) {
+ // Ports
+ for i, exp := range expected.Spec.Ports {
+ g.Expect(actual.Spec.Ports[i].Port).To(Equal(exp.Port))
+ }
+
+ // Service type
+ if expected.Spec.Type != "" {
+ g.Expect(actual.Spec.Type).To(Equal(expected.Spec.Type))
+ }
+
+ // Selector
+ for k, v := range expected.Spec.Selector {
+ g.Expect(actual.Spec.Selector).To(HaveKeyWithValue(k, v))
+ }
+
+ // Labels
+ for k, v := range expected.Labels {
+ g.Expect(actual.Labels).To(HaveKeyWithValue(k, v))
+ }
+
+ // Annotations
+ for k, v := range expected.Annotations {
+ g.Expect(actual.Annotations).To(HaveKeyWithValue(k, v))
+ }
+}
+
+// verifyStatusEquals checks status fields match and finalizer is present.
+func verifyStatusEquals(actual *mcpv1alpha1.EmbeddingServer, expected *mcpv1alpha1.EmbeddingServerStatus) bool {
+ if expected != nil && expected.Phase != "" && actual.Status.Phase != expected.Phase {
+ return false
+ }
+ if expected != nil && expected.URL != "" && actual.Status.URL != expected.URL {
+ return false
+ }
+ // Always verify finalizer is present
+ if !containsString(actual.Finalizers, "embeddingserver.toolhive.stacklok.dev/finalizer") {
+ return false
+ }
+ return true
+}
+
+// containsString checks if a slice contains a string.
+func containsString(slice []string, s string) bool {
+ for _, item := range slice {
+ if item == s {
+ return true
+ }
+ }
+ return false
+}
+
+// verifyOwnerReference checks owner reference is set correctly.
+func verifyOwnerReference(ownerRefs []metav1.OwnerReference, embedding *mcpv1alpha1.EmbeddingServer, _ string) {
+ Expect(ownerRefs).To(HaveLen(1))
+ Expect(ownerRefs[0].APIVersion).To(Equal("toolhive.stacklok.dev/v1alpha1"))
+ Expect(ownerRefs[0].Kind).To(Equal("EmbeddingServer"))
+ Expect(ownerRefs[0].Name).To(Equal(embedding.Name))
+ Expect(ownerRefs[0].UID).To(Equal(embedding.UID))
+ Expect(ownerRefs[0].Controller).To(HaveValue(BeTrue()))
+ Expect(ownerRefs[0].BlockOwnerDeletion).To(HaveValue(BeTrue()))
+}
diff --git a/cmd/thv-operator/test-integration/embedding-server/embeddingserver_update_test.go b/cmd/thv-operator/test-integration/embedding-server/embeddingserver_update_test.go
new file mode 100644
index 0000000000..12aecdffa3
--- /dev/null
+++ b/cmd/thv-operator/test-integration/embedding-server/embeddingserver_update_test.go
@@ -0,0 +1,532 @@
+// SPDX-License-Identifier: Apache-2.0
+
+// Package controllers contains integration tests for the EmbeddingServer controller.
+package controllers
+
+import (
+ "time"
+
+ . "github.com/onsi/ginkgo/v2"
+ . "github.com/onsi/gomega"
+ appsv1 "k8s.io/api/apps/v1"
+ corev1 "k8s.io/api/core/v1"
+ "k8s.io/apimachinery/pkg/api/resource"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/utils/ptr"
+ "sigs.k8s.io/controller-runtime/pkg/client"
+
+ mcpv1alpha1 "github.com/stacklok/toolhive/cmd/thv-operator/api/v1alpha1"
+)
+
+// UpdateTestCase defines a test case for EmbeddingServer update scenarios.
+type UpdateTestCase struct {
+ Name string
+ InitialState *mcpv1alpha1.EmbeddingServer
+ Updates []UpdateStep
+}
+
+// UpdateStep defines a single update operation and its expected result.
+type UpdateStep struct {
+ Name string
+ ApplyUpdate func(es *mcpv1alpha1.EmbeddingServer)
+ // Expected StatefulSet state after the update (nil means expect no changes)
+ ExpectedStatefulSet *appsv1.StatefulSet
+ // Expected Service state after the update (nil means expect no changes)
+ ExpectedService *corev1.Service
+}
+
+var _ = Describe("EmbeddingServer Controller Update Tests", func() {
+ const (
+ timeout = time.Second * 30
+ interval = time.Millisecond * 250
+ defaultNamespace = "default"
+ )
+
+ // Define update test cases
+ updateTestCases := []UpdateTestCase{
+ {
+ Name: "When updating EmbeddingServer image",
+ InitialState: &mcpv1alpha1.EmbeddingServer{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-update-image",
+ Namespace: defaultNamespace,
+ },
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ Model: "sentence-transformers/all-MiniLM-L6-v2",
+ Image: "ghcr.io/huggingface/text-embeddings-inference:v1.0",
+ Port: 8080,
+ },
+ },
+ Updates: []UpdateStep{
+ {
+ Name: "Should update StatefulSet when image changes to v2.0",
+ ApplyUpdate: func(es *mcpv1alpha1.EmbeddingServer) {
+ es.Spec.Image = "ghcr.io/huggingface/text-embeddings-inference:v2.0"
+ },
+ ExpectedStatefulSet: &appsv1.StatefulSet{
+ Spec: appsv1.StatefulSetSpec{
+ Template: corev1.PodTemplateSpec{
+ Spec: corev1.PodSpec{
+ Containers: []corev1.Container{{
+ Image: "ghcr.io/huggingface/text-embeddings-inference:v2.0",
+ }},
+ },
+ },
+ },
+ },
+ },
+ {
+ Name: "Should update StatefulSet when image changes to v3.0",
+ ApplyUpdate: func(es *mcpv1alpha1.EmbeddingServer) {
+ es.Spec.Image = "ghcr.io/huggingface/text-embeddings-inference:v3.0"
+ },
+ ExpectedStatefulSet: &appsv1.StatefulSet{
+ Spec: appsv1.StatefulSetSpec{
+ Template: corev1.PodTemplateSpec{
+ Spec: corev1.PodSpec{
+ Containers: []corev1.Container{{
+ Image: "ghcr.io/huggingface/text-embeddings-inference:v3.0",
+ }},
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ {
+ Name: "When updating EmbeddingServer replicas",
+ InitialState: &mcpv1alpha1.EmbeddingServer{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-update-replicas",
+ Namespace: defaultNamespace,
+ },
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ Model: "sentence-transformers/all-MiniLM-L6-v2",
+ Image: "ghcr.io/huggingface/text-embeddings-inference:latest",
+ Port: 8080,
+ Replicas: ptr.To(int32(1)),
+ },
+ },
+ Updates: []UpdateStep{
+ {
+ Name: "Should scale up to 3 replicas",
+ ApplyUpdate: func(es *mcpv1alpha1.EmbeddingServer) {
+ es.Spec.Replicas = ptr.To(int32(3))
+ },
+ ExpectedStatefulSet: &appsv1.StatefulSet{
+ Spec: appsv1.StatefulSetSpec{
+ Replicas: ptr.To(int32(3)),
+ },
+ },
+ },
+ {
+ Name: "Should scale down to 2 replicas",
+ ApplyUpdate: func(es *mcpv1alpha1.EmbeddingServer) {
+ es.Spec.Replicas = ptr.To(int32(2))
+ },
+ ExpectedStatefulSet: &appsv1.StatefulSet{
+ Spec: appsv1.StatefulSetSpec{
+ Replicas: ptr.To(int32(2)),
+ },
+ },
+ },
+ },
+ },
+ {
+ Name: "When updating EmbeddingServer model",
+ InitialState: &mcpv1alpha1.EmbeddingServer{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-update-model",
+ Namespace: defaultNamespace,
+ },
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ Model: "sentence-transformers/all-MiniLM-L6-v2",
+ Image: "ghcr.io/huggingface/text-embeddings-inference:latest",
+ Port: 8080,
+ },
+ },
+ Updates: []UpdateStep{
+ {
+ Name: "Should update StatefulSet args when model changes",
+ ApplyUpdate: func(es *mcpv1alpha1.EmbeddingServer) {
+ es.Spec.Model = "sentence-transformers/all-mpnet-base-v2"
+ },
+ ExpectedStatefulSet: &appsv1.StatefulSet{
+ Spec: appsv1.StatefulSetSpec{
+ Template: corev1.PodTemplateSpec{
+ Spec: corev1.PodSpec{
+ Containers: []corev1.Container{{
+ Args: []string{"--model-id", "sentence-transformers/all-mpnet-base-v2"},
+ }},
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ {
+ Name: "When updating EmbeddingServer environment variables",
+ InitialState: &mcpv1alpha1.EmbeddingServer{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-update-env",
+ Namespace: defaultNamespace,
+ },
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ Model: "sentence-transformers/all-MiniLM-L6-v2",
+ Image: "ghcr.io/huggingface/text-embeddings-inference:latest",
+ Port: 8080,
+ Env: []mcpv1alpha1.EnvVar{
+ {Name: "LOG_LEVEL", Value: "info"},
+ },
+ },
+ },
+ Updates: []UpdateStep{
+ {
+ Name: "Should update StatefulSet when env var value changes",
+ ApplyUpdate: func(es *mcpv1alpha1.EmbeddingServer) {
+ es.Spec.Env = []mcpv1alpha1.EnvVar{
+ {Name: "LOG_LEVEL", Value: "debug"},
+ }
+ },
+ ExpectedStatefulSet: &appsv1.StatefulSet{
+ Spec: appsv1.StatefulSetSpec{
+ Template: corev1.PodTemplateSpec{
+ Spec: corev1.PodSpec{
+ Containers: []corev1.Container{{
+ Env: []corev1.EnvVar{{Name: "LOG_LEVEL"}},
+ }},
+ },
+ },
+ },
+ },
+ },
+ {
+ Name: "Should update StatefulSet when new env var is added",
+ ApplyUpdate: func(es *mcpv1alpha1.EmbeddingServer) {
+ es.Spec.Env = []mcpv1alpha1.EnvVar{
+ {Name: "LOG_LEVEL", Value: "debug"},
+ {Name: "NEW_VAR", Value: "new_value"},
+ }
+ },
+ ExpectedStatefulSet: &appsv1.StatefulSet{
+ Spec: appsv1.StatefulSetSpec{
+ Template: corev1.PodTemplateSpec{
+ Spec: corev1.PodSpec{
+ Containers: []corev1.Container{{
+ Env: []corev1.EnvVar{
+ {Name: "LOG_LEVEL"},
+ {Name: "NEW_VAR"},
+ },
+ }},
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ {
+ Name: "When updating EmbeddingServer port",
+ InitialState: &mcpv1alpha1.EmbeddingServer{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-update-port",
+ Namespace: defaultNamespace,
+ },
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ Model: "sentence-transformers/all-MiniLM-L6-v2",
+ Image: "ghcr.io/huggingface/text-embeddings-inference:latest",
+ Port: 8080,
+ },
+ },
+ Updates: []UpdateStep{
+ {
+ Name: "Should update StatefulSet and Service when port changes",
+ ApplyUpdate: func(es *mcpv1alpha1.EmbeddingServer) {
+ es.Spec.Port = 9090
+ },
+ ExpectedStatefulSet: &appsv1.StatefulSet{
+ Spec: appsv1.StatefulSetSpec{
+ Template: corev1.PodTemplateSpec{
+ Spec: corev1.PodSpec{
+ Containers: []corev1.Container{{
+ Args: []string{"--port", "9090"},
+ }},
+ },
+ },
+ },
+ },
+ ExpectedService: &corev1.Service{
+ Spec: corev1.ServiceSpec{
+ Ports: []corev1.ServicePort{{Port: 9090}},
+ },
+ },
+ },
+ },
+ },
+ {
+ Name: "When updating EmbeddingServer resources",
+ InitialState: &mcpv1alpha1.EmbeddingServer{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-update-resources",
+ Namespace: defaultNamespace,
+ },
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ Model: "sentence-transformers/all-MiniLM-L6-v2",
+ Image: "ghcr.io/huggingface/text-embeddings-inference:latest",
+ Resources: mcpv1alpha1.ResourceRequirements{
+ Limits: mcpv1alpha1.ResourceList{CPU: "1", Memory: "2Gi"},
+ Requests: mcpv1alpha1.ResourceList{CPU: "500m", Memory: "1Gi"},
+ },
+ },
+ },
+ Updates: []UpdateStep{
+ {
+ Name: "Should update StatefulSet when resource limits change",
+ ApplyUpdate: func(es *mcpv1alpha1.EmbeddingServer) {
+ es.Spec.Resources = mcpv1alpha1.ResourceRequirements{
+ Limits: mcpv1alpha1.ResourceList{CPU: "2", Memory: "4Gi"},
+ Requests: mcpv1alpha1.ResourceList{CPU: "1", Memory: "2Gi"},
+ }
+ },
+ ExpectedStatefulSet: &appsv1.StatefulSet{
+ Spec: appsv1.StatefulSetSpec{
+ Template: corev1.PodTemplateSpec{
+ Spec: corev1.PodSpec{
+ Containers: []corev1.Container{{
+ Resources: corev1.ResourceRequirements{
+ Limits: corev1.ResourceList{
+ corev1.ResourceCPU: resource.MustParse("2"),
+ corev1.ResourceMemory: resource.MustParse("4Gi"),
+ },
+ Requests: corev1.ResourceList{
+ corev1.ResourceCPU: resource.MustParse("1"),
+ corev1.ResourceMemory: resource.MustParse("2Gi"),
+ },
+ },
+ }},
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ {
+ Name: "When updating EmbeddingServer args",
+ InitialState: &mcpv1alpha1.EmbeddingServer{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-update-args",
+ Namespace: defaultNamespace,
+ },
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ Model: "sentence-transformers/all-MiniLM-L6-v2",
+ Image: "ghcr.io/huggingface/text-embeddings-inference:latest",
+ Args: []string{"--max-concurrent-requests", "256"},
+ },
+ },
+ Updates: []UpdateStep{
+ {
+ Name: "Should update StatefulSet when args change",
+ ApplyUpdate: func(es *mcpv1alpha1.EmbeddingServer) {
+ es.Spec.Args = []string{"--max-concurrent-requests", "512", "--tokenization-workers", "4"}
+ },
+ ExpectedStatefulSet: &appsv1.StatefulSet{
+ Spec: appsv1.StatefulSetSpec{
+ Template: corev1.PodTemplateSpec{
+ Spec: corev1.PodSpec{
+ Containers: []corev1.Container{{
+ Args: []string{"--max-concurrent-requests", "512", "--tokenization-workers", "4"},
+ }},
+ },
+ },
+ },
+ },
+ },
+ {
+ Name: "Should update StatefulSet when args are removed",
+ ApplyUpdate: func(es *mcpv1alpha1.EmbeddingServer) {
+ es.Spec.Args = nil
+ },
+ ExpectedStatefulSet: &appsv1.StatefulSet{
+ Spec: appsv1.StatefulSetSpec{
+ Template: corev1.PodTemplateSpec{
+ Spec: corev1.PodSpec{
+ Containers: []corev1.Container{{
+ Args: []string{"--model-id", "sentence-transformers/all-MiniLM-L6-v2"},
+ }},
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ {
+ Name: "When updating EmbeddingServer ImagePullPolicy",
+ InitialState: &mcpv1alpha1.EmbeddingServer{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-update-imagepullpolicy",
+ Namespace: defaultNamespace,
+ },
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ Model: "sentence-transformers/all-MiniLM-L6-v2",
+ Image: "ghcr.io/huggingface/text-embeddings-inference:latest",
+ ImagePullPolicy: "IfNotPresent",
+ },
+ },
+ Updates: []UpdateStep{
+ {
+ Name: "Should update StatefulSet when ImagePullPolicy changes",
+ ApplyUpdate: func(es *mcpv1alpha1.EmbeddingServer) {
+ es.Spec.ImagePullPolicy = "Always"
+ },
+ ExpectedStatefulSet: &appsv1.StatefulSet{
+ Spec: appsv1.StatefulSetSpec{
+ Template: corev1.PodTemplateSpec{
+ Spec: corev1.PodSpec{
+ Containers: []corev1.Container{{
+ ImagePullPolicy: corev1.PullAlways,
+ }},
+ },
+ },
+ },
+ },
+ },
+ },
+ },
+ {
+ Name: "When updating EmbeddingServer ResourceOverrides",
+ InitialState: &mcpv1alpha1.EmbeddingServer{
+ ObjectMeta: metav1.ObjectMeta{
+ Name: "test-update-resourceoverrides",
+ Namespace: defaultNamespace,
+ },
+ Spec: mcpv1alpha1.EmbeddingServerSpec{
+ Model: "sentence-transformers/all-MiniLM-L6-v2",
+ Image: "ghcr.io/huggingface/text-embeddings-inference:latest",
+ },
+ },
+ Updates: []UpdateStep{
+ {
+ Name: "Should update StatefulSet when adding annotations",
+ ApplyUpdate: func(es *mcpv1alpha1.EmbeddingServer) {
+ es.Spec.ResourceOverrides = &mcpv1alpha1.EmbeddingResourceOverrides{
+ StatefulSet: &mcpv1alpha1.EmbeddingStatefulSetOverrides{
+ ResourceMetadataOverrides: mcpv1alpha1.ResourceMetadataOverrides{
+ Annotations: map[string]string{"new-annotation": "new-value"},
+ },
+ },
+ }
+ },
+ ExpectedStatefulSet: &appsv1.StatefulSet{
+ ObjectMeta: metav1.ObjectMeta{
+ Annotations: map[string]string{"new-annotation": "new-value"},
+ },
+ },
+ },
+ {
+ Name: "Should update StatefulSet and Service when adding annotations to both",
+ ApplyUpdate: func(es *mcpv1alpha1.EmbeddingServer) {
+ es.Spec.ResourceOverrides = &mcpv1alpha1.EmbeddingResourceOverrides{
+ StatefulSet: &mcpv1alpha1.EmbeddingStatefulSetOverrides{
+ ResourceMetadataOverrides: mcpv1alpha1.ResourceMetadataOverrides{
+ Annotations: map[string]string{"new-annotation": "new-value"},
+ },
+ },
+ Service: &mcpv1alpha1.ResourceMetadataOverrides{
+ Annotations: map[string]string{"service-annotation": "service-value"},
+ },
+ }
+ },
+ ExpectedStatefulSet: &appsv1.StatefulSet{
+ ObjectMeta: metav1.ObjectMeta{
+ Annotations: map[string]string{"new-annotation": "new-value"},
+ },
+ },
+ ExpectedService: &corev1.Service{
+ ObjectMeta: metav1.ObjectMeta{
+ Annotations: map[string]string{"service-annotation": "service-value"},
+ },
+ },
+ },
+ },
+ },
+ }
+
+ // Helper to run a single update test case
+ runUpdateTestCase := func(tc UpdateTestCase) {
+ Context(tc.Name, Ordered, func() {
+ var embeddingServer *mcpv1alpha1.EmbeddingServer
+
+ BeforeAll(func() {
+ _ = k8sClient.Create(ctx, &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: tc.InitialState.Namespace}})
+ embeddingServer = tc.InitialState.DeepCopy()
+ Expect(k8sClient.Create(ctx, embeddingServer)).To(Succeed())
+ Eventually(func(g Gomega) {
+ g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(embeddingServer), &appsv1.StatefulSet{})).To(Succeed())
+ }, timeout, interval).Should(Succeed())
+ })
+
+ AfterAll(func() {
+ _ = k8sClient.Delete(ctx, embeddingServer)
+ })
+
+ for _, update := range tc.Updates {
+ update := update
+ It(update.Name, func() {
+ // Capture original state before update
+ originalSts := &appsv1.StatefulSet{}
+ Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(embeddingServer), originalSts)).To(Succeed())
+ originalSvc := &corev1.Service{}
+ Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(embeddingServer), originalSvc)).To(Succeed())
+
+ // Apply the update
+ Eventually(func(g Gomega) {
+ g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(embeddingServer), embeddingServer)).To(Succeed())
+ update.ApplyUpdate(embeddingServer)
+ g.Expect(k8sClient.Update(ctx, embeddingServer)).To(Succeed())
+ }, timeout, interval).Should(Succeed())
+
+ // Verify the StatefulSet matches expected state (nil means expect no changes)
+ if update.ExpectedStatefulSet != nil {
+ Eventually(func(g Gomega) {
+ sts := &appsv1.StatefulSet{}
+ g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(embeddingServer), sts)).To(Succeed())
+ verifyStatefulSetEqualsG(g, sts, update.ExpectedStatefulSet)
+ }, timeout, interval).Should(Succeed())
+ } else {
+ // Verify StatefulSet hasn't changed
+ Consistently(func(g Gomega) {
+ sts := &appsv1.StatefulSet{}
+ g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(embeddingServer), sts)).To(Succeed())
+ g.Expect(sts.Spec).To(Equal(originalSts.Spec))
+ }, time.Second*2, interval).Should(Succeed())
+ }
+
+ // Verify the Service matches expected state (nil means expect no changes)
+ if update.ExpectedService != nil {
+ Eventually(func(g Gomega) {
+ svc := &corev1.Service{}
+ g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(embeddingServer), svc)).To(Succeed())
+ verifyServiceEqualsG(g, svc, update.ExpectedService)
+ }, timeout, interval).Should(Succeed())
+ } else {
+ // Verify Service hasn't changed
+ Consistently(func(g Gomega) {
+ svc := &corev1.Service{}
+ g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(embeddingServer), svc)).To(Succeed())
+ g.Expect(svc.Spec).To(Equal(originalSvc.Spec))
+ }, time.Second*2, interval).Should(Succeed())
+ }
+ })
+ }
+ })
+ }
+
+ // Run all update test cases
+ for _, tc := range updateTestCases {
+ runUpdateTestCase(tc)
+ }
+})
diff --git a/cmd/thv-operator/test-integration/embedding-server/suite_test.go b/cmd/thv-operator/test-integration/embedding-server/suite_test.go
new file mode 100644
index 0000000000..d8e7376933
--- /dev/null
+++ b/cmd/thv-operator/test-integration/embedding-server/suite_test.go
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: Apache-2.0
+
+// Package controllers contains integration tests for the EmbeddingServer controller.
+package controllers
+
+import (
+ "context"
+ "path/filepath"
+ "testing"
+ "time"
+
+ . "github.com/onsi/ginkgo/v2"
+ . "github.com/onsi/gomega"
+ "go.uber.org/zap/zapcore"
+ appsv1 "k8s.io/api/apps/v1"
+ corev1 "k8s.io/api/core/v1"
+ "k8s.io/client-go/kubernetes/scheme"
+ "k8s.io/client-go/rest"
+ ctrl "sigs.k8s.io/controller-runtime"
+ "sigs.k8s.io/controller-runtime/pkg/client"
+ "sigs.k8s.io/controller-runtime/pkg/envtest"
+ logf "sigs.k8s.io/controller-runtime/pkg/log"
+ "sigs.k8s.io/controller-runtime/pkg/log/zap"
+ metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
+
+ mcpv1alpha1 "github.com/stacklok/toolhive/cmd/thv-operator/api/v1alpha1"
+ "github.com/stacklok/toolhive/cmd/thv-operator/controllers"
+ ctrlutil "github.com/stacklok/toolhive/cmd/thv-operator/pkg/controllerutil"
+ "github.com/stacklok/toolhive/cmd/thv-operator/pkg/validation"
+)
+
+var (
+ cfg *rest.Config
+ k8sClient client.Client
+ testEnv *envtest.Environment
+ ctx context.Context
+ cancel context.CancelFunc
+)
+
+func TestControllers(t *testing.T) {
+ t.Parallel()
+ RegisterFailHandler(Fail)
+
+ suiteConfig, reporterConfig := GinkgoConfiguration()
+ // Only show verbose output for failures
+ reporterConfig.Verbose = false
+ reporterConfig.VeryVerbose = false
+ reporterConfig.FullTrace = false
+
+ RunSpecs(t, "EmbeddingServer Controller Integration Test Suite", suiteConfig, reporterConfig)
+}
+
+var _ = BeforeSuite(func() {
+ // Only log errors unless a test fails
+ logLevel := zapcore.ErrorLevel
+
+ logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true), zap.Level(logLevel)))
+
+ ctx, cancel = context.WithCancel(context.Background())
+
+ By("bootstrapping test environment")
+ testEnv = &envtest.Environment{
+ CRDDirectoryPaths: []string{filepath.Join("..", "..", "..", "..", "deploy", "charts", "operator-crds", "files", "crds")},
+ ErrorIfCRDPathMissing: true,
+ }
+
+ var err error
+ // cfg is defined in this file globally.
+ cfg, err = testEnv.Start()
+ Expect(err).NotTo(HaveOccurred())
+ Expect(cfg).NotTo(BeNil())
+
+ err = mcpv1alpha1.AddToScheme(scheme.Scheme)
+ Expect(err).NotTo(HaveOccurred())
+
+ // Add other schemes that the controllers use
+ err = appsv1.AddToScheme(scheme.Scheme)
+ Expect(err).NotTo(HaveOccurred())
+
+ err = corev1.AddToScheme(scheme.Scheme)
+ Expect(err).NotTo(HaveOccurred())
+
+ //+kubebuilder:scaffold:scheme
+
+ k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme})
+ Expect(err).NotTo(HaveOccurred())
+ Expect(k8sClient).NotTo(BeNil())
+
+ // Start the controller manager
+ k8sManager, err := ctrl.NewManager(cfg, ctrl.Options{
+ Scheme: scheme.Scheme,
+ Metrics: metricsserver.Options{
+ BindAddress: "0", // Disable metrics server for tests to avoid port conflicts
+ },
+ HealthProbeBindAddress: "0", // Disable health probe for tests
+ })
+ Expect(err).ToNot(HaveOccurred())
+
+ // Register the EmbeddingServer controller
+ err = (&controllers.EmbeddingServerReconciler{
+ Client: k8sManager.GetClient(),
+ Scheme: k8sManager.GetScheme(),
+ Recorder: k8sManager.GetEventRecorderFor("embeddingserver-controller"),
+ PlatformDetector: ctrlutil.NewSharedPlatformDetector(),
+ ImageValidation: validation.ImageValidationAlwaysAllow,
+ }).SetupWithManager(k8sManager)
+ Expect(err).ToNot(HaveOccurred())
+
+ // Start the manager in a goroutine
+ go func() {
+ defer GinkgoRecover()
+ err = k8sManager.Start(ctx)
+ Expect(err).ToNot(HaveOccurred(), "failed to run manager")
+ }()
+})
+
+var _ = AfterSuite(func() {
+ By("tearing down the test environment")
+ cancel()
+ // Give it some time to shut down gracefully
+ time.Sleep(100 * time.Millisecond)
+ err := testEnv.Stop()
+ Expect(err).NotTo(HaveOccurred())
+})
diff --git a/deploy/charts/operator-crds/Chart.yaml b/deploy/charts/operator-crds/Chart.yaml
index 1b14897d71..e336674530 100644
--- a/deploy/charts/operator-crds/Chart.yaml
+++ b/deploy/charts/operator-crds/Chart.yaml
@@ -2,5 +2,5 @@ apiVersion: v2
name: toolhive-operator-crds
description: A Helm chart for installing the ToolHive Operator CRDs into Kubernetes.
type: application
-version: 0.0.102
+version: 0.0.103
appVersion: "0.0.1"
diff --git a/deploy/charts/operator-crds/README.md b/deploy/charts/operator-crds/README.md
index 2c68563bc6..93948d1568 100644
--- a/deploy/charts/operator-crds/README.md
+++ b/deploy/charts/operator-crds/README.md
@@ -1,6 +1,6 @@
# ToolHive Operator CRDs Helm Chart
-
+

A Helm chart for installing the ToolHive Operator CRDs into Kubernetes.
diff --git a/deploy/charts/operator-crds/crd-helm-wrapper/main.go b/deploy/charts/operator-crds/crd-helm-wrapper/main.go
index 0e9f49161e..a1cc05f109 100644
--- a/deploy/charts/operator-crds/crd-helm-wrapper/main.go
+++ b/deploy/charts/operator-crds/crd-helm-wrapper/main.go
@@ -39,6 +39,7 @@ var crdFeatureFlags = map[string][]string{
"mcpremoteproxies": {"server"},
"mcptoolconfigs": {"server"},
"mcpgroups": {"server"},
+ "embeddingservers": {"server"},
"mcpregistries": {"registry"},
"virtualmcpservers": {"virtualMcp"},
"virtualmcpcompositetooldefinitions": {"virtualMcp"},
diff --git a/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_embeddingservers.yaml b/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_embeddingservers.yaml
new file mode 100644
index 0000000000..d213326771
--- /dev/null
+++ b/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_embeddingservers.yaml
@@ -0,0 +1,352 @@
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+ annotations:
+ controller-gen.kubebuilder.io/version: v0.17.3
+ name: embeddingservers.toolhive.stacklok.dev
+spec:
+ group: toolhive.stacklok.dev
+ names:
+ kind: EmbeddingServer
+ listKind: EmbeddingServerList
+ plural: embeddingservers
+ singular: embeddingserver
+ scope: Namespaced
+ versions:
+ - additionalPrinterColumns:
+ - jsonPath: .status.phase
+ name: Status
+ type: string
+ - jsonPath: .spec.model
+ name: Model
+ type: string
+ - jsonPath: .status.readyReplicas
+ name: Ready
+ type: integer
+ - jsonPath: .status.url
+ name: URL
+ type: string
+ - jsonPath: .metadata.creationTimestamp
+ name: Age
+ type: date
+ name: v1alpha1
+ schema:
+ openAPIV3Schema:
+ description: EmbeddingServer is the Schema for the embeddingservers API
+ properties:
+ apiVersion:
+ description: |-
+ APIVersion defines the versioned schema of this representation of an object.
+ Servers should convert recognized schemas to the latest internal value, and
+ may reject unrecognized values.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+ type: string
+ kind:
+ description: |-
+ Kind is a string value representing the REST resource this object represents.
+ Servers may infer this from the endpoint the client submits requests to.
+ Cannot be updated.
+ In CamelCase.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+ type: string
+ metadata:
+ type: object
+ spec:
+ description: EmbeddingServerSpec defines the desired state of EmbeddingServer
+ properties:
+ args:
+ description: Args are additional arguments to pass to the embedding
+ inference server
+ items:
+ type: string
+ type: array
+ env:
+ description: Env are environment variables to set in the container
+ items:
+ description: EnvVar represents an environment variable in a container
+ properties:
+ name:
+ description: Name of the environment variable
+ type: string
+ value:
+ description: Value of the environment variable
+ type: string
+ required:
+ - name
+ - value
+ type: object
+ type: array
+ hfTokenSecretRef:
+ description: |-
+ HFTokenSecretRef is a reference to a Kubernetes Secret containing the huggingface token.
+ If provided, the secret value will be provided to the embedding server for authentication with huggingface.
+ properties:
+ key:
+ description: Key is the key within the secret
+ type: string
+ name:
+ description: Name is the name of the secret
+ type: string
+ required:
+ - key
+ - name
+ type: object
+ image:
+ default: ghcr.io/huggingface/text-embeddings-inference:latest
+ description: Image is the container image for huggingface-embedding-inference
+ type: string
+ imagePullPolicy:
+ default: IfNotPresent
+ description: ImagePullPolicy defines the pull policy for the container
+ image
+ enum:
+ - Always
+ - Never
+ - IfNotPresent
+ type: string
+ model:
+ description: Model is the HuggingFace embedding model to use (e.g.,
+ "sentence-transformers/all-MiniLM-L6-v2")
+ type: string
+ modelCache:
+ description: |-
+ ModelCache configures persistent storage for downloaded models
+ When enabled, models are cached in a PVC and reused across pod restarts
+ properties:
+ accessMode:
+ default: ReadWriteOnce
+ description: AccessMode is the access mode for the PVC
+ enum:
+ - ReadWriteOnce
+ - ReadWriteMany
+ - ReadOnlyMany
+ type: string
+ enabled:
+ default: true
+ description: Enabled controls whether model caching is enabled
+ type: boolean
+ size:
+ default: 10Gi
+ description: Size is the size of the PVC for model caching (e.g.,
+ "10Gi")
+ type: string
+ storageClassName:
+ description: |-
+ StorageClassName is the storage class to use for the PVC
+ If not specified, uses the cluster's default storage class
+ type: string
+ type: object
+ podTemplateSpec:
+ description: |-
+ PodTemplateSpec allows customizing the pod (node selection, tolerations, etc.)
+ This field accepts a PodTemplateSpec object as JSON/YAML.
+ Note that to modify the specific container the embedding server runs in, you must specify
+ the 'embedding' container name in the PodTemplateSpec.
+ type: object
+ x-kubernetes-preserve-unknown-fields: true
+ port:
+ default: 8080
+ description: Port is the port to expose the embedding service on
+ format: int32
+ maximum: 65535
+ minimum: 1
+ type: integer
+ replicas:
+ default: 1
+ description: Replicas is the number of embedding server replicas to
+ run
+ format: int32
+ minimum: 1
+ type: integer
+ resourceOverrides:
+ description: ResourceOverrides allows overriding annotations and labels
+ for resources created by the operator
+ properties:
+ persistentVolumeClaim:
+ description: PersistentVolumeClaim defines overrides for the PVC
+ resource
+ properties:
+ annotations:
+ additionalProperties:
+ type: string
+ description: Annotations to add or override on the resource
+ type: object
+ labels:
+ additionalProperties:
+ type: string
+ description: Labels to add or override on the resource
+ type: object
+ type: object
+ service:
+ description: Service defines overrides for the Service resource
+ properties:
+ annotations:
+ additionalProperties:
+ type: string
+ description: Annotations to add or override on the resource
+ type: object
+ labels:
+ additionalProperties:
+ type: string
+ description: Labels to add or override on the resource
+ type: object
+ type: object
+ statefulSet:
+ description: StatefulSet defines overrides for the StatefulSet
+ resource
+ properties:
+ annotations:
+ additionalProperties:
+ type: string
+ description: Annotations to add or override on the resource
+ type: object
+ labels:
+ additionalProperties:
+ type: string
+ description: Labels to add or override on the resource
+ type: object
+ podTemplateMetadataOverrides:
+ description: PodTemplateMetadataOverrides defines metadata
+ overrides for the pod template
+ properties:
+ annotations:
+ additionalProperties:
+ type: string
+ description: Annotations to add or override on the resource
+ type: object
+ labels:
+ additionalProperties:
+ type: string
+ description: Labels to add or override on the resource
+ type: object
+ type: object
+ type: object
+ type: object
+ resources:
+ description: Resources defines compute resources for the embedding
+ server
+ properties:
+ limits:
+ description: Limits describes the maximum amount of compute resources
+ allowed
+ properties:
+ cpu:
+ description: CPU is the CPU limit in cores (e.g., "500m" for
+ 0.5 cores)
+ type: string
+ memory:
+ description: Memory is the memory limit in bytes (e.g., "64Mi"
+ for 64 megabytes)
+ type: string
+ type: object
+ requests:
+ description: Requests describes the minimum amount of compute
+ resources required
+ properties:
+ cpu:
+ description: CPU is the CPU limit in cores (e.g., "500m" for
+ 0.5 cores)
+ type: string
+ memory:
+ description: Memory is the memory limit in bytes (e.g., "64Mi"
+ for 64 megabytes)
+ type: string
+ type: object
+ type: object
+ required:
+ - image
+ - model
+ type: object
+ status:
+ description: EmbeddingServerStatus defines the observed state of EmbeddingServer
+ properties:
+ conditions:
+ description: Conditions represent the latest available observations
+ of the EmbeddingServer's state
+ items:
+ description: Condition contains details for one aspect of the current
+ state of this API Resource.
+ properties:
+ lastTransitionTime:
+ description: |-
+ lastTransitionTime is the last time the condition transitioned from one status to another.
+ This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
+ format: date-time
+ type: string
+ message:
+ description: |-
+ message is a human readable message indicating details about the transition.
+ This may be an empty string.
+ maxLength: 32768
+ type: string
+ observedGeneration:
+ description: |-
+ observedGeneration represents the .metadata.generation that the condition was set based upon.
+ For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+ with respect to the current state of the instance.
+ format: int64
+ minimum: 0
+ type: integer
+ reason:
+ description: |-
+ reason contains a programmatic identifier indicating the reason for the condition's last transition.
+ Producers of specific condition types may define expected values and meanings for this field,
+ and whether the values are considered a guaranteed API.
+ The value should be a CamelCase string.
+ This field may not be empty.
+ maxLength: 1024
+ minLength: 1
+ pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+ type: string
+ status:
+ description: status of the condition, one of True, False, Unknown.
+ enum:
+ - "True"
+ - "False"
+ - Unknown
+ type: string
+ type:
+ description: type of condition in CamelCase or in foo.example.com/CamelCase.
+ maxLength: 316
+ pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+ type: string
+ required:
+ - lastTransitionTime
+ - message
+ - reason
+ - status
+ - type
+ type: object
+ type: array
+ message:
+ description: Message provides additional information about the current
+ phase
+ type: string
+ observedGeneration:
+ description: ObservedGeneration reflects the generation most recently
+ observed by the controller
+ format: int64
+ type: integer
+ phase:
+ description: Phase is the current phase of the EmbeddingServer
+ enum:
+ - Pending
+ - Downloading
+ - Running
+ - Failed
+ - Terminating
+ type: string
+ readyReplicas:
+ description: ReadyReplicas is the number of ready replicas
+ format: int32
+ type: integer
+ url:
+ description: URL is the URL where the embedding service can be accessed
+ type: string
+ type: object
+ type: object
+ served: true
+ storage: true
+ subresources:
+ status: {}
diff --git a/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_embeddingservers.yaml b/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_embeddingservers.yaml
new file mode 100644
index 0000000000..2bf3138fe5
--- /dev/null
+++ b/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_embeddingservers.yaml
@@ -0,0 +1,356 @@
+{{- if .Values.crds.install.server }}
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+ annotations:
+ {{- if .Values.crds.keep }}
+ helm.sh/resource-policy: keep
+ {{- end }}
+ controller-gen.kubebuilder.io/version: v0.17.3
+ name: embeddingservers.toolhive.stacklok.dev
+spec:
+ group: toolhive.stacklok.dev
+ names:
+ kind: EmbeddingServer
+ listKind: EmbeddingServerList
+ plural: embeddingservers
+ singular: embeddingserver
+ scope: Namespaced
+ versions:
+ - additionalPrinterColumns:
+ - jsonPath: .status.phase
+ name: Status
+ type: string
+ - jsonPath: .spec.model
+ name: Model
+ type: string
+ - jsonPath: .status.readyReplicas
+ name: Ready
+ type: integer
+ - jsonPath: .status.url
+ name: URL
+ type: string
+ - jsonPath: .metadata.creationTimestamp
+ name: Age
+ type: date
+ name: v1alpha1
+ schema:
+ openAPIV3Schema:
+ description: EmbeddingServer is the Schema for the embeddingservers API
+ properties:
+ apiVersion:
+ description: |-
+ APIVersion defines the versioned schema of this representation of an object.
+ Servers should convert recognized schemas to the latest internal value, and
+ may reject unrecognized values.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+ type: string
+ kind:
+ description: |-
+ Kind is a string value representing the REST resource this object represents.
+ Servers may infer this from the endpoint the client submits requests to.
+ Cannot be updated.
+ In CamelCase.
+ More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+ type: string
+ metadata:
+ type: object
+ spec:
+ description: EmbeddingServerSpec defines the desired state of EmbeddingServer
+ properties:
+ args:
+ description: Args are additional arguments to pass to the embedding
+ inference server
+ items:
+ type: string
+ type: array
+ env:
+ description: Env are environment variables to set in the container
+ items:
+ description: EnvVar represents an environment variable in a container
+ properties:
+ name:
+ description: Name of the environment variable
+ type: string
+ value:
+ description: Value of the environment variable
+ type: string
+ required:
+ - name
+ - value
+ type: object
+ type: array
+ hfTokenSecretRef:
+ description: |-
+ HFTokenSecretRef is a reference to a Kubernetes Secret containing the huggingface token.
+ If provided, the secret value will be provided to the embedding server for authentication with huggingface.
+ properties:
+ key:
+ description: Key is the key within the secret
+ type: string
+ name:
+ description: Name is the name of the secret
+ type: string
+ required:
+ - key
+ - name
+ type: object
+ image:
+ default: ghcr.io/huggingface/text-embeddings-inference:latest
+ description: Image is the container image for huggingface-embedding-inference
+ type: string
+ imagePullPolicy:
+ default: IfNotPresent
+ description: ImagePullPolicy defines the pull policy for the container
+ image
+ enum:
+ - Always
+ - Never
+ - IfNotPresent
+ type: string
+ model:
+ description: Model is the HuggingFace embedding model to use (e.g.,
+ "sentence-transformers/all-MiniLM-L6-v2")
+ type: string
+ modelCache:
+ description: |-
+ ModelCache configures persistent storage for downloaded models
+ When enabled, models are cached in a PVC and reused across pod restarts
+ properties:
+ accessMode:
+ default: ReadWriteOnce
+ description: AccessMode is the access mode for the PVC
+ enum:
+ - ReadWriteOnce
+ - ReadWriteMany
+ - ReadOnlyMany
+ type: string
+ enabled:
+ default: true
+ description: Enabled controls whether model caching is enabled
+ type: boolean
+ size:
+ default: 10Gi
+ description: Size is the size of the PVC for model caching (e.g.,
+ "10Gi")
+ type: string
+ storageClassName:
+ description: |-
+ StorageClassName is the storage class to use for the PVC
+ If not specified, uses the cluster's default storage class
+ type: string
+ type: object
+ podTemplateSpec:
+ description: |-
+ PodTemplateSpec allows customizing the pod (node selection, tolerations, etc.)
+ This field accepts a PodTemplateSpec object as JSON/YAML.
+ Note that to modify the specific container the embedding server runs in, you must specify
+ the 'embedding' container name in the PodTemplateSpec.
+ type: object
+ x-kubernetes-preserve-unknown-fields: true
+ port:
+ default: 8080
+ description: Port is the port to expose the embedding service on
+ format: int32
+ maximum: 65535
+ minimum: 1
+ type: integer
+ replicas:
+ default: 1
+ description: Replicas is the number of embedding server replicas to
+ run
+ format: int32
+ minimum: 1
+ type: integer
+ resourceOverrides:
+ description: ResourceOverrides allows overriding annotations and labels
+ for resources created by the operator
+ properties:
+ persistentVolumeClaim:
+ description: PersistentVolumeClaim defines overrides for the PVC
+ resource
+ properties:
+ annotations:
+ additionalProperties:
+ type: string
+ description: Annotations to add or override on the resource
+ type: object
+ labels:
+ additionalProperties:
+ type: string
+ description: Labels to add or override on the resource
+ type: object
+ type: object
+ service:
+ description: Service defines overrides for the Service resource
+ properties:
+ annotations:
+ additionalProperties:
+ type: string
+ description: Annotations to add or override on the resource
+ type: object
+ labels:
+ additionalProperties:
+ type: string
+ description: Labels to add or override on the resource
+ type: object
+ type: object
+ statefulSet:
+ description: StatefulSet defines overrides for the StatefulSet
+ resource
+ properties:
+ annotations:
+ additionalProperties:
+ type: string
+ description: Annotations to add or override on the resource
+ type: object
+ labels:
+ additionalProperties:
+ type: string
+ description: Labels to add or override on the resource
+ type: object
+ podTemplateMetadataOverrides:
+ description: PodTemplateMetadataOverrides defines metadata
+ overrides for the pod template
+ properties:
+ annotations:
+ additionalProperties:
+ type: string
+ description: Annotations to add or override on the resource
+ type: object
+ labels:
+ additionalProperties:
+ type: string
+ description: Labels to add or override on the resource
+ type: object
+ type: object
+ type: object
+ type: object
+ resources:
+ description: Resources defines compute resources for the embedding
+ server
+ properties:
+ limits:
+ description: Limits describes the maximum amount of compute resources
+ allowed
+ properties:
+ cpu:
+ description: CPU is the CPU limit in cores (e.g., "500m" for
+ 0.5 cores)
+ type: string
+ memory:
+ description: Memory is the memory limit in bytes (e.g., "64Mi"
+ for 64 megabytes)
+ type: string
+ type: object
+ requests:
+ description: Requests describes the minimum amount of compute
+ resources required
+ properties:
+ cpu:
+ description: CPU is the CPU limit in cores (e.g., "500m" for
+ 0.5 cores)
+ type: string
+ memory:
+ description: Memory is the memory limit in bytes (e.g., "64Mi"
+ for 64 megabytes)
+ type: string
+ type: object
+ type: object
+ required:
+ - image
+ - model
+ type: object
+ status:
+ description: EmbeddingServerStatus defines the observed state of EmbeddingServer
+ properties:
+ conditions:
+ description: Conditions represent the latest available observations
+ of the EmbeddingServer's state
+ items:
+ description: Condition contains details for one aspect of the current
+ state of this API Resource.
+ properties:
+ lastTransitionTime:
+ description: |-
+ lastTransitionTime is the last time the condition transitioned from one status to another.
+ This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable.
+ format: date-time
+ type: string
+ message:
+ description: |-
+ message is a human readable message indicating details about the transition.
+ This may be an empty string.
+ maxLength: 32768
+ type: string
+ observedGeneration:
+ description: |-
+ observedGeneration represents the .metadata.generation that the condition was set based upon.
+ For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+ with respect to the current state of the instance.
+ format: int64
+ minimum: 0
+ type: integer
+ reason:
+ description: |-
+ reason contains a programmatic identifier indicating the reason for the condition's last transition.
+ Producers of specific condition types may define expected values and meanings for this field,
+ and whether the values are considered a guaranteed API.
+ The value should be a CamelCase string.
+ This field may not be empty.
+ maxLength: 1024
+ minLength: 1
+ pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+ type: string
+ status:
+ description: status of the condition, one of True, False, Unknown.
+ enum:
+ - "True"
+ - "False"
+ - Unknown
+ type: string
+ type:
+ description: type of condition in CamelCase or in foo.example.com/CamelCase.
+ maxLength: 316
+ pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+ type: string
+ required:
+ - lastTransitionTime
+ - message
+ - reason
+ - status
+ - type
+ type: object
+ type: array
+ message:
+ description: Message provides additional information about the current
+ phase
+ type: string
+ observedGeneration:
+ description: ObservedGeneration reflects the generation most recently
+ observed by the controller
+ format: int64
+ type: integer
+ phase:
+ description: Phase is the current phase of the EmbeddingServer
+ enum:
+ - Pending
+ - Downloading
+ - Running
+ - Failed
+ - Terminating
+ type: string
+ readyReplicas:
+ description: ReadyReplicas is the number of ready replicas
+ format: int32
+ type: integer
+ url:
+ description: URL is the URL where the embedding service can be accessed
+ type: string
+ type: object
+ type: object
+ served: true
+ storage: true
+ subresources:
+ status: {}
+{{- end }}
diff --git a/deploy/charts/operator/Chart.yaml b/deploy/charts/operator/Chart.yaml
index d0af785815..8be2129a20 100644
--- a/deploy/charts/operator/Chart.yaml
+++ b/deploy/charts/operator/Chart.yaml
@@ -2,5 +2,5 @@ apiVersion: v2
name: toolhive-operator
description: A Helm chart for deploying the ToolHive Operator into Kubernetes.
type: application
-version: 0.5.26
-appVersion: "v0.8.0"
+version: 0.5.27
+appVersion: "v0.8.1"
diff --git a/deploy/charts/operator/README.md b/deploy/charts/operator/README.md
index d8f6294a56..ac18156091 100644
--- a/deploy/charts/operator/README.md
+++ b/deploy/charts/operator/README.md
@@ -1,6 +1,6 @@
# ToolHive Operator Helm Chart
-
+

A Helm chart for deploying the ToolHive Operator into Kubernetes.
diff --git a/deploy/charts/operator/templates/clusterrole/role.yaml b/deploy/charts/operator/templates/clusterrole/role.yaml
index feccbeb749..97f45f2407 100644
--- a/deploy/charts/operator/templates/clusterrole/role.yaml
+++ b/deploy/charts/operator/templates/clusterrole/role.yaml
@@ -8,6 +8,7 @@ rules:
- ""
resources:
- configmaps
+ - persistentvolumeclaims
- secrets
- serviceaccounts
verbs:
@@ -121,6 +122,7 @@ rules:
- apiGroups:
- toolhive.stacklok.dev
resources:
+ - embeddingservers
- mcpexternalauthconfigs
- mcpgroups
- mcpregistries
@@ -139,6 +141,7 @@ rules:
- apiGroups:
- toolhive.stacklok.dev
resources:
+ - embeddingservers/finalizers
- mcpexternalauthconfigs/finalizers
- mcpgroups/finalizers
- mcpregistries/finalizers
@@ -149,6 +152,7 @@ rules:
- apiGroups:
- toolhive.stacklok.dev
resources:
+ - embeddingservers/status
- mcpexternalauthconfigs/status
- mcpgroups/status
- mcpregistries/status
diff --git a/docs/operator/crd-api.md b/docs/operator/crd-api.md
index 80e1ee9808..329d1fcbd6 100644
--- a/docs/operator/crd-api.md
+++ b/docs/operator/crd-api.md
@@ -645,6 +645,8 @@ _Appears in:_
## toolhive.stacklok.dev/v1alpha1
### Resource Types
+- [api.v1alpha1.EmbeddingServer](#apiv1alpha1embeddingserver)
+- [api.v1alpha1.EmbeddingServerList](#apiv1alpha1embeddingserverlist)
- [api.v1alpha1.MCPExternalAuthConfig](#apiv1alpha1mcpexternalauthconfig)
- [api.v1alpha1.MCPExternalAuthConfigList](#apiv1alpha1mcpexternalauthconfiglist)
- [api.v1alpha1.MCPGroup](#apiv1alpha1mcpgroup)
@@ -848,6 +850,153 @@ _Appears in:_
+#### api.v1alpha1.EmbeddingResourceOverrides
+
+
+
+EmbeddingResourceOverrides defines overrides for annotations and labels on created resources
+
+
+
+_Appears in:_
+- [api.v1alpha1.EmbeddingServerSpec](#apiv1alpha1embeddingserverspec)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `statefulSet` _[api.v1alpha1.EmbeddingStatefulSetOverrides](#apiv1alpha1embeddingstatefulsetoverrides)_ | StatefulSet defines overrides for the StatefulSet resource | | |
+| `service` _[api.v1alpha1.ResourceMetadataOverrides](#apiv1alpha1resourcemetadataoverrides)_ | Service defines overrides for the Service resource | | |
+| `persistentVolumeClaim` _[api.v1alpha1.ResourceMetadataOverrides](#apiv1alpha1resourcemetadataoverrides)_ | PersistentVolumeClaim defines overrides for the PVC resource | | |
+
+
+#### api.v1alpha1.EmbeddingServer
+
+
+
+EmbeddingServer is the Schema for the embeddingservers API
+
+
+
+_Appears in:_
+- [api.v1alpha1.EmbeddingServerList](#apiv1alpha1embeddingserverlist)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `apiVersion` _string_ | `toolhive.stacklok.dev/v1alpha1` | | |
+| `kind` _string_ | `EmbeddingServer` | | |
+| `kind` _string_ | Kind is a string value representing the REST resource this object represents.
Servers may infer this from the endpoint the client submits requests to.
Cannot be updated.
In CamelCase.
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | | |
+| `apiVersion` _string_ | APIVersion defines the versioned schema of this representation of an object.
Servers should convert recognized schemas to the latest internal value, and
may reject unrecognized values.
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | | |
+| `metadata` _[ObjectMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#objectmeta-v1-meta)_ | Refer to Kubernetes API documentation for fields of `metadata`. | | |
+| `spec` _[api.v1alpha1.EmbeddingServerSpec](#apiv1alpha1embeddingserverspec)_ | | | |
+| `status` _[api.v1alpha1.EmbeddingServerStatus](#apiv1alpha1embeddingserverstatus)_ | | | |
+
+
+#### api.v1alpha1.EmbeddingServerList
+
+
+
+EmbeddingServerList contains a list of EmbeddingServer
+
+
+
+
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `apiVersion` _string_ | `toolhive.stacklok.dev/v1alpha1` | | |
+| `kind` _string_ | `EmbeddingServerList` | | |
+| `kind` _string_ | Kind is a string value representing the REST resource this object represents.
Servers may infer this from the endpoint the client submits requests to.
Cannot be updated.
In CamelCase.
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | | |
+| `apiVersion` _string_ | APIVersion defines the versioned schema of this representation of an object.
Servers should convert recognized schemas to the latest internal value, and
may reject unrecognized values.
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | | |
+| `metadata` _[ListMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#listmeta-v1-meta)_ | Refer to Kubernetes API documentation for fields of `metadata`. | | |
+| `items` _[api.v1alpha1.EmbeddingServer](#apiv1alpha1embeddingserver) array_ | | | |
+
+
+#### api.v1alpha1.EmbeddingServerPhase
+
+_Underlying type:_ _string_
+
+EmbeddingServerPhase is the phase of the EmbeddingServer
+
+_Validation:_
+- Enum: [Pending Downloading Running Failed Terminating]
+
+_Appears in:_
+- [api.v1alpha1.EmbeddingServerStatus](#apiv1alpha1embeddingserverstatus)
+
+| Field | Description |
+| --- | --- |
+| `Pending` | EmbeddingServerPhasePending means the EmbeddingServer is being created
|
+| `Downloading` | EmbeddingServerPhaseDownloading means the model is being downloaded
|
+| `Running` | EmbeddingServerPhaseRunning means the EmbeddingServer is running and ready
|
+| `Failed` | EmbeddingServerPhaseFailed means the EmbeddingServer failed to start
|
+| `Terminating` | EmbeddingServerPhaseTerminating means the EmbeddingServer is being deleted
|
+
+
+#### api.v1alpha1.EmbeddingServerSpec
+
+
+
+EmbeddingServerSpec defines the desired state of EmbeddingServer
+
+
+
+_Appears in:_
+- [api.v1alpha1.EmbeddingServer](#apiv1alpha1embeddingserver)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `model` _string_ | Model is the HuggingFace embedding model to use (e.g., "sentence-transformers/all-MiniLM-L6-v2") | | Required: \{\}
|
+| `hfTokenSecretRef` _[api.v1alpha1.SecretKeyRef](#apiv1alpha1secretkeyref)_ | HFTokenSecretRef is a reference to a Kubernetes Secret containing the huggingface token.
If provided, the secret value will be provided to the embedding server for authentication with huggingface. | | |
+| `image` _string_ | Image is the container image for huggingface-embedding-inference | ghcr.io/huggingface/text-embeddings-inference:latest | Required: \{\}
|
+| `imagePullPolicy` _string_ | ImagePullPolicy defines the pull policy for the container image | IfNotPresent | Enum: [Always Never IfNotPresent]
|
+| `port` _integer_ | Port is the port to expose the embedding service on | 8080 | Maximum: 65535
Minimum: 1
|
+| `args` _string array_ | Args are additional arguments to pass to the embedding inference server | | |
+| `env` _[api.v1alpha1.EnvVar](#apiv1alpha1envvar) array_ | Env are environment variables to set in the container | | |
+| `resources` _[api.v1alpha1.ResourceRequirements](#apiv1alpha1resourcerequirements)_ | Resources defines compute resources for the embedding server | | |
+| `modelCache` _[api.v1alpha1.ModelCacheConfig](#apiv1alpha1modelcacheconfig)_ | ModelCache configures persistent storage for downloaded models
When enabled, models are cached in a PVC and reused across pod restarts | | |
+| `podTemplateSpec` _[RawExtension](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#rawextension-runtime-pkg)_ | PodTemplateSpec allows customizing the pod (node selection, tolerations, etc.)
This field accepts a PodTemplateSpec object as JSON/YAML.
Note that to modify the specific container the embedding server runs in, you must specify
the 'embedding' container name in the PodTemplateSpec. | | Type: object
|
+| `resourceOverrides` _[api.v1alpha1.EmbeddingResourceOverrides](#apiv1alpha1embeddingresourceoverrides)_ | ResourceOverrides allows overriding annotations and labels for resources created by the operator | | |
+| `replicas` _integer_ | Replicas is the number of embedding server replicas to run | 1 | Minimum: 1
|
+
+
+#### api.v1alpha1.EmbeddingServerStatus
+
+
+
+EmbeddingServerStatus defines the observed state of EmbeddingServer
+
+
+
+_Appears in:_
+- [api.v1alpha1.EmbeddingServer](#apiv1alpha1embeddingserver)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `conditions` _[Condition](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#condition-v1-meta) array_ | Conditions represent the latest available observations of the EmbeddingServer's state | | |
+| `phase` _[api.v1alpha1.EmbeddingServerPhase](#apiv1alpha1embeddingserverphase)_ | Phase is the current phase of the EmbeddingServer | | Enum: [Pending Downloading Running Failed Terminating]
|
+| `message` _string_ | Message provides additional information about the current phase | | |
+| `url` _string_ | URL is the URL where the embedding service can be accessed | | |
+| `readyReplicas` _integer_ | ReadyReplicas is the number of ready replicas | | |
+| `observedGeneration` _integer_ | ObservedGeneration reflects the generation most recently observed by the controller | | |
+
+
+#### api.v1alpha1.EmbeddingStatefulSetOverrides
+
+
+
+EmbeddingStatefulSetOverrides defines overrides specific to the embedding statefulset
+
+
+
+_Appears in:_
+- [api.v1alpha1.EmbeddingResourceOverrides](#apiv1alpha1embeddingresourceoverrides)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `annotations` _object (keys:string, values:string)_ | Annotations to add or override on the resource | | |
+| `labels` _object (keys:string, values:string)_ | Labels to add or override on the resource | | |
+| `podTemplateMetadataOverrides` _[api.v1alpha1.ResourceMetadataOverrides](#apiv1alpha1resourcemetadataoverrides)_ | PodTemplateMetadataOverrides defines metadata overrides for the pod template | | |
+
+
#### api.v1alpha1.EnvVar
@@ -857,6 +1006,7 @@ EnvVar represents an environment variable in a container
_Appears in:_
+- [api.v1alpha1.EmbeddingServerSpec](#apiv1alpha1embeddingserverspec)
- [api.v1alpha1.MCPServerSpec](#apiv1alpha1mcpserverspec)
- [api.v1alpha1.ProxyDeploymentOverrides](#apiv1alpha1proxydeploymentoverrides)
@@ -1770,6 +1920,25 @@ _Appears in:_
| `referencingServers` _string array_ | ReferencingServers is a list of MCPServer resources that reference this MCPToolConfig
This helps track which servers need to be reconciled when this config changes | | |
+#### api.v1alpha1.ModelCacheConfig
+
+
+
+ModelCacheConfig configures persistent storage for model caching
+
+
+
+_Appears in:_
+- [api.v1alpha1.EmbeddingServerSpec](#apiv1alpha1embeddingserverspec)
+
+| Field | Description | Default | Validation |
+| --- | --- | --- | --- |
+| `enabled` _boolean_ | Enabled controls whether model caching is enabled | true | |
+| `storageClassName` _string_ | StorageClassName is the storage class to use for the PVC
If not specified, uses the cluster's default storage class | | |
+| `size` _string_ | Size is the size of the PVC for model caching (e.g., "10Gi") | 10Gi | |
+| `accessMode` _string_ | AccessMode is the access mode for the PVC | ReadWriteOnce | Enum: [ReadWriteOnce ReadWriteMany ReadOnlyMany]
|
+
+
#### api.v1alpha1.NameFilter
@@ -2032,6 +2201,8 @@ ResourceMetadataOverrides defines metadata overrides for a resource
_Appears in:_
+- [api.v1alpha1.EmbeddingResourceOverrides](#apiv1alpha1embeddingresourceoverrides)
+- [api.v1alpha1.EmbeddingStatefulSetOverrides](#apiv1alpha1embeddingstatefulsetoverrides)
- [api.v1alpha1.ProxyDeploymentOverrides](#apiv1alpha1proxydeploymentoverrides)
- [api.v1alpha1.ResourceOverrides](#apiv1alpha1resourceoverrides)
@@ -2068,6 +2239,7 @@ ResourceRequirements describes the compute resource requirements
_Appears in:_
+- [api.v1alpha1.EmbeddingServerSpec](#apiv1alpha1embeddingserverspec)
- [api.v1alpha1.MCPRemoteProxySpec](#apiv1alpha1mcpremoteproxyspec)
- [api.v1alpha1.MCPServerSpec](#apiv1alpha1mcpserverspec)
@@ -2087,6 +2259,7 @@ SecretKeyRef is a reference to a key within a Secret
_Appears in:_
- [api.v1alpha1.BearerTokenConfig](#apiv1alpha1bearertokenconfig)
+- [api.v1alpha1.EmbeddingServerSpec](#apiv1alpha1embeddingserverspec)
- [api.v1alpha1.HeaderInjectionConfig](#apiv1alpha1headerinjectionconfig)
- [api.v1alpha1.InlineOIDCConfig](#apiv1alpha1inlineoidcconfig)
- [api.v1alpha1.TokenExchangeConfig](#apiv1alpha1tokenexchangeconfig)
diff --git a/examples/operator/embedding-servers/README.md b/examples/operator/embedding-servers/README.md
new file mode 100644
index 0000000000..ffa22bde23
--- /dev/null
+++ b/examples/operator/embedding-servers/README.md
@@ -0,0 +1,234 @@
+# EmbeddingServer Examples
+
+This directory contains example configurations for deploying HuggingFace embedding inference servers using the EmbeddingServer custom resource.
+
+## Overview
+
+The EmbeddingServer CRD allows you to deploy and manage HuggingFace Text Embeddings Inference (TEI) servers in Kubernetes. These servers provide high-performance embedding generation for various NLP tasks.
+
+## Examples
+
+### 1. Basic Embedding Server
+
+File: `basic-embedding.yaml`
+
+A minimal configuration that deploys an embedding server with default settings:
+- Uses `sentence-transformers/all-MiniLM-L6-v2` model
+- Single replica
+- Default port (8080)
+- No persistent storage
+
+```bash
+kubectl apply -f basic-embedding.yaml
+```
+
+### 2. Embedding with Model Cache
+
+File: `embedding-with-cache.yaml`
+
+Configures persistent storage for downloaded models:
+- Model cache enabled with 10Gi PVC
+- Resource limits specified
+- Environment variables configured
+- Faster restarts after initial model download
+
+```bash
+kubectl apply -f embedding-with-cache.yaml
+```
+
+### 3. Embedding with Group Association
+
+File: `embedding-with-group.yaml`
+
+Shows how to organize embeddings using MCPGroup:
+- Creates an MCPGroup named `ml-services`
+- Associates the embedding server with the group
+- Enables tracking and organization of related resources
+
+```bash
+kubectl apply -f embedding-with-group.yaml
+```
+
+### 4. Advanced Configuration
+
+File: `embedding-advanced.yaml`
+
+Demonstrates all available features:
+- High availability with 2 replicas
+- Custom arguments and environment variables
+- Persistent model caching with custom storage class
+- PodTemplateSpec for advanced pod customization:
+ - Node selection
+ - Tolerations
+ - Affinity rules
+ - Security contexts
+- Resource overrides for metadata
+
+```bash
+kubectl apply -f embedding-advanced.yaml
+```
+
+## Supported Models
+
+EmbeddingServer supports any HuggingFace model compatible with Text Embeddings Inference. Popular choices include:
+
+- `sentence-transformers/all-MiniLM-L6-v2` - Fast, lightweight (384 dimensions)
+- `sentence-transformers/all-mpnet-base-v2` - Good balance (768 dimensions)
+- `BAAI/bge-large-en-v1.5` - High quality (1024 dimensions)
+- `intfloat/e5-large-v2` - Instruction-based embeddings
+- `thenlper/gte-large` - General text embeddings
+
+## Accessing the Embedding Service
+
+After deployment, the embedding service is accessible at:
+
+```
+http://..svc.cluster.local:
+```
+
+For example, with `basic-embedding` in the `toolhive-system` namespace:
+
+```
+http://basic-embedding.toolhive-system.svc.cluster.local:8080
+```
+
+### Using the Embedding Service
+
+Generate embeddings using the REST API:
+
+```bash
+curl -X POST \
+ http://basic-embedding.toolhive-system.svc.cluster.local:8080/embed \
+ -H 'Content-Type: application/json' \
+ -d '{"inputs": "Hello, world!"}'
+```
+
+## Configuration Options
+
+### Required Fields
+
+- `spec.model`: HuggingFace model identifier
+
+### Optional Fields
+
+- `spec.image`: Container image (default: `ghcr.io/huggingface/text-embeddings-inference:latest`)
+- `spec.port`: Service port (default: 8080)
+- `spec.replicas`: Number of replicas (default: 1)
+- `spec.args`: Additional arguments for the embedding server
+- `spec.env`: Environment variables
+- `spec.resources`: CPU and memory limits/requests
+- `spec.modelCache`: Persistent volume configuration for model caching
+- `spec.podTemplateSpec`: Advanced pod customization
+- `spec.resourceOverrides`: Metadata overrides for created resources
+- `spec.groupRef`: Reference to an MCPGroup
+
+## Model Caching
+
+Enabling model caching provides several benefits:
+
+1. **Faster Restarts**: Models are downloaded once and cached
+2. **Reduced Network Usage**: No repeated downloads
+3. **Improved Reliability**: Not dependent on external network for restarts
+
+Configuration:
+
+```yaml
+spec:
+ modelCache:
+ enabled: true
+ size: "10Gi" # Adjust based on model size
+ accessMode: "ReadWriteOnce"
+ storageClassName: "fast-ssd" # Optional
+```
+
+## Resource Planning
+
+### CPU and Memory
+
+Recommended resources based on model size:
+
+| Model Type | CPU Request | CPU Limit | Memory Request | Memory Limit |
+|------------|-------------|-----------|----------------|--------------|
+| Small (< 500MB) | 500m | 2000m | 1Gi | 4Gi |
+| Medium (500MB-2GB) | 1000m | 4000m | 2Gi | 8Gi |
+| Large (> 2GB) | 2000m | 8000m | 4Gi | 16Gi |
+
+### Storage
+
+Model sizes vary significantly. Check the HuggingFace model page for size information:
+
+- `all-MiniLM-L6-v2`: ~90MB
+- `all-mpnet-base-v2`: ~420MB
+- `bge-large-en-v1.5`: ~1.3GB
+
+Recommended PVC sizes:
+- Small models: 5Gi
+- Medium models: 10Gi
+- Large models: 20Gi+
+
+## Monitoring
+
+The embedding server exposes health endpoints:
+
+- `/health`: Health check endpoint (used by Kubernetes probes)
+- `/metrics`: Prometheus metrics (if enabled)
+
+## Troubleshooting
+
+### Model Download Issues
+
+If pods are stuck in `Downloading` phase:
+
+1. Check pod logs:
+ ```bash
+ kubectl logs -n toolhive-system
+ ```
+
+2. Verify network connectivity to HuggingFace Hub
+
+3. Check if model exists and is accessible
+
+### PVC Binding Issues
+
+If PVC is not binding:
+
+1. Check storage class availability:
+ ```bash
+ kubectl get storageclass
+ ```
+
+2. Verify PVC status:
+ ```bash
+ kubectl get pvc -n toolhive-system
+ ```
+
+3. Check PV availability or dynamic provisioning
+
+### Resource Constraints
+
+If pods are pending due to insufficient resources:
+
+1. Check node resources:
+ ```bash
+ kubectl top nodes
+ ```
+
+2. Adjust resource requests in the EmbeddingServer spec
+
+3. Consider node scaling or resource optimization
+
+## Best Practices
+
+1. **Enable Model Caching**: Always enable caching for production deployments
+2. **Set Resource Limits**: Prevent resource contention with appropriate limits
+3. **Use Groups**: Organize related embeddings with MCPGroup
+4. **Monitor Performance**: Use Prometheus metrics for monitoring
+5. **Plan Storage**: Allocate sufficient PVC size for your models
+6. **Test Before Production**: Validate configuration in non-production first
+7. **Version Pins**: Use specific image tags rather than `:latest` for production
+
+## Additional Resources
+
+- [HuggingFace Text Embeddings Inference](https://github.com/huggingface/text-embeddings-inference)
+- [ToolHive Documentation](https://docs.toolhive.dev)
+- [MCPGroup Documentation](../virtual-mcps/README.md)
diff --git a/examples/operator/embedding-servers/basic-embedding.yaml b/examples/operator/embedding-servers/basic-embedding.yaml
new file mode 100644
index 0000000000..c4c2f01093
--- /dev/null
+++ b/examples/operator/embedding-servers/basic-embedding.yaml
@@ -0,0 +1,20 @@
+# Basic EmbeddingServer example with minimal configuration
+# This creates an embedding server using the default text-embeddings-inference image
+apiVersion: toolhive.stacklok.dev/v1alpha1
+kind: EmbeddingServer
+metadata:
+ name: basic-embedding
+ namespace: toolhive-system
+spec:
+ # Required: HuggingFace model to use
+ model: "sentence-transformers/all-MiniLM-L6-v2"
+
+ # Optional: Container image (defaults to ghcr.io/huggingface/text-embeddings-inference:latest)
+ image: "text-embeddings-inference:latest"
+ imagePullPolicy: IfNotPresent
+
+ # Optional: Port to expose (defaults to 8080)
+ port: 8080
+
+ # Optional: Number of replicas (defaults to 1)
+ replicas: 1
diff --git a/examples/operator/embedding-servers/embedding-advanced.yaml b/examples/operator/embedding-servers/embedding-advanced.yaml
new file mode 100644
index 0000000000..8c01b5858d
--- /dev/null
+++ b/examples/operator/embedding-servers/embedding-advanced.yaml
@@ -0,0 +1,108 @@
+# Advanced EmbeddingServer configuration with all features
+apiVersion: toolhive.stacklok.dev/v1alpha1
+kind: EmbeddingServer
+metadata:
+ name: advanced-embedding
+ namespace: toolhive-system
+spec:
+ # Model configuration
+ model: "sentence-transformers/all-MiniLM-L6-v2"
+ image: "text-embeddings-inference:latest"
+ port: 8080
+ replicas: 2
+
+ # HuggingFace authentication token (optional)
+ # Reference a Kubernetes Secret containing the HuggingFace token for accessing private models
+ # Create the secret with: kubectl create secret generic hf-token --from-literal=token=hf_xxxxx
+ hfTokenSecretRef:
+ name: hf-token
+ key: token
+
+ # Additional arguments to pass to the embedding server
+ args:
+ - "--max-concurrent-requests"
+ - "512"
+ - "--max-batch-tokens"
+ - "32768"
+
+ # Environment variables
+ env:
+ - name: RUST_LOG
+ value: "info"
+ - name: MAX_CLIENT_BATCH_SIZE
+ value: "32"
+
+ # Model caching
+ modelCache:
+ enabled: true
+ size: "20Gi"
+ accessMode: "ReadWriteOnce"
+ storageClassName: "fast-ssd"
+
+ # Resource requirements
+ resources:
+ limits:
+ cpu: "4000m"
+ memory: "8Gi"
+ requests:
+ cpu: "2000m"
+ memory: "4Gi"
+
+ # PodTemplateSpec for advanced pod customization
+ podTemplateSpec:
+ metadata:
+ annotations:
+ prometheus.io/scrape: "true"
+ prometheus.io/port: "8080"
+ spec:
+ # Node selection
+ nodeSelector:
+ workload: ml-inference
+ # Tolerations for dedicated nodes
+ tolerations:
+ - key: "ml-workload"
+ operator: "Equal"
+ value: "true"
+ effect: "NoSchedule"
+ # Affinity rules
+ affinity:
+ podAntiAffinity:
+ preferredDuringSchedulingIgnoredDuringExecution:
+ - weight: 100
+ podAffinityTerm:
+ labelSelector:
+ matchExpressions:
+ - key: app.kubernetes.io/name
+ operator: In
+ values:
+ - mcpembedding
+ topologyKey: kubernetes.io/hostname
+ # Security context
+ securityContext:
+ runAsNonRoot: true
+ runAsUser: 1000
+ fsGroup: 1000
+ # Container-specific overrides
+ containers:
+ - name: embedding
+ securityContext:
+ allowPrivilegeEscalation: false
+ capabilities:
+ drop:
+ - ALL
+
+ # Resource overrides for metadata
+ resourceOverrides:
+ deployment:
+ annotations:
+ description: "Advanced embedding server with HA configuration"
+ podTemplateMetadataOverrides:
+ labels:
+ app.custom: "ml-embedding"
+ version: "v1"
+ service:
+ annotations:
+ service.beta.kubernetes.io/aws-load-balancer-type: "nlb"
+ persistentVolumeClaim:
+ annotations:
+ volume.beta.kubernetes.io/storage-class: "fast-ssd"
diff --git a/examples/operator/embedding-servers/embedding-with-cache.yaml b/examples/operator/embedding-servers/embedding-with-cache.yaml
new file mode 100644
index 0000000000..6595f69f01
--- /dev/null
+++ b/examples/operator/embedding-servers/embedding-with-cache.yaml
@@ -0,0 +1,42 @@
+# EmbeddingServer with persistent model caching
+# This configuration caches downloaded models in a PVC for faster restarts
+apiVersion: toolhive.stacklok.dev/v1alpha1
+kind: EmbeddingServer
+metadata:
+ name: embedding-with-cache
+ namespace: toolhive-system
+spec:
+ # Model to use
+ model: "sentence-transformers/all-MiniLM-L6-v2"
+
+ # Container image
+ image: "text-embeddings-inference:latest"
+
+ # Port configuration
+ port: 8080
+
+ # Enable model caching with PVC
+ modelCache:
+ enabled: true
+ # Size of the PVC for model storage
+ size: "10Gi"
+ # Access mode for the PVC
+ accessMode: "ReadWriteOnce"
+ # Optional: Specify storage class name
+ # storageClassName: "fast-ssd"
+
+ # Resource requirements
+ resources:
+ limits:
+ cpu: "2000m"
+ memory: "4Gi"
+ requests:
+ cpu: "1000m"
+ memory: "2Gi"
+
+ # Environment variables
+ env:
+ - name: RUST_LOG
+ value: "info"
+ - name: MAX_BATCH_TOKENS
+ value: "16384"
diff --git a/test/e2e/chainsaw/operator/multi-tenancy/setup/assert-rbac-clusterrole.yaml b/test/e2e/chainsaw/operator/multi-tenancy/setup/assert-rbac-clusterrole.yaml
index feccbeb749..97f45f2407 100644
--- a/test/e2e/chainsaw/operator/multi-tenancy/setup/assert-rbac-clusterrole.yaml
+++ b/test/e2e/chainsaw/operator/multi-tenancy/setup/assert-rbac-clusterrole.yaml
@@ -8,6 +8,7 @@ rules:
- ""
resources:
- configmaps
+ - persistentvolumeclaims
- secrets
- serviceaccounts
verbs:
@@ -121,6 +122,7 @@ rules:
- apiGroups:
- toolhive.stacklok.dev
resources:
+ - embeddingservers
- mcpexternalauthconfigs
- mcpgroups
- mcpregistries
@@ -139,6 +141,7 @@ rules:
- apiGroups:
- toolhive.stacklok.dev
resources:
+ - embeddingservers/finalizers
- mcpexternalauthconfigs/finalizers
- mcpgroups/finalizers
- mcpregistries/finalizers
@@ -149,6 +152,7 @@ rules:
- apiGroups:
- toolhive.stacklok.dev
resources:
+ - embeddingservers/status
- mcpexternalauthconfigs/status
- mcpgroups/status
- mcpregistries/status
diff --git a/test/e2e/chainsaw/operator/multi-tenancy/setup/chainsaw-test.yaml b/test/e2e/chainsaw/operator/multi-tenancy/setup/chainsaw-test.yaml
index ecad301c38..4aabcf830a 100644
--- a/test/e2e/chainsaw/operator/multi-tenancy/setup/chainsaw-test.yaml
+++ b/test/e2e/chainsaw/operator/multi-tenancy/setup/chainsaw-test.yaml
@@ -41,7 +41,7 @@ spec:
- --set
- operator.rbac.scope=namespace
- --set
- - operator.rbac.allowedNamespaces={toolhive-system,test-namespace}
+ - operator.rbac.allowedNamespaces={toolhive-system,test-namespace,toolhive-test-ns-1,toolhive-test-ns-2}
- assert:
file: assert-operator-ready.yaml
- assert:
diff --git a/test/e2e/chainsaw/operator/multi-tenancy/setup/namespace.yaml b/test/e2e/chainsaw/operator/multi-tenancy/setup/namespace.yaml
index 10dfe35520..1dad25487e 100644
--- a/test/e2e/chainsaw/operator/multi-tenancy/setup/namespace.yaml
+++ b/test/e2e/chainsaw/operator/multi-tenancy/setup/namespace.yaml
@@ -1,4 +1,14 @@
apiVersion: v1
kind: Namespace
metadata:
- name: test-namespace
\ No newline at end of file
+ name: test-namespace
+---
+apiVersion: v1
+kind: Namespace
+metadata:
+ name: toolhive-test-ns-1
+---
+apiVersion: v1
+kind: Namespace
+metadata:
+ name: toolhive-test-ns-2
\ No newline at end of file
diff --git a/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/assert-deployment-ns1-running.yaml b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/assert-deployment-ns1-running.yaml
new file mode 100644
index 0000000000..a555c28e15
--- /dev/null
+++ b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/assert-deployment-ns1-running.yaml
@@ -0,0 +1,7 @@
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+ name: mt-embedding
+ namespace: toolhive-test-ns-1
+status:
+ replicas: 1
diff --git a/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/assert-deployment-ns2-running.yaml b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/assert-deployment-ns2-running.yaml
new file mode 100644
index 0000000000..4cf320a779
--- /dev/null
+++ b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/assert-deployment-ns2-running.yaml
@@ -0,0 +1,7 @@
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+ name: mt-embedding
+ namespace: toolhive-test-ns-2
+status:
+ replicas: 1
diff --git a/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/assert-embeddingserver-ns1-running.yaml b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/assert-embeddingserver-ns1-running.yaml
new file mode 100644
index 0000000000..ca17b4bb09
--- /dev/null
+++ b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/assert-embeddingserver-ns1-running.yaml
@@ -0,0 +1,7 @@
+apiVersion: toolhive.stacklok.dev/v1alpha1
+kind: EmbeddingServer
+metadata:
+ name: mt-embedding
+ namespace: toolhive-test-ns-1
+status:
+ (contains(['Downloading', 'Running'], phase)): true
diff --git a/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/assert-embeddingserver-ns2-running.yaml b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/assert-embeddingserver-ns2-running.yaml
new file mode 100644
index 0000000000..a35c2374c1
--- /dev/null
+++ b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/assert-embeddingserver-ns2-running.yaml
@@ -0,0 +1,7 @@
+apiVersion: toolhive.stacklok.dev/v1alpha1
+kind: EmbeddingServer
+metadata:
+ name: mt-embedding
+ namespace: toolhive-test-ns-2
+status:
+ (contains(['Downloading', 'Running'], phase)): true
diff --git a/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/assert-service-ns1-created.yaml b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/assert-service-ns1-created.yaml
new file mode 100644
index 0000000000..3f5f25ab88
--- /dev/null
+++ b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/assert-service-ns1-created.yaml
@@ -0,0 +1,10 @@
+apiVersion: v1
+kind: Service
+metadata:
+ name: mt-embedding
+ namespace: toolhive-test-ns-1
+spec:
+ type: ClusterIP
+ ports:
+ - port: 8080
+ targetPort: 8080
diff --git a/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/assert-service-ns2-created.yaml b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/assert-service-ns2-created.yaml
new file mode 100644
index 0000000000..3a74de38e3
--- /dev/null
+++ b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/assert-service-ns2-created.yaml
@@ -0,0 +1,10 @@
+apiVersion: v1
+kind: Service
+metadata:
+ name: mt-embedding
+ namespace: toolhive-test-ns-2
+spec:
+ type: ClusterIP
+ ports:
+ - port: 8080
+ targetPort: 8080
diff --git a/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/chainsaw-test.yaml b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/chainsaw-test.yaml
new file mode 100644
index 0000000000..2815d0c14d
--- /dev/null
+++ b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/chainsaw-test.yaml
@@ -0,0 +1,182 @@
+apiVersion: chainsaw.kyverno.io/v1alpha1
+kind: Test
+metadata:
+ name: mt-embeddingserver
+spec:
+ description: Tests EmbeddingServer in multi-tenancy mode across namespaces
+ timeouts:
+ apply: 30s
+ assert: 120s
+ cleanup: 30s
+ exec: 300s
+ template: true
+ bindings:
+ - name: testPrefix
+ value: "mt-embedding"
+ - name: namespace1
+ value: "toolhive-test-ns-1"
+ - name: namespace2
+ value: "toolhive-test-ns-2"
+ steps:
+ - name: verify-operator
+ description: Ensure operator is ready before testing
+ try:
+ - assert:
+ file: ../../setup/assert-operator-ready.yaml
+
+ - name: create-namespaces
+ description: Create test namespaces for multi-tenancy testing
+ try:
+ - apply:
+ file: namespace-1.yaml
+ - apply:
+ file: namespace-2.yaml
+ - assert:
+ file: namespace-1.yaml
+ - assert:
+ file: namespace-2.yaml
+
+ - name: deploy-embeddingserver-ns1
+ description: Deploy EmbeddingServer in namespace 1
+ try:
+ - apply:
+ file: embeddingserver-ns1.yaml
+ - assert:
+ file: embeddingserver-ns1.yaml
+ - assert:
+ file: assert-embeddingserver-ns1-running.yaml
+ - assert:
+ file: assert-deployment-ns1-running.yaml
+ - assert:
+ file: assert-service-ns1-created.yaml
+
+ - name: deploy-embeddingserver-ns2
+ description: Deploy EmbeddingServer in namespace 2
+ try:
+ - apply:
+ file: embeddingserver-ns2.yaml
+ - assert:
+ file: embeddingserver-ns2.yaml
+ - assert:
+ file: assert-embeddingserver-ns2-running.yaml
+ - assert:
+ file: assert-deployment-ns2-running.yaml
+ - assert:
+ file: assert-service-ns2-created.yaml
+
+ - name: verify-isolation
+ description: Verify that EmbeddingServers in different namespaces are isolated
+ try:
+ - script:
+ env:
+ - name: embeddingServerName
+ value: ($testPrefix)
+ - name: ns1
+ value: ($namespace1)
+ - name: ns2
+ value: ($namespace2)
+ content: |
+ echo "Verifying multi-tenancy isolation..."
+
+ # Verify EmbeddingServer exists in namespace 1
+ if ! kubectl get embeddingserver $embeddingServerName -n $ns1 >/dev/null 2>&1; then
+ echo "EmbeddingServer not found in namespace 1"
+ exit 1
+ fi
+ echo "✓ EmbeddingServer found in namespace 1"
+
+ # Verify EmbeddingServer exists in namespace 2
+ if ! kubectl get embeddingserver $embeddingServerName -n $ns2 >/dev/null 2>&1; then
+ echo "EmbeddingServer not found in namespace 2"
+ exit 1
+ fi
+ echo "✓ EmbeddingServer found in namespace 2"
+
+ # Verify statefulsets are in separate namespaces
+ STATEFULSET_NAME="$embeddingServerName"
+
+ NS1_STATEFULSET=$(kubectl get statefulset $STATEFULSET_NAME -n $ns1 -o name 2>/dev/null || echo "")
+ NS2_STATEFULSET=$(kubectl get statefulset $STATEFULSET_NAME -n $ns2 -o name 2>/dev/null || echo "")
+
+ if [ -z "$NS1_STATEFULSET" ]; then
+ echo "StatefulSet not found in namespace 1"
+ exit 1
+ fi
+ echo "✓ StatefulSet found in namespace 1"
+
+ if [ -z "$NS2_STATEFULSET" ]; then
+ echo "StatefulSet not found in namespace 2"
+ exit 1
+ fi
+ echo "✓ StatefulSet found in namespace 2"
+
+ # Verify services are in separate namespaces
+ SERVICE_NAME="$embeddingServerName"
+
+ NS1_SERVICE=$(kubectl get svc $SERVICE_NAME -n $ns1 -o name 2>/dev/null || echo "")
+ NS2_SERVICE=$(kubectl get svc $SERVICE_NAME -n $ns2 -o name 2>/dev/null || echo "")
+
+ if [ -z "$NS1_SERVICE" ]; then
+ echo "Service not found in namespace 1"
+ exit 1
+ fi
+ echo "✓ Service found in namespace 1"
+
+ if [ -z "$NS2_SERVICE" ]; then
+ echo "Service not found in namespace 2"
+ exit 1
+ fi
+ echo "✓ Service found in namespace 2"
+
+ # Get ClusterIPs to verify they are different
+ NS1_CLUSTERIP=$(kubectl get svc $SERVICE_NAME -n $ns1 -o jsonpath='{.spec.clusterIP}')
+ NS2_CLUSTERIP=$(kubectl get svc $SERVICE_NAME -n $ns2 -o jsonpath='{.spec.clusterIP}')
+
+ echo "Namespace 1 ClusterIP: $NS1_CLUSTERIP"
+ echo "Namespace 2 ClusterIP: $NS2_CLUSTERIP"
+
+ if [ "$NS1_CLUSTERIP" = "$NS2_CLUSTERIP" ]; then
+ echo "Services have the same ClusterIP - isolation may be compromised"
+ exit 1
+ fi
+ echo "✓ Services have different ClusterIPs"
+
+ echo "✅ Multi-tenancy isolation verified!"
+ exit 0
+
+ - name: test-embedding-endpoints
+ description: Test both embedding server endpoints
+ try:
+ - script:
+ env:
+ - name: embeddingServerName
+ value: ($testPrefix)
+ - name: ns1
+ value: ($namespace1)
+ - name: ns2
+ value: ($namespace2)
+ content: |
+ echo "Testing embedding server endpoints in both namespaces..."
+
+ SERVICE_NAME="$embeddingServerName"
+
+ # Test namespace 1
+ echo "Testing namespace 1..."
+ NS1_CLUSTERIP=$(kubectl get svc $SERVICE_NAME -n $ns1 -o jsonpath='{.spec.clusterIP}')
+
+ kubectl run test-curl-ns1-$RANDOM --image=curlimages/curl:latest --rm -i --restart=Never -n $ns1 -- \
+ curl -s -o /dev/null -w "%{http_code}" http://$NS1_CLUSTERIP:8080/health || true
+
+ echo "✓ Namespace 1 endpoint test completed"
+
+ # Test namespace 2
+ echo "Testing namespace 2..."
+ NS2_CLUSTERIP=$(kubectl get svc $SERVICE_NAME -n $ns2 -o jsonpath='{.spec.clusterIP}')
+
+ kubectl run test-curl-ns2-$RANDOM --image=curlimages/curl:latest --rm -i --restart=Never -n $ns2 -- \
+ curl -s -o /dev/null -w "%{http_code}" http://$NS2_CLUSTERIP:8080/health || true
+
+ echo "✓ Namespace 2 endpoint test completed"
+
+ echo "✅ Multi-tenancy embedding server tests passed!"
+ exit 0
diff --git a/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/embeddingserver-ns1.yaml b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/embeddingserver-ns1.yaml
new file mode 100644
index 0000000000..12e23de197
--- /dev/null
+++ b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/embeddingserver-ns1.yaml
@@ -0,0 +1,23 @@
+apiVersion: toolhive.stacklok.dev/v1alpha1
+kind: EmbeddingServer
+metadata:
+ name: ($testPrefix)
+ namespace: ($namespace1)
+spec:
+ model: "sentence-transformers/paraphrase-MiniLM-L3-v2"
+ image: "text-embeddings-inference"
+ imagePullPolicy: IfNotPresent
+ port: 8080
+ replicas: 1
+ resources:
+ limits:
+ cpu: "500m"
+ memory: "512Mi"
+ requests:
+ cpu: "250m"
+ memory: "256Mi"
+ env:
+ - name: RUST_LOG
+ value: "info"
+ - name: NAMESPACE_IDENTIFIER
+ value: "namespace-1"
diff --git a/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/embeddingserver-ns2.yaml b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/embeddingserver-ns2.yaml
new file mode 100644
index 0000000000..260e9532a4
--- /dev/null
+++ b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/embeddingserver-ns2.yaml
@@ -0,0 +1,23 @@
+apiVersion: toolhive.stacklok.dev/v1alpha1
+kind: EmbeddingServer
+metadata:
+ name: ($testPrefix)
+ namespace: ($namespace2)
+spec:
+ model: "sentence-transformers/paraphrase-MiniLM-L3-v2"
+ image: "text-embeddings-inference"
+ imagePullPolicy: IfNotPresent
+ port: 8080
+ replicas: 1
+ resources:
+ limits:
+ cpu: "500m"
+ memory: "512Mi"
+ requests:
+ cpu: "250m"
+ memory: "256Mi"
+ env:
+ - name: RUST_LOG
+ value: "info"
+ - name: NAMESPACE_IDENTIFIER
+ value: "namespace-2"
diff --git a/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/namespace-1.yaml b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/namespace-1.yaml
new file mode 100644
index 0000000000..b170d307d1
--- /dev/null
+++ b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/namespace-1.yaml
@@ -0,0 +1,4 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+ name: ($namespace1)
diff --git a/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/namespace-2.yaml b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/namespace-2.yaml
new file mode 100644
index 0000000000..68cf711b48
--- /dev/null
+++ b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/namespace-2.yaml
@@ -0,0 +1,4 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+ name: ($namespace2)
diff --git a/test/e2e/chainsaw/operator/single-tenancy/setup/assert-rbac-clusterrole.yaml b/test/e2e/chainsaw/operator/single-tenancy/setup/assert-rbac-clusterrole.yaml
index feccbeb749..97f45f2407 100644
--- a/test/e2e/chainsaw/operator/single-tenancy/setup/assert-rbac-clusterrole.yaml
+++ b/test/e2e/chainsaw/operator/single-tenancy/setup/assert-rbac-clusterrole.yaml
@@ -8,6 +8,7 @@ rules:
- ""
resources:
- configmaps
+ - persistentvolumeclaims
- secrets
- serviceaccounts
verbs:
@@ -121,6 +122,7 @@ rules:
- apiGroups:
- toolhive.stacklok.dev
resources:
+ - embeddingservers
- mcpexternalauthconfigs
- mcpgroups
- mcpregistries
@@ -139,6 +141,7 @@ rules:
- apiGroups:
- toolhive.stacklok.dev
resources:
+ - embeddingservers/finalizers
- mcpexternalauthconfigs/finalizers
- mcpgroups/finalizers
- mcpregistries/finalizers
@@ -149,6 +152,7 @@ rules:
- apiGroups:
- toolhive.stacklok.dev
resources:
+ - embeddingservers/status
- mcpexternalauthconfigs/status
- mcpgroups/status
- mcpregistries/status
diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/basic/assert-deployment-running.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/basic/assert-deployment-running.yaml
new file mode 100644
index 0000000000..016a5dad86
--- /dev/null
+++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/basic/assert-deployment-running.yaml
@@ -0,0 +1,7 @@
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+ name: st-embedding-basic
+ namespace: toolhive-system
+status:
+ replicas: 1
diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/basic/assert-embeddingserver-running.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/basic/assert-embeddingserver-running.yaml
new file mode 100644
index 0000000000..ff4cf53e37
--- /dev/null
+++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/basic/assert-embeddingserver-running.yaml
@@ -0,0 +1,7 @@
+apiVersion: toolhive.stacklok.dev/v1alpha1
+kind: EmbeddingServer
+metadata:
+ name: st-embedding-basic
+ namespace: toolhive-system
+status:
+ (contains(['Downloading', 'Running'], phase)): true
diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/basic/assert-service-created.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/basic/assert-service-created.yaml
new file mode 100644
index 0000000000..bd590bb88e
--- /dev/null
+++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/basic/assert-service-created.yaml
@@ -0,0 +1,10 @@
+apiVersion: v1
+kind: Service
+metadata:
+ name: st-embedding-basic
+ namespace: toolhive-system
+spec:
+ type: ClusterIP
+ ports:
+ - port: 8080
+ targetPort: 8080
diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/basic/chainsaw-test.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/basic/chainsaw-test.yaml
new file mode 100644
index 0000000000..aeba429463
--- /dev/null
+++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/basic/chainsaw-test.yaml
@@ -0,0 +1,69 @@
+apiVersion: chainsaw.kyverno.io/v1alpha1
+kind: Test
+metadata:
+ name: st-embeddingserver-basic
+spec:
+ description: Deploys basic EmbeddingServer and verifies it's running
+ timeouts:
+ apply: 30s
+ assert: 120s
+ cleanup: 30s
+ exec: 300s
+ template: true
+ bindings:
+ - name: testPrefix
+ value: "st-embedding-basic"
+ steps:
+ - name: verify-operator
+ description: Ensure operator is ready before testing
+ try:
+ - assert:
+ file: ../../../setup/assert-operator-ready.yaml
+ - name: deploy-embeddingserver
+ description: Deploy a basic EmbeddingServer instance and verify it's ready
+ try:
+ - apply:
+ file: embeddingserver.yaml
+ - assert:
+ file: embeddingserver.yaml
+ - assert:
+ file: assert-embeddingserver-running.yaml
+ - assert:
+ file: assert-deployment-running.yaml
+ - assert:
+ file: assert-service-created.yaml
+
+ - name: test-embedding-endpoint
+ description: Test the embedding server endpoint
+ try:
+ - script:
+ env:
+ - name: embeddingServerName
+ value: ($testPrefix)
+ content: |
+ # Get the service name for the embedding server
+ echo "Testing embedding server: $embeddingServerName"
+
+ # Get the service ClusterIP
+ SERVICE_NAME="$embeddingServerName"
+ CLUSTER_IP=$(kubectl get svc $SERVICE_NAME -n toolhive-system -o jsonpath='{.spec.clusterIP}' 2>/dev/null || echo "")
+
+ if [ -z "$CLUSTER_IP" ]; then
+ echo "Service not found or does not have ClusterIP"
+ kubectl describe svc $SERVICE_NAME -n toolhive-system
+ exit 1
+ fi
+
+ echo "Service ClusterIP: $CLUSTER_IP"
+
+ # Wait for the statefulset to be ready
+ echo "Waiting for statefulset to be ready..."
+ kubectl wait --for=jsonpath='{.status.replicas}'=1 --timeout=120s statefulset/$embeddingServerName -n toolhive-system
+
+ # Test the health endpoint using a test pod
+ echo "Testing health endpoint..."
+ kubectl run test-curl-$RANDOM --image=curlimages/curl:latest --rm -i --restart=Never -n toolhive-system -- \
+ curl -s -o /dev/null -w "%{http_code}" http://$CLUSTER_IP:8080/health || true
+
+ echo "✅ Basic embedding server test passed!"
+ exit 0
diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/basic/embeddingserver.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/basic/embeddingserver.yaml
new file mode 100644
index 0000000000..97eb1eada1
--- /dev/null
+++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/basic/embeddingserver.yaml
@@ -0,0 +1,22 @@
+apiVersion: toolhive.stacklok.dev/v1alpha1
+kind: EmbeddingServer
+metadata:
+ name: ($testPrefix)
+ namespace: toolhive-system
+spec:
+ # Use a very lightweight model for testing (17.4M params)
+ model: "sentence-transformers/paraphrase-MiniLM-L3-v2"
+ image: "ghcr.io/huggingface/text-embeddings-inference:cpu-latest"
+ imagePullPolicy: IfNotPresent
+ port: 8080
+ replicas: 1
+ resources:
+ limits:
+ cpu: "500m"
+ memory: "512Mi"
+ requests:
+ cpu: "250m"
+ memory: "256Mi"
+ env:
+ - name: RUST_LOG
+ value: "info"
diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/assert-deployment-running.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/assert-deployment-running.yaml
new file mode 100644
index 0000000000..addf6ca69a
--- /dev/null
+++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/assert-deployment-running.yaml
@@ -0,0 +1,7 @@
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+ name: st-embedding-lifecycle
+ namespace: toolhive-system
+status:
+ replicas: 1
\ No newline at end of file
diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/assert-deployment-scaled.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/assert-deployment-scaled.yaml
new file mode 100644
index 0000000000..f20167d663
--- /dev/null
+++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/assert-deployment-scaled.yaml
@@ -0,0 +1,7 @@
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+ name: st-embedding-lifecycle
+ namespace: toolhive-system
+status:
+ replicas: 2
diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/assert-embeddingserver-running.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/assert-embeddingserver-running.yaml
new file mode 100644
index 0000000000..0e47d1c7a9
--- /dev/null
+++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/assert-embeddingserver-running.yaml
@@ -0,0 +1,7 @@
+apiVersion: toolhive.stacklok.dev/v1alpha1
+kind: EmbeddingServer
+metadata:
+ name: st-embedding-lifecycle
+ namespace: toolhive-system
+status:
+ (contains(['Downloading', 'Running'], phase)): true
diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/assert-embeddingserver-scaled.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/assert-embeddingserver-scaled.yaml
new file mode 100644
index 0000000000..6e3da079c4
--- /dev/null
+++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/assert-embeddingserver-scaled.yaml
@@ -0,0 +1,7 @@
+apiVersion: toolhive.stacklok.dev/v1alpha1
+kind: EmbeddingServer
+metadata:
+ name: st-embedding-lifecycle
+ namespace: toolhive-system
+spec:
+ replicas: 2
diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/assert-service-created.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/assert-service-created.yaml
new file mode 100644
index 0000000000..610e94a7ab
--- /dev/null
+++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/assert-service-created.yaml
@@ -0,0 +1,10 @@
+apiVersion: v1
+kind: Service
+metadata:
+ name: st-embedding-lifecycle
+ namespace: toolhive-system
+spec:
+ type: ClusterIP
+ ports:
+ - port: 8080
+ targetPort: 8080
diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/chainsaw-test.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/chainsaw-test.yaml
new file mode 100644
index 0000000000..4dc652183c
--- /dev/null
+++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/chainsaw-test.yaml
@@ -0,0 +1,121 @@
+apiVersion: chainsaw.kyverno.io/v1alpha1
+kind: Test
+metadata:
+ name: st-embeddingserver-lifecycle
+spec:
+ description: Tests EmbeddingServer lifecycle operations (create, update, delete)
+ timeouts:
+ apply: 30s
+ assert: 120s
+ cleanup: 30s
+ delete: 60s
+ exec: 300s
+ template: true
+ bindings:
+ - name: testPrefix
+ value: "st-embedding-lifecycle"
+ steps:
+ - name: verify-operator
+ description: Ensure operator is ready before testing
+ try:
+ - assert:
+ file: ../../../setup/assert-operator-ready.yaml
+
+ - name: create-embeddingserver
+ description: Create initial EmbeddingServer
+ try:
+ - apply:
+ file: embeddingserver-initial.yaml
+ - assert:
+ file: embeddingserver-initial.yaml
+ - assert:
+ file: assert-embeddingserver-running.yaml
+ - assert:
+ file: assert-deployment-running.yaml
+ - assert:
+ file: assert-service-created.yaml
+
+ - name: update-embeddingserver-env
+ description: Update EmbeddingServer environment variables
+ try:
+ - apply:
+ file: embeddingserver-updated-env.yaml
+ - assert:
+ file: embeddingserver-updated-env.yaml
+ - script:
+ env:
+ - name: embeddingServerName
+ value: ($testPrefix)
+ content: |
+ # Verify environment variable update propagated to statefulset
+ STATEFULSET_NAME="$embeddingServerName"
+
+ # Wait for statefulset to be ready (still 1 replica)
+ kubectl wait --for=jsonpath='{.status.replicas}'=1 --timeout=120s statefulset/$STATEFULSET_NAME -n toolhive-system
+
+ # Check if the new environment variable is present
+ ENV_VALUE=$(kubectl get statefulset $STATEFULSET_NAME -n toolhive-system -o jsonpath='{.spec.template.spec.containers[0].env[?(@.name=="MAX_BATCH_TOKENS")].value}' 2>/dev/null || echo "")
+
+ if [ "$ENV_VALUE" != "16384" ]; then
+ echo "Environment variable not updated correctly. Expected: 16384, Got: $ENV_VALUE"
+ kubectl describe statefulset $STATEFULSET_NAME -n toolhive-system
+ exit 1
+ fi
+
+ echo "✓ Environment variable updated successfully"
+ exit 0
+
+ - name: delete-embeddingserver
+ description: Delete EmbeddingServer and verify cleanup
+ try:
+ - delete:
+ ref:
+ apiVersion: toolhive.stacklok.dev/v1alpha1
+ kind: EmbeddingServer
+ name: ($testPrefix)
+ namespace: toolhive-system
+ - script:
+ env:
+ - name: embeddingServerName
+ value: ($testPrefix)
+ content: |
+ # Wait for resources to be cleaned up
+ STATEFULSET_NAME="$embeddingServerName"
+ SERVICE_NAME="$embeddingServerName"
+
+ echo "Verifying resource cleanup..."
+
+ # Wait for statefulset to be deleted
+ timeout=30
+ while [ $timeout -gt 0 ]; do
+ if ! kubectl get statefulset $STATEFULSET_NAME -n toolhive-system 2>/dev/null; then
+ echo "✓ StatefulSet deleted"
+ break
+ fi
+ sleep 1
+ timeout=$((timeout - 1))
+ done
+
+ if [ $timeout -eq 0 ]; then
+ echo "StatefulSet was not deleted within timeout"
+ exit 1
+ fi
+
+ # Wait for service to be deleted
+ timeout=30
+ while [ $timeout -gt 0 ]; do
+ if ! kubectl get svc $SERVICE_NAME -n toolhive-system 2>/dev/null; then
+ echo "✓ Service deleted"
+ break
+ fi
+ sleep 1
+ timeout=$((timeout - 1))
+ done
+
+ if [ $timeout -eq 0 ]; then
+ echo "Service was not deleted within timeout"
+ exit 1
+ fi
+
+ echo "✅ EmbeddingServer lifecycle test passed!"
+ exit 0
diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/embeddingserver-initial.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/embeddingserver-initial.yaml
new file mode 100644
index 0000000000..da72c25b90
--- /dev/null
+++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/embeddingserver-initial.yaml
@@ -0,0 +1,21 @@
+apiVersion: toolhive.stacklok.dev/v1alpha1
+kind: EmbeddingServer
+metadata:
+ name: ($testPrefix)
+ namespace: toolhive-system
+spec:
+ model: "sentence-transformers/paraphrase-MiniLM-L3-v2"
+ image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5"
+ imagePullPolicy: IfNotPresent
+ port: 8080
+ replicas: 1
+ resources:
+ limits:
+ cpu: "500m"
+ memory: "512Mi"
+ requests:
+ cpu: "250m"
+ memory: "256Mi"
+ env:
+ - name: RUST_LOG
+ value: "info"
diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/embeddingserver-scaled.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/embeddingserver-scaled.yaml
new file mode 100644
index 0000000000..48e19545b9
--- /dev/null
+++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/embeddingserver-scaled.yaml
@@ -0,0 +1,21 @@
+apiVersion: toolhive.stacklok.dev/v1alpha1
+kind: EmbeddingServer
+metadata:
+ name: ($testPrefix)
+ namespace: toolhive-system
+spec:
+ model: "sentence-transformers/paraphrase-MiniLM-L3-v2"
+ image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5"
+ imagePullPolicy: IfNotPresent
+ port: 8080
+ replicas: 2
+ resources:
+ limits:
+ cpu: "500m"
+ memory: "512Mi"
+ requests:
+ cpu: "250m"
+ memory: "256Mi"
+ env:
+ - name: RUST_LOG
+ value: "info"
diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/embeddingserver-updated-env.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/embeddingserver-updated-env.yaml
new file mode 100644
index 0000000000..4efd73ec44
--- /dev/null
+++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/embeddingserver-updated-env.yaml
@@ -0,0 +1,23 @@
+apiVersion: toolhive.stacklok.dev/v1alpha1
+kind: EmbeddingServer
+metadata:
+ name: ($testPrefix)
+ namespace: toolhive-system
+spec:
+ model: "sentence-transformers/paraphrase-MiniLM-L3-v2"
+ image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5"
+ imagePullPolicy: IfNotPresent
+ port: 8080
+ replicas: 1
+ resources:
+ limits:
+ cpu: "500m"
+ memory: "512Mi"
+ requests:
+ cpu: "250m"
+ memory: "256Mi"
+ env:
+ - name: RUST_LOG
+ value: "debug"
+ - name: MAX_BATCH_TOKENS
+ value: "16384"
diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/with-cache/assert-deployment-running.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/with-cache/assert-deployment-running.yaml
new file mode 100644
index 0000000000..1d9ed74799
--- /dev/null
+++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/with-cache/assert-deployment-running.yaml
@@ -0,0 +1,7 @@
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+ name: st-embedding-cache
+ namespace: toolhive-system
+status:
+ replicas: 1
diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/with-cache/assert-embeddingserver-running.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/with-cache/assert-embeddingserver-running.yaml
new file mode 100644
index 0000000000..1bc08dec0a
--- /dev/null
+++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/with-cache/assert-embeddingserver-running.yaml
@@ -0,0 +1,7 @@
+apiVersion: toolhive.stacklok.dev/v1alpha1
+kind: EmbeddingServer
+metadata:
+ name: st-embedding-cache
+ namespace: toolhive-system
+status:
+ (contains(['Downloading', 'Running'], phase)): true
diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/with-cache/assert-pvc-created.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/with-cache/assert-pvc-created.yaml
new file mode 100644
index 0000000000..929e91e5f1
--- /dev/null
+++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/with-cache/assert-pvc-created.yaml
@@ -0,0 +1,13 @@
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+ name: st-embedding-cache-model-cache
+ namespace: toolhive-system
+spec:
+ accessModes:
+ - ReadWriteOnce
+ resources:
+ requests:
+ storage: 5Gi
+status:
+ phase: Bound
diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/with-cache/assert-service-created.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/with-cache/assert-service-created.yaml
new file mode 100644
index 0000000000..2d46b96cfa
--- /dev/null
+++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/with-cache/assert-service-created.yaml
@@ -0,0 +1,10 @@
+apiVersion: v1
+kind: Service
+metadata:
+ name: st-embedding-cache
+ namespace: toolhive-system
+spec:
+ type: ClusterIP
+ ports:
+ - port: 8080
+ targetPort: 8080
diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/with-cache/chainsaw-test.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/with-cache/chainsaw-test.yaml
new file mode 100644
index 0000000000..e77487a032
--- /dev/null
+++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/with-cache/chainsaw-test.yaml
@@ -0,0 +1,160 @@
+apiVersion: chainsaw.kyverno.io/v1alpha1
+kind: Test
+metadata:
+ name: st-embeddingserver-cache
+spec:
+ description: Deploys EmbeddingServer with model caching and verifies PVC is created
+ timeouts:
+ apply: 30s
+ assert: 120s
+ cleanup: 30s
+ exec: 300s
+ template: true
+ bindings:
+ - name: testPrefix
+ value: "st-embedding-cache"
+ steps:
+ - name: verify-operator
+ description: Ensure operator is ready before testing
+ try:
+ - assert:
+ file: ../../../setup/assert-operator-ready.yaml
+ - name: deploy-embeddingserver-with-cache
+ description: Deploy EmbeddingServer with model caching enabled
+ try:
+ - apply:
+ file: embeddingserver.yaml
+ - assert:
+ file: embeddingserver.yaml
+ - assert:
+ file: assert-embeddingserver-running.yaml
+ - assert:
+ file: assert-deployment-running.yaml
+ - assert:
+ file: assert-service-created.yaml
+
+ - name: verify-model-cache-volume
+ description: Verify that the PVC is mounted in the statefulset
+ try:
+ - script:
+ env:
+ - name: embeddingServerName
+ value: ($testPrefix)
+ content: |
+ # Get the statefulset name
+ echo "Verifying model cache for embedding server: $embeddingServerName"
+
+ # Wait for PVC to provision
+ echo "Waiting 60 seconds for PVC to provision..."
+ sleep 60
+
+ STATEFULSET_NAME="$embeddingServerName"
+ # StatefulSet PVCs follow the pattern: volumeClaimTemplate-statefulsetName-ordinal
+ PVC_NAME="model-cache-$embeddingServerName-0"
+
+ # Check if PVC exists and is bound
+ PVC_STATUS=$(kubectl get pvc $PVC_NAME -n toolhive-system -o jsonpath='{.status.phase}' 2>/dev/null || echo "NotFound")
+
+ if [ "$PVC_STATUS" != "Bound" ]; then
+ echo "PVC is not bound. Current status: $PVC_STATUS"
+ echo "Available PVCs:"
+ kubectl get pvc -n toolhive-system
+ exit 1
+ fi
+
+ echo "✓ PVC is bound"
+
+ # Check that the statefulset is ready
+ if ! kubectl wait --for=jsonpath='{.status.readyReplicas}'=1 --timeout=120s statefulset/$STATEFULSET_NAME -n toolhive-system; then
+ echo "StatefulSet failed to become ready. Gathering diagnostics..."
+ echo "StatefulSet status:"
+ kubectl get statefulset/$STATEFULSET_NAME -n toolhive-system -o yaml
+ echo "Pod status:"
+ kubectl get pods -n toolhive-system -l app.kubernetes.io/instance=$STATEFULSET_NAME
+ echo "Pod describe:"
+ kubectl describe pods -n toolhive-system -l app.kubernetes.io/instance=$STATEFULSET_NAME
+ echo "Pod events:"
+ kubectl get events -n toolhive-system --sort-by='.lastTimestamp' | tail -20
+ exit 1
+ fi
+
+ echo "✓ StatefulSet is ready"
+
+ # Verify that model files are written to the cache volume
+ echo "Checking for model files in cache volume..."
+ POD_NAME=$(kubectl get pods -n toolhive-system -l app.kubernetes.io/instance=$STATEFULSET_NAME --field-selector=status.phase=Running -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "")
+
+ if [ -z "$POD_NAME" ]; then
+ echo "No running pod found for statefulset"
+ echo "All pods in namespace:"
+ kubectl get pods -n toolhive-system -l app.kubernetes.io/instance=$STATEFULSET_NAME
+ exit 1
+ fi
+
+ echo "Checking cache contents in pod: $POD_NAME"
+
+ # Wait for model to be downloaded (check logs for model loading)
+ echo "Waiting for model to be downloaded..."
+ MAX_WAIT=60
+ COUNTER=0
+ MODEL_LOADED=false
+
+ while [ $COUNTER -lt $MAX_WAIT ]; do
+ # Check if model files exist in /data
+ CACHE_CONTENTS=$(kubectl exec -n toolhive-system $POD_NAME -- sh -c 'find /data -type f 2>/dev/null | wc -l' || echo "0")
+
+ if [ "$CACHE_CONTENTS" -gt 0 ]; then
+ MODEL_LOADED=true
+ break
+ fi
+
+ echo "Waiting for model files to appear... ($COUNTER/$MAX_WAIT seconds)"
+ sleep 2
+ COUNTER=$((COUNTER + 2))
+ done
+
+ if [ "$MODEL_LOADED" = false ]; then
+ echo "No model files found in /data after $MAX_WAIT seconds. Cache appears empty."
+ echo "Listing /data contents:"
+ kubectl exec -n toolhive-system $POD_NAME -- ls -laR /data || true
+ echo "Pod logs:"
+ kubectl logs -n toolhive-system $POD_NAME --tail=50 || true
+ exit 1
+ fi
+
+ echo "✓ Model files found in cache volume"
+ echo "Cache directory contents:"
+ kubectl exec -n toolhive-system $POD_NAME -- sh -c 'du -sh /data/* 2>/dev/null' || true
+
+ echo "✅ Model cache verification passed!"
+ exit 0
+
+ - name: test-embedding-endpoint
+ description: Test the embedding server endpoint with cache
+ try:
+ - script:
+ env:
+ - name: embeddingServerName
+ value: ($testPrefix)
+ content: |
+ # Get the service name for the embedding server
+ echo "Testing embedding server with cache: $embeddingServerName"
+
+ SERVICE_NAME="$embeddingServerName"
+ CLUSTER_IP=$(kubectl get svc $SERVICE_NAME -n toolhive-system -o jsonpath='{.spec.clusterIP}' 2>/dev/null || echo "")
+
+ if [ -z "$CLUSTER_IP" ]; then
+ echo "Service not found or does not have ClusterIP"
+ kubectl describe svc $SERVICE_NAME -n toolhive-system
+ exit 1
+ fi
+
+ echo "Service ClusterIP: $CLUSTER_IP"
+
+ # Test the health endpoint
+ echo "Testing health endpoint..."
+ kubectl run test-curl-$RANDOM --image=curlimages/curl:latest --rm -i --restart=Never -n toolhive-system -- \
+ curl -s -o /dev/null -w "%{http_code}" http://$CLUSTER_IP:8080/health || true
+
+ echo "✅ Embedding server with cache test passed!"
+ exit 0
diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/with-cache/embeddingserver.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/with-cache/embeddingserver.yaml
new file mode 100644
index 0000000000..28cef57bae
--- /dev/null
+++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/with-cache/embeddingserver.yaml
@@ -0,0 +1,27 @@
+apiVersion: toolhive.stacklok.dev/v1alpha1
+kind: EmbeddingServer
+metadata:
+ name: ($testPrefix)
+ namespace: toolhive-system
+spec:
+ # Use a very lightweight model for testing (17.4M params)
+ model: "sentence-transformers/paraphrase-MiniLM-L3-v2"
+ image: "ghcr.io/huggingface/text-embeddings-inference:cpu-latest"
+ imagePullPolicy: IfNotPresent
+ port: 8080
+ replicas: 1
+ # Enable model caching
+ modelCache:
+ enabled: true
+ size: "5Gi"
+ accessMode: "ReadWriteOnce"
+ resources:
+ limits:
+ cpu: "500m"
+ memory: "512Mi"
+ requests:
+ cpu: "250m"
+ memory: "256Mi"
+ env:
+ - name: RUST_LOG
+ value: "info"