diff --git a/CLAUDE.md b/CLAUDE.md index 83dcefa055..0be7ab06c3 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -313,6 +313,40 @@ For the complete documentation structure and navigation, see `docs/arch/README.m - Do not use "Conventional Commits", e.g. starting with `feat`, `fix`, `chore`, etc. - Use mockgen for creating mocks instead of generating mocks by hand. +### Go Coding Style + +- **Prefer immutable variable assignment with anonymous functions**: + When you need to assign a variable based on complex conditional logic, prefer using an immediately-invoked anonymous function instead of mutating the variable across multiple branches: + + ```go + // ✅ Good: Immutable assignment with anonymous function + phase := func() PhaseType { + if someCondition { + return PhaseA + } + if anotherCondition { + return PhaseB + } + return PhaseDefault + }() + + // ❌ Avoid: Mutable variable across branches + var phase PhaseType + if someCondition { + phase = PhaseA + } else if anotherCondition { + phase = PhaseB + } else { + phase = PhaseDefault + } + ``` + + **Benefits**: + - The variable is immutable after assignment, reducing bugs from accidental modification + - All decision logic is in one place with explicit returns + - Clearer logic flow and easier to understand + - Reduces cognitive load from tracking which branch sets which value + ## Error Handling Guidelines See `docs/error-handling.md` for comprehensive documentation. diff --git a/cmd/thv-operator/api/v1alpha1/embeddingserver_types.go b/cmd/thv-operator/api/v1alpha1/embeddingserver_types.go new file mode 100644 index 0000000000..c7909cb3f5 --- /dev/null +++ b/cmd/thv-operator/api/v1alpha1/embeddingserver_types.go @@ -0,0 +1,272 @@ +// SPDX-License-Identifier: Apache-2.0 + +package v1alpha1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" +) + +// Condition types for EmbeddingServer (reuses common conditions from MCPServer) +// ConditionImageValidated and ConditionPodTemplateValid are shared with MCPServer + +const ( + // ConditionModelReady indicates whether the embedding model is downloaded and ready + ConditionModelReady = "ModelReady" + + // ConditionVolumeReady indicates whether the PVC for model caching is ready + ConditionVolumeReady = "VolumeReady" +) + +// Condition reasons for EmbeddingServer +// Image validation and PodTemplate reasons are shared with MCPServer + +const ( + // ConditionReasonModelDownloading indicates the model is being downloaded + ConditionReasonModelDownloading = "ModelDownloading" + // ConditionReasonModelReady indicates the model is downloaded and ready + ConditionReasonModelReady = "ModelReady" + // ConditionReasonModelFailed indicates the model download or initialization failed + ConditionReasonModelFailed = "ModelFailed" + + // ConditionReasonVolumeCreating indicates the PVC is being created + ConditionReasonVolumeCreating = "VolumeCreating" + // ConditionReasonVolumeReady indicates the PVC is ready + ConditionReasonVolumeReady = "VolumeReady" + // ConditionReasonVolumeFailed indicates the PVC creation failed + ConditionReasonVolumeFailed = "VolumeFailed" +) + +// EmbeddingServerSpec defines the desired state of EmbeddingServer +type EmbeddingServerSpec struct { + // Model is the HuggingFace embedding model to use (e.g., "sentence-transformers/all-MiniLM-L6-v2") + // +kubebuilder:validation:Required + Model string `json:"model"` + + // HFTokenSecretRef is a reference to a Kubernetes Secret containing the huggingface token. + // If provided, the secret value will be provided to the embedding server for authentication with huggingface. + // +optional + HFTokenSecretRef *SecretKeyRef `json:"hfTokenSecretRef,omitempty"` + + // Image is the container image for huggingface-embedding-inference + // +kubebuilder:validation:Required + // +kubebuilder:default="ghcr.io/huggingface/text-embeddings-inference:latest" + Image string `json:"image,omitempty"` + + // ImagePullPolicy defines the pull policy for the container image + // +kubebuilder:validation:Enum=Always;Never;IfNotPresent + // +kubebuilder:default="IfNotPresent" + // +optional + ImagePullPolicy string `json:"imagePullPolicy,omitempty"` + + // Port is the port to expose the embedding service on + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:validation:Maximum=65535 + // +kubebuilder:default=8080 + Port int32 `json:"port,omitempty"` + + // Args are additional arguments to pass to the embedding inference server + // +optional + Args []string `json:"args,omitempty"` + + // Env are environment variables to set in the container + // +optional + Env []EnvVar `json:"env,omitempty"` + + // Resources defines compute resources for the embedding server + // +optional + Resources ResourceRequirements `json:"resources,omitempty"` + + // ModelCache configures persistent storage for downloaded models + // When enabled, models are cached in a PVC and reused across pod restarts + // +optional + ModelCache *ModelCacheConfig `json:"modelCache,omitempty"` + + // PodTemplateSpec allows customizing the pod (node selection, tolerations, etc.) + // This field accepts a PodTemplateSpec object as JSON/YAML. + // Note that to modify the specific container the embedding server runs in, you must specify + // the 'embedding' container name in the PodTemplateSpec. + // +optional + // +kubebuilder:pruning:PreserveUnknownFields + // +kubebuilder:validation:Type=object + PodTemplateSpec *runtime.RawExtension `json:"podTemplateSpec,omitempty"` + + // ResourceOverrides allows overriding annotations and labels for resources created by the operator + // +optional + ResourceOverrides *EmbeddingResourceOverrides `json:"resourceOverrides,omitempty"` + + // Replicas is the number of embedding server replicas to run + // +kubebuilder:validation:Minimum=1 + // +kubebuilder:default=1 + // +optional + Replicas *int32 `json:"replicas,omitempty"` +} + +// ModelCacheConfig configures persistent storage for model caching +type ModelCacheConfig struct { + // Enabled controls whether model caching is enabled + // +kubebuilder:default=true + // +optional + Enabled bool `json:"enabled,omitempty"` + + // StorageClassName is the storage class to use for the PVC + // If not specified, uses the cluster's default storage class + // +optional + StorageClassName *string `json:"storageClassName,omitempty"` + + // Size is the size of the PVC for model caching (e.g., "10Gi") + // +kubebuilder:default="10Gi" + // +optional + Size string `json:"size,omitempty"` + + // AccessMode is the access mode for the PVC + // +kubebuilder:default="ReadWriteOnce" + // +kubebuilder:validation:Enum=ReadWriteOnce;ReadWriteMany;ReadOnlyMany + // +optional + AccessMode string `json:"accessMode,omitempty"` +} + +// EmbeddingResourceOverrides defines overrides for annotations and labels on created resources +type EmbeddingResourceOverrides struct { + // StatefulSet defines overrides for the StatefulSet resource + // +optional + StatefulSet *EmbeddingStatefulSetOverrides `json:"statefulSet,omitempty"` + + // Service defines overrides for the Service resource + // +optional + Service *ResourceMetadataOverrides `json:"service,omitempty"` + + // PersistentVolumeClaim defines overrides for the PVC resource + // +optional + PersistentVolumeClaim *ResourceMetadataOverrides `json:"persistentVolumeClaim,omitempty"` +} + +// EmbeddingStatefulSetOverrides defines overrides specific to the embedding statefulset +type EmbeddingStatefulSetOverrides struct { + // ResourceMetadataOverrides is embedded to inherit annotations and labels fields + ResourceMetadataOverrides `json:",inline"` // nolint:revive + + // PodTemplateMetadataOverrides defines metadata overrides for the pod template + // +optional + PodTemplateMetadataOverrides *ResourceMetadataOverrides `json:"podTemplateMetadataOverrides,omitempty"` +} + +// EmbeddingServerStatus defines the observed state of EmbeddingServer +type EmbeddingServerStatus struct { + // Conditions represent the latest available observations of the EmbeddingServer's state + // +optional + Conditions []metav1.Condition `json:"conditions,omitempty"` + + // Phase is the current phase of the EmbeddingServer + // +optional + Phase EmbeddingServerPhase `json:"phase,omitempty"` + + // Message provides additional information about the current phase + // +optional + Message string `json:"message,omitempty"` + + // URL is the URL where the embedding service can be accessed + // +optional + URL string `json:"url,omitempty"` + + // ReadyReplicas is the number of ready replicas + // +optional + ReadyReplicas int32 `json:"readyReplicas,omitempty"` + + // ObservedGeneration reflects the generation most recently observed by the controller + // +optional + ObservedGeneration int64 `json:"observedGeneration,omitempty"` +} + +// EmbeddingServerPhase is the phase of the EmbeddingServer +// +kubebuilder:validation:Enum=Pending;Downloading;Running;Failed;Terminating +type EmbeddingServerPhase string + +const ( + // EmbeddingServerPhasePending means the EmbeddingServer is being created + EmbeddingServerPhasePending EmbeddingServerPhase = "Pending" + + // EmbeddingServerPhaseDownloading means the model is being downloaded + EmbeddingServerPhaseDownloading EmbeddingServerPhase = "Downloading" + + // EmbeddingServerPhaseRunning means the EmbeddingServer is running and ready + EmbeddingServerPhaseRunning EmbeddingServerPhase = "Running" + + // EmbeddingServerPhaseFailed means the EmbeddingServer failed to start + EmbeddingServerPhaseFailed EmbeddingServerPhase = "Failed" + + // EmbeddingServerPhaseTerminating means the EmbeddingServer is being deleted + EmbeddingServerPhaseTerminating EmbeddingServerPhase = "Terminating" +) + +//+kubebuilder:object:root=true +//+kubebuilder:subresource:status +//+kubebuilder:printcolumn:name="Status",type="string",JSONPath=".status.phase" +//+kubebuilder:printcolumn:name="Model",type="string",JSONPath=".spec.model" +//+kubebuilder:printcolumn:name="Ready",type="integer",JSONPath=".status.readyReplicas" +//+kubebuilder:printcolumn:name="URL",type="string",JSONPath=".status.url" +//+kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp" + +// EmbeddingServer is the Schema for the embeddingservers API +type EmbeddingServer struct { + metav1.TypeMeta `json:",inline"` // nolint:revive + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec EmbeddingServerSpec `json:"spec,omitempty"` + Status EmbeddingServerStatus `json:"status,omitempty"` +} + +//+kubebuilder:object:root=true + +// EmbeddingServerList contains a list of EmbeddingServer +type EmbeddingServerList struct { + metav1.TypeMeta `json:",inline"` // nolint:revive + metav1.ListMeta `json:"metadata,omitempty"` + Items []EmbeddingServer `json:"items"` +} + +// GetName returns the name of the EmbeddingServer +func (e *EmbeddingServer) GetName() string { + return e.Name +} + +// GetNamespace returns the namespace of the EmbeddingServer +func (e *EmbeddingServer) GetNamespace() string { + return e.Namespace +} + +// GetPort returns the port of the EmbeddingServer +func (e *EmbeddingServer) GetPort() int32 { + if e.Spec.Port > 0 { + return e.Spec.Port + } + return 8080 +} + +// GetReplicas returns the number of replicas for the EmbeddingServer +func (e *EmbeddingServer) GetReplicas() int32 { + if e.Spec.Replicas != nil { + return *e.Spec.Replicas + } + return 1 +} + +// IsModelCacheEnabled returns whether model caching is enabled +func (e *EmbeddingServer) IsModelCacheEnabled() bool { + if e.Spec.ModelCache == nil { + return false + } + return e.Spec.ModelCache.Enabled +} + +// GetImagePullPolicy returns the image pull policy for the EmbeddingServer +func (e *EmbeddingServer) GetImagePullPolicy() string { + if e.Spec.ImagePullPolicy != "" { + return e.Spec.ImagePullPolicy + } + return "IfNotPresent" +} + +func init() { + SchemeBuilder.Register(&EmbeddingServer{}, &EmbeddingServerList{}) +} diff --git a/cmd/thv-operator/api/v1alpha1/zz_generated.deepcopy.go b/cmd/thv-operator/api/v1alpha1/zz_generated.deepcopy.go index b9342d79db..f3da8d75a6 100644 --- a/cmd/thv-operator/api/v1alpha1/zz_generated.deepcopy.go +++ b/cmd/thv-operator/api/v1alpha1/zz_generated.deepcopy.go @@ -22,7 +22,7 @@ package v1alpha1 import ( corev1 "k8s.io/api/core/v1" - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" ) @@ -120,6 +120,7 @@ func (in *BackendAuthConfig) DeepCopy() *BackendAuthConfig { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *BearerTokenConfig) DeepCopyInto(out *BearerTokenConfig) { *out = *in if in.TokenSecretRef != nil { @@ -194,6 +195,189 @@ func (in *ConfigMapOIDCRef) DeepCopy() *ConfigMapOIDCRef { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *EmbeddingResourceOverrides) DeepCopyInto(out *EmbeddingResourceOverrides) { + *out = *in + if in.StatefulSet != nil { + in, out := &in.StatefulSet, &out.StatefulSet + *out = new(EmbeddingStatefulSetOverrides) + (*in).DeepCopyInto(*out) + } + if in.Service != nil { + in, out := &in.Service, &out.Service + *out = new(ResourceMetadataOverrides) + (*in).DeepCopyInto(*out) + } + if in.PersistentVolumeClaim != nil { + in, out := &in.PersistentVolumeClaim, &out.PersistentVolumeClaim + *out = new(ResourceMetadataOverrides) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EmbeddingResourceOverrides. +func (in *EmbeddingResourceOverrides) DeepCopy() *EmbeddingResourceOverrides { + if in == nil { + return nil + } + out := new(EmbeddingResourceOverrides) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *EmbeddingServer) DeepCopyInto(out *EmbeddingServer) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EmbeddingServer. +func (in *EmbeddingServer) DeepCopy() *EmbeddingServer { + if in == nil { + return nil + } + out := new(EmbeddingServer) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *EmbeddingServer) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *EmbeddingServerList) DeepCopyInto(out *EmbeddingServerList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]EmbeddingServer, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EmbeddingServerList. +func (in *EmbeddingServerList) DeepCopy() *EmbeddingServerList { + if in == nil { + return nil + } + out := new(EmbeddingServerList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *EmbeddingServerList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *EmbeddingServerSpec) DeepCopyInto(out *EmbeddingServerSpec) { + *out = *in + if in.HFTokenSecretRef != nil { + in, out := &in.HFTokenSecretRef, &out.HFTokenSecretRef + *out = new(SecretKeyRef) + **out = **in + } + if in.Args != nil { + in, out := &in.Args, &out.Args + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.Env != nil { + in, out := &in.Env, &out.Env + *out = make([]EnvVar, len(*in)) + copy(*out, *in) + } + out.Resources = in.Resources + if in.ModelCache != nil { + in, out := &in.ModelCache, &out.ModelCache + *out = new(ModelCacheConfig) + (*in).DeepCopyInto(*out) + } + if in.PodTemplateSpec != nil { + in, out := &in.PodTemplateSpec, &out.PodTemplateSpec + *out = new(runtime.RawExtension) + (*in).DeepCopyInto(*out) + } + if in.ResourceOverrides != nil { + in, out := &in.ResourceOverrides, &out.ResourceOverrides + *out = new(EmbeddingResourceOverrides) + (*in).DeepCopyInto(*out) + } + if in.Replicas != nil { + in, out := &in.Replicas, &out.Replicas + *out = new(int32) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EmbeddingServerSpec. +func (in *EmbeddingServerSpec) DeepCopy() *EmbeddingServerSpec { + if in == nil { + return nil + } + out := new(EmbeddingServerSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *EmbeddingServerStatus) DeepCopyInto(out *EmbeddingServerStatus) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]v1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EmbeddingServerStatus. +func (in *EmbeddingServerStatus) DeepCopy() *EmbeddingServerStatus { + if in == nil { + return nil + } + out := new(EmbeddingServerStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *EmbeddingStatefulSetOverrides) DeepCopyInto(out *EmbeddingStatefulSetOverrides) { + *out = *in + in.ResourceMetadataOverrides.DeepCopyInto(&out.ResourceMetadataOverrides) + if in.PodTemplateMetadataOverrides != nil { + in, out := &in.PodTemplateMetadataOverrides, &out.PodTemplateMetadataOverrides + *out = new(ResourceMetadataOverrides) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EmbeddingStatefulSetOverrides. +func (in *EmbeddingStatefulSetOverrides) DeepCopy() *EmbeddingStatefulSetOverrides { + if in == nil { + return nil + } + out := new(EmbeddingStatefulSetOverrides) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *EnvVar) DeepCopyInto(out *EnvVar) { *out = *in @@ -1252,6 +1436,26 @@ func (in *MCPToolConfigStatus) DeepCopy() *MCPToolConfigStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ModelCacheConfig) DeepCopyInto(out *ModelCacheConfig) { + *out = *in + if in.StorageClassName != nil { + in, out := &in.StorageClassName, &out.StorageClassName + *out = new(string) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelCacheConfig. +func (in *ModelCacheConfig) DeepCopy() *ModelCacheConfig { + if in == nil { + return nil + } + out := new(ModelCacheConfig) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NameFilter) DeepCopyInto(out *NameFilter) { *out = *in diff --git a/cmd/thv-operator/controllers/embeddingserver_controller.go b/cmd/thv-operator/controllers/embeddingserver_controller.go new file mode 100644 index 0000000000..a17f79197c --- /dev/null +++ b/cmd/thv-operator/controllers/embeddingserver_controller.go @@ -0,0 +1,1069 @@ +// SPDX-License-Identifier: Apache-2.0 + +// Package controllers contains the reconciliation logic for the EmbeddingServer custom resource. +// It handles the creation, update, and deletion of HuggingFace embedding inference servers in Kubernetes. +package controllers + +import ( + "context" + "fmt" + "maps" + "reflect" + "time" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/client-go/tools/record" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/log" + + mcpv1alpha1 "github.com/stacklok/toolhive/cmd/thv-operator/api/v1alpha1" + ctrlutil "github.com/stacklok/toolhive/cmd/thv-operator/pkg/controllerutil" + "github.com/stacklok/toolhive/cmd/thv-operator/pkg/validation" +) + +// EmbeddingServerReconciler reconciles a EmbeddingServer object +type EmbeddingServerReconciler struct { + client.Client + Scheme *runtime.Scheme + Recorder record.EventRecorder + PlatformDetector *ctrlutil.SharedPlatformDetector + ImageValidation validation.ImageValidation +} + +const ( + // embeddingContainerName is the name of the embedding container used in pod templates + embeddingContainerName = "embedding" + + // embeddingFinalizerName is the finalizer name for EmbeddingServer resources + embeddingFinalizerName = "embeddingserver.toolhive.stacklok.dev/finalizer" + + // modelCacheMountPath is the mount path for the model cache volume + modelCacheMountPath = "/data" +) + +//+kubebuilder:rbac:groups=toolhive.stacklok.dev,resources=embeddingservers,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups=toolhive.stacklok.dev,resources=embeddingservers/status,verbs=get;update;patch +//+kubebuilder:rbac:groups=toolhive.stacklok.dev,resources=embeddingservers/finalizers,verbs=update +//+kubebuilder:rbac:groups=apps,resources=statefulsets,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups="",resources=services,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups="",resources=persistentvolumeclaims,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups="",resources=secrets,verbs=get;list;watch +//+kubebuilder:rbac:groups="",resources=events,verbs=create;patch + +// Reconcile is part of the main kubernetes reconciliation loop which aims to +// move the current state of the cluster closer to the desired state. +// +//nolint:gocyclo // Reconciliation logic complexity is acceptable +func (r *EmbeddingServerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + ctxLogger := log.FromContext(ctx) + + // Fetch the EmbeddingServer instance + embedding := &mcpv1alpha1.EmbeddingServer{} + err := r.Get(ctx, req.NamespacedName, embedding) + if err != nil { + if errors.IsNotFound(err) { + ctxLogger.Info("EmbeddingServer resource not found. Ignoring since object must be deleted") + return ctrl.Result{}, nil + } + ctxLogger.Error(err, "Failed to get EmbeddingServer") + return ctrl.Result{}, err + } + + // Perform early validations + if result, err := r.performValidations(ctx, embedding); err != nil || result.RequeueAfter > 0 { + return result, err + } + + // Handle deletion + if result, done, err := r.handleDeletion(ctx, embedding); done { + return result, err + } + + // Add finalizer if needed + if result, done, err := r.ensureFinalizer(ctx, embedding); done { + return result, err + } + + // Track if we need to requeue after status update + var requeueResult ctrl.Result + + // Ensure statefulset exists and is up to date + if result, err := r.ensureStatefulSet(ctx, embedding); err != nil { + return ctrl.Result{}, err + } else if result.RequeueAfter > 0 { + requeueResult = result + } + + // Ensure service exists + if result, err := r.ensureService(ctx, embedding); err != nil { + return ctrl.Result{}, err + } else if result.RequeueAfter > 0 { + // If we already have a requeue scheduled, keep the shorter duration + if requeueResult.RequeueAfter == 0 || (result.RequeueAfter > 0 && result.RequeueAfter < requeueResult.RequeueAfter) { + requeueResult = result + } + } + + // Always update the EmbeddingServer status before returning + if err := r.updateEmbeddingServerStatus(ctx, embedding); err != nil { + ctxLogger.Error(err, "Failed to update EmbeddingServer status") + return ctrl.Result{}, err + } + + return requeueResult, nil +} + +// performValidations performs all early validations for the EmbeddingServer +// +//nolint:unparam // error return kept for consistency with reconciler pattern +func (r *EmbeddingServerReconciler) performValidations( + ctx context.Context, + embedding *mcpv1alpha1.EmbeddingServer, +) (ctrl.Result, error) { + ctxLogger := log.FromContext(ctx) + + // Validate PodTemplateSpec early + if !r.validateAndUpdatePodTemplateStatus(ctx, embedding) { + // Status fields were set by validateAndUpdatePodTemplateStatus, now update + if err := r.Status().Update(ctx, embedding); err != nil { + ctxLogger.Error(err, "Failed to update EmbeddingServer status after PodTemplateSpec validation failure") + return ctrl.Result{}, err + } + return ctrl.Result{}, nil + } + + // Validate image + if err := r.validateImage(ctx, embedding); err != nil { + // Status fields were set by validateImage, now update + if statusErr := r.Status().Update(ctx, embedding); statusErr != nil { + ctxLogger.Error(statusErr, "Failed to update EmbeddingServer status after image validation failure") + return ctrl.Result{}, statusErr + } + // We requeue to retry validation after image issues are resolved + ctxLogger.Error(err, "Image validation failed, will retry", + "image", embedding.Spec.Image, + "requeueAfter", 5*time.Minute) + return ctrl.Result{RequeueAfter: 5 * time.Minute}, nil + } + + return ctrl.Result{}, nil +} + +// handleDeletion handles the deletion of EmbeddingServer resources +// +//nolint:unparam // ctrl.Result return kept for consistency with reconciler pattern +func (r *EmbeddingServerReconciler) handleDeletion( + ctx context.Context, + embedding *mcpv1alpha1.EmbeddingServer, +) (ctrl.Result, bool, error) { + if embedding.GetDeletionTimestamp() == nil { + return ctrl.Result{}, false, nil + } + + if controllerutil.ContainsFinalizer(embedding, embeddingFinalizerName) { + r.finalizeEmbeddingServer(ctx, embedding) + + controllerutil.RemoveFinalizer(embedding, embeddingFinalizerName) + err := r.Update(ctx, embedding) + if err != nil { + return ctrl.Result{}, true, err + } + } + return ctrl.Result{}, true, nil +} + +// ensureFinalizer ensures the finalizer is added to the EmbeddingServer +// +//nolint:unparam // ctrl.Result return kept for consistency with reconciler pattern +func (r *EmbeddingServerReconciler) ensureFinalizer( + ctx context.Context, + embedding *mcpv1alpha1.EmbeddingServer, +) (ctrl.Result, bool, error) { + if controllerutil.ContainsFinalizer(embedding, embeddingFinalizerName) { + return ctrl.Result{}, false, nil + } + + controllerutil.AddFinalizer(embedding, embeddingFinalizerName) + err := r.Update(ctx, embedding) + if err != nil { + return ctrl.Result{}, true, err + } + return ctrl.Result{}, false, nil +} + +// ensureStatefulSet ensures the statefulset exists and is up to date +func (r *EmbeddingServerReconciler) ensureStatefulSet( + ctx context.Context, + embedding *mcpv1alpha1.EmbeddingServer, +) (ctrl.Result, error) { + ctxLogger := log.FromContext(ctx) + + statefulSet := &appsv1.StatefulSet{} + err := r.Get(ctx, types.NamespacedName{Name: embedding.Name, Namespace: embedding.Namespace}, statefulSet) + if err != nil && errors.IsNotFound(err) { + sts := r.statefulSetForEmbedding(ctx, embedding) + if sts == nil { + ctxLogger.Error(nil, "Failed to create StatefulSet object") + return ctrl.Result{}, fmt.Errorf("failed to create StatefulSet object") + } + ctxLogger.Info("Creating a new StatefulSet", "StatefulSet.Namespace", sts.Namespace, "StatefulSet.Name", sts.Name) + err = r.Create(ctx, sts) + if err != nil { + ctxLogger.Error(err, "Failed to create new StatefulSet", "StatefulSet.Namespace", sts.Namespace, "StatefulSet.Name", sts.Name) + return ctrl.Result{}, err + } + // StatefulSet created successfully, continue to ensure service + return ctrl.Result{}, nil + } else if err != nil { + ctxLogger.Error(err, "Failed to get StatefulSet") + return ctrl.Result{}, err + } + + // Ensure the statefulset size matches the spec + desiredReplicas := embedding.GetReplicas() + if *statefulSet.Spec.Replicas != desiredReplicas { + statefulSet.Spec.Replicas = &desiredReplicas + if err := r.Update(ctx, statefulSet); err != nil { + ctxLogger.Error(err, "Failed to update StatefulSet replicas", + "StatefulSet.Namespace", statefulSet.Namespace, + "StatefulSet.Name", statefulSet.Name) + return ctrl.Result{}, err + } + return ctrl.Result{RequeueAfter: time.Second}, nil + } + + // Check if the statefulset spec changed + if r.statefulSetNeedsUpdate(ctx, statefulSet, embedding) { + newStatefulSet := r.statefulSetForEmbedding(ctx, embedding) + statefulSet.Spec = newStatefulSet.Spec + statefulSet.Annotations = newStatefulSet.Annotations + statefulSet.Labels = newStatefulSet.Labels + if err := r.Update(ctx, statefulSet); err != nil { + ctxLogger.Error(err, "Failed to update StatefulSet", + "StatefulSet.Namespace", statefulSet.Namespace, + "StatefulSet.Name", statefulSet.Name) + return ctrl.Result{}, err + } + return ctrl.Result{RequeueAfter: time.Second}, nil + } + + return ctrl.Result{}, nil +} + +// ensureService ensures the service exists and is up to date +// +//nolint:unparam // ctrl.Result return kept for consistency with reconciler pattern +func (r *EmbeddingServerReconciler) ensureService( + ctx context.Context, + embedding *mcpv1alpha1.EmbeddingServer, +) (ctrl.Result, error) { + ctxLogger := log.FromContext(ctx) + + service := &corev1.Service{} + err := r.Get(ctx, types.NamespacedName{Name: embedding.Name, Namespace: embedding.Namespace}, service) + if err != nil && errors.IsNotFound(err) { + svc := r.serviceForEmbedding(ctx, embedding) + if svc == nil { + ctxLogger.Error(nil, "Failed to create Service object") + return ctrl.Result{}, fmt.Errorf("failed to create Service object") + } + ctxLogger.Info("Creating a new Service", "Service.Namespace", svc.Namespace, "Service.Name", svc.Name) + err = r.Create(ctx, svc) + if err != nil { + ctxLogger.Error(err, "Failed to create new Service", "Service.Namespace", svc.Namespace, "Service.Name", svc.Name) + return ctrl.Result{}, err + } + // Service created successfully, continue to update status + return ctrl.Result{}, nil + } else if err != nil { + ctxLogger.Error(err, "Failed to get Service") + return ctrl.Result{}, err + } + + // Check if the service needs to be updated + if r.serviceNeedsUpdate(service, embedding) { + desiredService := r.serviceForEmbedding(ctx, embedding) + service.Spec.Ports = desiredService.Spec.Ports + service.Labels = desiredService.Labels + service.Annotations = desiredService.Annotations + // Preserve ClusterIP as it's immutable + if err := r.Update(ctx, service); err != nil { + ctxLogger.Error(err, "Failed to update Service", + "Service.Namespace", service.Namespace, + "Service.Name", service.Name) + return ctrl.Result{}, err + } + ctxLogger.Info("Updated Service", "Service.Namespace", service.Namespace, "Service.Name", service.Name) + return ctrl.Result{RequeueAfter: time.Second}, nil + } + + return ctrl.Result{}, nil +} + +// serviceNeedsUpdate checks if the service needs to be updated based on the embedding spec +func (*EmbeddingServerReconciler) serviceNeedsUpdate( + service *corev1.Service, + embedding *mcpv1alpha1.EmbeddingServer, +) bool { + desiredPort := embedding.GetPort() + + // Check if any port has changed + for _, port := range service.Spec.Ports { + if port.Name == "http" && port.Port != desiredPort { + return true + } + } + + // Check ResourceOverrides (annotations and labels) + expectedAnnotations := make(map[string]string) + expectedLabels := make(map[string]string) + + if embedding.Spec.ResourceOverrides != nil && embedding.Spec.ResourceOverrides.Service != nil { + if embedding.Spec.ResourceOverrides.Service.Annotations != nil { + maps.Copy(expectedAnnotations, embedding.Spec.ResourceOverrides.Service.Annotations) + } + if embedding.Spec.ResourceOverrides.Service.Labels != nil { + maps.Copy(expectedLabels, embedding.Spec.ResourceOverrides.Service.Labels) + } + } + + // Check if expected annotations are present in service + for key, value := range expectedAnnotations { + if service.Annotations[key] != value { + return true + } + } + + // Check if expected labels are present in service + for key, value := range expectedLabels { + if service.Labels[key] != value { + return true + } + } + + return false +} + +// validateAndUpdatePodTemplateStatus validates the PodTemplateSpec and sets the status condition +// Status is not updated here - it will be updated at the end of reconciliation +func (r *EmbeddingServerReconciler) validateAndUpdatePodTemplateStatus( + ctx context.Context, + embedding *mcpv1alpha1.EmbeddingServer, +) bool { + ctxLogger := log.FromContext(ctx) + + if embedding.Spec.PodTemplateSpec == nil { + meta.SetStatusCondition(&embedding.Status.Conditions, metav1.Condition{ + Type: mcpv1alpha1.ConditionPodTemplateValid, + Status: metav1.ConditionTrue, + Reason: mcpv1alpha1.ConditionReasonPodTemplateValid, + Message: "No PodTemplateSpec provided", + ObservedGeneration: embedding.Generation, + }) + return true + } + + // Parse and validate PodTemplateSpec using builder + _, err := ctrlutil.NewPodTemplateSpecBuilder(embedding.Spec.PodTemplateSpec, embeddingContainerName) + if err != nil { + ctxLogger.Error(err, "Invalid PodTemplateSpec") + embedding.Status.Phase = mcpv1alpha1.EmbeddingServerPhaseFailed + embedding.Status.Message = fmt.Sprintf("Invalid PodTemplateSpec: %v", err) + meta.SetStatusCondition(&embedding.Status.Conditions, metav1.Condition{ + Type: mcpv1alpha1.ConditionPodTemplateValid, + Status: metav1.ConditionFalse, + Reason: mcpv1alpha1.ConditionReasonPodTemplateInvalid, + Message: fmt.Sprintf("Invalid PodTemplateSpec: %v", err), + ObservedGeneration: embedding.Generation, + }) + r.Recorder.Event(embedding, corev1.EventTypeWarning, "ValidationFailed", fmt.Sprintf("Invalid PodTemplateSpec: %v", err)) + return false + } + + meta.SetStatusCondition(&embedding.Status.Conditions, metav1.Condition{ + Type: mcpv1alpha1.ConditionPodTemplateValid, + Status: metav1.ConditionTrue, + Reason: mcpv1alpha1.ConditionReasonPodTemplateValid, + Message: "PodTemplateSpec is valid", + ObservedGeneration: embedding.Generation, + }) + + return true +} + +// validateImage validates the embedding image and sets the status condition +// Status is not updated here - it will be updated at the end of reconciliation +func (r *EmbeddingServerReconciler) validateImage(ctx context.Context, embedding *mcpv1alpha1.EmbeddingServer) error { + ctxLogger := log.FromContext(ctx) + + imageValidator := validation.NewImageValidator(r.Client, embedding.Namespace, r.ImageValidation) + err := imageValidator.ValidateImage(ctx, embedding.Spec.Image, embedding.ObjectMeta) + + if err == validation.ErrImageNotChecked { + ctxLogger.Info("Image validation skipped - no enforcement configured") + meta.SetStatusCondition(&embedding.Status.Conditions, metav1.Condition{ + Type: mcpv1alpha1.ConditionImageValidated, + Status: metav1.ConditionTrue, + Reason: mcpv1alpha1.ConditionReasonImageValidationSkipped, + Message: "Image validation was not performed (no enforcement configured)", + }) + return nil + } else if err == validation.ErrImageInvalid { + ctxLogger.Error(err, "EmbeddingServer image validation failed", "image", embedding.Spec.Image) + embedding.Status.Phase = mcpv1alpha1.EmbeddingServerPhaseFailed + embedding.Status.Message = err.Error() + meta.SetStatusCondition(&embedding.Status.Conditions, metav1.Condition{ + Type: mcpv1alpha1.ConditionImageValidated, + Status: metav1.ConditionFalse, + Reason: mcpv1alpha1.ConditionReasonImageValidationFailed, + Message: err.Error(), + }) + return err + } else if err != nil { + ctxLogger.Error(err, "EmbeddingServer image validation system error", "image", embedding.Spec.Image) + meta.SetStatusCondition(&embedding.Status.Conditions, metav1.Condition{ + Type: mcpv1alpha1.ConditionImageValidated, + Status: metav1.ConditionFalse, + Reason: mcpv1alpha1.ConditionReasonImageValidationError, + Message: fmt.Sprintf("Error checking image validity: %v", err), + }) + return err + } + + ctxLogger.Info("Image validation passed", "image", embedding.Spec.Image) + meta.SetStatusCondition(&embedding.Status.Conditions, metav1.Condition{ + Type: mcpv1alpha1.ConditionImageValidated, + Status: metav1.ConditionTrue, + Reason: mcpv1alpha1.ConditionReasonImageValidationSuccess, + Message: "Image validation passed", + }) + + return nil +} + +// statefulSetForEmbedding creates a StatefulSet for the embedding server +func (r *EmbeddingServerReconciler) statefulSetForEmbedding( + _ context.Context, + embedding *mcpv1alpha1.EmbeddingServer, +) *appsv1.StatefulSet { + replicas := embedding.GetReplicas() + labels := r.labelsForEmbedding(embedding) + + // Build container + container := r.buildEmbeddingContainer(embedding) + + // Build pod template + podTemplate := r.buildPodTemplate(embedding, labels, container) + + // Apply statefulset overrides + stsAnnotations, stsLabels := r.applyStatefulSetOverrides(embedding, &podTemplate) + + // Merge ResourceOverrides labels into base labels + finalLabels := make(map[string]string) + maps.Copy(finalLabels, labels) + maps.Copy(finalLabels, stsLabels) + + statefulSet := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: embedding.Name, + Namespace: embedding.Namespace, + Labels: finalLabels, + Annotations: stsAnnotations, + }, + Spec: appsv1.StatefulSetSpec{ + Replicas: &replicas, + ServiceName: embedding.Name, // Required for StatefulSet + Selector: &metav1.LabelSelector{ + MatchLabels: labels, + }, + Template: podTemplate, + }, + } + + // Add volumeClaimTemplates if model caching is enabled + if embedding.IsModelCacheEnabled() { + statefulSet.Spec.VolumeClaimTemplates = r.buildVolumeClaimTemplates(embedding) + } + + if err := ctrl.SetControllerReference(embedding, statefulSet, r.Scheme); err != nil { + return nil + } + return statefulSet +} + +// buildVolumeClaimTemplates builds the volumeClaimTemplates for the StatefulSet +func (r *EmbeddingServerReconciler) buildVolumeClaimTemplates( + embedding *mcpv1alpha1.EmbeddingServer, +) []corev1.PersistentVolumeClaim { + size := "10Gi" + if embedding.Spec.ModelCache.Size != "" { + size = embedding.Spec.ModelCache.Size + } + + accessMode := corev1.ReadWriteOnce + if embedding.Spec.ModelCache.AccessMode != "" { + accessMode = corev1.PersistentVolumeAccessMode(embedding.Spec.ModelCache.AccessMode) + } + + pvc := corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: "model-cache", + Labels: r.labelsForEmbedding(embedding), + }, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{accessMode}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse(size), + }, + }, + }, + } + + if embedding.Spec.ModelCache.StorageClassName != nil { + pvc.Spec.StorageClassName = embedding.Spec.ModelCache.StorageClassName + } + + // Apply resource overrides if specified + if embedding.Spec.ResourceOverrides != nil && embedding.Spec.ResourceOverrides.PersistentVolumeClaim != nil { + if pvc.Annotations == nil && embedding.Spec.ResourceOverrides.PersistentVolumeClaim.Annotations != nil { + pvc.Annotations = make(map[string]string) + } + if embedding.Spec.ResourceOverrides.PersistentVolumeClaim.Annotations != nil { + maps.Copy(pvc.Annotations, embedding.Spec.ResourceOverrides.PersistentVolumeClaim.Annotations) + } + if embedding.Spec.ResourceOverrides.PersistentVolumeClaim.Labels != nil { + maps.Copy(pvc.Labels, embedding.Spec.ResourceOverrides.PersistentVolumeClaim.Labels) + } + } + + return []corev1.PersistentVolumeClaim{pvc} +} + +// buildEmbeddingContainer builds the container spec for the embedding server +func (r *EmbeddingServerReconciler) buildEmbeddingContainer(embedding *mcpv1alpha1.EmbeddingServer) corev1.Container { + // Build container args + args := []string{ + "--model-id", embedding.Spec.Model, + "--port", fmt.Sprintf("%d", embedding.GetPort()), + } + args = append(args, embedding.Spec.Args...) + + // Build environment variables + envVars := r.buildEnvVars(embedding) + + // Build container + container := corev1.Container{ + Name: embeddingContainerName, + Image: embedding.Spec.Image, + Args: args, + Env: envVars, + ImagePullPolicy: corev1.PullPolicy(embedding.GetImagePullPolicy()), + Ports: []corev1.ContainerPort{ + { + Name: "http", + ContainerPort: embedding.GetPort(), + Protocol: corev1.ProtocolTCP, + }, + }, + LivenessProbe: r.buildLivenessProbe(embedding), + ReadinessProbe: r.buildReadinessProbe(embedding), + } + + // Add volume mount and HF_HOME for model cache if enabled + if embedding.IsModelCacheEnabled() { + container.VolumeMounts = []corev1.VolumeMount{ + { + Name: "model-cache", + MountPath: modelCacheMountPath, + }, + } + container.Env = append(container.Env, corev1.EnvVar{ + Name: "HF_HOME", + Value: modelCacheMountPath, + }) + } + + // Add resources if specified + r.applyResourceRequirements(embedding, &container) + + return container +} + +// buildEnvVars builds environment variables for the container +func (*EmbeddingServerReconciler) buildEnvVars(embedding *mcpv1alpha1.EmbeddingServer) []corev1.EnvVar { + envVars := []corev1.EnvVar{ + { + Name: "MODEL_ID", + Value: embedding.Spec.Model, + }, + } + + // Add HuggingFace token from secret if provided + if embedding.Spec.HFTokenSecretRef != nil { + envVars = append(envVars, corev1.EnvVar{ + Name: "HF_TOKEN", + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: embedding.Spec.HFTokenSecretRef.Name, + }, + Key: embedding.Spec.HFTokenSecretRef.Key, + }, + }, + }) + } + + for _, env := range embedding.Spec.Env { + envVars = append(envVars, corev1.EnvVar{ + Name: env.Name, + Value: env.Value, + }) + } + return envVars +} + +// buildLivenessProbe builds the liveness probe for the container +func (*EmbeddingServerReconciler) buildLivenessProbe(embedding *mcpv1alpha1.EmbeddingServer) *corev1.Probe { + return &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + HTTPGet: &corev1.HTTPGetAction{ + Path: "/health", + Port: intstr.FromInt(int(embedding.GetPort())), + }, + }, + InitialDelaySeconds: 60, + PeriodSeconds: 30, + TimeoutSeconds: 10, + FailureThreshold: 3, + } +} + +// buildReadinessProbe builds the readiness probe for the container +func (*EmbeddingServerReconciler) buildReadinessProbe(embedding *mcpv1alpha1.EmbeddingServer) *corev1.Probe { + return &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{ + HTTPGet: &corev1.HTTPGetAction{ + Path: "/health", + Port: intstr.FromInt(int(embedding.GetPort())), + }, + }, + InitialDelaySeconds: 30, + PeriodSeconds: 10, + TimeoutSeconds: 5, + FailureThreshold: 3, + } +} + +// applyResourceRequirements applies resource requirements to the container +func (*EmbeddingServerReconciler) applyResourceRequirements(embedding *mcpv1alpha1.EmbeddingServer, container *corev1.Container) { + if embedding.Spec.Resources.Limits.CPU == "" && embedding.Spec.Resources.Limits.Memory == "" && + embedding.Spec.Resources.Requests.CPU == "" && embedding.Spec.Resources.Requests.Memory == "" { + return + } + + container.Resources = corev1.ResourceRequirements{ + Limits: corev1.ResourceList{}, + Requests: corev1.ResourceList{}, + } + + if embedding.Spec.Resources.Limits.CPU != "" { + container.Resources.Limits[corev1.ResourceCPU] = resource.MustParse(embedding.Spec.Resources.Limits.CPU) + } + if embedding.Spec.Resources.Limits.Memory != "" { + container.Resources.Limits[corev1.ResourceMemory] = resource.MustParse(embedding.Spec.Resources.Limits.Memory) + } + if embedding.Spec.Resources.Requests.CPU != "" { + container.Resources.Requests[corev1.ResourceCPU] = resource.MustParse(embedding.Spec.Resources.Requests.CPU) + } + if embedding.Spec.Resources.Requests.Memory != "" { + container.Resources.Requests[corev1.ResourceMemory] = resource.MustParse(embedding.Spec.Resources.Requests.Memory) + } +} + +// buildPodTemplate builds the pod template for the statefulset +func (r *EmbeddingServerReconciler) buildPodTemplate( + embedding *mcpv1alpha1.EmbeddingServer, + labels map[string]string, + container corev1.Container, +) corev1.PodTemplateSpec { + podTemplate := corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: labels, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{container}, + }, + } + + // Note: Volumes for model cache are managed by StatefulSet volumeClaimTemplates + // and will be automatically mounted with the name "model-cache" + + // Merge with user-provided PodTemplateSpec if specified + r.mergePodTemplateSpec(embedding, &podTemplate) + + return podTemplate +} + +// mergePodTemplateSpec merges user-provided PodTemplateSpec customizations +func (r *EmbeddingServerReconciler) mergePodTemplateSpec( + embedding *mcpv1alpha1.EmbeddingServer, + podTemplate *corev1.PodTemplateSpec, +) { + if embedding.Spec.PodTemplateSpec == nil { + return + } + + builder, err := ctrlutil.NewPodTemplateSpecBuilder(embedding.Spec.PodTemplateSpec, embeddingContainerName) + if err != nil { + return + } + + userTemplate := builder.Build() + if userTemplate == nil { + return + } + + // Merge user customizations into base pod template + if userTemplate.Spec.NodeSelector != nil { + podTemplate.Spec.NodeSelector = userTemplate.Spec.NodeSelector + } + if userTemplate.Spec.Affinity != nil { + podTemplate.Spec.Affinity = userTemplate.Spec.Affinity + } + if len(userTemplate.Spec.Tolerations) > 0 { + podTemplate.Spec.Tolerations = userTemplate.Spec.Tolerations + } + if userTemplate.Spec.SecurityContext != nil { + podTemplate.Spec.SecurityContext = userTemplate.Spec.SecurityContext + } + if userTemplate.Spec.ServiceAccountName != "" { + podTemplate.Spec.ServiceAccountName = userTemplate.Spec.ServiceAccountName + } + + // Merge container-level customizations + r.mergeContainerSecurityContext(podTemplate, userTemplate) +} + +// mergeContainerSecurityContext merges container-level security context +func (*EmbeddingServerReconciler) mergeContainerSecurityContext( + podTemplate *corev1.PodTemplateSpec, + userTemplate *corev1.PodTemplateSpec, +) { + for i := range podTemplate.Spec.Containers { + if podTemplate.Spec.Containers[i].Name != embeddingContainerName { + continue + } + for _, userContainer := range userTemplate.Spec.Containers { + if userContainer.Name == embeddingContainerName && userContainer.SecurityContext != nil { + podTemplate.Spec.Containers[i].SecurityContext = userContainer.SecurityContext + break + } + } + break + } +} + +// applyStatefulSetOverrides applies statefulset-level overrides and returns annotations and labels +func (*EmbeddingServerReconciler) applyStatefulSetOverrides( + embedding *mcpv1alpha1.EmbeddingServer, + podTemplate *corev1.PodTemplateSpec, +) (map[string]string, map[string]string) { + annotations := make(map[string]string) + labels := make(map[string]string) + + if embedding.Spec.ResourceOverrides == nil || embedding.Spec.ResourceOverrides.StatefulSet == nil { + return annotations, labels + } + + if embedding.Spec.ResourceOverrides.StatefulSet.Annotations != nil { + maps.Copy(annotations, embedding.Spec.ResourceOverrides.StatefulSet.Annotations) + } + + if embedding.Spec.ResourceOverrides.StatefulSet.Labels != nil { + maps.Copy(labels, embedding.Spec.ResourceOverrides.StatefulSet.Labels) + } + + if embedding.Spec.ResourceOverrides.StatefulSet.PodTemplateMetadataOverrides != nil { + if podTemplate.Annotations == nil { + podTemplate.Annotations = make(map[string]string) + } + if embedding.Spec.ResourceOverrides.StatefulSet.PodTemplateMetadataOverrides.Annotations != nil { + maps.Copy( + podTemplate.Annotations, + embedding.Spec.ResourceOverrides.StatefulSet.PodTemplateMetadataOverrides.Annotations, + ) + } + if embedding.Spec.ResourceOverrides.StatefulSet.PodTemplateMetadataOverrides.Labels != nil { + maps.Copy(podTemplate.Labels, embedding.Spec.ResourceOverrides.StatefulSet.PodTemplateMetadataOverrides.Labels) + } + } + + return annotations, labels +} + +// serviceForEmbedding creates a Service for the embedding server +func (r *EmbeddingServerReconciler) serviceForEmbedding( + _ context.Context, + embedding *mcpv1alpha1.EmbeddingServer, +) *corev1.Service { + labels := r.labelsForEmbedding(embedding) + annotations := make(map[string]string) + + // Apply service overrides if specified + finalLabels := make(map[string]string) + maps.Copy(finalLabels, labels) + + if embedding.Spec.ResourceOverrides != nil && embedding.Spec.ResourceOverrides.Service != nil { + if embedding.Spec.ResourceOverrides.Service.Annotations != nil { + maps.Copy(annotations, embedding.Spec.ResourceOverrides.Service.Annotations) + } + if embedding.Spec.ResourceOverrides.Service.Labels != nil { + maps.Copy(finalLabels, embedding.Spec.ResourceOverrides.Service.Labels) + } + } + + service := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: embedding.Name, + Namespace: embedding.Namespace, + Labels: finalLabels, + Annotations: annotations, + }, + Spec: corev1.ServiceSpec{ + Selector: labels, + Ports: []corev1.ServicePort{ + { + Name: "http", + Port: embedding.GetPort(), + TargetPort: intstr.FromInt(int(embedding.GetPort())), + Protocol: corev1.ProtocolTCP, + }, + }, + }, + } + + if err := ctrl.SetControllerReference(embedding, service, r.Scheme); err != nil { + return nil + } + return service +} + +// labelsForEmbedding returns the labels for the embedding resources +func (*EmbeddingServerReconciler) labelsForEmbedding(embedding *mcpv1alpha1.EmbeddingServer) map[string]string { + return map[string]string{ + "app.kubernetes.io/name": "embeddingserver", + "app.kubernetes.io/instance": embedding.Name, + "app.kubernetes.io/component": "embedding-server", + "app.kubernetes.io/managed-by": "toolhive-operator", + } +} + +// statefulSetNeedsUpdate checks if the statefulset needs to be updated +func (r *EmbeddingServerReconciler) statefulSetNeedsUpdate( + ctx context.Context, + currentSts *appsv1.StatefulSet, + embedding *mcpv1alpha1.EmbeddingServer, +) bool { + // Generate the expected StatefulSet from the current spec + newSts := r.statefulSetForEmbedding(ctx, embedding) + if newSts == nil { + // If we can't generate a new StatefulSet, assume update is needed + return true + } + + // Check StatefulSet-level fields + if r.statefulSetMetadataChanged(currentSts, newSts) { + return true + } + + // Check container-level fields + existingContainer, newContainer := r.findEmbeddingContainers(currentSts, newSts) + if existingContainer == nil || newContainer == nil { + return true + } + + if r.containerNeedsUpdate(existingContainer, newContainer) { + return true + } + + // Check pod template metadata + if r.podTemplateMetadataChanged(currentSts, newSts) { + return true + } + + return false +} + +// statefulSetMetadataChanged checks if StatefulSet-level metadata has changed +func (*EmbeddingServerReconciler) statefulSetMetadataChanged(currentSts, newSts *appsv1.StatefulSet) bool { + if *currentSts.Spec.Replicas != *newSts.Spec.Replicas { + return true + } + if !reflect.DeepEqual(newSts.Annotations, currentSts.Annotations) { + return true + } + if !reflect.DeepEqual(newSts.Labels, currentSts.Labels) { + return true + } + return false +} + +// findEmbeddingContainers finds the embedding container in both StatefulSets +func (*EmbeddingServerReconciler) findEmbeddingContainers( + currentSts, newSts *appsv1.StatefulSet, +) (*corev1.Container, *corev1.Container) { + var existingContainer *corev1.Container + for i := range currentSts.Spec.Template.Spec.Containers { + if currentSts.Spec.Template.Spec.Containers[i].Name == embeddingContainerName { + existingContainer = ¤tSts.Spec.Template.Spec.Containers[i] + break + } + } + + var newContainer *corev1.Container + for i := range newSts.Spec.Template.Spec.Containers { + if newSts.Spec.Template.Spec.Containers[i].Name == embeddingContainerName { + newContainer = &newSts.Spec.Template.Spec.Containers[i] + break + } + } + + return existingContainer, newContainer +} + +// containerNeedsUpdate checks if the container spec has changed +func (*EmbeddingServerReconciler) containerNeedsUpdate(existingContainer, newContainer *corev1.Container) bool { + if existingContainer.Image != newContainer.Image { + return true + } + if !reflect.DeepEqual(existingContainer.Args, newContainer.Args) { + return true + } + if !reflect.DeepEqual(existingContainer.Env, newContainer.Env) { + return true + } + if !reflect.DeepEqual(existingContainer.Ports, newContainer.Ports) { + return true + } + if existingContainer.ImagePullPolicy != newContainer.ImagePullPolicy { + return true + } + if !reflect.DeepEqual(existingContainer.Resources, newContainer.Resources) { + return true + } + return false +} + +// podTemplateMetadataChanged checks if pod template metadata has changed +func (*EmbeddingServerReconciler) podTemplateMetadataChanged(currentSts, newSts *appsv1.StatefulSet) bool { + if !reflect.DeepEqual(currentSts.Spec.Template.Annotations, newSts.Spec.Template.Annotations) { + return true + } + if !reflect.DeepEqual(currentSts.Spec.Template.Labels, newSts.Spec.Template.Labels) { + return true + } + return false +} + +// updateEmbeddingServerStatus updates the status based on statefulset state +func (r *EmbeddingServerReconciler) updateEmbeddingServerStatus( + ctx context.Context, + embedding *mcpv1alpha1.EmbeddingServer, +) error { + ctxLogger := log.FromContext(ctx) + + // Set the service URL if not already set + if embedding.Status.URL == "" { + embedding.Status.URL = fmt.Sprintf("http://%s.%s.svc.cluster.local:%d", + embedding.Name, embedding.Namespace, embedding.GetPort()) + } + + statefulSet := &appsv1.StatefulSet{} + err := r.Get(ctx, types.NamespacedName{Name: embedding.Name, Namespace: embedding.Namespace}, statefulSet) + if err != nil { + if errors.IsNotFound(err) { + embedding.Status.Phase = mcpv1alpha1.EmbeddingServerPhasePending + embedding.Status.ReadyReplicas = 0 + } else { + return err + } + } else { + embedding.Status.ReadyReplicas = statefulSet.Status.ReadyReplicas + embedding.Status.ObservedGeneration = embedding.Generation + + // Determine phase and message based on statefulset status using immutable assignment + type phaseInfo struct { + phase mcpv1alpha1.EmbeddingServerPhase + message string + } + + info := func() phaseInfo { + if statefulSet.Status.ReadyReplicas > 0 { + return phaseInfo{ + phase: mcpv1alpha1.EmbeddingServerPhaseRunning, + message: "Embedding server is running", + } + } + if statefulSet.Status.Replicas > 0 && statefulSet.Status.ReadyReplicas == 0 { + // Check if pods are downloading the model + return phaseInfo{ + phase: mcpv1alpha1.EmbeddingServerPhaseDownloading, + message: "Downloading embedding model", + } + } + return phaseInfo{ + phase: mcpv1alpha1.EmbeddingServerPhasePending, + message: "Waiting for statefulset", + } + }() + + embedding.Status.Phase = info.phase + embedding.Status.Message = info.message + } + + err = r.Status().Update(ctx, embedding) + if err != nil { + ctxLogger.Error(err, "Failed to update EmbeddingServer status") + return err + } + + return nil +} + +// finalizeEmbeddingServer performs cleanup before the EmbeddingServer is deleted +func (r *EmbeddingServerReconciler) finalizeEmbeddingServer(ctx context.Context, embedding *mcpv1alpha1.EmbeddingServer) { + ctxLogger := log.FromContext(ctx) + ctxLogger.Info("Finalizing EmbeddingServer", "name", embedding.Name) + + // Update status to Terminating + embedding.Status.Phase = mcpv1alpha1.EmbeddingServerPhaseTerminating + if err := r.Status().Update(ctx, embedding); err != nil { + ctxLogger.Error(err, "Failed to update EmbeddingServer status to Terminating") + } + + // Cleanup logic here if needed + // For now, Kubernetes will handle cascade deletion of owned resources + + r.Recorder.Event(embedding, corev1.EventTypeNormal, "Deleted", "EmbeddingServer has been finalized") +} + +// SetupWithManager sets up the controller with the Manager. +func (r *EmbeddingServerReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + For(&mcpv1alpha1.EmbeddingServer{}). + Owns(&appsv1.StatefulSet{}). + Owns(&corev1.Service{}). + Owns(&corev1.PersistentVolumeClaim{}). + Complete(r) +} diff --git a/cmd/thv-operator/controllers/embeddingserver_controller_test.go b/cmd/thv-operator/controllers/embeddingserver_controller_test.go new file mode 100644 index 0000000000..d783be5e43 --- /dev/null +++ b/cmd/thv-operator/controllers/embeddingserver_controller_test.go @@ -0,0 +1,883 @@ +// SPDX-License-Identifier: Apache-2.0 + +package controllers + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/record" + "k8s.io/utils/ptr" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + mcpv1alpha1 "github.com/stacklok/toolhive/cmd/thv-operator/api/v1alpha1" + ctrlutil "github.com/stacklok/toolhive/cmd/thv-operator/pkg/controllerutil" + "github.com/stacklok/toolhive/cmd/thv-operator/pkg/validation" +) + +func TestEmbeddingServer_GetPort(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + port int32 + expected int32 + }{ + { + name: "default port", + port: 0, + expected: 8080, + }, + { + name: "custom port", + port: 9000, + expected: 9000, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + embedding := &mcpv1alpha1.EmbeddingServer{ + Spec: mcpv1alpha1.EmbeddingServerSpec{ + Port: tt.port, + }, + } + + assert.Equal(t, tt.expected, embedding.GetPort()) + }) + } +} + +func TestEmbeddingServer_GetReplicas(t *testing.T) { + t.Parallel() + + replicas2 := int32(2) + tests := []struct { + name string + replicas *int32 + expected int32 + }{ + { + name: "default replicas", + replicas: nil, + expected: 1, + }, + { + name: "custom replicas", + replicas: &replicas2, + expected: 2, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + embedding := &mcpv1alpha1.EmbeddingServer{ + Spec: mcpv1alpha1.EmbeddingServerSpec{ + Replicas: tt.replicas, + }, + } + + assert.Equal(t, tt.expected, embedding.GetReplicas()) + }) + } +} + +func TestEmbeddingServer_IsModelCacheEnabled(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + modelCache *mcpv1alpha1.ModelCacheConfig + expected bool + }{ + { + name: "nil model cache", + modelCache: nil, + expected: false, + }, + { + name: "model cache disabled", + modelCache: &mcpv1alpha1.ModelCacheConfig{ + Enabled: false, + }, + expected: false, + }, + { + name: "model cache enabled", + modelCache: &mcpv1alpha1.ModelCacheConfig{ + Enabled: true, + }, + expected: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + embedding := &mcpv1alpha1.EmbeddingServer{ + Spec: mcpv1alpha1.EmbeddingServerSpec{ + ModelCache: tt.modelCache, + }, + } + + assert.Equal(t, tt.expected, embedding.IsModelCacheEnabled()) + }) + } +} + +func TestEmbeddingServer_GetImagePullPolicy(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + imagePullPolicy string + expected string + }{ + { + name: "default pull policy", + imagePullPolicy: "", + expected: "IfNotPresent", + }, + { + name: "Never pull policy", + imagePullPolicy: "Never", + expected: "Never", + }, + { + name: "Always pull policy", + imagePullPolicy: "Always", + expected: "Always", + }, + { + name: "IfNotPresent pull policy", + imagePullPolicy: "IfNotPresent", + expected: "IfNotPresent", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + embedding := &mcpv1alpha1.EmbeddingServer{ + Spec: mcpv1alpha1.EmbeddingServerSpec{ + ImagePullPolicy: tt.imagePullPolicy, + }, + } + + assert.Equal(t, tt.expected, embedding.GetImagePullPolicy()) + }) + } +} + +func TestEmbeddingServerPodTemplateSpecValidation(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + podTemplateSpec *runtime.RawExtension + expectValid bool + }{ + { + name: "no PodTemplateSpec provided", + podTemplateSpec: nil, + expectValid: true, + }, + { + name: "valid PodTemplateSpec", + podTemplateSpec: &runtime.RawExtension{ + Raw: []byte(`{"spec":{"nodeSelector":{"disktype":"ssd"}}}`), + }, + expectValid: true, + }, + { + name: "invalid PodTemplateSpec", + podTemplateSpec: &runtime.RawExtension{ + Raw: []byte(`{invalid json`), + }, + expectValid: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + if tt.podTemplateSpec == nil { + // nil is always valid + assert.True(t, tt.expectValid) + return + } + + _, err := ctrlutil.NewPodTemplateSpecBuilder(tt.podTemplateSpec, embeddingContainerName) + + if tt.expectValid { + assert.NoError(t, err) + } else { + assert.Error(t, err) + } + }) + } +} + +func TestEmbeddingServer_Labels(t *testing.T) { + t.Parallel() + + embedding := &mcpv1alpha1.EmbeddingServer{ + Spec: mcpv1alpha1.EmbeddingServerSpec{ + Model: "test-model", + }, + } + embedding.Name = "test-embedding" + + reconciler := &EmbeddingServerReconciler{} + labels := reconciler.labelsForEmbedding(embedding) + + // Check required labels + assert.Equal(t, "embeddingserver", labels["app.kubernetes.io/name"]) + assert.Equal(t, "test-embedding", labels["app.kubernetes.io/instance"]) + assert.Equal(t, "embedding-server", labels["app.kubernetes.io/component"]) + assert.Equal(t, "toolhive-operator", labels["app.kubernetes.io/managed-by"]) + +} + +func TestEmbeddingServer_ModelCacheConfig(t *testing.T) { + t.Parallel() + + storageClassName := "fast-ssd" + tests := []struct { + name string + modelCache *mcpv1alpha1.ModelCacheConfig + expectedSize string + expectedAccess string + }{ + { + name: "default values", + modelCache: &mcpv1alpha1.ModelCacheConfig{ + Enabled: true, + }, + expectedSize: "10Gi", + expectedAccess: "ReadWriteOnce", + }, + { + name: "custom values", + modelCache: &mcpv1alpha1.ModelCacheConfig{ + Enabled: true, + Size: "20Gi", + AccessMode: "ReadWriteMany", + StorageClassName: &storageClassName, + }, + expectedSize: "20Gi", + expectedAccess: "ReadWriteMany", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + embedding := &mcpv1alpha1.EmbeddingServer{ + Spec: mcpv1alpha1.EmbeddingServerSpec{ + Model: "test-model", + ModelCache: tt.modelCache, + }, + } + embedding.Name = "test-embedding" + embedding.Namespace = "default" + + // Note: We're testing the PVC structure creation, not SetControllerReference + // which requires a Scheme. In actual reconciliation, the Scheme is set. + // For this unit test, we test just the PVC structure without owner references. + pvcName := fmt.Sprintf("%s-model-cache", embedding.Name) + + size := tt.modelCache.Size + if size == "" { + size = "10Gi" + } + + accessMode := corev1.ReadWriteOnce + if tt.modelCache.AccessMode != "" { + accessMode = corev1.PersistentVolumeAccessMode(tt.modelCache.AccessMode) + } + + // Verify expected values + assert.Equal(t, "test-embedding-model-cache", pvcName) + assert.Equal(t, tt.expectedSize, size) + assert.Equal(t, tt.expectedAccess, string(accessMode)) + + // Verify storage class name if provided + if tt.modelCache.StorageClassName != nil { + assert.Equal(t, storageClassName, *tt.modelCache.StorageClassName) + } + }) + } +} + +// Test helpers + +func createEmbeddingServerTestScheme() *runtime.Scheme { + testScheme := runtime.NewScheme() + _ = corev1.AddToScheme(testScheme) + _ = appsv1.AddToScheme(testScheme) + _ = mcpv1alpha1.AddToScheme(testScheme) + return testScheme +} + +func createTestEmbeddingServer(name, namespace, image, model string) *mcpv1alpha1.EmbeddingServer { + return &mcpv1alpha1.EmbeddingServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + Generation: 1, + }, + Spec: mcpv1alpha1.EmbeddingServerSpec{ + Image: image, + Model: model, + }, + } +} + +// TestReconcile_NotFound tests reconciliation when resource is not found +func TestReconcile_NotFound(t *testing.T) { + t.Parallel() + + scheme := createEmbeddingServerTestScheme() + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + Build() + + reconciler := &EmbeddingServerReconciler{ + Client: fakeClient, + Scheme: scheme, + Recorder: record.NewFakeRecorder(10), + ImageValidation: validation.ImageValidationAlwaysAllow, + } + + req := ctrl.Request{ + NamespacedName: types.NamespacedName{ + Name: "non-existent", + Namespace: "default", + }, + } + + result, err := reconciler.Reconcile(context.TODO(), req) + assert.NoError(t, err) + assert.Equal(t, ctrl.Result{}, result) +} + +// TestReconcile_CreateResources tests the reconciliation creates all necessary resources +func TestReconcile_CreateResources(t *testing.T) { + t.Parallel() + + embedding := createTestEmbeddingServer("test-embedding", "test-ns", "test-image:latest", "test-model") + + scheme := createEmbeddingServerTestScheme() + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithRuntimeObjects(embedding). + WithStatusSubresource(embedding). + Build() + + reconciler := &EmbeddingServerReconciler{ + Client: fakeClient, + Scheme: scheme, + Recorder: record.NewFakeRecorder(10), + PlatformDetector: ctrlutil.NewSharedPlatformDetector(), + ImageValidation: validation.ImageValidationAlwaysAllow, + } + + ctx := context.TODO() + req := ctrl.Request{ + NamespacedName: types.NamespacedName{ + Name: embedding.Name, + Namespace: embedding.Namespace, + }, + } + + // First reconcile should create resources + result, err := reconciler.Reconcile(ctx, req) + require.NoError(t, err) + assert.Equal(t, ctrl.Result{}, result) + + // Verify finalizer was added + updatedEmbedding := &mcpv1alpha1.EmbeddingServer{} + err = fakeClient.Get(ctx, types.NamespacedName{ + Name: embedding.Name, + Namespace: embedding.Namespace, + }, updatedEmbedding) + require.NoError(t, err) + assert.Contains(t, updatedEmbedding.Finalizers, embeddingFinalizerName) + + // Verify StatefulSet was created + sts := &appsv1.StatefulSet{} + err = fakeClient.Get(ctx, types.NamespacedName{ + Name: embedding.Name, + Namespace: embedding.Namespace, + }, sts) + assert.NoError(t, err, "StatefulSet should be created") + assert.Equal(t, embedding.Name, sts.Name) + assert.Equal(t, int32(1), *sts.Spec.Replicas) + + // Verify Service was created + svc := &corev1.Service{} + err = fakeClient.Get(ctx, types.NamespacedName{ + Name: embedding.Name, + Namespace: embedding.Namespace, + }, svc) + assert.NoError(t, err, "Service should be created") + assert.Equal(t, embedding.Name, svc.Name) +} + +// TestValidateImage tests image validation with different scenarios +func TestValidateImage(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + embedding *mcpv1alpha1.EmbeddingServer + imageValidation validation.ImageValidation + registries []runtime.Object + expectError bool + expectedCondition metav1.ConditionStatus + expectedReason string + }{ + { + name: "always allow - no validation", + embedding: createTestEmbeddingServer("test", "default", "any-image:latest", "model"), + imageValidation: validation.ImageValidationAlwaysAllow, + expectError: false, + expectedCondition: metav1.ConditionTrue, + expectedReason: mcpv1alpha1.ConditionReasonImageValidationSkipped, + }, + { + name: "registry enforcing - no registries", + embedding: createTestEmbeddingServer("test", "default", "test-image:latest", "model"), + imageValidation: validation.ImageValidationRegistryEnforcing, + registries: []runtime.Object{}, + expectError: false, + expectedCondition: metav1.ConditionTrue, + expectedReason: mcpv1alpha1.ConditionReasonImageValidationSkipped, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + scheme := createEmbeddingServerTestScheme() + objects := append([]runtime.Object{tt.embedding}, tt.registries...) + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithRuntimeObjects(objects...). + WithStatusSubresource(tt.embedding). + Build() + + reconciler := &EmbeddingServerReconciler{ + Client: fakeClient, + Scheme: scheme, + ImageValidation: tt.imageValidation, + } + + err := reconciler.validateImage(context.TODO(), tt.embedding) + + if tt.expectError { + assert.Error(t, err) + } else { + assert.NoError(t, err) + } + + // Verify condition was set + updatedEmbedding := &mcpv1alpha1.EmbeddingServer{} + err = fakeClient.Get(context.TODO(), types.NamespacedName{ + Name: tt.embedding.Name, + Namespace: tt.embedding.Namespace, + }, updatedEmbedding) + require.NoError(t, err) + + // Find the ImageValidated condition + for _, cond := range updatedEmbedding.Status.Conditions { + if cond.Type == mcpv1alpha1.ConditionImageValidated { + assert.Equal(t, tt.expectedCondition, cond.Status) + assert.Equal(t, tt.expectedReason, cond.Reason) + return + } + } + }) + } +} + +// TestStatefulSetNeedsUpdate tests drift detection logic +func TestStatefulSetNeedsUpdate(t *testing.T) { + t.Parallel() + + scheme := createEmbeddingServerTestScheme() + reconciler := &EmbeddingServerReconciler{ + Scheme: scheme, + PlatformDetector: ctrlutil.NewSharedPlatformDetector(), + } + + // Helper to generate a StatefulSet from an embedding using the reconciler + generateSts := func(e *mcpv1alpha1.EmbeddingServer) *appsv1.StatefulSet { + return reconciler.statefulSetForEmbedding(context.TODO(), e) + } + + tests := []struct { + name string + embedding *mcpv1alpha1.EmbeddingServer + existingSts *appsv1.StatefulSet + expectedUpdate bool + updateReason string + }{ + { + name: "no update needed - identical", + embedding: createTestEmbeddingServer("test", "default", "image:v1", "model1"), + existingSts: generateSts(createTestEmbeddingServer("test", "default", "image:v1", "model1")), + expectedUpdate: false, + }, + { + name: "update needed - image changed", + embedding: createTestEmbeddingServer("test", "default", "image:v2", "model1"), + existingSts: generateSts(createTestEmbeddingServer("test", "default", "image:v1", "model1")), + expectedUpdate: true, + updateReason: "image changed", + }, + { + name: "update needed - model changed", + embedding: createTestEmbeddingServer("test", "default", "image:v1", "model2"), + existingSts: generateSts(createTestEmbeddingServer("test", "default", "image:v1", "model1")), + expectedUpdate: true, + updateReason: "model changed", + }, + { + name: "update needed - port changed", + embedding: &mcpv1alpha1.EmbeddingServer{ + ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "default", Generation: 1}, + Spec: mcpv1alpha1.EmbeddingServerSpec{ + Image: "image:v1", + Model: "model1", + Port: 9090, + }, + }, + existingSts: generateSts(createTestEmbeddingServer("test", "default", "image:v1", "model1")), + expectedUpdate: true, + updateReason: "port changed", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + needsUpdate := reconciler.statefulSetNeedsUpdate(context.TODO(), tt.existingSts, tt.embedding) + + assert.Equal(t, tt.expectedUpdate, needsUpdate, tt.updateReason) + }) + } +} + +// TestHandleDeletion tests finalizer cleanup +func TestHandleDeletion(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + embedding *mcpv1alpha1.EmbeddingServer + expectDone bool + expectError bool + expectFinalizer bool + }{ + { + name: "not being deleted", + embedding: &mcpv1alpha1.EmbeddingServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: "default", + Finalizers: []string{embeddingFinalizerName}, + }, + Spec: mcpv1alpha1.EmbeddingServerSpec{ + Image: "test:latest", + Model: "test-model", + }, + }, + expectDone: false, + expectError: false, + expectFinalizer: true, + }, + { + name: "being deleted with finalizer", + embedding: &mcpv1alpha1.EmbeddingServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: "default", + Finalizers: []string{embeddingFinalizerName}, + DeletionTimestamp: &metav1.Time{Time: time.Now()}, + }, + Spec: mcpv1alpha1.EmbeddingServerSpec{ + Image: "test:latest", + Model: "test-model", + }, + }, + expectDone: true, + expectError: false, + expectFinalizer: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + scheme := createEmbeddingServerTestScheme() + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithRuntimeObjects(tt.embedding). + WithStatusSubresource(tt.embedding). + Build() + + reconciler := &EmbeddingServerReconciler{ + Client: fakeClient, + Scheme: scheme, + Recorder: record.NewFakeRecorder(10), + } + + result, done, err := reconciler.handleDeletion(context.TODO(), tt.embedding) + + assert.Equal(t, tt.expectDone, done) + if tt.expectError { + assert.Error(t, err) + } else { + assert.NoError(t, err) + } + + if done { + assert.Equal(t, ctrl.Result{}, result) + } + + // Verify finalizer state if not being deleted + if tt.embedding.DeletionTimestamp == nil { + updatedEmbedding := &mcpv1alpha1.EmbeddingServer{} + err := fakeClient.Get(context.TODO(), types.NamespacedName{ + Name: tt.embedding.Name, + Namespace: tt.embedding.Namespace, + }, updatedEmbedding) + require.NoError(t, err) + + hasFinalizer := false + for _, f := range updatedEmbedding.Finalizers { + if f == embeddingFinalizerName { + hasFinalizer = true + break + } + } + assert.Equal(t, tt.expectFinalizer, hasFinalizer) + } + }) + } +} + +// TestEnsureStatefulSet tests statefulset creation and updates +func TestEnsureStatefulSet(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + embedding *mcpv1alpha1.EmbeddingServer + existingSts *appsv1.StatefulSet + expectCreate bool + expectUpdate bool + expectDone bool + }{ + { + name: "create new statefulset", + embedding: createTestEmbeddingServer("test", "default", "image:v1", "model1"), + existingSts: nil, + expectCreate: true, + expectDone: false, + }, + { + name: "update replicas", + embedding: func() *mcpv1alpha1.EmbeddingServer { + e := createTestEmbeddingServer("test", "default", "image:v1", "model1") + replicas := int32(3) + e.Spec.Replicas = &replicas + return e + }(), + existingSts: &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: "default", + }, + Spec: appsv1.StatefulSetSpec{ + Replicas: ptr.To(int32(1)), + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: embeddingContainerName, + Image: "image:v1", + Args: []string{"--model-id", "model1", "--port", "8080"}, + Env: []corev1.EnvVar{ + {Name: "MODEL_ID", Value: "model1"}, + }, + Ports: []corev1.ContainerPort{ + {ContainerPort: 8080}, + }, + }, + }, + }, + }, + }, + }, + expectUpdate: true, + expectDone: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + scheme := createEmbeddingServerTestScheme() + objects := []runtime.Object{tt.embedding} + if tt.existingSts != nil { + objects = append(objects, tt.existingSts) + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithRuntimeObjects(objects...). + Build() + + reconciler := &EmbeddingServerReconciler{ + Client: fakeClient, + Scheme: scheme, + PlatformDetector: ctrlutil.NewSharedPlatformDetector(), + } + + result, err := reconciler.ensureStatefulSet(context.TODO(), tt.embedding) + require.NoError(t, err) + // expectDone is now represented by whether we need to requeue + if tt.expectDone { + assert.True(t, result.RequeueAfter > 0) + } + + // Verify statefulset exists + sts := &appsv1.StatefulSet{} + err = fakeClient.Get(context.TODO(), types.NamespacedName{ + Name: tt.embedding.Name, + Namespace: tt.embedding.Namespace, + }, sts) + assert.NoError(t, err) + + if tt.expectUpdate { + assert.Greater(t, result.RequeueAfter, time.Duration(0)) + } + }) + } +} + +// TestUpdateEmbeddingServerStatus tests status updates +func TestUpdateEmbeddingServerStatus(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + embedding *mcpv1alpha1.EmbeddingServer + statefulSet *appsv1.StatefulSet + expectedPhase mcpv1alpha1.EmbeddingServerPhase + expectedURL string + }{ + { + name: "no statefulset - pending", + embedding: createTestEmbeddingServer("test", "default", "image:v1", "model1"), + statefulSet: nil, + expectedPhase: mcpv1alpha1.EmbeddingServerPhasePending, + expectedURL: "http://test.default.svc.cluster.local:8080", + }, + { + name: "statefulset ready", + embedding: createTestEmbeddingServer("test", "default", "image:v1", "model1"), + statefulSet: &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: "default", + }, + Status: appsv1.StatefulSetStatus{ + Replicas: 1, + ReadyReplicas: 1, + }, + }, + expectedPhase: mcpv1alpha1.EmbeddingServerPhaseRunning, + expectedURL: "http://test.default.svc.cluster.local:8080", + }, + { + name: "statefulset downloading", + embedding: createTestEmbeddingServer("test", "default", "image:v1", "model1"), + statefulSet: &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: "default", + }, + Status: appsv1.StatefulSetStatus{ + Replicas: 1, + ReadyReplicas: 0, + }, + }, + expectedPhase: mcpv1alpha1.EmbeddingServerPhaseDownloading, + expectedURL: "http://test.default.svc.cluster.local:8080", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + scheme := createEmbeddingServerTestScheme() + objects := []runtime.Object{tt.embedding} + if tt.statefulSet != nil { + objects = append(objects, tt.statefulSet) + } + + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithRuntimeObjects(objects...). + WithStatusSubresource(tt.embedding). + Build() + + reconciler := &EmbeddingServerReconciler{ + Client: fakeClient, + Scheme: scheme, + } + + err := reconciler.updateEmbeddingServerStatus(context.TODO(), tt.embedding) + assert.NoError(t, err) + + // Verify status was updated + updatedEmbedding := &mcpv1alpha1.EmbeddingServer{} + err = fakeClient.Get(context.TODO(), types.NamespacedName{ + Name: tt.embedding.Name, + Namespace: tt.embedding.Namespace, + }, updatedEmbedding) + require.NoError(t, err) + + assert.Equal(t, tt.expectedPhase, updatedEmbedding.Status.Phase) + assert.Equal(t, tt.expectedURL, updatedEmbedding.Status.URL) + }) + } +} diff --git a/cmd/thv-operator/main.go b/cmd/thv-operator/main.go index cf3316d0aa..ffba4f70ee 100644 --- a/cmd/thv-operator/main.go +++ b/cmd/thv-operator/main.go @@ -267,6 +267,17 @@ func setupServerControllers(mgr ctrl.Manager, enableRegistry bool) error { return fmt.Errorf("unable to create controller MCPRemoteProxy: %w", err) } + // Set up EmbeddingServer controller + if err := (&controllers.EmbeddingServerReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Recorder: mgr.GetEventRecorderFor("embeddingserver-controller"), + PlatformDetector: ctrlutil.NewSharedPlatformDetector(), + ImageValidation: imageValidation, + }).SetupWithManager(mgr); err != nil { + return fmt.Errorf("unable to create controller EmbeddingServer: %w", err) + } + return nil } diff --git a/cmd/thv-operator/test-integration/embedding-server/embeddingserver_creation_test.go b/cmd/thv-operator/test-integration/embedding-server/embeddingserver_creation_test.go new file mode 100644 index 0000000000..efb3841a54 --- /dev/null +++ b/cmd/thv-operator/test-integration/embedding-server/embeddingserver_creation_test.go @@ -0,0 +1,1177 @@ +// SPDX-License-Identifier: Apache-2.0 + +// Package controllers contains integration tests for the EmbeddingServer controller. +package controllers + +import ( + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/utils/ptr" + + mcpv1alpha1 "github.com/stacklok/toolhive/cmd/thv-operator/api/v1alpha1" +) + +// TestCase defines a table-driven test case for EmbeddingServer controller +type TestCase struct { + Name string + // InitialState contains objects to create before running assertions + InitialState InitialState + // FinalState defines the expected Kubernetes state after reconciliation + FinalState FinalState +} + +// InitialState represents the initial Kubernetes objects to create +type InitialState struct { + EmbeddingServer *mcpv1alpha1.EmbeddingServer + Secrets []*corev1.Secret +} + +// FinalState represents the expected Kubernetes state after reconciliation +// Uses actual K8s objects for comparison - only non-nil/non-zero fields are checked +type FinalState struct { + // StatefulSet expected state (nil means don't check specific fields) + StatefulSet *appsv1.StatefulSet + // Service expected state (nil means don't check specific fields) + Service *corev1.Service + // EmbeddingServer status expectations + Status *mcpv1alpha1.EmbeddingServerStatus +} + +var _ = Describe("EmbeddingServer Controller Integration Tests", func() { + const ( + timeout = time.Second * 30 + interval = time.Millisecond * 250 + defaultNamespace = "default" + ) + + // Helper function to create test namespace + createNamespace := func(namespace string) { + ns := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: namespace, + }, + } + _ = k8sClient.Create(ctx, ns) + } + + // Helper to run a single test case + runTestCase := func(tc TestCase) { + Context(tc.Name, Ordered, func() { + var createdEmbeddingServer *mcpv1alpha1.EmbeddingServer + + BeforeAll(func() { + namespace := tc.InitialState.EmbeddingServer.Namespace + createNamespace(namespace) + + // Create secrets first + for _, secret := range tc.InitialState.Secrets { + Expect(k8sClient.Create(ctx, secret)).Should(Succeed()) + } + + // Create the EmbeddingServer + Expect(k8sClient.Create(ctx, tc.InitialState.EmbeddingServer)).Should(Succeed()) + + // Fetch the created resource to get UID etc. + createdEmbeddingServer = &mcpv1alpha1.EmbeddingServer{} + Eventually(func() error { + return k8sClient.Get(ctx, types.NamespacedName{ + Name: tc.InitialState.EmbeddingServer.Name, + Namespace: tc.InitialState.EmbeddingServer.Namespace, + }, createdEmbeddingServer) + }, timeout, interval).Should(Succeed()) + }) + + AfterAll(func() { + // Clean up EmbeddingServer + if tc.InitialState.EmbeddingServer != nil { + _ = k8sClient.Delete(ctx, tc.InitialState.EmbeddingServer) + } + // Clean up secrets + for _, secret := range tc.InitialState.Secrets { + _ = k8sClient.Delete(ctx, secret) + } + }) + + // StatefulSet assertions + It("Should create StatefulSet with expected configuration", func() { + actual := &appsv1.StatefulSet{} + Eventually(func() error { + return k8sClient.Get(ctx, types.NamespacedName{ + Name: tc.InitialState.EmbeddingServer.Name, + Namespace: tc.InitialState.EmbeddingServer.Namespace, + }, actual) + }, timeout, interval).Should(Succeed()) + + if tc.FinalState.StatefulSet != nil { + verifyStatefulSetEquals(actual, tc.FinalState.StatefulSet) + } + verifyOwnerReference(actual.OwnerReferences, createdEmbeddingServer, "StatefulSet") + }) + + // Service assertions + It("Should create Service with expected configuration", func() { + actual := &corev1.Service{} + Eventually(func() error { + return k8sClient.Get(ctx, types.NamespacedName{ + Name: tc.InitialState.EmbeddingServer.Name, + Namespace: tc.InitialState.EmbeddingServer.Namespace, + }, actual) + }, timeout, interval).Should(Succeed()) + + if tc.FinalState.Service != nil { + verifyServiceEquals(actual, tc.FinalState.Service) + } + verifyOwnerReference(actual.OwnerReferences, createdEmbeddingServer, "Service") + }) + + // Status assertions + It("Should have expected status and finalizer", func() { + Eventually(func() bool { + actual := &mcpv1alpha1.EmbeddingServer{} + err := k8sClient.Get(ctx, types.NamespacedName{ + Name: tc.InitialState.EmbeddingServer.Name, + Namespace: tc.InitialState.EmbeddingServer.Namespace, + }, actual) + if err != nil { + return false + } + return verifyStatusEquals(actual, tc.FinalState.Status) + }, timeout, interval).Should(BeTrue()) + }) + }) + } + + // Define test cases as a table using actual K8s objects + testCases := []TestCase{ + { + Name: "When creating an EmbeddingServer with minimal config (verifies defaults)", + InitialState: InitialState{ + EmbeddingServer: &mcpv1alpha1.EmbeddingServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-defaults", + Namespace: defaultNamespace, + }, + Spec: mcpv1alpha1.EmbeddingServerSpec{ + // Only required fields - model and image + Model: "sentence-transformers/all-MiniLM-L6-v2", + Image: "ghcr.io/huggingface/text-embeddings-inference:latest", + }, + }, + }, + FinalState: FinalState{ + StatefulSet: &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "app.kubernetes.io/name": "embeddingserver", + "app.kubernetes.io/instance": "test-defaults", + "app.kubernetes.io/component": "embedding-server", + "app.kubernetes.io/managed-by": "toolhive-operator", + }, + }, + Spec: appsv1.StatefulSetSpec{ + // Default: 1 replica + Replicas: ptr.To(int32(1)), + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "embedding", + Image: "ghcr.io/huggingface/text-embeddings-inference:latest", + // Default port: 8080 + Args: []string{"--model-id", "sentence-transformers/all-MiniLM-L6-v2", "--port", "8080"}, + Env: []corev1.EnvVar{{Name: "MODEL_ID", Value: "sentence-transformers/all-MiniLM-L6-v2"}}, + // Default: IfNotPresent + ImagePullPolicy: corev1.PullIfNotPresent, + // Default: no resource limits or requests + Resources: corev1.ResourceRequirements{}, + LivenessProbe: &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{HTTPGet: &corev1.HTTPGetAction{Path: "/health"}}, + }, + ReadinessProbe: &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{HTTPGet: &corev1.HTTPGetAction{Path: "/health"}}, + }, + }}, + }, + }, + }, + }, + // Default port: 8080 + Service: &corev1.Service{ + Spec: corev1.ServiceSpec{ + Ports: []corev1.ServicePort{{Port: 8080}}, + }, + }, + Status: &mcpv1alpha1.EmbeddingServerStatus{ + // URL uses default port + URL: "http://test-defaults.default.svc.cluster.local:8080", + }, + }, + }, + { + Name: "When creating a basic EmbeddingServer", + InitialState: InitialState{ + EmbeddingServer: &mcpv1alpha1.EmbeddingServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-basic", + Namespace: defaultNamespace, + }, + Spec: mcpv1alpha1.EmbeddingServerSpec{ + Model: "sentence-transformers/all-MiniLM-L6-v2", + Image: "ghcr.io/huggingface/text-embeddings-inference:latest", + Port: 8080, + }, + }, + }, + FinalState: FinalState{ + StatefulSet: &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "app.kubernetes.io/name": "embeddingserver", + "app.kubernetes.io/instance": "test-basic", + "app.kubernetes.io/component": "embedding-server", + "app.kubernetes.io/managed-by": "toolhive-operator", + }, + }, + Spec: appsv1.StatefulSetSpec{ + Replicas: ptr.To(int32(1)), + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "embedding", + Image: "ghcr.io/huggingface/text-embeddings-inference:latest", + Args: []string{"--model-id", "sentence-transformers/all-MiniLM-L6-v2", "--port", "8080"}, + Env: []corev1.EnvVar{{Name: "MODEL_ID", Value: "sentence-transformers/all-MiniLM-L6-v2"}}, + LivenessProbe: &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{HTTPGet: &corev1.HTTPGetAction{Path: "/health"}}, + }, + ReadinessProbe: &corev1.Probe{ + ProbeHandler: corev1.ProbeHandler{HTTPGet: &corev1.HTTPGetAction{Path: "/health"}}, + }, + }}, + }, + }, + }, + }, + Service: &corev1.Service{ + Spec: corev1.ServiceSpec{ + Ports: []corev1.ServicePort{{Port: 8080}}, + }, + }, + Status: &mcpv1alpha1.EmbeddingServerStatus{ + URL: "http://test-basic.default.svc.cluster.local:8080", + }, + }, + }, + { + Name: "When creating an EmbeddingServer with model cache enabled", + InitialState: InitialState{ + EmbeddingServer: &mcpv1alpha1.EmbeddingServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-with-cache", + Namespace: defaultNamespace, + }, + Spec: mcpv1alpha1.EmbeddingServerSpec{ + Model: "sentence-transformers/all-MiniLM-L6-v2", + Image: "ghcr.io/huggingface/text-embeddings-inference:latest", + Port: 8080, + ModelCache: &mcpv1alpha1.ModelCacheConfig{ + Enabled: true, + Size: "20Gi", + }, + }, + }, + }, + FinalState: FinalState{ + StatefulSet: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + Replicas: ptr.To(int32(1)), + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "embedding", + Env: []corev1.EnvVar{{Name: "HF_HOME", Value: "/data"}}, + VolumeMounts: []corev1.VolumeMount{{Name: "model-cache", MountPath: "/data"}}, + }}, + }, + }, + VolumeClaimTemplates: []corev1.PersistentVolumeClaim{{ + ObjectMeta: metav1.ObjectMeta{Name: "model-cache"}, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{corev1.ResourceStorage: resource.MustParse("20Gi")}, + }, + }, + }}, + }, + }, + Service: &corev1.Service{Spec: corev1.ServiceSpec{Ports: []corev1.ServicePort{{Port: 8080}}}}, + }, + }, + { + Name: "When creating an EmbeddingServer with resource requirements", + InitialState: InitialState{ + EmbeddingServer: &mcpv1alpha1.EmbeddingServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-resources", + Namespace: defaultNamespace, + }, + Spec: mcpv1alpha1.EmbeddingServerSpec{ + Model: "sentence-transformers/all-MiniLM-L6-v2", + Image: "ghcr.io/huggingface/text-embeddings-inference:latest", + Port: 8080, + Resources: mcpv1alpha1.ResourceRequirements{ + Limits: mcpv1alpha1.ResourceList{CPU: "2", Memory: "4Gi"}, + Requests: mcpv1alpha1.ResourceList{CPU: "500m", Memory: "1Gi"}, + }, + }, + }, + }, + FinalState: FinalState{ + StatefulSet: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "embedding", + Resources: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{corev1.ResourceCPU: resource.MustParse("2"), corev1.ResourceMemory: resource.MustParse("4Gi")}, + Requests: corev1.ResourceList{corev1.ResourceCPU: resource.MustParse("500m"), corev1.ResourceMemory: resource.MustParse("1Gi")}, + }, + }}, + }, + }, + }, + }, + }, + }, + { + Name: "When creating an EmbeddingServer with custom replicas", + InitialState: InitialState{ + EmbeddingServer: &mcpv1alpha1.EmbeddingServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-replicas", + Namespace: defaultNamespace, + }, + Spec: mcpv1alpha1.EmbeddingServerSpec{ + Model: "sentence-transformers/all-MiniLM-L6-v2", + Image: "ghcr.io/huggingface/text-embeddings-inference:latest", + Port: 8080, + Replicas: ptr.To(int32(3)), + }, + }, + }, + FinalState: FinalState{ + StatefulSet: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + Replicas: ptr.To(int32(3)), + }, + }, + }, + }, + { + Name: "When creating an EmbeddingServer with invalid PodTemplateSpec", + InitialState: InitialState{ + EmbeddingServer: &mcpv1alpha1.EmbeddingServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-invalid-podtemplate", + Namespace: defaultNamespace, + }, + Spec: mcpv1alpha1.EmbeddingServerSpec{ + Model: "sentence-transformers/all-MiniLM-L6-v2", + Image: "ghcr.io/huggingface/text-embeddings-inference:latest", + Port: 8080, + PodTemplateSpec: &runtime.RawExtension{ + Raw: []byte(`{"spec": {"containers": "invalid-not-an-array"}}`), + }, + }, + }, + }, + FinalState: FinalState{ + Status: &mcpv1alpha1.EmbeddingServerStatus{ + Phase: mcpv1alpha1.EmbeddingServerPhaseFailed, + Conditions: []metav1.Condition{{ + Type: mcpv1alpha1.ConditionPodTemplateValid, + Status: metav1.ConditionFalse, + Reason: mcpv1alpha1.ConditionReasonPodTemplateInvalid, + }}, + }, + }, + }, + { + Name: "When creating an EmbeddingServer with valid PodTemplateSpec (nodeSelector)", + InitialState: InitialState{ + EmbeddingServer: &mcpv1alpha1.EmbeddingServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-valid-podtemplate", + Namespace: defaultNamespace, + }, + Spec: mcpv1alpha1.EmbeddingServerSpec{ + Model: "sentence-transformers/all-MiniLM-L6-v2", + Image: "ghcr.io/huggingface/text-embeddings-inference:latest", + Port: 8080, + PodTemplateSpec: &runtime.RawExtension{ + Raw: []byte(`{"spec":{"nodeSelector":{"disktype":"ssd"}}}`), + }, + }, + }, + }, + FinalState: FinalState{ + StatefulSet: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + NodeSelector: map[string]string{"disktype": "ssd"}, + }, + }, + }, + }, + Status: &mcpv1alpha1.EmbeddingServerStatus{ + Conditions: []metav1.Condition{{ + Type: mcpv1alpha1.ConditionPodTemplateValid, + Status: metav1.ConditionTrue, + }}, + }, + }, + }, + { + Name: "When creating an EmbeddingServer with HuggingFace token secret", + InitialState: InitialState{ + EmbeddingServer: &mcpv1alpha1.EmbeddingServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-hf-token", + Namespace: defaultNamespace, + }, + Spec: mcpv1alpha1.EmbeddingServerSpec{ + Model: "sentence-transformers/all-MiniLM-L6-v2", + Image: "ghcr.io/huggingface/text-embeddings-inference:latest", + Port: 8080, + HFTokenSecretRef: &mcpv1alpha1.SecretKeyRef{ + Name: "hf-token-secret", + Key: "token", + }, + }, + }, + Secrets: []*corev1.Secret{{ + ObjectMeta: metav1.ObjectMeta{ + Name: "hf-token-secret", + Namespace: defaultNamespace, + }, + Data: map[string][]byte{"token": []byte("hf_test_token_value")}, + }}, + }, + FinalState: FinalState{ + StatefulSet: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "embedding", + Env: []corev1.EnvVar{{ + Name: "HF_TOKEN", + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{Name: "hf-token-secret"}, + Key: "token", + }, + }, + }}, + }}, + }, + }, + }, + }, + }, + }, + { + Name: "When creating an EmbeddingServer with custom environment variables", + InitialState: InitialState{ + EmbeddingServer: &mcpv1alpha1.EmbeddingServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-custom-env", + Namespace: defaultNamespace, + }, + Spec: mcpv1alpha1.EmbeddingServerSpec{ + Model: "sentence-transformers/all-MiniLM-L6-v2", + Image: "ghcr.io/huggingface/text-embeddings-inference:latest", + Port: 8080, + Env: []mcpv1alpha1.EnvVar{ + {Name: "CUSTOM_VAR_1", Value: "value1"}, + {Name: "CUSTOM_VAR_2", Value: "value2"}, + }, + }, + }, + }, + FinalState: FinalState{ + StatefulSet: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "embedding", + Env: []corev1.EnvVar{ + {Name: "CUSTOM_VAR_1", Value: "value1"}, + {Name: "CUSTOM_VAR_2", Value: "value2"}, + }, + }}, + }, + }, + }, + }, + }, + }, + { + Name: "When creating an EmbeddingServer with custom args", + InitialState: InitialState{ + EmbeddingServer: &mcpv1alpha1.EmbeddingServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-custom-args", + Namespace: defaultNamespace, + }, + Spec: mcpv1alpha1.EmbeddingServerSpec{ + Model: "sentence-transformers/all-MiniLM-L6-v2", + Image: "ghcr.io/huggingface/text-embeddings-inference:latest", + Port: 8080, + Args: []string{"--max-concurrent-requests", "512", "--tokenization-workers", "4"}, + }, + }, + }, + FinalState: FinalState{ + StatefulSet: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "embedding", + Args: []string{"--model-id", "sentence-transformers/all-MiniLM-L6-v2", "--max-concurrent-requests", "512", "--tokenization-workers", "4"}, + }}, + }, + }, + }, + }, + }, + }, + { + Name: "When creating an EmbeddingServer with custom port", + InitialState: InitialState{ + EmbeddingServer: &mcpv1alpha1.EmbeddingServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-custom-port", + Namespace: defaultNamespace, + }, + Spec: mcpv1alpha1.EmbeddingServerSpec{ + Model: "sentence-transformers/all-MiniLM-L6-v2", + Image: "ghcr.io/huggingface/text-embeddings-inference:latest", + Port: 9090, + }, + }, + }, + FinalState: FinalState{ + StatefulSet: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "embedding", + Args: []string{"--port", "9090"}, + }}, + }, + }, + }, + }, + Service: &corev1.Service{Spec: corev1.ServiceSpec{Ports: []corev1.ServicePort{{Port: 9090}}}}, + Status: &mcpv1alpha1.EmbeddingServerStatus{URL: "http://test-custom-port.default.svc.cluster.local:9090"}, + }, + }, + { + Name: "When creating an EmbeddingServer with ImagePullPolicy Always", + InitialState: InitialState{ + EmbeddingServer: &mcpv1alpha1.EmbeddingServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-imagepullpolicy-always", + Namespace: defaultNamespace, + }, + Spec: mcpv1alpha1.EmbeddingServerSpec{ + Model: "sentence-transformers/all-MiniLM-L6-v2", + Image: "ghcr.io/huggingface/text-embeddings-inference:latest", + ImagePullPolicy: "Always", + }, + }, + }, + FinalState: FinalState{ + StatefulSet: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "embedding", + ImagePullPolicy: corev1.PullAlways, + }}, + }, + }, + }, + }, + }, + }, + { + Name: "When creating an EmbeddingServer with ImagePullPolicy Never", + InitialState: InitialState{ + EmbeddingServer: &mcpv1alpha1.EmbeddingServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-imagepullpolicy-never", + Namespace: defaultNamespace, + }, + Spec: mcpv1alpha1.EmbeddingServerSpec{ + Model: "sentence-transformers/all-MiniLM-L6-v2", + Image: "ghcr.io/huggingface/text-embeddings-inference:latest", + ImagePullPolicy: "Never", + }, + }, + }, + FinalState: FinalState{ + StatefulSet: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "embedding", + ImagePullPolicy: corev1.PullNever, + }}, + }, + }, + }, + }, + }, + }, + { + Name: "When creating an EmbeddingServer with model cache and custom storage class", + InitialState: InitialState{ + EmbeddingServer: &mcpv1alpha1.EmbeddingServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cache-storageclass", + Namespace: defaultNamespace, + }, + Spec: mcpv1alpha1.EmbeddingServerSpec{ + Model: "sentence-transformers/all-MiniLM-L6-v2", + Image: "ghcr.io/huggingface/text-embeddings-inference:latest", + ModelCache: &mcpv1alpha1.ModelCacheConfig{ + Enabled: true, + Size: "50Gi", + StorageClassName: ptr.To("fast-ssd"), + }, + }, + }, + }, + FinalState: FinalState{ + StatefulSet: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + VolumeClaimTemplates: []corev1.PersistentVolumeClaim{{ + ObjectMeta: metav1.ObjectMeta{Name: "model-cache"}, + Spec: corev1.PersistentVolumeClaimSpec{ + StorageClassName: ptr.To("fast-ssd"), + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{corev1.ResourceStorage: resource.MustParse("50Gi")}, + }, + }, + }}, + }, + }, + }, + }, + { + Name: "When creating an EmbeddingServer with model cache ReadWriteMany access mode", + InitialState: InitialState{ + EmbeddingServer: &mcpv1alpha1.EmbeddingServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cache-rwx", + Namespace: defaultNamespace, + }, + Spec: mcpv1alpha1.EmbeddingServerSpec{ + Model: "sentence-transformers/all-MiniLM-L6-v2", + Image: "ghcr.io/huggingface/text-embeddings-inference:latest", + ModelCache: &mcpv1alpha1.ModelCacheConfig{ + Enabled: true, + Size: "10Gi", + AccessMode: "ReadWriteMany", + }, + }, + }, + }, + FinalState: FinalState{ + StatefulSet: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + VolumeClaimTemplates: []corev1.PersistentVolumeClaim{{ + ObjectMeta: metav1.ObjectMeta{Name: "model-cache"}, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteMany}, + }, + }}, + }, + }, + }, + }, + { + Name: "When creating an EmbeddingServer with PodTemplateSpec tolerations", + InitialState: InitialState{ + EmbeddingServer: &mcpv1alpha1.EmbeddingServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-tolerations", + Namespace: defaultNamespace, + }, + Spec: mcpv1alpha1.EmbeddingServerSpec{ + Model: "sentence-transformers/all-MiniLM-L6-v2", + Image: "ghcr.io/huggingface/text-embeddings-inference:latest", + PodTemplateSpec: &runtime.RawExtension{ + Raw: []byte(`{"spec":{"tolerations":[{"key":"gpu","operator":"Exists","effect":"NoSchedule"}]}}`), + }, + }, + }, + }, + FinalState: FinalState{ + StatefulSet: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Tolerations: []corev1.Toleration{{ + Key: "gpu", + Operator: corev1.TolerationOpExists, + Effect: corev1.TaintEffectNoSchedule, + }}, + }, + }, + }, + }, + }, + }, + { + Name: "When creating an EmbeddingServer with PodTemplateSpec serviceAccountName", + InitialState: InitialState{ + EmbeddingServer: &mcpv1alpha1.EmbeddingServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-serviceaccount", + Namespace: defaultNamespace, + }, + Spec: mcpv1alpha1.EmbeddingServerSpec{ + Model: "sentence-transformers/all-MiniLM-L6-v2", + Image: "ghcr.io/huggingface/text-embeddings-inference:latest", + PodTemplateSpec: &runtime.RawExtension{ + Raw: []byte(`{"spec":{"serviceAccountName":"custom-sa"}}`), + }, + }, + }, + }, + FinalState: FinalState{ + StatefulSet: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + Replicas: ptr.To(int32(1)), + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + ServiceAccountName: "custom-sa", + }, + }, + }, + }, + }, + }, + { + Name: "When creating an EmbeddingServer with ResourceOverrides on StatefulSet", + InitialState: InitialState{ + EmbeddingServer: &mcpv1alpha1.EmbeddingServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-resource-overrides-sts", + Namespace: defaultNamespace, + }, + Spec: mcpv1alpha1.EmbeddingServerSpec{ + Model: "sentence-transformers/all-MiniLM-L6-v2", + Image: "ghcr.io/huggingface/text-embeddings-inference:latest", + ResourceOverrides: &mcpv1alpha1.EmbeddingResourceOverrides{ + StatefulSet: &mcpv1alpha1.EmbeddingStatefulSetOverrides{ + ResourceMetadataOverrides: mcpv1alpha1.ResourceMetadataOverrides{ + Annotations: map[string]string{"custom-annotation": "sts-value"}, + Labels: map[string]string{"custom-label": "sts-value"}, + }, + }, + }, + }, + }, + }, + FinalState: FinalState{ + StatefulSet: &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "app.kubernetes.io/name": "embeddingserver", + "app.kubernetes.io/instance": "test-resource-overrides-sts", + "app.kubernetes.io/component": "embedding-server", + "app.kubernetes.io/managed-by": "toolhive-operator", + "custom-label": "sts-value", + }, + Annotations: map[string]string{ + "custom-annotation": "sts-value", + }, + }, + }, + }, + }, + { + Name: "When creating an EmbeddingServer with ResourceOverrides on Service", + InitialState: InitialState{ + EmbeddingServer: &mcpv1alpha1.EmbeddingServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-resource-overrides-svc", + Namespace: defaultNamespace, + }, + Spec: mcpv1alpha1.EmbeddingServerSpec{ + Model: "sentence-transformers/all-MiniLM-L6-v2", + Image: "ghcr.io/huggingface/text-embeddings-inference:latest", + ResourceOverrides: &mcpv1alpha1.EmbeddingResourceOverrides{ + Service: &mcpv1alpha1.ResourceMetadataOverrides{ + Annotations: map[string]string{"service-annotation": "svc-value"}, + Labels: map[string]string{"service-label": "svc-value"}, + }, + }, + }, + }, + }, + FinalState: FinalState{ + Service: &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "app.kubernetes.io/name": "embeddingserver", + "app.kubernetes.io/instance": "test-resource-overrides-svc", + "app.kubernetes.io/component": "embedding-server", + "app.kubernetes.io/managed-by": "toolhive-operator", + "service-label": "svc-value", + }, + Annotations: map[string]string{ + "service-annotation": "svc-value", + }, + }, + Spec: corev1.ServiceSpec{ + Ports: []corev1.ServicePort{{Port: 8080}}, + }, + }, + }, + }, + { + Name: "When creating an EmbeddingServer with ResourceOverrides on pod template", + InitialState: InitialState{ + EmbeddingServer: &mcpv1alpha1.EmbeddingServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-resource-overrides-pod", + Namespace: defaultNamespace, + }, + Spec: mcpv1alpha1.EmbeddingServerSpec{ + Model: "sentence-transformers/all-MiniLM-L6-v2", + Image: "ghcr.io/huggingface/text-embeddings-inference:latest", + ResourceOverrides: &mcpv1alpha1.EmbeddingResourceOverrides{ + StatefulSet: &mcpv1alpha1.EmbeddingStatefulSetOverrides{ + PodTemplateMetadataOverrides: &mcpv1alpha1.ResourceMetadataOverrides{ + Annotations: map[string]string{"pod-annotation": "pod-value"}, + Labels: map[string]string{"pod-label": "pod-value"}, + }, + }, + }, + }, + }, + }, + FinalState: FinalState{ + StatefulSet: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + Replicas: ptr.To(int32(1)), + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "app.kubernetes.io/name": "embeddingserver", + "app.kubernetes.io/instance": "test-resource-overrides-pod", + "pod-label": "pod-value", + }, + Annotations: map[string]string{ + "pod-annotation": "pod-value", + }, + }, + }, + }, + }, + }, + }, + { + Name: "When creating an EmbeddingServer verifies container port", + InitialState: InitialState{ + EmbeddingServer: &mcpv1alpha1.EmbeddingServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-container-port", + Namespace: defaultNamespace, + }, + Spec: mcpv1alpha1.EmbeddingServerSpec{ + Model: "sentence-transformers/all-MiniLM-L6-v2", + Image: "ghcr.io/huggingface/text-embeddings-inference:latest", + Port: 8080, + }, + }, + }, + FinalState: FinalState{ + StatefulSet: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "embedding", + Ports: []corev1.ContainerPort{{ + Name: "http", + ContainerPort: 8080, + Protocol: corev1.ProtocolTCP, + }}, + }}, + }, + }, + }, + }, + }, + }, + { + Name: "When creating an EmbeddingServer verifies Service selector and type", + InitialState: InitialState{ + EmbeddingServer: &mcpv1alpha1.EmbeddingServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-service-selector", + Namespace: defaultNamespace, + }, + Spec: mcpv1alpha1.EmbeddingServerSpec{ + Model: "sentence-transformers/all-MiniLM-L6-v2", + Image: "ghcr.io/huggingface/text-embeddings-inference:latest", + }, + }, + }, + FinalState: FinalState{ + Service: &corev1.Service{ + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeClusterIP, + Selector: map[string]string{ + "app.kubernetes.io/name": "embeddingserver", + "app.kubernetes.io/instance": "test-service-selector", + }, + Ports: []corev1.ServicePort{{Port: 8080}}, + }, + }, + }, + }, + } + + // Run all test cases + for _, tc := range testCases { + runTestCase(tc) + } +}) + +// --- Equality helper functions for K8s objects --- +// These functions accept an optional Gomega parameter for use inside Eventually blocks. +// When g is nil, they use the global Expect. + +// verifyStatefulSetEquals checks that actual StatefulSet contains expected fields. +func verifyStatefulSetEquals(actual, expected *appsv1.StatefulSet) { + verifyStatefulSetEqualsG(Default, actual, expected) +} + +// verifyStatefulSetEqualsG is the Gomega-aware version for use in Eventually blocks. +func verifyStatefulSetEqualsG(g Gomega, actual, expected *appsv1.StatefulSet) { + // Replicas + if expected.Spec.Replicas != nil { + g.Expect(actual.Spec.Replicas).To(Equal(expected.Spec.Replicas), "replicas mismatch") + } + + // Labels + for k, v := range expected.Labels { + g.Expect(actual.Labels).To(HaveKeyWithValue(k, v)) + } + + // Annotations + for k, v := range expected.Annotations { + g.Expect(actual.Annotations).To(HaveKeyWithValue(k, v)) + } + + // NodeSelector + for k, v := range expected.Spec.Template.Spec.NodeSelector { + g.Expect(actual.Spec.Template.Spec.NodeSelector).To(HaveKeyWithValue(k, v)) + } + + // Tolerations + for _, exp := range expected.Spec.Template.Spec.Tolerations { + g.Expect(actual.Spec.Template.Spec.Tolerations).To(ContainElement(exp)) + } + + // ServiceAccountName + if expected.Spec.Template.Spec.ServiceAccountName != "" { + g.Expect(actual.Spec.Template.Spec.ServiceAccountName).To(Equal(expected.Spec.Template.Spec.ServiceAccountName)) + } + + // Pod template labels + for k, v := range expected.Spec.Template.Labels { + g.Expect(actual.Spec.Template.Labels).To(HaveKeyWithValue(k, v)) + } + + // Pod template annotations + for k, v := range expected.Spec.Template.Annotations { + g.Expect(actual.Spec.Template.Annotations).To(HaveKeyWithValue(k, v)) + } + + // Containers + for i, exp := range expected.Spec.Template.Spec.Containers { + verifyContainerEqualsG(g, actual.Spec.Template.Spec.Containers[i], exp) + } + + // VolumeClaimTemplates + for i, exp := range expected.Spec.VolumeClaimTemplates { + verifyPVCEqualsG(g, actual.Spec.VolumeClaimTemplates[i], exp) + } +} + +// verifyContainerEqualsG is the Gomega-aware version for use in Eventually blocks. +func verifyContainerEqualsG(g Gomega, actual, expected corev1.Container) { + if expected.Name != "" { + g.Expect(actual.Name).To(Equal(expected.Name)) + } + if expected.Image != "" { + g.Expect(actual.Image).To(Equal(expected.Image)) + } + if expected.ImagePullPolicy != "" { + g.Expect(actual.ImagePullPolicy).To(Equal(expected.ImagePullPolicy)) + } + + for _, arg := range expected.Args { + g.Expect(actual.Args).To(ContainElement(arg)) + } + + for _, env := range expected.Env { + g.Expect(actual.Env).To(ContainElement(HaveField("Name", env.Name))) + } + + for _, vm := range expected.VolumeMounts { + g.Expect(actual.VolumeMounts).To(ContainElement(And( + HaveField("Name", vm.Name), + HaveField("MountPath", vm.MountPath), + ))) + } + + // Check resource limits - only verify if expected has values + for k, v := range expected.Resources.Limits { + g.Expect(actual.Resources.Limits[k]).To(Equal(v)) + } + + // Check resource requests - only verify if expected has values + for k, v := range expected.Resources.Requests { + g.Expect(actual.Resources.Requests[k]).To(Equal(v)) + } + + if expected.LivenessProbe != nil { + g.Expect(actual.LivenessProbe).NotTo(BeNil()) + } + if expected.ReadinessProbe != nil { + g.Expect(actual.ReadinessProbe).NotTo(BeNil()) + } + + // Container ports + for _, exp := range expected.Ports { + g.Expect(actual.Ports).To(ContainElement(And( + HaveField("Name", exp.Name), + HaveField("ContainerPort", exp.ContainerPort), + HaveField("Protocol", exp.Protocol), + ))) + } +} + +// verifyPVCEqualsG is the Gomega-aware version for use in Eventually blocks. +func verifyPVCEqualsG(g Gomega, actual, expected corev1.PersistentVolumeClaim) { + if expected.Name != "" { + g.Expect(actual.Name).To(Equal(expected.Name)) + } + for _, mode := range expected.Spec.AccessModes { + g.Expect(actual.Spec.AccessModes).To(ContainElement(mode)) + } + // StorageClassName + if expected.Spec.StorageClassName != nil { + g.Expect(actual.Spec.StorageClassName).To(Equal(expected.Spec.StorageClassName)) + } + // Storage size + if expected.Spec.Resources.Requests != nil { + expectedSize := expected.Spec.Resources.Requests[corev1.ResourceStorage] + actualSize := actual.Spec.Resources.Requests[corev1.ResourceStorage] + g.Expect(actualSize.Cmp(expectedSize)).To(Equal(0), "storage size mismatch") + } +} + +// verifyServiceEquals checks that actual Service contains expected ports. +func verifyServiceEquals(actual, expected *corev1.Service) { + verifyServiceEqualsG(Default, actual, expected) +} + +// verifyServiceEqualsG is the Gomega-aware version for use in Eventually blocks. +func verifyServiceEqualsG(g Gomega, actual, expected *corev1.Service) { + // Ports + for i, exp := range expected.Spec.Ports { + g.Expect(actual.Spec.Ports[i].Port).To(Equal(exp.Port)) + } + + // Service type + if expected.Spec.Type != "" { + g.Expect(actual.Spec.Type).To(Equal(expected.Spec.Type)) + } + + // Selector + for k, v := range expected.Spec.Selector { + g.Expect(actual.Spec.Selector).To(HaveKeyWithValue(k, v)) + } + + // Labels + for k, v := range expected.Labels { + g.Expect(actual.Labels).To(HaveKeyWithValue(k, v)) + } + + // Annotations + for k, v := range expected.Annotations { + g.Expect(actual.Annotations).To(HaveKeyWithValue(k, v)) + } +} + +// verifyStatusEquals checks status fields match and finalizer is present. +func verifyStatusEquals(actual *mcpv1alpha1.EmbeddingServer, expected *mcpv1alpha1.EmbeddingServerStatus) bool { + if expected != nil && expected.Phase != "" && actual.Status.Phase != expected.Phase { + return false + } + if expected != nil && expected.URL != "" && actual.Status.URL != expected.URL { + return false + } + // Always verify finalizer is present + if !containsString(actual.Finalizers, "embeddingserver.toolhive.stacklok.dev/finalizer") { + return false + } + return true +} + +// containsString checks if a slice contains a string. +func containsString(slice []string, s string) bool { + for _, item := range slice { + if item == s { + return true + } + } + return false +} + +// verifyOwnerReference checks owner reference is set correctly. +func verifyOwnerReference(ownerRefs []metav1.OwnerReference, embedding *mcpv1alpha1.EmbeddingServer, _ string) { + Expect(ownerRefs).To(HaveLen(1)) + Expect(ownerRefs[0].APIVersion).To(Equal("toolhive.stacklok.dev/v1alpha1")) + Expect(ownerRefs[0].Kind).To(Equal("EmbeddingServer")) + Expect(ownerRefs[0].Name).To(Equal(embedding.Name)) + Expect(ownerRefs[0].UID).To(Equal(embedding.UID)) + Expect(ownerRefs[0].Controller).To(HaveValue(BeTrue())) + Expect(ownerRefs[0].BlockOwnerDeletion).To(HaveValue(BeTrue())) +} diff --git a/cmd/thv-operator/test-integration/embedding-server/embeddingserver_update_test.go b/cmd/thv-operator/test-integration/embedding-server/embeddingserver_update_test.go new file mode 100644 index 0000000000..12aecdffa3 --- /dev/null +++ b/cmd/thv-operator/test-integration/embedding-server/embeddingserver_update_test.go @@ -0,0 +1,532 @@ +// SPDX-License-Identifier: Apache-2.0 + +// Package controllers contains integration tests for the EmbeddingServer controller. +package controllers + +import ( + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" + + mcpv1alpha1 "github.com/stacklok/toolhive/cmd/thv-operator/api/v1alpha1" +) + +// UpdateTestCase defines a test case for EmbeddingServer update scenarios. +type UpdateTestCase struct { + Name string + InitialState *mcpv1alpha1.EmbeddingServer + Updates []UpdateStep +} + +// UpdateStep defines a single update operation and its expected result. +type UpdateStep struct { + Name string + ApplyUpdate func(es *mcpv1alpha1.EmbeddingServer) + // Expected StatefulSet state after the update (nil means expect no changes) + ExpectedStatefulSet *appsv1.StatefulSet + // Expected Service state after the update (nil means expect no changes) + ExpectedService *corev1.Service +} + +var _ = Describe("EmbeddingServer Controller Update Tests", func() { + const ( + timeout = time.Second * 30 + interval = time.Millisecond * 250 + defaultNamespace = "default" + ) + + // Define update test cases + updateTestCases := []UpdateTestCase{ + { + Name: "When updating EmbeddingServer image", + InitialState: &mcpv1alpha1.EmbeddingServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-update-image", + Namespace: defaultNamespace, + }, + Spec: mcpv1alpha1.EmbeddingServerSpec{ + Model: "sentence-transformers/all-MiniLM-L6-v2", + Image: "ghcr.io/huggingface/text-embeddings-inference:v1.0", + Port: 8080, + }, + }, + Updates: []UpdateStep{ + { + Name: "Should update StatefulSet when image changes to v2.0", + ApplyUpdate: func(es *mcpv1alpha1.EmbeddingServer) { + es.Spec.Image = "ghcr.io/huggingface/text-embeddings-inference:v2.0" + }, + ExpectedStatefulSet: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Image: "ghcr.io/huggingface/text-embeddings-inference:v2.0", + }}, + }, + }, + }, + }, + }, + { + Name: "Should update StatefulSet when image changes to v3.0", + ApplyUpdate: func(es *mcpv1alpha1.EmbeddingServer) { + es.Spec.Image = "ghcr.io/huggingface/text-embeddings-inference:v3.0" + }, + ExpectedStatefulSet: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Image: "ghcr.io/huggingface/text-embeddings-inference:v3.0", + }}, + }, + }, + }, + }, + }, + }, + }, + { + Name: "When updating EmbeddingServer replicas", + InitialState: &mcpv1alpha1.EmbeddingServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-update-replicas", + Namespace: defaultNamespace, + }, + Spec: mcpv1alpha1.EmbeddingServerSpec{ + Model: "sentence-transformers/all-MiniLM-L6-v2", + Image: "ghcr.io/huggingface/text-embeddings-inference:latest", + Port: 8080, + Replicas: ptr.To(int32(1)), + }, + }, + Updates: []UpdateStep{ + { + Name: "Should scale up to 3 replicas", + ApplyUpdate: func(es *mcpv1alpha1.EmbeddingServer) { + es.Spec.Replicas = ptr.To(int32(3)) + }, + ExpectedStatefulSet: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + Replicas: ptr.To(int32(3)), + }, + }, + }, + { + Name: "Should scale down to 2 replicas", + ApplyUpdate: func(es *mcpv1alpha1.EmbeddingServer) { + es.Spec.Replicas = ptr.To(int32(2)) + }, + ExpectedStatefulSet: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + Replicas: ptr.To(int32(2)), + }, + }, + }, + }, + }, + { + Name: "When updating EmbeddingServer model", + InitialState: &mcpv1alpha1.EmbeddingServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-update-model", + Namespace: defaultNamespace, + }, + Spec: mcpv1alpha1.EmbeddingServerSpec{ + Model: "sentence-transformers/all-MiniLM-L6-v2", + Image: "ghcr.io/huggingface/text-embeddings-inference:latest", + Port: 8080, + }, + }, + Updates: []UpdateStep{ + { + Name: "Should update StatefulSet args when model changes", + ApplyUpdate: func(es *mcpv1alpha1.EmbeddingServer) { + es.Spec.Model = "sentence-transformers/all-mpnet-base-v2" + }, + ExpectedStatefulSet: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Args: []string{"--model-id", "sentence-transformers/all-mpnet-base-v2"}, + }}, + }, + }, + }, + }, + }, + }, + }, + { + Name: "When updating EmbeddingServer environment variables", + InitialState: &mcpv1alpha1.EmbeddingServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-update-env", + Namespace: defaultNamespace, + }, + Spec: mcpv1alpha1.EmbeddingServerSpec{ + Model: "sentence-transformers/all-MiniLM-L6-v2", + Image: "ghcr.io/huggingface/text-embeddings-inference:latest", + Port: 8080, + Env: []mcpv1alpha1.EnvVar{ + {Name: "LOG_LEVEL", Value: "info"}, + }, + }, + }, + Updates: []UpdateStep{ + { + Name: "Should update StatefulSet when env var value changes", + ApplyUpdate: func(es *mcpv1alpha1.EmbeddingServer) { + es.Spec.Env = []mcpv1alpha1.EnvVar{ + {Name: "LOG_LEVEL", Value: "debug"}, + } + }, + ExpectedStatefulSet: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Env: []corev1.EnvVar{{Name: "LOG_LEVEL"}}, + }}, + }, + }, + }, + }, + }, + { + Name: "Should update StatefulSet when new env var is added", + ApplyUpdate: func(es *mcpv1alpha1.EmbeddingServer) { + es.Spec.Env = []mcpv1alpha1.EnvVar{ + {Name: "LOG_LEVEL", Value: "debug"}, + {Name: "NEW_VAR", Value: "new_value"}, + } + }, + ExpectedStatefulSet: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Env: []corev1.EnvVar{ + {Name: "LOG_LEVEL"}, + {Name: "NEW_VAR"}, + }, + }}, + }, + }, + }, + }, + }, + }, + }, + { + Name: "When updating EmbeddingServer port", + InitialState: &mcpv1alpha1.EmbeddingServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-update-port", + Namespace: defaultNamespace, + }, + Spec: mcpv1alpha1.EmbeddingServerSpec{ + Model: "sentence-transformers/all-MiniLM-L6-v2", + Image: "ghcr.io/huggingface/text-embeddings-inference:latest", + Port: 8080, + }, + }, + Updates: []UpdateStep{ + { + Name: "Should update StatefulSet and Service when port changes", + ApplyUpdate: func(es *mcpv1alpha1.EmbeddingServer) { + es.Spec.Port = 9090 + }, + ExpectedStatefulSet: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Args: []string{"--port", "9090"}, + }}, + }, + }, + }, + }, + ExpectedService: &corev1.Service{ + Spec: corev1.ServiceSpec{ + Ports: []corev1.ServicePort{{Port: 9090}}, + }, + }, + }, + }, + }, + { + Name: "When updating EmbeddingServer resources", + InitialState: &mcpv1alpha1.EmbeddingServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-update-resources", + Namespace: defaultNamespace, + }, + Spec: mcpv1alpha1.EmbeddingServerSpec{ + Model: "sentence-transformers/all-MiniLM-L6-v2", + Image: "ghcr.io/huggingface/text-embeddings-inference:latest", + Resources: mcpv1alpha1.ResourceRequirements{ + Limits: mcpv1alpha1.ResourceList{CPU: "1", Memory: "2Gi"}, + Requests: mcpv1alpha1.ResourceList{CPU: "500m", Memory: "1Gi"}, + }, + }, + }, + Updates: []UpdateStep{ + { + Name: "Should update StatefulSet when resource limits change", + ApplyUpdate: func(es *mcpv1alpha1.EmbeddingServer) { + es.Spec.Resources = mcpv1alpha1.ResourceRequirements{ + Limits: mcpv1alpha1.ResourceList{CPU: "2", Memory: "4Gi"}, + Requests: mcpv1alpha1.ResourceList{CPU: "1", Memory: "2Gi"}, + } + }, + ExpectedStatefulSet: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Resources: corev1.ResourceRequirements{ + Limits: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("2"), + corev1.ResourceMemory: resource.MustParse("4Gi"), + }, + Requests: corev1.ResourceList{ + corev1.ResourceCPU: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("2Gi"), + }, + }, + }}, + }, + }, + }, + }, + }, + }, + }, + { + Name: "When updating EmbeddingServer args", + InitialState: &mcpv1alpha1.EmbeddingServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-update-args", + Namespace: defaultNamespace, + }, + Spec: mcpv1alpha1.EmbeddingServerSpec{ + Model: "sentence-transformers/all-MiniLM-L6-v2", + Image: "ghcr.io/huggingface/text-embeddings-inference:latest", + Args: []string{"--max-concurrent-requests", "256"}, + }, + }, + Updates: []UpdateStep{ + { + Name: "Should update StatefulSet when args change", + ApplyUpdate: func(es *mcpv1alpha1.EmbeddingServer) { + es.Spec.Args = []string{"--max-concurrent-requests", "512", "--tokenization-workers", "4"} + }, + ExpectedStatefulSet: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Args: []string{"--max-concurrent-requests", "512", "--tokenization-workers", "4"}, + }}, + }, + }, + }, + }, + }, + { + Name: "Should update StatefulSet when args are removed", + ApplyUpdate: func(es *mcpv1alpha1.EmbeddingServer) { + es.Spec.Args = nil + }, + ExpectedStatefulSet: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Args: []string{"--model-id", "sentence-transformers/all-MiniLM-L6-v2"}, + }}, + }, + }, + }, + }, + }, + }, + }, + { + Name: "When updating EmbeddingServer ImagePullPolicy", + InitialState: &mcpv1alpha1.EmbeddingServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-update-imagepullpolicy", + Namespace: defaultNamespace, + }, + Spec: mcpv1alpha1.EmbeddingServerSpec{ + Model: "sentence-transformers/all-MiniLM-L6-v2", + Image: "ghcr.io/huggingface/text-embeddings-inference:latest", + ImagePullPolicy: "IfNotPresent", + }, + }, + Updates: []UpdateStep{ + { + Name: "Should update StatefulSet when ImagePullPolicy changes", + ApplyUpdate: func(es *mcpv1alpha1.EmbeddingServer) { + es.Spec.ImagePullPolicy = "Always" + }, + ExpectedStatefulSet: &appsv1.StatefulSet{ + Spec: appsv1.StatefulSetSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + ImagePullPolicy: corev1.PullAlways, + }}, + }, + }, + }, + }, + }, + }, + }, + { + Name: "When updating EmbeddingServer ResourceOverrides", + InitialState: &mcpv1alpha1.EmbeddingServer{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-update-resourceoverrides", + Namespace: defaultNamespace, + }, + Spec: mcpv1alpha1.EmbeddingServerSpec{ + Model: "sentence-transformers/all-MiniLM-L6-v2", + Image: "ghcr.io/huggingface/text-embeddings-inference:latest", + }, + }, + Updates: []UpdateStep{ + { + Name: "Should update StatefulSet when adding annotations", + ApplyUpdate: func(es *mcpv1alpha1.EmbeddingServer) { + es.Spec.ResourceOverrides = &mcpv1alpha1.EmbeddingResourceOverrides{ + StatefulSet: &mcpv1alpha1.EmbeddingStatefulSetOverrides{ + ResourceMetadataOverrides: mcpv1alpha1.ResourceMetadataOverrides{ + Annotations: map[string]string{"new-annotation": "new-value"}, + }, + }, + } + }, + ExpectedStatefulSet: &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{"new-annotation": "new-value"}, + }, + }, + }, + { + Name: "Should update StatefulSet and Service when adding annotations to both", + ApplyUpdate: func(es *mcpv1alpha1.EmbeddingServer) { + es.Spec.ResourceOverrides = &mcpv1alpha1.EmbeddingResourceOverrides{ + StatefulSet: &mcpv1alpha1.EmbeddingStatefulSetOverrides{ + ResourceMetadataOverrides: mcpv1alpha1.ResourceMetadataOverrides{ + Annotations: map[string]string{"new-annotation": "new-value"}, + }, + }, + Service: &mcpv1alpha1.ResourceMetadataOverrides{ + Annotations: map[string]string{"service-annotation": "service-value"}, + }, + } + }, + ExpectedStatefulSet: &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{"new-annotation": "new-value"}, + }, + }, + ExpectedService: &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{"service-annotation": "service-value"}, + }, + }, + }, + }, + }, + } + + // Helper to run a single update test case + runUpdateTestCase := func(tc UpdateTestCase) { + Context(tc.Name, Ordered, func() { + var embeddingServer *mcpv1alpha1.EmbeddingServer + + BeforeAll(func() { + _ = k8sClient.Create(ctx, &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: tc.InitialState.Namespace}}) + embeddingServer = tc.InitialState.DeepCopy() + Expect(k8sClient.Create(ctx, embeddingServer)).To(Succeed()) + Eventually(func(g Gomega) { + g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(embeddingServer), &appsv1.StatefulSet{})).To(Succeed()) + }, timeout, interval).Should(Succeed()) + }) + + AfterAll(func() { + _ = k8sClient.Delete(ctx, embeddingServer) + }) + + for _, update := range tc.Updates { + update := update + It(update.Name, func() { + // Capture original state before update + originalSts := &appsv1.StatefulSet{} + Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(embeddingServer), originalSts)).To(Succeed()) + originalSvc := &corev1.Service{} + Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(embeddingServer), originalSvc)).To(Succeed()) + + // Apply the update + Eventually(func(g Gomega) { + g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(embeddingServer), embeddingServer)).To(Succeed()) + update.ApplyUpdate(embeddingServer) + g.Expect(k8sClient.Update(ctx, embeddingServer)).To(Succeed()) + }, timeout, interval).Should(Succeed()) + + // Verify the StatefulSet matches expected state (nil means expect no changes) + if update.ExpectedStatefulSet != nil { + Eventually(func(g Gomega) { + sts := &appsv1.StatefulSet{} + g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(embeddingServer), sts)).To(Succeed()) + verifyStatefulSetEqualsG(g, sts, update.ExpectedStatefulSet) + }, timeout, interval).Should(Succeed()) + } else { + // Verify StatefulSet hasn't changed + Consistently(func(g Gomega) { + sts := &appsv1.StatefulSet{} + g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(embeddingServer), sts)).To(Succeed()) + g.Expect(sts.Spec).To(Equal(originalSts.Spec)) + }, time.Second*2, interval).Should(Succeed()) + } + + // Verify the Service matches expected state (nil means expect no changes) + if update.ExpectedService != nil { + Eventually(func(g Gomega) { + svc := &corev1.Service{} + g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(embeddingServer), svc)).To(Succeed()) + verifyServiceEqualsG(g, svc, update.ExpectedService) + }, timeout, interval).Should(Succeed()) + } else { + // Verify Service hasn't changed + Consistently(func(g Gomega) { + svc := &corev1.Service{} + g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(embeddingServer), svc)).To(Succeed()) + g.Expect(svc.Spec).To(Equal(originalSvc.Spec)) + }, time.Second*2, interval).Should(Succeed()) + } + }) + } + }) + } + + // Run all update test cases + for _, tc := range updateTestCases { + runUpdateTestCase(tc) + } +}) diff --git a/cmd/thv-operator/test-integration/embedding-server/suite_test.go b/cmd/thv-operator/test-integration/embedding-server/suite_test.go new file mode 100644 index 0000000000..d8e7376933 --- /dev/null +++ b/cmd/thv-operator/test-integration/embedding-server/suite_test.go @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: Apache-2.0 + +// Package controllers contains integration tests for the EmbeddingServer controller. +package controllers + +import ( + "context" + "path/filepath" + "testing" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "go.uber.org/zap/zapcore" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/rest" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/envtest" + logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" + + mcpv1alpha1 "github.com/stacklok/toolhive/cmd/thv-operator/api/v1alpha1" + "github.com/stacklok/toolhive/cmd/thv-operator/controllers" + ctrlutil "github.com/stacklok/toolhive/cmd/thv-operator/pkg/controllerutil" + "github.com/stacklok/toolhive/cmd/thv-operator/pkg/validation" +) + +var ( + cfg *rest.Config + k8sClient client.Client + testEnv *envtest.Environment + ctx context.Context + cancel context.CancelFunc +) + +func TestControllers(t *testing.T) { + t.Parallel() + RegisterFailHandler(Fail) + + suiteConfig, reporterConfig := GinkgoConfiguration() + // Only show verbose output for failures + reporterConfig.Verbose = false + reporterConfig.VeryVerbose = false + reporterConfig.FullTrace = false + + RunSpecs(t, "EmbeddingServer Controller Integration Test Suite", suiteConfig, reporterConfig) +} + +var _ = BeforeSuite(func() { + // Only log errors unless a test fails + logLevel := zapcore.ErrorLevel + + logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true), zap.Level(logLevel))) + + ctx, cancel = context.WithCancel(context.Background()) + + By("bootstrapping test environment") + testEnv = &envtest.Environment{ + CRDDirectoryPaths: []string{filepath.Join("..", "..", "..", "..", "deploy", "charts", "operator-crds", "files", "crds")}, + ErrorIfCRDPathMissing: true, + } + + var err error + // cfg is defined in this file globally. + cfg, err = testEnv.Start() + Expect(err).NotTo(HaveOccurred()) + Expect(cfg).NotTo(BeNil()) + + err = mcpv1alpha1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + + // Add other schemes that the controllers use + err = appsv1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + + err = corev1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + + //+kubebuilder:scaffold:scheme + + k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) + Expect(err).NotTo(HaveOccurred()) + Expect(k8sClient).NotTo(BeNil()) + + // Start the controller manager + k8sManager, err := ctrl.NewManager(cfg, ctrl.Options{ + Scheme: scheme.Scheme, + Metrics: metricsserver.Options{ + BindAddress: "0", // Disable metrics server for tests to avoid port conflicts + }, + HealthProbeBindAddress: "0", // Disable health probe for tests + }) + Expect(err).ToNot(HaveOccurred()) + + // Register the EmbeddingServer controller + err = (&controllers.EmbeddingServerReconciler{ + Client: k8sManager.GetClient(), + Scheme: k8sManager.GetScheme(), + Recorder: k8sManager.GetEventRecorderFor("embeddingserver-controller"), + PlatformDetector: ctrlutil.NewSharedPlatformDetector(), + ImageValidation: validation.ImageValidationAlwaysAllow, + }).SetupWithManager(k8sManager) + Expect(err).ToNot(HaveOccurred()) + + // Start the manager in a goroutine + go func() { + defer GinkgoRecover() + err = k8sManager.Start(ctx) + Expect(err).ToNot(HaveOccurred(), "failed to run manager") + }() +}) + +var _ = AfterSuite(func() { + By("tearing down the test environment") + cancel() + // Give it some time to shut down gracefully + time.Sleep(100 * time.Millisecond) + err := testEnv.Stop() + Expect(err).NotTo(HaveOccurred()) +}) diff --git a/deploy/charts/operator-crds/Chart.yaml b/deploy/charts/operator-crds/Chart.yaml index 1b14897d71..e336674530 100644 --- a/deploy/charts/operator-crds/Chart.yaml +++ b/deploy/charts/operator-crds/Chart.yaml @@ -2,5 +2,5 @@ apiVersion: v2 name: toolhive-operator-crds description: A Helm chart for installing the ToolHive Operator CRDs into Kubernetes. type: application -version: 0.0.102 +version: 0.0.103 appVersion: "0.0.1" diff --git a/deploy/charts/operator-crds/README.md b/deploy/charts/operator-crds/README.md index 2c68563bc6..93948d1568 100644 --- a/deploy/charts/operator-crds/README.md +++ b/deploy/charts/operator-crds/README.md @@ -1,6 +1,6 @@ # ToolHive Operator CRDs Helm Chart -![Version: 0.0.102](https://img.shields.io/badge/Version-0.0.102-informational?style=flat-square) +![Version: 0.0.103](https://img.shields.io/badge/Version-0.0.103-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) A Helm chart for installing the ToolHive Operator CRDs into Kubernetes. diff --git a/deploy/charts/operator-crds/crd-helm-wrapper/main.go b/deploy/charts/operator-crds/crd-helm-wrapper/main.go index 0e9f49161e..a1cc05f109 100644 --- a/deploy/charts/operator-crds/crd-helm-wrapper/main.go +++ b/deploy/charts/operator-crds/crd-helm-wrapper/main.go @@ -39,6 +39,7 @@ var crdFeatureFlags = map[string][]string{ "mcpremoteproxies": {"server"}, "mcptoolconfigs": {"server"}, "mcpgroups": {"server"}, + "embeddingservers": {"server"}, "mcpregistries": {"registry"}, "virtualmcpservers": {"virtualMcp"}, "virtualmcpcompositetooldefinitions": {"virtualMcp"}, diff --git a/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_embeddingservers.yaml b/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_embeddingservers.yaml new file mode 100644 index 0000000000..d213326771 --- /dev/null +++ b/deploy/charts/operator-crds/files/crds/toolhive.stacklok.dev_embeddingservers.yaml @@ -0,0 +1,352 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.17.3 + name: embeddingservers.toolhive.stacklok.dev +spec: + group: toolhive.stacklok.dev + names: + kind: EmbeddingServer + listKind: EmbeddingServerList + plural: embeddingservers + singular: embeddingserver + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .status.phase + name: Status + type: string + - jsonPath: .spec.model + name: Model + type: string + - jsonPath: .status.readyReplicas + name: Ready + type: integer + - jsonPath: .status.url + name: URL + type: string + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1alpha1 + schema: + openAPIV3Schema: + description: EmbeddingServer is the Schema for the embeddingservers API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: EmbeddingServerSpec defines the desired state of EmbeddingServer + properties: + args: + description: Args are additional arguments to pass to the embedding + inference server + items: + type: string + type: array + env: + description: Env are environment variables to set in the container + items: + description: EnvVar represents an environment variable in a container + properties: + name: + description: Name of the environment variable + type: string + value: + description: Value of the environment variable + type: string + required: + - name + - value + type: object + type: array + hfTokenSecretRef: + description: |- + HFTokenSecretRef is a reference to a Kubernetes Secret containing the huggingface token. + If provided, the secret value will be provided to the embedding server for authentication with huggingface. + properties: + key: + description: Key is the key within the secret + type: string + name: + description: Name is the name of the secret + type: string + required: + - key + - name + type: object + image: + default: ghcr.io/huggingface/text-embeddings-inference:latest + description: Image is the container image for huggingface-embedding-inference + type: string + imagePullPolicy: + default: IfNotPresent + description: ImagePullPolicy defines the pull policy for the container + image + enum: + - Always + - Never + - IfNotPresent + type: string + model: + description: Model is the HuggingFace embedding model to use (e.g., + "sentence-transformers/all-MiniLM-L6-v2") + type: string + modelCache: + description: |- + ModelCache configures persistent storage for downloaded models + When enabled, models are cached in a PVC and reused across pod restarts + properties: + accessMode: + default: ReadWriteOnce + description: AccessMode is the access mode for the PVC + enum: + - ReadWriteOnce + - ReadWriteMany + - ReadOnlyMany + type: string + enabled: + default: true + description: Enabled controls whether model caching is enabled + type: boolean + size: + default: 10Gi + description: Size is the size of the PVC for model caching (e.g., + "10Gi") + type: string + storageClassName: + description: |- + StorageClassName is the storage class to use for the PVC + If not specified, uses the cluster's default storage class + type: string + type: object + podTemplateSpec: + description: |- + PodTemplateSpec allows customizing the pod (node selection, tolerations, etc.) + This field accepts a PodTemplateSpec object as JSON/YAML. + Note that to modify the specific container the embedding server runs in, you must specify + the 'embedding' container name in the PodTemplateSpec. + type: object + x-kubernetes-preserve-unknown-fields: true + port: + default: 8080 + description: Port is the port to expose the embedding service on + format: int32 + maximum: 65535 + minimum: 1 + type: integer + replicas: + default: 1 + description: Replicas is the number of embedding server replicas to + run + format: int32 + minimum: 1 + type: integer + resourceOverrides: + description: ResourceOverrides allows overriding annotations and labels + for resources created by the operator + properties: + persistentVolumeClaim: + description: PersistentVolumeClaim defines overrides for the PVC + resource + properties: + annotations: + additionalProperties: + type: string + description: Annotations to add or override on the resource + type: object + labels: + additionalProperties: + type: string + description: Labels to add or override on the resource + type: object + type: object + service: + description: Service defines overrides for the Service resource + properties: + annotations: + additionalProperties: + type: string + description: Annotations to add or override on the resource + type: object + labels: + additionalProperties: + type: string + description: Labels to add or override on the resource + type: object + type: object + statefulSet: + description: StatefulSet defines overrides for the StatefulSet + resource + properties: + annotations: + additionalProperties: + type: string + description: Annotations to add or override on the resource + type: object + labels: + additionalProperties: + type: string + description: Labels to add or override on the resource + type: object + podTemplateMetadataOverrides: + description: PodTemplateMetadataOverrides defines metadata + overrides for the pod template + properties: + annotations: + additionalProperties: + type: string + description: Annotations to add or override on the resource + type: object + labels: + additionalProperties: + type: string + description: Labels to add or override on the resource + type: object + type: object + type: object + type: object + resources: + description: Resources defines compute resources for the embedding + server + properties: + limits: + description: Limits describes the maximum amount of compute resources + allowed + properties: + cpu: + description: CPU is the CPU limit in cores (e.g., "500m" for + 0.5 cores) + type: string + memory: + description: Memory is the memory limit in bytes (e.g., "64Mi" + for 64 megabytes) + type: string + type: object + requests: + description: Requests describes the minimum amount of compute + resources required + properties: + cpu: + description: CPU is the CPU limit in cores (e.g., "500m" for + 0.5 cores) + type: string + memory: + description: Memory is the memory limit in bytes (e.g., "64Mi" + for 64 megabytes) + type: string + type: object + type: object + required: + - image + - model + type: object + status: + description: EmbeddingServerStatus defines the observed state of EmbeddingServer + properties: + conditions: + description: Conditions represent the latest available observations + of the EmbeddingServer's state + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + message: + description: Message provides additional information about the current + phase + type: string + observedGeneration: + description: ObservedGeneration reflects the generation most recently + observed by the controller + format: int64 + type: integer + phase: + description: Phase is the current phase of the EmbeddingServer + enum: + - Pending + - Downloading + - Running + - Failed + - Terminating + type: string + readyReplicas: + description: ReadyReplicas is the number of ready replicas + format: int32 + type: integer + url: + description: URL is the URL where the embedding service can be accessed + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_embeddingservers.yaml b/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_embeddingservers.yaml new file mode 100644 index 0000000000..2bf3138fe5 --- /dev/null +++ b/deploy/charts/operator-crds/templates/toolhive.stacklok.dev_embeddingservers.yaml @@ -0,0 +1,356 @@ +{{- if .Values.crds.install.server }} +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + {{- if .Values.crds.keep }} + helm.sh/resource-policy: keep + {{- end }} + controller-gen.kubebuilder.io/version: v0.17.3 + name: embeddingservers.toolhive.stacklok.dev +spec: + group: toolhive.stacklok.dev + names: + kind: EmbeddingServer + listKind: EmbeddingServerList + plural: embeddingservers + singular: embeddingserver + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .status.phase + name: Status + type: string + - jsonPath: .spec.model + name: Model + type: string + - jsonPath: .status.readyReplicas + name: Ready + type: integer + - jsonPath: .status.url + name: URL + type: string + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1alpha1 + schema: + openAPIV3Schema: + description: EmbeddingServer is the Schema for the embeddingservers API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: EmbeddingServerSpec defines the desired state of EmbeddingServer + properties: + args: + description: Args are additional arguments to pass to the embedding + inference server + items: + type: string + type: array + env: + description: Env are environment variables to set in the container + items: + description: EnvVar represents an environment variable in a container + properties: + name: + description: Name of the environment variable + type: string + value: + description: Value of the environment variable + type: string + required: + - name + - value + type: object + type: array + hfTokenSecretRef: + description: |- + HFTokenSecretRef is a reference to a Kubernetes Secret containing the huggingface token. + If provided, the secret value will be provided to the embedding server for authentication with huggingface. + properties: + key: + description: Key is the key within the secret + type: string + name: + description: Name is the name of the secret + type: string + required: + - key + - name + type: object + image: + default: ghcr.io/huggingface/text-embeddings-inference:latest + description: Image is the container image for huggingface-embedding-inference + type: string + imagePullPolicy: + default: IfNotPresent + description: ImagePullPolicy defines the pull policy for the container + image + enum: + - Always + - Never + - IfNotPresent + type: string + model: + description: Model is the HuggingFace embedding model to use (e.g., + "sentence-transformers/all-MiniLM-L6-v2") + type: string + modelCache: + description: |- + ModelCache configures persistent storage for downloaded models + When enabled, models are cached in a PVC and reused across pod restarts + properties: + accessMode: + default: ReadWriteOnce + description: AccessMode is the access mode for the PVC + enum: + - ReadWriteOnce + - ReadWriteMany + - ReadOnlyMany + type: string + enabled: + default: true + description: Enabled controls whether model caching is enabled + type: boolean + size: + default: 10Gi + description: Size is the size of the PVC for model caching (e.g., + "10Gi") + type: string + storageClassName: + description: |- + StorageClassName is the storage class to use for the PVC + If not specified, uses the cluster's default storage class + type: string + type: object + podTemplateSpec: + description: |- + PodTemplateSpec allows customizing the pod (node selection, tolerations, etc.) + This field accepts a PodTemplateSpec object as JSON/YAML. + Note that to modify the specific container the embedding server runs in, you must specify + the 'embedding' container name in the PodTemplateSpec. + type: object + x-kubernetes-preserve-unknown-fields: true + port: + default: 8080 + description: Port is the port to expose the embedding service on + format: int32 + maximum: 65535 + minimum: 1 + type: integer + replicas: + default: 1 + description: Replicas is the number of embedding server replicas to + run + format: int32 + minimum: 1 + type: integer + resourceOverrides: + description: ResourceOverrides allows overriding annotations and labels + for resources created by the operator + properties: + persistentVolumeClaim: + description: PersistentVolumeClaim defines overrides for the PVC + resource + properties: + annotations: + additionalProperties: + type: string + description: Annotations to add or override on the resource + type: object + labels: + additionalProperties: + type: string + description: Labels to add or override on the resource + type: object + type: object + service: + description: Service defines overrides for the Service resource + properties: + annotations: + additionalProperties: + type: string + description: Annotations to add or override on the resource + type: object + labels: + additionalProperties: + type: string + description: Labels to add or override on the resource + type: object + type: object + statefulSet: + description: StatefulSet defines overrides for the StatefulSet + resource + properties: + annotations: + additionalProperties: + type: string + description: Annotations to add or override on the resource + type: object + labels: + additionalProperties: + type: string + description: Labels to add or override on the resource + type: object + podTemplateMetadataOverrides: + description: PodTemplateMetadataOverrides defines metadata + overrides for the pod template + properties: + annotations: + additionalProperties: + type: string + description: Annotations to add or override on the resource + type: object + labels: + additionalProperties: + type: string + description: Labels to add or override on the resource + type: object + type: object + type: object + type: object + resources: + description: Resources defines compute resources for the embedding + server + properties: + limits: + description: Limits describes the maximum amount of compute resources + allowed + properties: + cpu: + description: CPU is the CPU limit in cores (e.g., "500m" for + 0.5 cores) + type: string + memory: + description: Memory is the memory limit in bytes (e.g., "64Mi" + for 64 megabytes) + type: string + type: object + requests: + description: Requests describes the minimum amount of compute + resources required + properties: + cpu: + description: CPU is the CPU limit in cores (e.g., "500m" for + 0.5 cores) + type: string + memory: + description: Memory is the memory limit in bytes (e.g., "64Mi" + for 64 megabytes) + type: string + type: object + type: object + required: + - image + - model + type: object + status: + description: EmbeddingServerStatus defines the observed state of EmbeddingServer + properties: + conditions: + description: Conditions represent the latest available observations + of the EmbeddingServer's state + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + message: + description: Message provides additional information about the current + phase + type: string + observedGeneration: + description: ObservedGeneration reflects the generation most recently + observed by the controller + format: int64 + type: integer + phase: + description: Phase is the current phase of the EmbeddingServer + enum: + - Pending + - Downloading + - Running + - Failed + - Terminating + type: string + readyReplicas: + description: ReadyReplicas is the number of ready replicas + format: int32 + type: integer + url: + description: URL is the URL where the embedding service can be accessed + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} +{{- end }} diff --git a/deploy/charts/operator/Chart.yaml b/deploy/charts/operator/Chart.yaml index d0af785815..8be2129a20 100644 --- a/deploy/charts/operator/Chart.yaml +++ b/deploy/charts/operator/Chart.yaml @@ -2,5 +2,5 @@ apiVersion: v2 name: toolhive-operator description: A Helm chart for deploying the ToolHive Operator into Kubernetes. type: application -version: 0.5.26 -appVersion: "v0.8.0" +version: 0.5.27 +appVersion: "v0.8.1" diff --git a/deploy/charts/operator/README.md b/deploy/charts/operator/README.md index d8f6294a56..ac18156091 100644 --- a/deploy/charts/operator/README.md +++ b/deploy/charts/operator/README.md @@ -1,6 +1,6 @@ # ToolHive Operator Helm Chart -![Version: 0.5.26](https://img.shields.io/badge/Version-0.5.26-informational?style=flat-square) +![Version: 0.5.27](https://img.shields.io/badge/Version-0.5.27-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) A Helm chart for deploying the ToolHive Operator into Kubernetes. diff --git a/deploy/charts/operator/templates/clusterrole/role.yaml b/deploy/charts/operator/templates/clusterrole/role.yaml index feccbeb749..97f45f2407 100644 --- a/deploy/charts/operator/templates/clusterrole/role.yaml +++ b/deploy/charts/operator/templates/clusterrole/role.yaml @@ -8,6 +8,7 @@ rules: - "" resources: - configmaps + - persistentvolumeclaims - secrets - serviceaccounts verbs: @@ -121,6 +122,7 @@ rules: - apiGroups: - toolhive.stacklok.dev resources: + - embeddingservers - mcpexternalauthconfigs - mcpgroups - mcpregistries @@ -139,6 +141,7 @@ rules: - apiGroups: - toolhive.stacklok.dev resources: + - embeddingservers/finalizers - mcpexternalauthconfigs/finalizers - mcpgroups/finalizers - mcpregistries/finalizers @@ -149,6 +152,7 @@ rules: - apiGroups: - toolhive.stacklok.dev resources: + - embeddingservers/status - mcpexternalauthconfigs/status - mcpgroups/status - mcpregistries/status diff --git a/docs/operator/crd-api.md b/docs/operator/crd-api.md index 80e1ee9808..329d1fcbd6 100644 --- a/docs/operator/crd-api.md +++ b/docs/operator/crd-api.md @@ -645,6 +645,8 @@ _Appears in:_ ## toolhive.stacklok.dev/v1alpha1 ### Resource Types +- [api.v1alpha1.EmbeddingServer](#apiv1alpha1embeddingserver) +- [api.v1alpha1.EmbeddingServerList](#apiv1alpha1embeddingserverlist) - [api.v1alpha1.MCPExternalAuthConfig](#apiv1alpha1mcpexternalauthconfig) - [api.v1alpha1.MCPExternalAuthConfigList](#apiv1alpha1mcpexternalauthconfiglist) - [api.v1alpha1.MCPGroup](#apiv1alpha1mcpgroup) @@ -848,6 +850,153 @@ _Appears in:_ +#### api.v1alpha1.EmbeddingResourceOverrides + + + +EmbeddingResourceOverrides defines overrides for annotations and labels on created resources + + + +_Appears in:_ +- [api.v1alpha1.EmbeddingServerSpec](#apiv1alpha1embeddingserverspec) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `statefulSet` _[api.v1alpha1.EmbeddingStatefulSetOverrides](#apiv1alpha1embeddingstatefulsetoverrides)_ | StatefulSet defines overrides for the StatefulSet resource | | | +| `service` _[api.v1alpha1.ResourceMetadataOverrides](#apiv1alpha1resourcemetadataoverrides)_ | Service defines overrides for the Service resource | | | +| `persistentVolumeClaim` _[api.v1alpha1.ResourceMetadataOverrides](#apiv1alpha1resourcemetadataoverrides)_ | PersistentVolumeClaim defines overrides for the PVC resource | | | + + +#### api.v1alpha1.EmbeddingServer + + + +EmbeddingServer is the Schema for the embeddingservers API + + + +_Appears in:_ +- [api.v1alpha1.EmbeddingServerList](#apiv1alpha1embeddingserverlist) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `apiVersion` _string_ | `toolhive.stacklok.dev/v1alpha1` | | | +| `kind` _string_ | `EmbeddingServer` | | | +| `kind` _string_ | Kind is a string value representing the REST resource this object represents.
Servers may infer this from the endpoint the client submits requests to.
Cannot be updated.
In CamelCase.
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | | | +| `apiVersion` _string_ | APIVersion defines the versioned schema of this representation of an object.
Servers should convert recognized schemas to the latest internal value, and
may reject unrecognized values.
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | | | +| `metadata` _[ObjectMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#objectmeta-v1-meta)_ | Refer to Kubernetes API documentation for fields of `metadata`. | | | +| `spec` _[api.v1alpha1.EmbeddingServerSpec](#apiv1alpha1embeddingserverspec)_ | | | | +| `status` _[api.v1alpha1.EmbeddingServerStatus](#apiv1alpha1embeddingserverstatus)_ | | | | + + +#### api.v1alpha1.EmbeddingServerList + + + +EmbeddingServerList contains a list of EmbeddingServer + + + + + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `apiVersion` _string_ | `toolhive.stacklok.dev/v1alpha1` | | | +| `kind` _string_ | `EmbeddingServerList` | | | +| `kind` _string_ | Kind is a string value representing the REST resource this object represents.
Servers may infer this from the endpoint the client submits requests to.
Cannot be updated.
In CamelCase.
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds | | | +| `apiVersion` _string_ | APIVersion defines the versioned schema of this representation of an object.
Servers should convert recognized schemas to the latest internal value, and
may reject unrecognized values.
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources | | | +| `metadata` _[ListMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#listmeta-v1-meta)_ | Refer to Kubernetes API documentation for fields of `metadata`. | | | +| `items` _[api.v1alpha1.EmbeddingServer](#apiv1alpha1embeddingserver) array_ | | | | + + +#### api.v1alpha1.EmbeddingServerPhase + +_Underlying type:_ _string_ + +EmbeddingServerPhase is the phase of the EmbeddingServer + +_Validation:_ +- Enum: [Pending Downloading Running Failed Terminating] + +_Appears in:_ +- [api.v1alpha1.EmbeddingServerStatus](#apiv1alpha1embeddingserverstatus) + +| Field | Description | +| --- | --- | +| `Pending` | EmbeddingServerPhasePending means the EmbeddingServer is being created
| +| `Downloading` | EmbeddingServerPhaseDownloading means the model is being downloaded
| +| `Running` | EmbeddingServerPhaseRunning means the EmbeddingServer is running and ready
| +| `Failed` | EmbeddingServerPhaseFailed means the EmbeddingServer failed to start
| +| `Terminating` | EmbeddingServerPhaseTerminating means the EmbeddingServer is being deleted
| + + +#### api.v1alpha1.EmbeddingServerSpec + + + +EmbeddingServerSpec defines the desired state of EmbeddingServer + + + +_Appears in:_ +- [api.v1alpha1.EmbeddingServer](#apiv1alpha1embeddingserver) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `model` _string_ | Model is the HuggingFace embedding model to use (e.g., "sentence-transformers/all-MiniLM-L6-v2") | | Required: \{\}
| +| `hfTokenSecretRef` _[api.v1alpha1.SecretKeyRef](#apiv1alpha1secretkeyref)_ | HFTokenSecretRef is a reference to a Kubernetes Secret containing the huggingface token.
If provided, the secret value will be provided to the embedding server for authentication with huggingface. | | | +| `image` _string_ | Image is the container image for huggingface-embedding-inference | ghcr.io/huggingface/text-embeddings-inference:latest | Required: \{\}
| +| `imagePullPolicy` _string_ | ImagePullPolicy defines the pull policy for the container image | IfNotPresent | Enum: [Always Never IfNotPresent]
| +| `port` _integer_ | Port is the port to expose the embedding service on | 8080 | Maximum: 65535
Minimum: 1
| +| `args` _string array_ | Args are additional arguments to pass to the embedding inference server | | | +| `env` _[api.v1alpha1.EnvVar](#apiv1alpha1envvar) array_ | Env are environment variables to set in the container | | | +| `resources` _[api.v1alpha1.ResourceRequirements](#apiv1alpha1resourcerequirements)_ | Resources defines compute resources for the embedding server | | | +| `modelCache` _[api.v1alpha1.ModelCacheConfig](#apiv1alpha1modelcacheconfig)_ | ModelCache configures persistent storage for downloaded models
When enabled, models are cached in a PVC and reused across pod restarts | | | +| `podTemplateSpec` _[RawExtension](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#rawextension-runtime-pkg)_ | PodTemplateSpec allows customizing the pod (node selection, tolerations, etc.)
This field accepts a PodTemplateSpec object as JSON/YAML.
Note that to modify the specific container the embedding server runs in, you must specify
the 'embedding' container name in the PodTemplateSpec. | | Type: object
| +| `resourceOverrides` _[api.v1alpha1.EmbeddingResourceOverrides](#apiv1alpha1embeddingresourceoverrides)_ | ResourceOverrides allows overriding annotations and labels for resources created by the operator | | | +| `replicas` _integer_ | Replicas is the number of embedding server replicas to run | 1 | Minimum: 1
| + + +#### api.v1alpha1.EmbeddingServerStatus + + + +EmbeddingServerStatus defines the observed state of EmbeddingServer + + + +_Appears in:_ +- [api.v1alpha1.EmbeddingServer](#apiv1alpha1embeddingserver) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `conditions` _[Condition](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.27/#condition-v1-meta) array_ | Conditions represent the latest available observations of the EmbeddingServer's state | | | +| `phase` _[api.v1alpha1.EmbeddingServerPhase](#apiv1alpha1embeddingserverphase)_ | Phase is the current phase of the EmbeddingServer | | Enum: [Pending Downloading Running Failed Terminating]
| +| `message` _string_ | Message provides additional information about the current phase | | | +| `url` _string_ | URL is the URL where the embedding service can be accessed | | | +| `readyReplicas` _integer_ | ReadyReplicas is the number of ready replicas | | | +| `observedGeneration` _integer_ | ObservedGeneration reflects the generation most recently observed by the controller | | | + + +#### api.v1alpha1.EmbeddingStatefulSetOverrides + + + +EmbeddingStatefulSetOverrides defines overrides specific to the embedding statefulset + + + +_Appears in:_ +- [api.v1alpha1.EmbeddingResourceOverrides](#apiv1alpha1embeddingresourceoverrides) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `annotations` _object (keys:string, values:string)_ | Annotations to add or override on the resource | | | +| `labels` _object (keys:string, values:string)_ | Labels to add or override on the resource | | | +| `podTemplateMetadataOverrides` _[api.v1alpha1.ResourceMetadataOverrides](#apiv1alpha1resourcemetadataoverrides)_ | PodTemplateMetadataOverrides defines metadata overrides for the pod template | | | + + #### api.v1alpha1.EnvVar @@ -857,6 +1006,7 @@ EnvVar represents an environment variable in a container _Appears in:_ +- [api.v1alpha1.EmbeddingServerSpec](#apiv1alpha1embeddingserverspec) - [api.v1alpha1.MCPServerSpec](#apiv1alpha1mcpserverspec) - [api.v1alpha1.ProxyDeploymentOverrides](#apiv1alpha1proxydeploymentoverrides) @@ -1770,6 +1920,25 @@ _Appears in:_ | `referencingServers` _string array_ | ReferencingServers is a list of MCPServer resources that reference this MCPToolConfig
This helps track which servers need to be reconciled when this config changes | | | +#### api.v1alpha1.ModelCacheConfig + + + +ModelCacheConfig configures persistent storage for model caching + + + +_Appears in:_ +- [api.v1alpha1.EmbeddingServerSpec](#apiv1alpha1embeddingserverspec) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `enabled` _boolean_ | Enabled controls whether model caching is enabled | true | | +| `storageClassName` _string_ | StorageClassName is the storage class to use for the PVC
If not specified, uses the cluster's default storage class | | | +| `size` _string_ | Size is the size of the PVC for model caching (e.g., "10Gi") | 10Gi | | +| `accessMode` _string_ | AccessMode is the access mode for the PVC | ReadWriteOnce | Enum: [ReadWriteOnce ReadWriteMany ReadOnlyMany]
| + + #### api.v1alpha1.NameFilter @@ -2032,6 +2201,8 @@ ResourceMetadataOverrides defines metadata overrides for a resource _Appears in:_ +- [api.v1alpha1.EmbeddingResourceOverrides](#apiv1alpha1embeddingresourceoverrides) +- [api.v1alpha1.EmbeddingStatefulSetOverrides](#apiv1alpha1embeddingstatefulsetoverrides) - [api.v1alpha1.ProxyDeploymentOverrides](#apiv1alpha1proxydeploymentoverrides) - [api.v1alpha1.ResourceOverrides](#apiv1alpha1resourceoverrides) @@ -2068,6 +2239,7 @@ ResourceRequirements describes the compute resource requirements _Appears in:_ +- [api.v1alpha1.EmbeddingServerSpec](#apiv1alpha1embeddingserverspec) - [api.v1alpha1.MCPRemoteProxySpec](#apiv1alpha1mcpremoteproxyspec) - [api.v1alpha1.MCPServerSpec](#apiv1alpha1mcpserverspec) @@ -2087,6 +2259,7 @@ SecretKeyRef is a reference to a key within a Secret _Appears in:_ - [api.v1alpha1.BearerTokenConfig](#apiv1alpha1bearertokenconfig) +- [api.v1alpha1.EmbeddingServerSpec](#apiv1alpha1embeddingserverspec) - [api.v1alpha1.HeaderInjectionConfig](#apiv1alpha1headerinjectionconfig) - [api.v1alpha1.InlineOIDCConfig](#apiv1alpha1inlineoidcconfig) - [api.v1alpha1.TokenExchangeConfig](#apiv1alpha1tokenexchangeconfig) diff --git a/examples/operator/embedding-servers/README.md b/examples/operator/embedding-servers/README.md new file mode 100644 index 0000000000..ffa22bde23 --- /dev/null +++ b/examples/operator/embedding-servers/README.md @@ -0,0 +1,234 @@ +# EmbeddingServer Examples + +This directory contains example configurations for deploying HuggingFace embedding inference servers using the EmbeddingServer custom resource. + +## Overview + +The EmbeddingServer CRD allows you to deploy and manage HuggingFace Text Embeddings Inference (TEI) servers in Kubernetes. These servers provide high-performance embedding generation for various NLP tasks. + +## Examples + +### 1. Basic Embedding Server + +File: `basic-embedding.yaml` + +A minimal configuration that deploys an embedding server with default settings: +- Uses `sentence-transformers/all-MiniLM-L6-v2` model +- Single replica +- Default port (8080) +- No persistent storage + +```bash +kubectl apply -f basic-embedding.yaml +``` + +### 2. Embedding with Model Cache + +File: `embedding-with-cache.yaml` + +Configures persistent storage for downloaded models: +- Model cache enabled with 10Gi PVC +- Resource limits specified +- Environment variables configured +- Faster restarts after initial model download + +```bash +kubectl apply -f embedding-with-cache.yaml +``` + +### 3. Embedding with Group Association + +File: `embedding-with-group.yaml` + +Shows how to organize embeddings using MCPGroup: +- Creates an MCPGroup named `ml-services` +- Associates the embedding server with the group +- Enables tracking and organization of related resources + +```bash +kubectl apply -f embedding-with-group.yaml +``` + +### 4. Advanced Configuration + +File: `embedding-advanced.yaml` + +Demonstrates all available features: +- High availability with 2 replicas +- Custom arguments and environment variables +- Persistent model caching with custom storage class +- PodTemplateSpec for advanced pod customization: + - Node selection + - Tolerations + - Affinity rules + - Security contexts +- Resource overrides for metadata + +```bash +kubectl apply -f embedding-advanced.yaml +``` + +## Supported Models + +EmbeddingServer supports any HuggingFace model compatible with Text Embeddings Inference. Popular choices include: + +- `sentence-transformers/all-MiniLM-L6-v2` - Fast, lightweight (384 dimensions) +- `sentence-transformers/all-mpnet-base-v2` - Good balance (768 dimensions) +- `BAAI/bge-large-en-v1.5` - High quality (1024 dimensions) +- `intfloat/e5-large-v2` - Instruction-based embeddings +- `thenlper/gte-large` - General text embeddings + +## Accessing the Embedding Service + +After deployment, the embedding service is accessible at: + +``` +http://..svc.cluster.local: +``` + +For example, with `basic-embedding` in the `toolhive-system` namespace: + +``` +http://basic-embedding.toolhive-system.svc.cluster.local:8080 +``` + +### Using the Embedding Service + +Generate embeddings using the REST API: + +```bash +curl -X POST \ + http://basic-embedding.toolhive-system.svc.cluster.local:8080/embed \ + -H 'Content-Type: application/json' \ + -d '{"inputs": "Hello, world!"}' +``` + +## Configuration Options + +### Required Fields + +- `spec.model`: HuggingFace model identifier + +### Optional Fields + +- `spec.image`: Container image (default: `ghcr.io/huggingface/text-embeddings-inference:latest`) +- `spec.port`: Service port (default: 8080) +- `spec.replicas`: Number of replicas (default: 1) +- `spec.args`: Additional arguments for the embedding server +- `spec.env`: Environment variables +- `spec.resources`: CPU and memory limits/requests +- `spec.modelCache`: Persistent volume configuration for model caching +- `spec.podTemplateSpec`: Advanced pod customization +- `spec.resourceOverrides`: Metadata overrides for created resources +- `spec.groupRef`: Reference to an MCPGroup + +## Model Caching + +Enabling model caching provides several benefits: + +1. **Faster Restarts**: Models are downloaded once and cached +2. **Reduced Network Usage**: No repeated downloads +3. **Improved Reliability**: Not dependent on external network for restarts + +Configuration: + +```yaml +spec: + modelCache: + enabled: true + size: "10Gi" # Adjust based on model size + accessMode: "ReadWriteOnce" + storageClassName: "fast-ssd" # Optional +``` + +## Resource Planning + +### CPU and Memory + +Recommended resources based on model size: + +| Model Type | CPU Request | CPU Limit | Memory Request | Memory Limit | +|------------|-------------|-----------|----------------|--------------| +| Small (< 500MB) | 500m | 2000m | 1Gi | 4Gi | +| Medium (500MB-2GB) | 1000m | 4000m | 2Gi | 8Gi | +| Large (> 2GB) | 2000m | 8000m | 4Gi | 16Gi | + +### Storage + +Model sizes vary significantly. Check the HuggingFace model page for size information: + +- `all-MiniLM-L6-v2`: ~90MB +- `all-mpnet-base-v2`: ~420MB +- `bge-large-en-v1.5`: ~1.3GB + +Recommended PVC sizes: +- Small models: 5Gi +- Medium models: 10Gi +- Large models: 20Gi+ + +## Monitoring + +The embedding server exposes health endpoints: + +- `/health`: Health check endpoint (used by Kubernetes probes) +- `/metrics`: Prometheus metrics (if enabled) + +## Troubleshooting + +### Model Download Issues + +If pods are stuck in `Downloading` phase: + +1. Check pod logs: + ```bash + kubectl logs -n toolhive-system + ``` + +2. Verify network connectivity to HuggingFace Hub + +3. Check if model exists and is accessible + +### PVC Binding Issues + +If PVC is not binding: + +1. Check storage class availability: + ```bash + kubectl get storageclass + ``` + +2. Verify PVC status: + ```bash + kubectl get pvc -n toolhive-system + ``` + +3. Check PV availability or dynamic provisioning + +### Resource Constraints + +If pods are pending due to insufficient resources: + +1. Check node resources: + ```bash + kubectl top nodes + ``` + +2. Adjust resource requests in the EmbeddingServer spec + +3. Consider node scaling or resource optimization + +## Best Practices + +1. **Enable Model Caching**: Always enable caching for production deployments +2. **Set Resource Limits**: Prevent resource contention with appropriate limits +3. **Use Groups**: Organize related embeddings with MCPGroup +4. **Monitor Performance**: Use Prometheus metrics for monitoring +5. **Plan Storage**: Allocate sufficient PVC size for your models +6. **Test Before Production**: Validate configuration in non-production first +7. **Version Pins**: Use specific image tags rather than `:latest` for production + +## Additional Resources + +- [HuggingFace Text Embeddings Inference](https://github.com/huggingface/text-embeddings-inference) +- [ToolHive Documentation](https://docs.toolhive.dev) +- [MCPGroup Documentation](../virtual-mcps/README.md) diff --git a/examples/operator/embedding-servers/basic-embedding.yaml b/examples/operator/embedding-servers/basic-embedding.yaml new file mode 100644 index 0000000000..c4c2f01093 --- /dev/null +++ b/examples/operator/embedding-servers/basic-embedding.yaml @@ -0,0 +1,20 @@ +# Basic EmbeddingServer example with minimal configuration +# This creates an embedding server using the default text-embeddings-inference image +apiVersion: toolhive.stacklok.dev/v1alpha1 +kind: EmbeddingServer +metadata: + name: basic-embedding + namespace: toolhive-system +spec: + # Required: HuggingFace model to use + model: "sentence-transformers/all-MiniLM-L6-v2" + + # Optional: Container image (defaults to ghcr.io/huggingface/text-embeddings-inference:latest) + image: "text-embeddings-inference:latest" + imagePullPolicy: IfNotPresent + + # Optional: Port to expose (defaults to 8080) + port: 8080 + + # Optional: Number of replicas (defaults to 1) + replicas: 1 diff --git a/examples/operator/embedding-servers/embedding-advanced.yaml b/examples/operator/embedding-servers/embedding-advanced.yaml new file mode 100644 index 0000000000..8c01b5858d --- /dev/null +++ b/examples/operator/embedding-servers/embedding-advanced.yaml @@ -0,0 +1,108 @@ +# Advanced EmbeddingServer configuration with all features +apiVersion: toolhive.stacklok.dev/v1alpha1 +kind: EmbeddingServer +metadata: + name: advanced-embedding + namespace: toolhive-system +spec: + # Model configuration + model: "sentence-transformers/all-MiniLM-L6-v2" + image: "text-embeddings-inference:latest" + port: 8080 + replicas: 2 + + # HuggingFace authentication token (optional) + # Reference a Kubernetes Secret containing the HuggingFace token for accessing private models + # Create the secret with: kubectl create secret generic hf-token --from-literal=token=hf_xxxxx + hfTokenSecretRef: + name: hf-token + key: token + + # Additional arguments to pass to the embedding server + args: + - "--max-concurrent-requests" + - "512" + - "--max-batch-tokens" + - "32768" + + # Environment variables + env: + - name: RUST_LOG + value: "info" + - name: MAX_CLIENT_BATCH_SIZE + value: "32" + + # Model caching + modelCache: + enabled: true + size: "20Gi" + accessMode: "ReadWriteOnce" + storageClassName: "fast-ssd" + + # Resource requirements + resources: + limits: + cpu: "4000m" + memory: "8Gi" + requests: + cpu: "2000m" + memory: "4Gi" + + # PodTemplateSpec for advanced pod customization + podTemplateSpec: + metadata: + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "8080" + spec: + # Node selection + nodeSelector: + workload: ml-inference + # Tolerations for dedicated nodes + tolerations: + - key: "ml-workload" + operator: "Equal" + value: "true" + effect: "NoSchedule" + # Affinity rules + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app.kubernetes.io/name + operator: In + values: + - mcpembedding + topologyKey: kubernetes.io/hostname + # Security context + securityContext: + runAsNonRoot: true + runAsUser: 1000 + fsGroup: 1000 + # Container-specific overrides + containers: + - name: embedding + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + + # Resource overrides for metadata + resourceOverrides: + deployment: + annotations: + description: "Advanced embedding server with HA configuration" + podTemplateMetadataOverrides: + labels: + app.custom: "ml-embedding" + version: "v1" + service: + annotations: + service.beta.kubernetes.io/aws-load-balancer-type: "nlb" + persistentVolumeClaim: + annotations: + volume.beta.kubernetes.io/storage-class: "fast-ssd" diff --git a/examples/operator/embedding-servers/embedding-with-cache.yaml b/examples/operator/embedding-servers/embedding-with-cache.yaml new file mode 100644 index 0000000000..6595f69f01 --- /dev/null +++ b/examples/operator/embedding-servers/embedding-with-cache.yaml @@ -0,0 +1,42 @@ +# EmbeddingServer with persistent model caching +# This configuration caches downloaded models in a PVC for faster restarts +apiVersion: toolhive.stacklok.dev/v1alpha1 +kind: EmbeddingServer +metadata: + name: embedding-with-cache + namespace: toolhive-system +spec: + # Model to use + model: "sentence-transformers/all-MiniLM-L6-v2" + + # Container image + image: "text-embeddings-inference:latest" + + # Port configuration + port: 8080 + + # Enable model caching with PVC + modelCache: + enabled: true + # Size of the PVC for model storage + size: "10Gi" + # Access mode for the PVC + accessMode: "ReadWriteOnce" + # Optional: Specify storage class name + # storageClassName: "fast-ssd" + + # Resource requirements + resources: + limits: + cpu: "2000m" + memory: "4Gi" + requests: + cpu: "1000m" + memory: "2Gi" + + # Environment variables + env: + - name: RUST_LOG + value: "info" + - name: MAX_BATCH_TOKENS + value: "16384" diff --git a/test/e2e/chainsaw/operator/multi-tenancy/setup/assert-rbac-clusterrole.yaml b/test/e2e/chainsaw/operator/multi-tenancy/setup/assert-rbac-clusterrole.yaml index feccbeb749..97f45f2407 100644 --- a/test/e2e/chainsaw/operator/multi-tenancy/setup/assert-rbac-clusterrole.yaml +++ b/test/e2e/chainsaw/operator/multi-tenancy/setup/assert-rbac-clusterrole.yaml @@ -8,6 +8,7 @@ rules: - "" resources: - configmaps + - persistentvolumeclaims - secrets - serviceaccounts verbs: @@ -121,6 +122,7 @@ rules: - apiGroups: - toolhive.stacklok.dev resources: + - embeddingservers - mcpexternalauthconfigs - mcpgroups - mcpregistries @@ -139,6 +141,7 @@ rules: - apiGroups: - toolhive.stacklok.dev resources: + - embeddingservers/finalizers - mcpexternalauthconfigs/finalizers - mcpgroups/finalizers - mcpregistries/finalizers @@ -149,6 +152,7 @@ rules: - apiGroups: - toolhive.stacklok.dev resources: + - embeddingservers/status - mcpexternalauthconfigs/status - mcpgroups/status - mcpregistries/status diff --git a/test/e2e/chainsaw/operator/multi-tenancy/setup/chainsaw-test.yaml b/test/e2e/chainsaw/operator/multi-tenancy/setup/chainsaw-test.yaml index ecad301c38..4aabcf830a 100644 --- a/test/e2e/chainsaw/operator/multi-tenancy/setup/chainsaw-test.yaml +++ b/test/e2e/chainsaw/operator/multi-tenancy/setup/chainsaw-test.yaml @@ -41,7 +41,7 @@ spec: - --set - operator.rbac.scope=namespace - --set - - operator.rbac.allowedNamespaces={toolhive-system,test-namespace} + - operator.rbac.allowedNamespaces={toolhive-system,test-namespace,toolhive-test-ns-1,toolhive-test-ns-2} - assert: file: assert-operator-ready.yaml - assert: diff --git a/test/e2e/chainsaw/operator/multi-tenancy/setup/namespace.yaml b/test/e2e/chainsaw/operator/multi-tenancy/setup/namespace.yaml index 10dfe35520..1dad25487e 100644 --- a/test/e2e/chainsaw/operator/multi-tenancy/setup/namespace.yaml +++ b/test/e2e/chainsaw/operator/multi-tenancy/setup/namespace.yaml @@ -1,4 +1,14 @@ apiVersion: v1 kind: Namespace metadata: - name: test-namespace \ No newline at end of file + name: test-namespace +--- +apiVersion: v1 +kind: Namespace +metadata: + name: toolhive-test-ns-1 +--- +apiVersion: v1 +kind: Namespace +metadata: + name: toolhive-test-ns-2 \ No newline at end of file diff --git a/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/assert-deployment-ns1-running.yaml b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/assert-deployment-ns1-running.yaml new file mode 100644 index 0000000000..a555c28e15 --- /dev/null +++ b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/assert-deployment-ns1-running.yaml @@ -0,0 +1,7 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: mt-embedding + namespace: toolhive-test-ns-1 +status: + replicas: 1 diff --git a/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/assert-deployment-ns2-running.yaml b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/assert-deployment-ns2-running.yaml new file mode 100644 index 0000000000..4cf320a779 --- /dev/null +++ b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/assert-deployment-ns2-running.yaml @@ -0,0 +1,7 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: mt-embedding + namespace: toolhive-test-ns-2 +status: + replicas: 1 diff --git a/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/assert-embeddingserver-ns1-running.yaml b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/assert-embeddingserver-ns1-running.yaml new file mode 100644 index 0000000000..ca17b4bb09 --- /dev/null +++ b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/assert-embeddingserver-ns1-running.yaml @@ -0,0 +1,7 @@ +apiVersion: toolhive.stacklok.dev/v1alpha1 +kind: EmbeddingServer +metadata: + name: mt-embedding + namespace: toolhive-test-ns-1 +status: + (contains(['Downloading', 'Running'], phase)): true diff --git a/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/assert-embeddingserver-ns2-running.yaml b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/assert-embeddingserver-ns2-running.yaml new file mode 100644 index 0000000000..a35c2374c1 --- /dev/null +++ b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/assert-embeddingserver-ns2-running.yaml @@ -0,0 +1,7 @@ +apiVersion: toolhive.stacklok.dev/v1alpha1 +kind: EmbeddingServer +metadata: + name: mt-embedding + namespace: toolhive-test-ns-2 +status: + (contains(['Downloading', 'Running'], phase)): true diff --git a/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/assert-service-ns1-created.yaml b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/assert-service-ns1-created.yaml new file mode 100644 index 0000000000..3f5f25ab88 --- /dev/null +++ b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/assert-service-ns1-created.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: Service +metadata: + name: mt-embedding + namespace: toolhive-test-ns-1 +spec: + type: ClusterIP + ports: + - port: 8080 + targetPort: 8080 diff --git a/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/assert-service-ns2-created.yaml b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/assert-service-ns2-created.yaml new file mode 100644 index 0000000000..3a74de38e3 --- /dev/null +++ b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/assert-service-ns2-created.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: Service +metadata: + name: mt-embedding + namespace: toolhive-test-ns-2 +spec: + type: ClusterIP + ports: + - port: 8080 + targetPort: 8080 diff --git a/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/chainsaw-test.yaml b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/chainsaw-test.yaml new file mode 100644 index 0000000000..2815d0c14d --- /dev/null +++ b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/chainsaw-test.yaml @@ -0,0 +1,182 @@ +apiVersion: chainsaw.kyverno.io/v1alpha1 +kind: Test +metadata: + name: mt-embeddingserver +spec: + description: Tests EmbeddingServer in multi-tenancy mode across namespaces + timeouts: + apply: 30s + assert: 120s + cleanup: 30s + exec: 300s + template: true + bindings: + - name: testPrefix + value: "mt-embedding" + - name: namespace1 + value: "toolhive-test-ns-1" + - name: namespace2 + value: "toolhive-test-ns-2" + steps: + - name: verify-operator + description: Ensure operator is ready before testing + try: + - assert: + file: ../../setup/assert-operator-ready.yaml + + - name: create-namespaces + description: Create test namespaces for multi-tenancy testing + try: + - apply: + file: namespace-1.yaml + - apply: + file: namespace-2.yaml + - assert: + file: namespace-1.yaml + - assert: + file: namespace-2.yaml + + - name: deploy-embeddingserver-ns1 + description: Deploy EmbeddingServer in namespace 1 + try: + - apply: + file: embeddingserver-ns1.yaml + - assert: + file: embeddingserver-ns1.yaml + - assert: + file: assert-embeddingserver-ns1-running.yaml + - assert: + file: assert-deployment-ns1-running.yaml + - assert: + file: assert-service-ns1-created.yaml + + - name: deploy-embeddingserver-ns2 + description: Deploy EmbeddingServer in namespace 2 + try: + - apply: + file: embeddingserver-ns2.yaml + - assert: + file: embeddingserver-ns2.yaml + - assert: + file: assert-embeddingserver-ns2-running.yaml + - assert: + file: assert-deployment-ns2-running.yaml + - assert: + file: assert-service-ns2-created.yaml + + - name: verify-isolation + description: Verify that EmbeddingServers in different namespaces are isolated + try: + - script: + env: + - name: embeddingServerName + value: ($testPrefix) + - name: ns1 + value: ($namespace1) + - name: ns2 + value: ($namespace2) + content: | + echo "Verifying multi-tenancy isolation..." + + # Verify EmbeddingServer exists in namespace 1 + if ! kubectl get embeddingserver $embeddingServerName -n $ns1 >/dev/null 2>&1; then + echo "EmbeddingServer not found in namespace 1" + exit 1 + fi + echo "✓ EmbeddingServer found in namespace 1" + + # Verify EmbeddingServer exists in namespace 2 + if ! kubectl get embeddingserver $embeddingServerName -n $ns2 >/dev/null 2>&1; then + echo "EmbeddingServer not found in namespace 2" + exit 1 + fi + echo "✓ EmbeddingServer found in namespace 2" + + # Verify statefulsets are in separate namespaces + STATEFULSET_NAME="$embeddingServerName" + + NS1_STATEFULSET=$(kubectl get statefulset $STATEFULSET_NAME -n $ns1 -o name 2>/dev/null || echo "") + NS2_STATEFULSET=$(kubectl get statefulset $STATEFULSET_NAME -n $ns2 -o name 2>/dev/null || echo "") + + if [ -z "$NS1_STATEFULSET" ]; then + echo "StatefulSet not found in namespace 1" + exit 1 + fi + echo "✓ StatefulSet found in namespace 1" + + if [ -z "$NS2_STATEFULSET" ]; then + echo "StatefulSet not found in namespace 2" + exit 1 + fi + echo "✓ StatefulSet found in namespace 2" + + # Verify services are in separate namespaces + SERVICE_NAME="$embeddingServerName" + + NS1_SERVICE=$(kubectl get svc $SERVICE_NAME -n $ns1 -o name 2>/dev/null || echo "") + NS2_SERVICE=$(kubectl get svc $SERVICE_NAME -n $ns2 -o name 2>/dev/null || echo "") + + if [ -z "$NS1_SERVICE" ]; then + echo "Service not found in namespace 1" + exit 1 + fi + echo "✓ Service found in namespace 1" + + if [ -z "$NS2_SERVICE" ]; then + echo "Service not found in namespace 2" + exit 1 + fi + echo "✓ Service found in namespace 2" + + # Get ClusterIPs to verify they are different + NS1_CLUSTERIP=$(kubectl get svc $SERVICE_NAME -n $ns1 -o jsonpath='{.spec.clusterIP}') + NS2_CLUSTERIP=$(kubectl get svc $SERVICE_NAME -n $ns2 -o jsonpath='{.spec.clusterIP}') + + echo "Namespace 1 ClusterIP: $NS1_CLUSTERIP" + echo "Namespace 2 ClusterIP: $NS2_CLUSTERIP" + + if [ "$NS1_CLUSTERIP" = "$NS2_CLUSTERIP" ]; then + echo "Services have the same ClusterIP - isolation may be compromised" + exit 1 + fi + echo "✓ Services have different ClusterIPs" + + echo "✅ Multi-tenancy isolation verified!" + exit 0 + + - name: test-embedding-endpoints + description: Test both embedding server endpoints + try: + - script: + env: + - name: embeddingServerName + value: ($testPrefix) + - name: ns1 + value: ($namespace1) + - name: ns2 + value: ($namespace2) + content: | + echo "Testing embedding server endpoints in both namespaces..." + + SERVICE_NAME="$embeddingServerName" + + # Test namespace 1 + echo "Testing namespace 1..." + NS1_CLUSTERIP=$(kubectl get svc $SERVICE_NAME -n $ns1 -o jsonpath='{.spec.clusterIP}') + + kubectl run test-curl-ns1-$RANDOM --image=curlimages/curl:latest --rm -i --restart=Never -n $ns1 -- \ + curl -s -o /dev/null -w "%{http_code}" http://$NS1_CLUSTERIP:8080/health || true + + echo "✓ Namespace 1 endpoint test completed" + + # Test namespace 2 + echo "Testing namespace 2..." + NS2_CLUSTERIP=$(kubectl get svc $SERVICE_NAME -n $ns2 -o jsonpath='{.spec.clusterIP}') + + kubectl run test-curl-ns2-$RANDOM --image=curlimages/curl:latest --rm -i --restart=Never -n $ns2 -- \ + curl -s -o /dev/null -w "%{http_code}" http://$NS2_CLUSTERIP:8080/health || true + + echo "✓ Namespace 2 endpoint test completed" + + echo "✅ Multi-tenancy embedding server tests passed!" + exit 0 diff --git a/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/embeddingserver-ns1.yaml b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/embeddingserver-ns1.yaml new file mode 100644 index 0000000000..12e23de197 --- /dev/null +++ b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/embeddingserver-ns1.yaml @@ -0,0 +1,23 @@ +apiVersion: toolhive.stacklok.dev/v1alpha1 +kind: EmbeddingServer +metadata: + name: ($testPrefix) + namespace: ($namespace1) +spec: + model: "sentence-transformers/paraphrase-MiniLM-L3-v2" + image: "text-embeddings-inference" + imagePullPolicy: IfNotPresent + port: 8080 + replicas: 1 + resources: + limits: + cpu: "500m" + memory: "512Mi" + requests: + cpu: "250m" + memory: "256Mi" + env: + - name: RUST_LOG + value: "info" + - name: NAMESPACE_IDENTIFIER + value: "namespace-1" diff --git a/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/embeddingserver-ns2.yaml b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/embeddingserver-ns2.yaml new file mode 100644 index 0000000000..260e9532a4 --- /dev/null +++ b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/embeddingserver-ns2.yaml @@ -0,0 +1,23 @@ +apiVersion: toolhive.stacklok.dev/v1alpha1 +kind: EmbeddingServer +metadata: + name: ($testPrefix) + namespace: ($namespace2) +spec: + model: "sentence-transformers/paraphrase-MiniLM-L3-v2" + image: "text-embeddings-inference" + imagePullPolicy: IfNotPresent + port: 8080 + replicas: 1 + resources: + limits: + cpu: "500m" + memory: "512Mi" + requests: + cpu: "250m" + memory: "256Mi" + env: + - name: RUST_LOG + value: "info" + - name: NAMESPACE_IDENTIFIER + value: "namespace-2" diff --git a/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/namespace-1.yaml b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/namespace-1.yaml new file mode 100644 index 0000000000..b170d307d1 --- /dev/null +++ b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/namespace-1.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: ($namespace1) diff --git a/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/namespace-2.yaml b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/namespace-2.yaml new file mode 100644 index 0000000000..68cf711b48 --- /dev/null +++ b/test/e2e/chainsaw/operator/multi-tenancy/test-scenarios/embeddingserver/namespace-2.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: ($namespace2) diff --git a/test/e2e/chainsaw/operator/single-tenancy/setup/assert-rbac-clusterrole.yaml b/test/e2e/chainsaw/operator/single-tenancy/setup/assert-rbac-clusterrole.yaml index feccbeb749..97f45f2407 100644 --- a/test/e2e/chainsaw/operator/single-tenancy/setup/assert-rbac-clusterrole.yaml +++ b/test/e2e/chainsaw/operator/single-tenancy/setup/assert-rbac-clusterrole.yaml @@ -8,6 +8,7 @@ rules: - "" resources: - configmaps + - persistentvolumeclaims - secrets - serviceaccounts verbs: @@ -121,6 +122,7 @@ rules: - apiGroups: - toolhive.stacklok.dev resources: + - embeddingservers - mcpexternalauthconfigs - mcpgroups - mcpregistries @@ -139,6 +141,7 @@ rules: - apiGroups: - toolhive.stacklok.dev resources: + - embeddingservers/finalizers - mcpexternalauthconfigs/finalizers - mcpgroups/finalizers - mcpregistries/finalizers @@ -149,6 +152,7 @@ rules: - apiGroups: - toolhive.stacklok.dev resources: + - embeddingservers/status - mcpexternalauthconfigs/status - mcpgroups/status - mcpregistries/status diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/basic/assert-deployment-running.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/basic/assert-deployment-running.yaml new file mode 100644 index 0000000000..016a5dad86 --- /dev/null +++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/basic/assert-deployment-running.yaml @@ -0,0 +1,7 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: st-embedding-basic + namespace: toolhive-system +status: + replicas: 1 diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/basic/assert-embeddingserver-running.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/basic/assert-embeddingserver-running.yaml new file mode 100644 index 0000000000..ff4cf53e37 --- /dev/null +++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/basic/assert-embeddingserver-running.yaml @@ -0,0 +1,7 @@ +apiVersion: toolhive.stacklok.dev/v1alpha1 +kind: EmbeddingServer +metadata: + name: st-embedding-basic + namespace: toolhive-system +status: + (contains(['Downloading', 'Running'], phase)): true diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/basic/assert-service-created.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/basic/assert-service-created.yaml new file mode 100644 index 0000000000..bd590bb88e --- /dev/null +++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/basic/assert-service-created.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: Service +metadata: + name: st-embedding-basic + namespace: toolhive-system +spec: + type: ClusterIP + ports: + - port: 8080 + targetPort: 8080 diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/basic/chainsaw-test.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/basic/chainsaw-test.yaml new file mode 100644 index 0000000000..aeba429463 --- /dev/null +++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/basic/chainsaw-test.yaml @@ -0,0 +1,69 @@ +apiVersion: chainsaw.kyverno.io/v1alpha1 +kind: Test +metadata: + name: st-embeddingserver-basic +spec: + description: Deploys basic EmbeddingServer and verifies it's running + timeouts: + apply: 30s + assert: 120s + cleanup: 30s + exec: 300s + template: true + bindings: + - name: testPrefix + value: "st-embedding-basic" + steps: + - name: verify-operator + description: Ensure operator is ready before testing + try: + - assert: + file: ../../../setup/assert-operator-ready.yaml + - name: deploy-embeddingserver + description: Deploy a basic EmbeddingServer instance and verify it's ready + try: + - apply: + file: embeddingserver.yaml + - assert: + file: embeddingserver.yaml + - assert: + file: assert-embeddingserver-running.yaml + - assert: + file: assert-deployment-running.yaml + - assert: + file: assert-service-created.yaml + + - name: test-embedding-endpoint + description: Test the embedding server endpoint + try: + - script: + env: + - name: embeddingServerName + value: ($testPrefix) + content: | + # Get the service name for the embedding server + echo "Testing embedding server: $embeddingServerName" + + # Get the service ClusterIP + SERVICE_NAME="$embeddingServerName" + CLUSTER_IP=$(kubectl get svc $SERVICE_NAME -n toolhive-system -o jsonpath='{.spec.clusterIP}' 2>/dev/null || echo "") + + if [ -z "$CLUSTER_IP" ]; then + echo "Service not found or does not have ClusterIP" + kubectl describe svc $SERVICE_NAME -n toolhive-system + exit 1 + fi + + echo "Service ClusterIP: $CLUSTER_IP" + + # Wait for the statefulset to be ready + echo "Waiting for statefulset to be ready..." + kubectl wait --for=jsonpath='{.status.replicas}'=1 --timeout=120s statefulset/$embeddingServerName -n toolhive-system + + # Test the health endpoint using a test pod + echo "Testing health endpoint..." + kubectl run test-curl-$RANDOM --image=curlimages/curl:latest --rm -i --restart=Never -n toolhive-system -- \ + curl -s -o /dev/null -w "%{http_code}" http://$CLUSTER_IP:8080/health || true + + echo "✅ Basic embedding server test passed!" + exit 0 diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/basic/embeddingserver.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/basic/embeddingserver.yaml new file mode 100644 index 0000000000..97eb1eada1 --- /dev/null +++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/basic/embeddingserver.yaml @@ -0,0 +1,22 @@ +apiVersion: toolhive.stacklok.dev/v1alpha1 +kind: EmbeddingServer +metadata: + name: ($testPrefix) + namespace: toolhive-system +spec: + # Use a very lightweight model for testing (17.4M params) + model: "sentence-transformers/paraphrase-MiniLM-L3-v2" + image: "ghcr.io/huggingface/text-embeddings-inference:cpu-latest" + imagePullPolicy: IfNotPresent + port: 8080 + replicas: 1 + resources: + limits: + cpu: "500m" + memory: "512Mi" + requests: + cpu: "250m" + memory: "256Mi" + env: + - name: RUST_LOG + value: "info" diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/assert-deployment-running.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/assert-deployment-running.yaml new file mode 100644 index 0000000000..addf6ca69a --- /dev/null +++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/assert-deployment-running.yaml @@ -0,0 +1,7 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: st-embedding-lifecycle + namespace: toolhive-system +status: + replicas: 1 \ No newline at end of file diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/assert-deployment-scaled.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/assert-deployment-scaled.yaml new file mode 100644 index 0000000000..f20167d663 --- /dev/null +++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/assert-deployment-scaled.yaml @@ -0,0 +1,7 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: st-embedding-lifecycle + namespace: toolhive-system +status: + replicas: 2 diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/assert-embeddingserver-running.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/assert-embeddingserver-running.yaml new file mode 100644 index 0000000000..0e47d1c7a9 --- /dev/null +++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/assert-embeddingserver-running.yaml @@ -0,0 +1,7 @@ +apiVersion: toolhive.stacklok.dev/v1alpha1 +kind: EmbeddingServer +metadata: + name: st-embedding-lifecycle + namespace: toolhive-system +status: + (contains(['Downloading', 'Running'], phase)): true diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/assert-embeddingserver-scaled.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/assert-embeddingserver-scaled.yaml new file mode 100644 index 0000000000..6e3da079c4 --- /dev/null +++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/assert-embeddingserver-scaled.yaml @@ -0,0 +1,7 @@ +apiVersion: toolhive.stacklok.dev/v1alpha1 +kind: EmbeddingServer +metadata: + name: st-embedding-lifecycle + namespace: toolhive-system +spec: + replicas: 2 diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/assert-service-created.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/assert-service-created.yaml new file mode 100644 index 0000000000..610e94a7ab --- /dev/null +++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/assert-service-created.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: Service +metadata: + name: st-embedding-lifecycle + namespace: toolhive-system +spec: + type: ClusterIP + ports: + - port: 8080 + targetPort: 8080 diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/chainsaw-test.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/chainsaw-test.yaml new file mode 100644 index 0000000000..4dc652183c --- /dev/null +++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/chainsaw-test.yaml @@ -0,0 +1,121 @@ +apiVersion: chainsaw.kyverno.io/v1alpha1 +kind: Test +metadata: + name: st-embeddingserver-lifecycle +spec: + description: Tests EmbeddingServer lifecycle operations (create, update, delete) + timeouts: + apply: 30s + assert: 120s + cleanup: 30s + delete: 60s + exec: 300s + template: true + bindings: + - name: testPrefix + value: "st-embedding-lifecycle" + steps: + - name: verify-operator + description: Ensure operator is ready before testing + try: + - assert: + file: ../../../setup/assert-operator-ready.yaml + + - name: create-embeddingserver + description: Create initial EmbeddingServer + try: + - apply: + file: embeddingserver-initial.yaml + - assert: + file: embeddingserver-initial.yaml + - assert: + file: assert-embeddingserver-running.yaml + - assert: + file: assert-deployment-running.yaml + - assert: + file: assert-service-created.yaml + + - name: update-embeddingserver-env + description: Update EmbeddingServer environment variables + try: + - apply: + file: embeddingserver-updated-env.yaml + - assert: + file: embeddingserver-updated-env.yaml + - script: + env: + - name: embeddingServerName + value: ($testPrefix) + content: | + # Verify environment variable update propagated to statefulset + STATEFULSET_NAME="$embeddingServerName" + + # Wait for statefulset to be ready (still 1 replica) + kubectl wait --for=jsonpath='{.status.replicas}'=1 --timeout=120s statefulset/$STATEFULSET_NAME -n toolhive-system + + # Check if the new environment variable is present + ENV_VALUE=$(kubectl get statefulset $STATEFULSET_NAME -n toolhive-system -o jsonpath='{.spec.template.spec.containers[0].env[?(@.name=="MAX_BATCH_TOKENS")].value}' 2>/dev/null || echo "") + + if [ "$ENV_VALUE" != "16384" ]; then + echo "Environment variable not updated correctly. Expected: 16384, Got: $ENV_VALUE" + kubectl describe statefulset $STATEFULSET_NAME -n toolhive-system + exit 1 + fi + + echo "✓ Environment variable updated successfully" + exit 0 + + - name: delete-embeddingserver + description: Delete EmbeddingServer and verify cleanup + try: + - delete: + ref: + apiVersion: toolhive.stacklok.dev/v1alpha1 + kind: EmbeddingServer + name: ($testPrefix) + namespace: toolhive-system + - script: + env: + - name: embeddingServerName + value: ($testPrefix) + content: | + # Wait for resources to be cleaned up + STATEFULSET_NAME="$embeddingServerName" + SERVICE_NAME="$embeddingServerName" + + echo "Verifying resource cleanup..." + + # Wait for statefulset to be deleted + timeout=30 + while [ $timeout -gt 0 ]; do + if ! kubectl get statefulset $STATEFULSET_NAME -n toolhive-system 2>/dev/null; then + echo "✓ StatefulSet deleted" + break + fi + sleep 1 + timeout=$((timeout - 1)) + done + + if [ $timeout -eq 0 ]; then + echo "StatefulSet was not deleted within timeout" + exit 1 + fi + + # Wait for service to be deleted + timeout=30 + while [ $timeout -gt 0 ]; do + if ! kubectl get svc $SERVICE_NAME -n toolhive-system 2>/dev/null; then + echo "✓ Service deleted" + break + fi + sleep 1 + timeout=$((timeout - 1)) + done + + if [ $timeout -eq 0 ]; then + echo "Service was not deleted within timeout" + exit 1 + fi + + echo "✅ EmbeddingServer lifecycle test passed!" + exit 0 diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/embeddingserver-initial.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/embeddingserver-initial.yaml new file mode 100644 index 0000000000..da72c25b90 --- /dev/null +++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/embeddingserver-initial.yaml @@ -0,0 +1,21 @@ +apiVersion: toolhive.stacklok.dev/v1alpha1 +kind: EmbeddingServer +metadata: + name: ($testPrefix) + namespace: toolhive-system +spec: + model: "sentence-transformers/paraphrase-MiniLM-L3-v2" + image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5" + imagePullPolicy: IfNotPresent + port: 8080 + replicas: 1 + resources: + limits: + cpu: "500m" + memory: "512Mi" + requests: + cpu: "250m" + memory: "256Mi" + env: + - name: RUST_LOG + value: "info" diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/embeddingserver-scaled.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/embeddingserver-scaled.yaml new file mode 100644 index 0000000000..48e19545b9 --- /dev/null +++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/embeddingserver-scaled.yaml @@ -0,0 +1,21 @@ +apiVersion: toolhive.stacklok.dev/v1alpha1 +kind: EmbeddingServer +metadata: + name: ($testPrefix) + namespace: toolhive-system +spec: + model: "sentence-transformers/paraphrase-MiniLM-L3-v2" + image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5" + imagePullPolicy: IfNotPresent + port: 8080 + replicas: 2 + resources: + limits: + cpu: "500m" + memory: "512Mi" + requests: + cpu: "250m" + memory: "256Mi" + env: + - name: RUST_LOG + value: "info" diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/embeddingserver-updated-env.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/embeddingserver-updated-env.yaml new file mode 100644 index 0000000000..4efd73ec44 --- /dev/null +++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/lifecycle/embeddingserver-updated-env.yaml @@ -0,0 +1,23 @@ +apiVersion: toolhive.stacklok.dev/v1alpha1 +kind: EmbeddingServer +metadata: + name: ($testPrefix) + namespace: toolhive-system +spec: + model: "sentence-transformers/paraphrase-MiniLM-L3-v2" + image: "ghcr.io/huggingface/text-embeddings-inference:cpu-1.5" + imagePullPolicy: IfNotPresent + port: 8080 + replicas: 1 + resources: + limits: + cpu: "500m" + memory: "512Mi" + requests: + cpu: "250m" + memory: "256Mi" + env: + - name: RUST_LOG + value: "debug" + - name: MAX_BATCH_TOKENS + value: "16384" diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/with-cache/assert-deployment-running.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/with-cache/assert-deployment-running.yaml new file mode 100644 index 0000000000..1d9ed74799 --- /dev/null +++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/with-cache/assert-deployment-running.yaml @@ -0,0 +1,7 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: st-embedding-cache + namespace: toolhive-system +status: + replicas: 1 diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/with-cache/assert-embeddingserver-running.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/with-cache/assert-embeddingserver-running.yaml new file mode 100644 index 0000000000..1bc08dec0a --- /dev/null +++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/with-cache/assert-embeddingserver-running.yaml @@ -0,0 +1,7 @@ +apiVersion: toolhive.stacklok.dev/v1alpha1 +kind: EmbeddingServer +metadata: + name: st-embedding-cache + namespace: toolhive-system +status: + (contains(['Downloading', 'Running'], phase)): true diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/with-cache/assert-pvc-created.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/with-cache/assert-pvc-created.yaml new file mode 100644 index 0000000000..929e91e5f1 --- /dev/null +++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/with-cache/assert-pvc-created.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: st-embedding-cache-model-cache + namespace: toolhive-system +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 5Gi +status: + phase: Bound diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/with-cache/assert-service-created.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/with-cache/assert-service-created.yaml new file mode 100644 index 0000000000..2d46b96cfa --- /dev/null +++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/with-cache/assert-service-created.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: Service +metadata: + name: st-embedding-cache + namespace: toolhive-system +spec: + type: ClusterIP + ports: + - port: 8080 + targetPort: 8080 diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/with-cache/chainsaw-test.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/with-cache/chainsaw-test.yaml new file mode 100644 index 0000000000..e77487a032 --- /dev/null +++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/with-cache/chainsaw-test.yaml @@ -0,0 +1,160 @@ +apiVersion: chainsaw.kyverno.io/v1alpha1 +kind: Test +metadata: + name: st-embeddingserver-cache +spec: + description: Deploys EmbeddingServer with model caching and verifies PVC is created + timeouts: + apply: 30s + assert: 120s + cleanup: 30s + exec: 300s + template: true + bindings: + - name: testPrefix + value: "st-embedding-cache" + steps: + - name: verify-operator + description: Ensure operator is ready before testing + try: + - assert: + file: ../../../setup/assert-operator-ready.yaml + - name: deploy-embeddingserver-with-cache + description: Deploy EmbeddingServer with model caching enabled + try: + - apply: + file: embeddingserver.yaml + - assert: + file: embeddingserver.yaml + - assert: + file: assert-embeddingserver-running.yaml + - assert: + file: assert-deployment-running.yaml + - assert: + file: assert-service-created.yaml + + - name: verify-model-cache-volume + description: Verify that the PVC is mounted in the statefulset + try: + - script: + env: + - name: embeddingServerName + value: ($testPrefix) + content: | + # Get the statefulset name + echo "Verifying model cache for embedding server: $embeddingServerName" + + # Wait for PVC to provision + echo "Waiting 60 seconds for PVC to provision..." + sleep 60 + + STATEFULSET_NAME="$embeddingServerName" + # StatefulSet PVCs follow the pattern: volumeClaimTemplate-statefulsetName-ordinal + PVC_NAME="model-cache-$embeddingServerName-0" + + # Check if PVC exists and is bound + PVC_STATUS=$(kubectl get pvc $PVC_NAME -n toolhive-system -o jsonpath='{.status.phase}' 2>/dev/null || echo "NotFound") + + if [ "$PVC_STATUS" != "Bound" ]; then + echo "PVC is not bound. Current status: $PVC_STATUS" + echo "Available PVCs:" + kubectl get pvc -n toolhive-system + exit 1 + fi + + echo "✓ PVC is bound" + + # Check that the statefulset is ready + if ! kubectl wait --for=jsonpath='{.status.readyReplicas}'=1 --timeout=120s statefulset/$STATEFULSET_NAME -n toolhive-system; then + echo "StatefulSet failed to become ready. Gathering diagnostics..." + echo "StatefulSet status:" + kubectl get statefulset/$STATEFULSET_NAME -n toolhive-system -o yaml + echo "Pod status:" + kubectl get pods -n toolhive-system -l app.kubernetes.io/instance=$STATEFULSET_NAME + echo "Pod describe:" + kubectl describe pods -n toolhive-system -l app.kubernetes.io/instance=$STATEFULSET_NAME + echo "Pod events:" + kubectl get events -n toolhive-system --sort-by='.lastTimestamp' | tail -20 + exit 1 + fi + + echo "✓ StatefulSet is ready" + + # Verify that model files are written to the cache volume + echo "Checking for model files in cache volume..." + POD_NAME=$(kubectl get pods -n toolhive-system -l app.kubernetes.io/instance=$STATEFULSET_NAME --field-selector=status.phase=Running -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || echo "") + + if [ -z "$POD_NAME" ]; then + echo "No running pod found for statefulset" + echo "All pods in namespace:" + kubectl get pods -n toolhive-system -l app.kubernetes.io/instance=$STATEFULSET_NAME + exit 1 + fi + + echo "Checking cache contents in pod: $POD_NAME" + + # Wait for model to be downloaded (check logs for model loading) + echo "Waiting for model to be downloaded..." + MAX_WAIT=60 + COUNTER=0 + MODEL_LOADED=false + + while [ $COUNTER -lt $MAX_WAIT ]; do + # Check if model files exist in /data + CACHE_CONTENTS=$(kubectl exec -n toolhive-system $POD_NAME -- sh -c 'find /data -type f 2>/dev/null | wc -l' || echo "0") + + if [ "$CACHE_CONTENTS" -gt 0 ]; then + MODEL_LOADED=true + break + fi + + echo "Waiting for model files to appear... ($COUNTER/$MAX_WAIT seconds)" + sleep 2 + COUNTER=$((COUNTER + 2)) + done + + if [ "$MODEL_LOADED" = false ]; then + echo "No model files found in /data after $MAX_WAIT seconds. Cache appears empty." + echo "Listing /data contents:" + kubectl exec -n toolhive-system $POD_NAME -- ls -laR /data || true + echo "Pod logs:" + kubectl logs -n toolhive-system $POD_NAME --tail=50 || true + exit 1 + fi + + echo "✓ Model files found in cache volume" + echo "Cache directory contents:" + kubectl exec -n toolhive-system $POD_NAME -- sh -c 'du -sh /data/* 2>/dev/null' || true + + echo "✅ Model cache verification passed!" + exit 0 + + - name: test-embedding-endpoint + description: Test the embedding server endpoint with cache + try: + - script: + env: + - name: embeddingServerName + value: ($testPrefix) + content: | + # Get the service name for the embedding server + echo "Testing embedding server with cache: $embeddingServerName" + + SERVICE_NAME="$embeddingServerName" + CLUSTER_IP=$(kubectl get svc $SERVICE_NAME -n toolhive-system -o jsonpath='{.spec.clusterIP}' 2>/dev/null || echo "") + + if [ -z "$CLUSTER_IP" ]; then + echo "Service not found or does not have ClusterIP" + kubectl describe svc $SERVICE_NAME -n toolhive-system + exit 1 + fi + + echo "Service ClusterIP: $CLUSTER_IP" + + # Test the health endpoint + echo "Testing health endpoint..." + kubectl run test-curl-$RANDOM --image=curlimages/curl:latest --rm -i --restart=Never -n toolhive-system -- \ + curl -s -o /dev/null -w "%{http_code}" http://$CLUSTER_IP:8080/health || true + + echo "✅ Embedding server with cache test passed!" + exit 0 diff --git a/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/with-cache/embeddingserver.yaml b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/with-cache/embeddingserver.yaml new file mode 100644 index 0000000000..28cef57bae --- /dev/null +++ b/test/e2e/chainsaw/operator/single-tenancy/test-scenarios/embeddingserver/with-cache/embeddingserver.yaml @@ -0,0 +1,27 @@ +apiVersion: toolhive.stacklok.dev/v1alpha1 +kind: EmbeddingServer +metadata: + name: ($testPrefix) + namespace: toolhive-system +spec: + # Use a very lightweight model for testing (17.4M params) + model: "sentence-transformers/paraphrase-MiniLM-L3-v2" + image: "ghcr.io/huggingface/text-embeddings-inference:cpu-latest" + imagePullPolicy: IfNotPresent + port: 8080 + replicas: 1 + # Enable model caching + modelCache: + enabled: true + size: "5Gi" + accessMode: "ReadWriteOnce" + resources: + limits: + cpu: "500m" + memory: "512Mi" + requests: + cpu: "250m" + memory: "256Mi" + env: + - name: RUST_LOG + value: "info"