From 2f10a088b4025067604254f6af8b018889cb302d Mon Sep 17 00:00:00 2001 From: Aravind Nidadavolu Date: Sun, 17 May 2026 16:42:36 -0700 Subject: [PATCH] feat(operator): auto-create KubeRay RBAC for Feast service account When the batch-engine ConfigMap selects the Ray engine in KubeRay mode (type: ray.engine, use_kuberay: true), the Feast service pod uses the CodeFlare SDK to discover RayCluster resources and read mTLS Secrets. Previously the Feast SA had no permissions on either, so the SDK calls returned 403 and users had to apply the Role + RoleBinding by hand before every materialization run. This change makes the operator provision them automatically: - New services/ray_rbac.go reads the batch-engine ConfigMap once per reconcile and, when KubeRay is selected, CreateOrUpdates a namespace-scoped Role + RoleBinding named feast--kuberay granting the Feast SA: ray.io/rayclusters: get, list, watch core/secrets: get, list, watch, create, update, delete - Both resources are owner-referenced to the FeatureStore so they GC with the CR. When use_kuberay flips back to false (or the batchEngine block is removed), they are deleted on the next reconcile. - The operator's own kubebuilder RBAC markers are widened to match so it can hand those verbs to the Feast SA (k8s RBAC escalation rules require the granter to hold the granted verbs). config/rbac and dist/install.yaml are regenerated accordingly. - New ginkgo suite covers create-on-enable, delete-on-disable, and no-op when batchEngine is absent. - 06-batch-and-jobs.md documents the new auto-RBAC behavior so users know manual setup is no longer required. Fixes #6408 Signed-off-by: Aravind Nidadavolu --- .secrets.baseline | 4 +- .../feast-operator/06-batch-and-jobs.md | 27 +++ infra/feast-operator/config/rbac/role.yaml | 10 +- infra/feast-operator/dist/install.yaml | 10 +- .../controller/featurestore_controller.go | 4 +- ...aturestore_controller_kuberay_rbac_test.go | 197 ++++++++++++++++++ .../internal/controller/services/ray_rbac.go | 169 +++++++++++++++ .../internal/controller/services/services.go | 3 + 8 files changed, 419 insertions(+), 5 deletions(-) create mode 100644 infra/feast-operator/internal/controller/featurestore_controller_kuberay_rbac_test.go create mode 100644 infra/feast-operator/internal/controller/services/ray_rbac.go diff --git a/.secrets.baseline b/.secrets.baseline index e0030466f1f..e86a277b0f3 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -1156,7 +1156,7 @@ "filename": "infra/feast-operator/internal/controller/services/services.go", "hashed_secret": "36dc326eb15c7bdd8d91a6b87905bcea20b637d1", "is_verified": false, - "line_number": 179 + "line_number": 182 } ], "infra/feast-operator/internal/controller/services/tls_test.go": [ @@ -1539,5 +1539,5 @@ } ] }, - "generated_at": "2026-05-14T10:20:01Z" + "generated_at": "2026-05-18T04:10:17Z" } diff --git a/docs/how-to-guides/feast-operator/06-batch-and-jobs.md b/docs/how-to-guides/feast-operator/06-batch-and-jobs.md index fd513168c54..39e85d347f7 100644 --- a/docs/how-to-guides/feast-operator/06-batch-and-jobs.md +++ b/docs/how-to-guides/feast-operator/06-batch-and-jobs.md @@ -63,6 +63,33 @@ spec: > For engine-specific YAML options (Spark conf, Ray address, etc.) see the > [Feast SDK — Compute Engine](../reference/compute-engine/) docs. +### KubeRay clusters — auto-generated RBAC + +When the batch-engine ConfigMap selects the Ray engine in KubeRay mode: + +```yaml +data: + config: | + type: ray.engine + use_kuberay: true + cluster_name: my-ray-cluster +``` + +the operator creates a namespace-scoped `Role` and `RoleBinding` named +`feast--kuberay`, owner-referenced to the `FeatureStore`. The Role +grants the Feast service account: + +| API group | Resource | Verbs | +|-----------|----------|-------| +| `ray.io` | `rayclusters` | `get`, `list`, `watch` | +| (core) | `secrets` | `get`, `list`, `watch`, `create`, `update`, `delete` | + +This is what the CodeFlare SDK needs to discover the `RayCluster` and read +the mTLS Secrets used for the Ray client connection. When you flip +`use_kuberay` back to `false` (or remove the `batchEngine` field), the +Role and RoleBinding are deleted on the next reconcile. No manual RBAC +setup is required. + --- ## Scheduled Materialization (`spec.cronJob`) diff --git a/infra/feast-operator/config/rbac/role.yaml b/infra/feast-operator/config/rbac/role.yaml index 0c1bd7be84b..9fdae3aa8e1 100644 --- a/infra/feast-operator/config/rbac/role.yaml +++ b/infra/feast-operator/config/rbac/role.yaml @@ -9,6 +9,7 @@ rules: resources: - configmaps - persistentvolumeclaims + - secrets - serviceaccounts - services verbs: @@ -23,7 +24,6 @@ rules: resources: - namespaces - pods - - secrets verbs: - get - list @@ -124,6 +124,14 @@ rules: - patch - update - watch +- apiGroups: + - ray.io + resources: + - rayclusters + verbs: + - get + - list + - watch - apiGroups: - rbac.authorization.k8s.io resources: diff --git a/infra/feast-operator/dist/install.yaml b/infra/feast-operator/dist/install.yaml index c466442b8e8..2eb21143381 100644 --- a/infra/feast-operator/dist/install.yaml +++ b/infra/feast-operator/dist/install.yaml @@ -20919,6 +20919,7 @@ rules: resources: - configmaps - persistentvolumeclaims + - secrets - serviceaccounts - services verbs: @@ -20933,7 +20934,6 @@ rules: resources: - namespaces - pods - - secrets verbs: - get - list @@ -21034,6 +21034,14 @@ rules: - patch - update - watch +- apiGroups: + - ray.io + resources: + - rayclusters + verbs: + - get + - list + - watch - apiGroups: - rbac.authorization.k8s.io resources: diff --git a/infra/feast-operator/internal/controller/featurestore_controller.go b/infra/feast-operator/internal/controller/featurestore_controller.go index ae877447ddb..c57ba6178da 100644 --- a/infra/feast-operator/internal/controller/featurestore_controller.go +++ b/infra/feast-operator/internal/controller/featurestore_controller.go @@ -66,9 +66,11 @@ type FeatureStoreReconciler struct { // +kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;create;update;watch;delete // +kubebuilder:rbac:groups=core,resources=services;configmaps;persistentvolumeclaims;serviceaccounts,verbs=get;list;create;update;watch;delete // +kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=roles;rolebindings;clusterroles;clusterrolebindings;subjectaccessreviews,verbs=get;list;create;update;watch;delete -// +kubebuilder:rbac:groups=core,resources=secrets;pods;namespaces,verbs=get;list;watch +// +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;create;update;delete +// +kubebuilder:rbac:groups=core,resources=pods;namespaces,verbs=get;list;watch // +kubebuilder:rbac:groups=core,resources=pods/exec,verbs=create // +kubebuilder:rbac:groups=authentication.k8s.io,resources=tokenreviews,verbs=create +// +kubebuilder:rbac:groups=ray.io,resources=rayclusters,verbs=get;list;watch // +kubebuilder:rbac:groups=route.openshift.io,resources=routes,verbs=get;list;create;update;watch;delete // +kubebuilder:rbac:groups=batch,resources=cronjobs,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=autoscaling,resources=horizontalpodautoscalers,verbs=get;list;watch;create;update;patch;delete diff --git a/infra/feast-operator/internal/controller/featurestore_controller_kuberay_rbac_test.go b/infra/feast-operator/internal/controller/featurestore_controller_kuberay_rbac_test.go new file mode 100644 index 00000000000..39c13439b4b --- /dev/null +++ b/infra/feast-operator/internal/controller/featurestore_controller_kuberay_rbac_test.go @@ -0,0 +1,197 @@ +/* +Copyright 2024 Feast Community. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + feastdevv1 "github.com/feast-dev/feast/infra/feast-operator/api/v1" + "github.com/feast-dev/feast/infra/feast-operator/internal/controller/services" +) + +var _ = Describe("FeatureStore Controller-KubeRay RBAC", func() { + const ( + batchConfigMapName = "ray-batch-engine" + ) + var pullPolicy = corev1.PullAlways + ctx := context.Background() + + createBatchEngineConfigMap := func(yamlBody string) { + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: batchConfigMapName, Namespace: "default"}, + Data: map[string]string{"config": yamlBody}, + } + Expect(k8sClient.Create(ctx, cm)).To(Succeed()) + } + + deleteBatchEngineConfigMap := func() { + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: batchConfigMapName, Namespace: "default"}, + } + _ = k8sClient.Delete(ctx, cm) + } + + reconcileOnce := func(name string) { + controllerReconciler := &FeatureStoreReconciler{ + Client: k8sClient, + Scheme: k8sClient.Scheme(), + } + _, err := controllerReconciler.Reconcile(ctx, reconcile.Request{ + NamespacedName: types.NamespacedName{Name: name, Namespace: "default"}, + }) + Expect(err).NotTo(HaveOccurred()) + } + + rbacKey := func(fsName string) types.NamespacedName { + return types.NamespacedName{ + Name: services.GetFeastName(&feastdevv1.FeatureStore{ObjectMeta: metav1.ObjectMeta{Name: fsName}}) + "-kuberay", + Namespace: "default", + } + } + + Context("when batchEngine has type: ray.engine and use_kuberay: true", func() { + const resourceName = "kuberay-rbac-enabled" + + BeforeEach(func() { + createBatchEngineConfigMap("type: ray.engine\nuse_kuberay: true\ncluster_name: my-cluster\n") + resource := createFeatureStoreResource(resourceName, image, pullPolicy, &[]corev1.EnvVar{}, nil) + resource.Spec.BatchEngine = &feastdevv1.BatchEngineConfig{ + ConfigMapRef: &corev1.LocalObjectReference{Name: batchConfigMapName}, + } + Expect(k8sClient.Create(ctx, resource)).To(Succeed()) + }) + + AfterEach(func() { + resource := &feastdevv1.FeatureStore{} + if err := k8sClient.Get(ctx, types.NamespacedName{Name: resourceName, Namespace: "default"}, resource); err == nil { + Expect(k8sClient.Delete(ctx, resource)).To(Succeed()) + } + deleteBatchEngineConfigMap() + }) + + It("creates a Role with KubeRay rules and a RoleBinding to the Feast SA", func() { + reconcileOnce(resourceName) + + role := &rbacv1.Role{} + Expect(k8sClient.Get(ctx, rbacKey(resourceName), role)).To(Succeed()) + Expect(role.Rules).To(ConsistOf( + rbacv1.PolicyRule{ + APIGroups: []string{"ray.io"}, + Resources: []string{"rayclusters"}, + Verbs: []string{"get", "list", "watch"}, + }, + rbacv1.PolicyRule{ + APIGroups: []string{""}, + Resources: []string{"secrets"}, + Verbs: []string{"get", "list", "watch", "create", "update", "delete"}, + }, + )) + + fs := &feastdevv1.FeatureStore{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: resourceName, Namespace: "default"}, fs)).To(Succeed()) + Expect(role.OwnerReferences).To(HaveLen(1)) + Expect(role.OwnerReferences[0].UID).To(Equal(fs.UID)) + + binding := &rbacv1.RoleBinding{} + Expect(k8sClient.Get(ctx, rbacKey(resourceName), binding)).To(Succeed()) + Expect(binding.RoleRef).To(Equal(rbacv1.RoleRef{ + APIGroup: rbacv1.GroupName, + Kind: "Role", + Name: rbacKey(resourceName).Name, + })) + Expect(binding.Subjects).To(ConsistOf(rbacv1.Subject{ + Kind: rbacv1.ServiceAccountKind, + Name: services.GetFeastName(fs), + Namespace: "default", + })) + Expect(binding.OwnerReferences).To(HaveLen(1)) + Expect(binding.OwnerReferences[0].UID).To(Equal(fs.UID)) + }) + }) + + Context("when batchEngine ConfigMap changes from use_kuberay: true to false", func() { + const resourceName = "kuberay-rbac-disable" + + BeforeEach(func() { + createBatchEngineConfigMap("type: ray.engine\nuse_kuberay: true\n") + resource := createFeatureStoreResource(resourceName, image, pullPolicy, &[]corev1.EnvVar{}, nil) + resource.Spec.BatchEngine = &feastdevv1.BatchEngineConfig{ + ConfigMapRef: &corev1.LocalObjectReference{Name: batchConfigMapName}, + } + Expect(k8sClient.Create(ctx, resource)).To(Succeed()) + }) + + AfterEach(func() { + resource := &feastdevv1.FeatureStore{} + if err := k8sClient.Get(ctx, types.NamespacedName{Name: resourceName, Namespace: "default"}, resource); err == nil { + Expect(k8sClient.Delete(ctx, resource)).To(Succeed()) + } + deleteBatchEngineConfigMap() + }) + + It("deletes the Role and RoleBinding on the next reconcile", func() { + reconcileOnce(resourceName) + Expect(k8sClient.Get(ctx, rbacKey(resourceName), &rbacv1.Role{})).To(Succeed()) + + cm := &corev1.ConfigMap{} + Expect(k8sClient.Get(ctx, types.NamespacedName{Name: batchConfigMapName, Namespace: "default"}, cm)).To(Succeed()) + cm.Data["config"] = "type: ray.engine\nuse_kuberay: false\n" + Expect(k8sClient.Update(ctx, cm)).To(Succeed()) + + reconcileOnce(resourceName) + + err := k8sClient.Get(ctx, rbacKey(resourceName), &rbacv1.Role{}) + Expect(errors.IsNotFound(err)).To(BeTrue(), "expected Role to be deleted, got %v", err) + err = k8sClient.Get(ctx, rbacKey(resourceName), &rbacv1.RoleBinding{}) + Expect(errors.IsNotFound(err)).To(BeTrue(), "expected RoleBinding to be deleted, got %v", err) + }) + }) + + Context("when no batchEngine is configured", func() { + const resourceName = "kuberay-rbac-absent" + + BeforeEach(func() { + resource := createFeatureStoreResource(resourceName, image, pullPolicy, &[]corev1.EnvVar{}, nil) + Expect(k8sClient.Create(ctx, resource)).To(Succeed()) + }) + + AfterEach(func() { + resource := &feastdevv1.FeatureStore{} + if err := k8sClient.Get(ctx, types.NamespacedName{Name: resourceName, Namespace: "default"}, resource); err == nil { + Expect(k8sClient.Delete(ctx, resource)).To(Succeed()) + } + }) + + It("does not create the KubeRay Role or RoleBinding", func() { + reconcileOnce(resourceName) + + err := k8sClient.Get(ctx, rbacKey(resourceName), &rbacv1.Role{}) + Expect(errors.IsNotFound(err)).To(BeTrue(), "expected no Role, got %v", err) + err = k8sClient.Get(ctx, rbacKey(resourceName), &rbacv1.RoleBinding{}) + Expect(errors.IsNotFound(err)).To(BeTrue(), "expected no RoleBinding, got %v", err) + }) + }) +}) diff --git a/infra/feast-operator/internal/controller/services/ray_rbac.go b/infra/feast-operator/internal/controller/services/ray_rbac.go new file mode 100644 index 00000000000..c26263fbf08 --- /dev/null +++ b/infra/feast-operator/internal/controller/services/ray_rbac.go @@ -0,0 +1,169 @@ +/* +Copyright 2024 Feast Community. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package services + +import ( + rbacv1 "k8s.io/api/rbac/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/log" +) + +// rayEngineType is the Feast Python SDK value for batch_engine.type when the +// Ray compute engine is selected (see RayComputeEngineConfig.type). +const rayEngineType = "ray.engine" + +// kubeRayRBACSuffix is appended to the FeatureStore name to form the namespaced +// Role and RoleBinding that grant the Feast service account access to KubeRay +// resources. +const kubeRayRBACSuffix = "-kuberay" + +// usesKubeRay reports whether the FeatureStore's batch engine is configured to +// connect to a KubeRay cluster. It reads the user-supplied batch engine +// ConfigMap and returns true only when the resolved config has both +// type == "ray.engine" and use_kuberay == true. +func (feast *FeastServices) usesKubeRay() (bool, error) { + spec := feast.Handler.FeatureStore.Status.Applied + if spec.BatchEngine == nil || spec.BatchEngine.ConfigMapRef == nil { + return false, nil + } + cfg, err := feast.extractConfigFromConfigMap(spec.BatchEngine.ConfigMapRef.Name, spec.BatchEngine.ConfigMapKey) + if err != nil { + return false, err + } + if engineType, _ := cfg["type"].(string); engineType != rayEngineType { + return false, nil + } + useKubeRay, _ := cfg["use_kuberay"].(bool) + return useKubeRay, nil +} + +// applyOrDeleteKubeRayRBAC creates the KubeRay Role and RoleBinding when the +// batch engine is configured for KubeRay, and deletes them otherwise. The +// resources are owner-referenced to the FeatureStore so they are garbage +// collected with the CR. +func (feast *FeastServices) applyOrDeleteKubeRayRBAC() error { + enabled, err := feast.usesKubeRay() + if err != nil { + return err + } + if !enabled { + if err := feast.Handler.DeleteOwnedFeastObj(feast.initKubeRayRoleBinding()); err != nil { + return err + } + return feast.Handler.DeleteOwnedFeastObj(feast.initKubeRayRole()) + } + if err := feast.createKubeRayRole(); err != nil { + return err + } + return feast.createKubeRayRoleBinding() +} + +func (feast *FeastServices) createKubeRayRole() error { + logger := log.FromContext(feast.Handler.Context) + role := feast.initKubeRayRole() + op, err := controllerutil.CreateOrUpdate(feast.Handler.Context, feast.Handler.Client, role, func() error { + return feast.setKubeRayRole(role) + }) + if err != nil { + return err + } + if op == controllerutil.OperationResultCreated || op == controllerutil.OperationResultUpdated { + logger.Info("Successfully reconciled", "Role", role.Name, "operation", op) + } + return nil +} + +func (feast *FeastServices) createKubeRayRoleBinding() error { + logger := log.FromContext(feast.Handler.Context) + binding := feast.initKubeRayRoleBinding() + op, err := controllerutil.CreateOrUpdate(feast.Handler.Context, feast.Handler.Client, binding, func() error { + return feast.setKubeRayRoleBinding(binding) + }) + if err != nil { + return err + } + if op == controllerutil.OperationResultCreated || op == controllerutil.OperationResultUpdated { + logger.Info("Successfully reconciled", "RoleBinding", binding.Name, "operation", op) + } + return nil +} + +func (feast *FeastServices) initKubeRayRole() *rbacv1.Role { + role := &rbacv1.Role{ + ObjectMeta: metav1.ObjectMeta{ + Name: feast.getKubeRayRBACName(), + Namespace: feast.Handler.FeatureStore.Namespace, + }, + } + role.SetGroupVersionKind(rbacv1.SchemeGroupVersion.WithKind("Role")) + return role +} + +func (feast *FeastServices) setKubeRayRole(role *rbacv1.Role) error { + role.Labels = feast.getKubeRayLabels() + role.Rules = []rbacv1.PolicyRule{ + { + APIGroups: []string{"ray.io"}, + Resources: []string{"rayclusters"}, + Verbs: []string{"get", "list", "watch"}, + }, + { + APIGroups: []string{""}, + Resources: []string{"secrets"}, + Verbs: []string{"get", "list", "watch", "create", "update", "delete"}, + }, + } + return controllerutil.SetControllerReference(feast.Handler.FeatureStore, role, feast.Handler.Scheme) +} + +func (feast *FeastServices) initKubeRayRoleBinding() *rbacv1.RoleBinding { + binding := &rbacv1.RoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: feast.getKubeRayRBACName(), + Namespace: feast.Handler.FeatureStore.Namespace, + }, + } + binding.SetGroupVersionKind(rbacv1.SchemeGroupVersion.WithKind("RoleBinding")) + return binding +} + +func (feast *FeastServices) setKubeRayRoleBinding(binding *rbacv1.RoleBinding) error { + binding.Labels = feast.getKubeRayLabels() + binding.Subjects = []rbacv1.Subject{{ + Kind: rbacv1.ServiceAccountKind, + Name: GetFeastName(feast.Handler.FeatureStore), + Namespace: feast.Handler.FeatureStore.Namespace, + }} + binding.RoleRef = rbacv1.RoleRef{ + APIGroup: rbacv1.GroupName, + Kind: "Role", + Name: feast.getKubeRayRBACName(), + } + return controllerutil.SetControllerReference(feast.Handler.FeatureStore, binding, feast.Handler.Scheme) +} + +func (feast *FeastServices) getKubeRayRBACName() string { + return GetFeastName(feast.Handler.FeatureStore) + kubeRayRBACSuffix +} + +func (feast *FeastServices) getKubeRayLabels() map[string]string { + return map[string]string{ + NameLabelKey: feast.Handler.FeatureStore.Name, + ManagedByLabelKey: ManagedByLabelValue, + } +} diff --git a/infra/feast-operator/internal/controller/services/services.go b/infra/feast-operator/internal/controller/services/services.go index 47226d460aa..bb79d0a01b7 100644 --- a/infra/feast-operator/internal/controller/services/services.go +++ b/infra/feast-operator/internal/controller/services/services.go @@ -78,6 +78,9 @@ func (feast *FeastServices) Deploy() error { if err := feast.createServiceAccount(); err != nil { return err } + if err := feast.applyOrDeleteKubeRayRBAC(); err != nil { + return err + } if err := feast.createDeployment(); err != nil { return err }