diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cb908b3c6..0268797c8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,6 +14,7 @@ on: env: GO_VERSION: '1.24.9' + CERT_MANAGER_VERSION: 'v1.16.2' jobs: detect-noop: @@ -125,6 +126,7 @@ jobs: PROPERTY_PROVIDER: 'azure' RESOURCE_SNAPSHOT_CREATION_MINIMUM_INTERVAL: ${{ matrix.resource-snapshot-creation-minimum-interval }} RESOURCE_CHANGES_COLLECTION_DURATION: ${{ matrix.resource-changes-collection-duration }} + CERT_MANAGER_VERSION: ${{ env.CERT_MANAGER_VERSION }} - name: Collect logs if: always() diff --git a/charts/hub-agent/README.md b/charts/hub-agent/README.md index 3f44d29a4..5d0e395cd 100644 --- a/charts/hub-agent/README.md +++ b/charts/hub-agent/README.md @@ -2,11 +2,33 @@ ## Install Chart +### Default Installation (Self-Signed Certificates) + ```console # Helm install with fleet-system namespace already created helm install hub-agent ./charts/hub-agent/ ``` +### Installation with cert-manager + +When using cert-manager for certificate management, install cert-manager as a prerequisite first: + +```console +# Install cert-manager +helm repo add jetstack https://charts.jetstack.io +helm repo update +helm install cert-manager jetstack/cert-manager \ + --namespace cert-manager \ + --create-namespace \ + --version v1.16.2 \ + --set crds.enabled=true + +# Then install hub-agent with cert-manager enabled +helm install hub-agent ./charts/hub-agent --set useCertManager=true --set enableWorkload=true --set enableWebhook=true +``` + +This configures cert-manager to manage webhook certificates. + ## Upgrade Chart ```console @@ -32,6 +54,14 @@ _See [helm install](https://helm.sh/docs/helm/helm_install/) for command documen | `affinity` | Node affinity for hub-agent pods | `{}` | | `tolerations` | Tolerations for hub-agent pods | `[]` | | `logVerbosity` | Log level (klog V logs) | `5` | +| `enableWebhook` | Enable webhook server | `true` | +| `webhookServiceName` | Webhook service name | `fleetwebhook` | +| `enableGuardRail` | Enable guard rail webhook configurations | `true` | +| `webhookClientConnectionType` | Connection type for webhook client (service or url) | `service` | +| `useCertManager` | Use cert-manager for webhook certificate management (requires `enableWebhook=true` and `enableWorkload=true`) | `false` | +| `webhookCertDir` | Directory where webhook certificates are stored/mounted | `/tmp/k8s-webhook-server/serving-certs` | +| `webhookCertName` | Name of the Certificate resource created by cert-manager | `fleet-webhook-server-cert` | +| `webhookCertSecretName` | Name of the Secret containing webhook certificates | `fleet-webhook-server-cert` | | `enableV1Beta1APIs` | Watch for v1beta1 APIs | `true` | | `hubAPIQPS` | QPS for fleet-apiserver (not including events/node heartbeat) | `250` | | `hubAPIBurst` | Burst for fleet-apiserver (not including events/node heartbeat) | `1000` | @@ -41,4 +71,69 @@ _See [helm install](https://helm.sh/docs/helm/helm_install/) for command documen | `MaxFleetSizeSupported` | Max number of member clusters supported | `100` | | `resourceSnapshotCreationMinimumInterval` | The minimum interval at which resource snapshots could be created. | `30s` | | `resourceChangesCollectionDuration` | The duration for collecting resource changes into one snapshot. | `15s` | -| `enableWorkload` | Enable kubernetes builtin workload to run in hub cluster. | `false` | \ No newline at end of file +| `enableWorkload` | Enable kubernetes builtin workload to run in hub cluster. | `false` | + +## Certificate Management + +The hub-agent supports two modes for webhook certificate management: + +### Automatic Certificate Generation (Default) + +By default, the hub-agent generates certificates automatically at startup. This mode: +- Requires no external dependencies +- Works out of the box +- Certificates are valid for 10 years + +### cert-manager (Optional) + +When `useCertManager=true`, certificates are managed by cert-manager. This mode: +- Requires cert-manager to be installed as a prerequisite +- Requires `enableWorkload=true` to allow cert-manager pods to run in the hub cluster (without this, pod creation would be blocked by the webhook) +- Requires `enableWebhook=true` because cert-manager is only used for webhook certificate management +- Handles certificate rotation automatically (90-day certificates) +- Follows industry-standard certificate management practices +- Suitable for production environments + +To switch to cert-manager mode: +```console +# Install cert-manager first +helm repo add jetstack https://charts.jetstack.io +helm repo update +helm install cert-manager jetstack/cert-manager \ + --namespace cert-manager \ + --create-namespace \ + --version v1.16.2 \ + --set crds.enabled=true + +# Then install hub-agent with cert-manager enabled +helm install hub-agent ./charts/hub-agent --set useCertManager=true --set enableWorkload=true --set enableWebhook=true +``` + +### Certificate Directory Configuration + +The `webhookCertDir` parameter allows you to customize where webhook certificates are stored: +- Default: `/tmp/k8s-webhook-server/serving-certs` +- Must match the volumeMount path when using cert-manager +- Configurable via both Helm values and `--webhook-cert-dir` flag + +The `webhookCertName` parameter specifies the Certificate resource name: +- Default: `fleet-webhook-server-cert` +- When using cert-manager, this is the name of the Certificate resource +- Referenced in the `cert-manager.io/inject-ca-from` annotation +- Configurable via both Helm values and `--webhook-cert-name` flag + +The `webhookCertSecretName` parameter specifies the Secret name for webhook certificates: +- Default: `fleet-webhook-server-cert` +- When using cert-manager, this is the Secret name created by the Certificate resource +- Must match the secretName in the Certificate spec +- Configurable via both Helm values and `--webhook-cert-secret-name` flag + +Example with custom certificate directory and names: +```console +helm install hub-agent ./charts/hub-agent \ + --set useCertManager=true \ + --set enableWorkload=true \ + --set webhookCertDir=/custom/cert/path \ + --set webhookCertName=my-webhook-certificate \ + --set webhookCertSecretName=my-webhook-secret +``` \ No newline at end of file diff --git a/charts/hub-agent/templates/certificate.yaml b/charts/hub-agent/templates/certificate.yaml new file mode 100644 index 000000000..75a6921cf --- /dev/null +++ b/charts/hub-agent/templates/certificate.yaml @@ -0,0 +1,62 @@ +{{- if and .Values.enableWebhook .Values.useCertManager }} +--- +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: {{ .Values.webhookCertName }} + namespace: {{ .Values.namespace }} + labels: + {{- include "hub-agent.labels" . | nindent 4 }} +spec: + # Secret name where cert-manager will store the certificate + secretName: {{ .Values.webhookCertSecretName }} + + # Certificate duration (90 days is cert-manager's default and recommended) + duration: 2160h # 90 days + + # Renew certificate 30 days before expiry + renewBefore: 720h # 30 days + + # Subject configuration + subject: + organizations: + - KubeFleet + + # Common name + commonName: fleet-webhook.{{ .Values.namespace }}.svc + + # DNS names for the certificate + dnsNames: + - {{ .Values.webhookServiceName }} + - {{ .Values.webhookServiceName }}.{{ .Values.namespace }} + - {{ .Values.webhookServiceName }}.{{ .Values.namespace }}.svc + - {{ .Values.webhookServiceName }}.{{ .Values.namespace }}.svc.cluster.local + + # Issuer reference - using self-signed issuer + issuerRef: + name: fleet-selfsigned-issuer + kind: Issuer + group: cert-manager.io + + # Private key configuration + privateKey: + algorithm: ECDSA + size: 256 + + # Key usages + usages: + - digital signature + - key encipherment + - server auth +--- +# Self-signed issuer for generating the certificate +apiVersion: cert-manager.io/v1 +kind: Issuer +metadata: + name: fleet-selfsigned-issuer + namespace: {{ .Values.namespace }} + labels: + {{- include "hub-agent.labels" . | nindent 4 }} +spec: + selfSigned: {} +{{- end }} diff --git a/charts/hub-agent/templates/deployment.yaml b/charts/hub-agent/templates/deployment.yaml index 7ed6ab7ba..48d96aa11 100644 --- a/charts/hub-agent/templates/deployment.yaml +++ b/charts/hub-agent/templates/deployment.yaml @@ -1,3 +1,6 @@ +{{- if and (not .Values.useCertManager) (gt (.Values.replicaCount | int) 1) }} +{{- fail "ERROR: replicaCount > 1 requires useCertManager=true (self-signed certificates cannot be shared across replicas)" }} +{{- end }} apiVersion: apps/v1 kind: Deployment metadata: @@ -6,6 +9,7 @@ metadata: labels: {{- include "hub-agent.labels" . | nindent 4 }} spec: + replicas: {{ .Values.replicaCount }} selector: matchLabels: {{- include "hub-agent.selectorLabels" . | nindent 6 }} @@ -25,6 +29,10 @@ spec: - --webhook-service-name={{ .Values.webhookServiceName }} - --enable-guard-rail={{ .Values.enableGuardRail }} - --enable-workload={{ .Values.enableWorkload }} + - --use-cert-manager={{ .Values.useCertManager }} + - --webhook-cert-dir={{ .Values.webhookCertDir }} + - --webhook-cert-name={{ .Values.webhookCertName }} + - --webhook-cert-secret-name={{ .Values.webhookCertSecretName }} - --whitelisted-users=system:serviceaccount:fleet-system:hub-agent-sa - --webhook-client-connection-type={{.Values.webhookClientConnectionType}} - --v={{ .Values.logVerbosity }} @@ -73,6 +81,21 @@ spec: fieldPath: metadata.namespace resources: {{- toYaml .Values.resources | nindent 12 }} + {{- if .Values.useCertManager }} + volumeMounts: + - name: webhook-cert + mountPath: {{ .Values.webhookCertDir }} + readOnly: true + {{- end }} + {{- if .Values.useCertManager }} + volumes: + - name: webhook-cert + secret: + secretName: {{ .Values.webhookCertSecretName }} + # defaultMode 0444 (read for all) allows the container process to read the certs + # regardless of the user/group it runs as + defaultMode: 0444 + {{- end }} {{- with .Values.affinity }} affinity: {{- toYaml . | nindent 8 }} diff --git a/charts/hub-agent/values.yaml b/charts/hub-agent/values.yaml index a423c6270..bd2562353 100644 --- a/charts/hub-agent/values.yaml +++ b/charts/hub-agent/values.yaml @@ -17,13 +17,26 @@ webhookServiceName: fleetwebhook enableGuardRail: true webhookClientConnectionType: service enableWorkload: false +# useCertManager enables cert-manager for webhook certificate management +# When enabled, cert-manager must be installed as a prerequisite (it is not installed automatically by this chart) +# and a Certificate resource will be created +useCertManager: false +# webhookCertDir is the directory where webhook certificates are mounted +# This is configurable via --webhook-cert-dir flag and must match the volumeMount path in deployment +webhookCertDir: /tmp/k8s-webhook-server/serving-certs +# webhookCertName is the name of the Certificate resource created by cert-manager +# This is referenced in the cert-manager.io/inject-ca-from annotation +webhookCertName: fleet-webhook-server-cert +# webhookCertSecretName is the name of the Secret containing webhook certificates +# When using cert-manager, this is the Secret name created by the Certificate resource +webhookCertSecretName: fleet-webhook-server-cert + forceDeleteWaitTime: 15m0s clusterUnhealthyThreshold: 3m0s resourceSnapshotCreationMinimumInterval: 30s resourceChangesCollectionDuration: 15s -namespace: - fleet-system +namespace: fleet-system resources: limits: diff --git a/cmd/hubagent/main.go b/cmd/hubagent/main.go index d69d1d867..c64685f47 100644 --- a/cmd/hubagent/main.go +++ b/cmd/hubagent/main.go @@ -20,6 +20,7 @@ import ( "flag" "fmt" "math" + "net/http" "os" "strings" "sync" @@ -65,8 +66,7 @@ var ( ) const ( - FleetWebhookCertDir = "/tmp/k8s-webhook-server/serving-certs" - FleetWebhookPort = 9443 + FleetWebhookPort = 9443 ) func init() { @@ -121,7 +121,7 @@ func main() { }, WebhookServer: ctrlwebhook.NewServer(ctrlwebhook.Options{ Port: FleetWebhookPort, - CertDir: FleetWebhookCertDir, + CertDir: opts.WebhookCertDir, }), } if opts.EnablePprof { @@ -158,11 +158,25 @@ func main() { if opts.EnableWebhook { whiteListedUsers := strings.Split(opts.WhiteListedUsers, ",") - if err := SetupWebhook(mgr, options.WebhookClientConnectionType(opts.WebhookClientConnectionType), opts.WebhookServiceName, whiteListedUsers, - opts.EnableGuardRail, opts.EnableV1Beta1APIs, opts.DenyModifyMemberClusterLabels, opts.EnableWorkload, opts.NetworkingAgentsEnabled); err != nil { + webhookConfig, err := SetupWebhook(mgr, options.WebhookClientConnectionType(opts.WebhookClientConnectionType), opts.WebhookServiceName, whiteListedUsers, + opts.EnableGuardRail, opts.EnableV1Beta1APIs, opts.DenyModifyMemberClusterLabels, opts.EnableWorkload, opts.NetworkingAgentsEnabled, opts.UseCertManager, opts.WebhookCertDir, opts.WebhookCertName, opts.WebhookCertSecretName) + if err != nil { klog.ErrorS(err, "unable to set up webhook") exitWithErrorFunc() } + + // When using cert-manager, add a readiness check to ensure CA bundles are injected before marking ready. + // This prevents the pod from accepting traffic before cert-manager has populated the webhook CA bundles, + // which would cause webhook calls to fail. + if opts.UseCertManager { + if err := mgr.AddReadyzCheck("cert-manager-ca-injection", func(req *http.Request) error { + return webhookConfig.CheckCAInjection(req.Context()) + }); err != nil { + klog.ErrorS(err, "unable to set up cert-manager CA injection readiness check") + exitWithErrorFunc() + } + klog.V(2).InfoS("Added cert-manager CA injection readiness check") + } } ctx := ctrl.SetupSignalHandler() @@ -201,21 +215,22 @@ func main() { } // SetupWebhook generates the webhook cert and then set up the webhook configurator. +// Returns the webhook Config so it can be used for readiness checks. func SetupWebhook(mgr manager.Manager, webhookClientConnectionType options.WebhookClientConnectionType, webhookServiceName string, - whiteListedUsers []string, enableGuardRail, isFleetV1Beta1API bool, denyModifyMemberClusterLabels bool, enableWorkload bool, networkingAgentsEnabled bool) error { - // Generate self-signed key and crt files in FleetWebhookCertDir for the webhook server to start. - w, err := webhook.NewWebhookConfig(mgr, webhookServiceName, FleetWebhookPort, &webhookClientConnectionType, FleetWebhookCertDir, enableGuardRail, denyModifyMemberClusterLabels, enableWorkload) + whiteListedUsers []string, enableGuardRail, isFleetV1Beta1API bool, denyModifyMemberClusterLabels bool, enableWorkload bool, networkingAgentsEnabled bool, useCertManager bool, webhookCertDir string, webhookCertName string, webhookCertSecretName string) (*webhook.Config, error) { + // Generate self-signed key and crt files in webhookCertDir for the webhook server to start. + w, err := webhook.NewWebhookConfig(mgr, webhookServiceName, FleetWebhookPort, &webhookClientConnectionType, webhookCertDir, enableGuardRail, denyModifyMemberClusterLabels, enableWorkload, useCertManager, webhookCertName, webhookCertSecretName) if err != nil { klog.ErrorS(err, "fail to generate WebhookConfig") - return err + return nil, err } if err = mgr.Add(w); err != nil { klog.ErrorS(err, "unable to add WebhookConfig") - return err + return nil, err } if err = webhook.AddToManager(mgr, whiteListedUsers, denyModifyMemberClusterLabels, networkingAgentsEnabled); err != nil { klog.ErrorS(err, "unable to register webhooks to the manager") - return err + return nil, err } - return nil + return w, nil } diff --git a/cmd/hubagent/options/options.go b/cmd/hubagent/options/options.go index 6573789c1..593b85220 100644 --- a/cmd/hubagent/options/options.go +++ b/cmd/hubagent/options/options.go @@ -110,6 +110,18 @@ type Options struct { // EnableWorkload enables workload resources (pods and replicasets) to be created in the hub cluster. // When set to true, the pod and replicaset validating webhooks are disabled. EnableWorkload bool + // UseCertManager indicates whether to use cert-manager for webhook certificate management. + // When enabled, webhook certificates are managed by cert-manager instead of self-signed generation. + UseCertManager bool + // WebhookCertDir is the directory where webhook certificates are stored/mounted. + // This must match the mountPath in the Helm chart deployment when using cert-manager. + WebhookCertDir string + // WebhookCertName is the name of the Certificate resource created by cert-manager. + // This is referenced in the cert-manager.io/inject-ca-from annotation. + WebhookCertName string + // WebhookCertSecretName is the name of the Secret containing webhook certificates. + // When using cert-manager, this is the Secret name created by the Certificate resource. + WebhookCertSecretName string // ResourceSnapshotCreationMinimumInterval is the minimum interval at which resource snapshots could be created. // Whether the resource snapshot is created or not depends on the both ResourceSnapshotCreationMinimumInterval and ResourceChangesCollectionDuration. ResourceSnapshotCreationMinimumInterval time.Duration @@ -185,6 +197,10 @@ func (o *Options) AddFlags(flags *flag.FlagSet) { flags.IntVar(&o.PprofPort, "pprof-port", 6065, "The port for pprof profiling.") flags.BoolVar(&o.DenyModifyMemberClusterLabels, "deny-modify-member-cluster-labels", false, "If set, users not in the system:masters cannot modify member cluster labels.") flags.BoolVar(&o.EnableWorkload, "enable-workload", false, "If set, workloads (pods and replicasets) can be created in the hub cluster. This disables the pod and replicaset validating webhooks.") + flags.BoolVar(&o.UseCertManager, "use-cert-manager", false, "If set, cert-manager will be used for webhook certificate management instead of self-signed certificates.") + flags.StringVar(&o.WebhookCertDir, "webhook-cert-dir", "/tmp/k8s-webhook-server/serving-certs", "The directory where webhook certificates are stored. Must match the volumeMount path in deployment when using cert-manager.") + flags.StringVar(&o.WebhookCertName, "webhook-cert-name", "fleet-webhook-server-cert", "The name of the Certificate resource created by cert-manager. Referenced in cert-manager.io/inject-ca-from annotation.") + flags.StringVar(&o.WebhookCertSecretName, "webhook-cert-secret-name", "fleet-webhook-server-cert", "The name of the Secret containing webhook certificates. Must match the secretName in deployment and Certificate resource when using cert-manager.") flags.DurationVar(&o.ResourceSnapshotCreationMinimumInterval, "resource-snapshot-creation-minimum-interval", 30*time.Second, "The minimum interval at which resource snapshots could be created.") flags.DurationVar(&o.ResourceChangesCollectionDuration, "resource-changes-collection-duration", 15*time.Second, "The duration for collecting resource changes into one snapshot. The default is 15 seconds, which means that the controller will collect resource changes for 15 seconds before creating a resource snapshot.") diff --git a/cmd/hubagent/options/validation.go b/cmd/hubagent/options/validation.go index df5af530f..4258f24e2 100644 --- a/cmd/hubagent/options/validation.go +++ b/cmd/hubagent/options/validation.go @@ -52,6 +52,14 @@ func (o *Options) Validate() field.ErrorList { errs = append(errs, field.Invalid(newPath.Child("WebhookServiceName"), o.WebhookServiceName, "Webhook service name is required when webhook is enabled")) } + if o.UseCertManager && !o.EnableWebhook { + errs = append(errs, field.Invalid(newPath.Child("UseCertManager"), o.UseCertManager, "UseCertManager requires EnableWebhook to be true (cert-manager is only used for webhook certificate management)")) + } + + if o.UseCertManager && !o.EnableWorkload { + errs = append(errs, field.Invalid(newPath.Child("UseCertManager"), o.UseCertManager, "UseCertManager requires EnableWorkload to be true (when EnableWorkload is false, a validating webhook blocks pod creation except for certain system pods; cert-manager controller pods must be allowed to run in the hub cluster)")) + } + connectionType := o.WebhookClientConnectionType if _, err := parseWebhookClientConnectionString(connectionType); err != nil { errs = append(errs, field.Invalid(newPath.Child("WebhookClientConnectionType"), o.WebhookClientConnectionType, err.Error())) diff --git a/cmd/hubagent/options/validation_test.go b/cmd/hubagent/options/validation_test.go index b4cf80b9c..342f6e2a2 100644 --- a/cmd/hubagent/options/validation_test.go +++ b/cmd/hubagent/options/validation_test.go @@ -27,6 +27,10 @@ import ( "k8s.io/apimachinery/pkg/util/validation/field" ) +const ( + testWebhookServiceName = "test-webhook" +) + // a callback function to modify options type ModifyOptions func(option *Options) @@ -93,6 +97,42 @@ func TestValidateControllerManagerConfiguration(t *testing.T) { }), want: field.ErrorList{field.Invalid(newPath.Child("WebhookServiceName"), "", "Webhook service name is required when webhook is enabled")}, }, + "UseCertManager without EnableWebhook": { + opt: newTestOptions(func(option *Options) { + option.EnableWebhook = false + option.UseCertManager = true + }), + want: field.ErrorList{ + field.Invalid(newPath.Child("UseCertManager"), true, "UseCertManager requires EnableWebhook to be true (cert-manager is only used for webhook certificate management)"), + field.Invalid(newPath.Child("UseCertManager"), true, "UseCertManager requires EnableWorkload to be true (when EnableWorkload is false, a validating webhook blocks pod creation except for certain system pods; cert-manager controller pods must be allowed to run in the hub cluster)"), + }, + }, + "UseCertManager with EnableWebhook": { + opt: newTestOptions(func(option *Options) { + option.EnableWebhook = true + option.WebhookServiceName = testWebhookServiceName + option.UseCertManager = true + }), + want: field.ErrorList{field.Invalid(newPath.Child("UseCertManager"), true, "UseCertManager requires EnableWorkload to be true (when EnableWorkload is false, a validating webhook blocks pod creation except for certain system pods; cert-manager controller pods must be allowed to run in the hub cluster)")}, + }, + "UseCertManager without EnableWorkload": { + opt: newTestOptions(func(option *Options) { + option.EnableWebhook = true + option.WebhookServiceName = testWebhookServiceName + option.UseCertManager = true + option.EnableWorkload = false + }), + want: field.ErrorList{field.Invalid(newPath.Child("UseCertManager"), true, "UseCertManager requires EnableWorkload to be true (when EnableWorkload is false, a validating webhook blocks pod creation except for certain system pods; cert-manager controller pods must be allowed to run in the hub cluster)")}, + }, + "UseCertManager with EnableWebhook and EnableWorkload": { + opt: newTestOptions(func(option *Options) { + option.EnableWebhook = true + option.WebhookServiceName = testWebhookServiceName + option.UseCertManager = true + option.EnableWorkload = true + }), + want: field.ErrorList{}, + }, } for name, tc := range testCases { diff --git a/cmd/hubagent/workload/setup.go b/cmd/hubagent/workload/setup.go index dbb47de4f..1397a2f1a 100644 --- a/cmd/hubagent/workload/setup.go +++ b/cmd/hubagent/workload/setup.go @@ -496,7 +496,23 @@ func SetupControllers(ctx context.Context, wg *sync.WaitGroup, mgr ctrl.Manager, } resourceChangeController := controller.NewController(resourceChangeControllerName, controller.ClusterWideKeyFunc, rcr.Reconcile, rateLimiter) + // Set up the InformerPopulator that runs on ALL pods (leader and followers) + // This ensures all pods have synced informer caches for webhook validation + klog.Info("Setting up informer populator") + informerPopulator := &resourcewatcher.InformerPopulator{ + DiscoveryClient: discoverClient, + RESTMapper: mgr.GetRESTMapper(), + InformerManager: dynamicInformerManager, + ResourceConfig: resourceConfig, + } + + if err := mgr.Add(informerPopulator); err != nil { + klog.ErrorS(err, "Failed to setup informer populator") + return err + } + // Set up a runner that starts all the custom controllers we created above + // This runs ONLY on the leader and adds event handlers to the informers created by InformerPopulator resourceChangeDetector := &resourcewatcher.ChangeDetector{ DiscoveryClient: discoverClient, RESTMapper: mgr.GetRESTMapper(), diff --git a/pkg/resourcewatcher/change_dector.go b/pkg/resourcewatcher/change_dector.go index 9a4011628..0dcfc41e6 100644 --- a/pkg/resourcewatcher/change_dector.go +++ b/pkg/resourcewatcher/change_dector.go @@ -23,7 +23,6 @@ import ( "golang.org/x/sync/errgroup" "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" - "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/discovery" "k8s.io/client-go/tools/cache" @@ -45,7 +44,7 @@ var ( // ChangeDetector is a resource watcher which watches all types of resources in the cluster and reconcile the events. type ChangeDetector struct { // DiscoveryClient is used to do resource discovery. - DiscoveryClient *discovery.DiscoveryClient + DiscoveryClient discovery.DiscoveryInterface // RESTMapper is used to convert between GVK and GVR RESTMapper meta.RESTMapper @@ -137,43 +136,20 @@ func (d *ChangeDetector) discoverAPIResourcesLoop(ctx context.Context, period ti }, period) } -// discoverResources goes through all the api resources in the cluster and create informers on selected types +// discoverResources goes through all the api resources in the cluster and adds event handlers to informers func (d *ChangeDetector) discoverResources(dynamicResourceEventHandler cache.ResourceEventHandler) { - newResources, err := d.getWatchableResources() - var dynamicResources []informer.APIResourceMeta - if err != nil { - klog.ErrorS(err, "Failed to get all the api resources from the cluster") - } - for _, res := range newResources { - // all the static resources are disabled by default - if d.shouldWatchResource(res.GroupVersionResource) { - dynamicResources = append(dynamicResources, res) - } + resourcesToWatch := discoverWatchableResources(d.DiscoveryClient, d.RESTMapper, d.ResourceConfig) + + // On the leader, add event handlers to informers that were already created by InformerPopulator + // The informers exist on all pods, but only the leader adds handlers and processes events + for _, res := range resourcesToWatch { + d.InformerManager.AddEventHandlerToInformer(res.GroupVersionResource, dynamicResourceEventHandler) } - d.InformerManager.AddDynamicResources(dynamicResources, dynamicResourceEventHandler, err == nil) + // this will start the newly added informers if there is any d.InformerManager.Start() -} - -// gvrDisabled returns whether GroupVersionResource is disabled. -func (d *ChangeDetector) shouldWatchResource(gvr schema.GroupVersionResource) bool { - // By default, all of the APIs are allowed. - if d.ResourceConfig == nil { - return true - } - gvks, err := d.RESTMapper.KindsFor(gvr) - if err != nil { - klog.ErrorS(err, "gvr transform failed", "gvr", gvr.String()) - return false - } - for _, gvk := range gvks { - if d.ResourceConfig.IsResourceDisabled(gvk) { - klog.V(4).InfoS("Skip watch resource", "group version kind", gvk.String()) - return false - } - } - return true + klog.V(2).InfoS("Change detector: discovered resources", "count", len(resourcesToWatch)) } // dynamicResourceFilter filters out resources that we don't want to watch diff --git a/pkg/resourcewatcher/change_detector_test.go b/pkg/resourcewatcher/change_detector_test.go new file mode 100644 index 000000000..e0f83e3bf --- /dev/null +++ b/pkg/resourcewatcher/change_detector_test.go @@ -0,0 +1,200 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package resourcewatcher + +import ( + "testing" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + fakediscovery "k8s.io/client-go/discovery/fake" + "k8s.io/client-go/kubernetes/fake" + "k8s.io/client-go/restmapper" + "k8s.io/client-go/tools/cache" + + "github.com/kubefleet-dev/kubefleet/pkg/utils" + testinformer "github.com/kubefleet-dev/kubefleet/test/utils/informer" +) + +func TestChangeDetector_discoverResources(t *testing.T) { + tests := []struct { + name string + discoveryResources []*metav1.APIResourceList + resourceConfig *utils.ResourceConfig + }{ + { + name: "discovers and adds handlers for watchable resources", + discoveryResources: []*metav1.APIResourceList{ + { + GroupVersion: "v1", + APIResources: []metav1.APIResource{ + { + Name: "configmaps", + Kind: "ConfigMap", + Namespaced: true, + Verbs: []string{"list", "watch", "get"}, + }, + { + Name: "secrets", + Kind: "Secret", + Namespaced: true, + Verbs: []string{"list", "watch", "get"}, + }, + }, + }, + }, + resourceConfig: nil, // Allow all resources + }, + { + name: "skips resources without list/watch verbs", + discoveryResources: []*metav1.APIResourceList{ + { + GroupVersion: "v1", + APIResources: []metav1.APIResource{ + { + Name: "configmaps", + Kind: "ConfigMap", + Namespaced: true, + Verbs: []string{"get", "delete"}, // Missing list/watch + }, + }, + }, + }, + resourceConfig: nil, + }, + { + name: "respects resource config filtering", + discoveryResources: []*metav1.APIResourceList{ + { + GroupVersion: "v1", + APIResources: []metav1.APIResource{ + { + Name: "configmaps", + Kind: "ConfigMap", + Namespaced: true, + Verbs: []string{"list", "watch", "get"}, + }, + { + Name: "secrets", + Kind: "Secret", + Namespaced: true, + Verbs: []string{"list", "watch", "get"}, + }, + }, + }, + }, + resourceConfig: func() *utils.ResourceConfig { + rc := utils.NewResourceConfig(false) // Skip mode + _ = rc.Parse("v1/Secret") // Skip secrets + return rc + }(), + }, + { + name: "discovers apps group resources", + discoveryResources: []*metav1.APIResourceList{ + { + GroupVersion: "apps/v1", + APIResources: []metav1.APIResource{ + { + Name: "deployments", + Kind: "Deployment", + Namespaced: true, + Verbs: []string{"list", "watch", "get"}, + }, + { + Name: "statefulsets", + Kind: "StatefulSet", + Namespaced: true, + Verbs: []string{"list", "watch", "get"}, + }, + }, + }, + }, + resourceConfig: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create fake discovery client + fakeClient := fake.NewSimpleClientset() + fakeDiscovery, ok := fakeClient.Discovery().(*fakediscovery.FakeDiscovery) + if !ok { + t.Fatal("Failed to cast to FakeDiscovery") + } + fakeDiscovery.Resources = tt.discoveryResources + + // Create REST mapper + groupResources := []*restmapper.APIGroupResources{} + for _, resourceList := range tt.discoveryResources { + gv, err := schema.ParseGroupVersion(resourceList.GroupVersion) + if err != nil { + t.Fatalf("Failed to parse group version: %v", err) + } + + groupResources = append(groupResources, &restmapper.APIGroupResources{ + Group: metav1.APIGroup{ + Name: gv.Group, + Versions: []metav1.GroupVersionForDiscovery{ + {GroupVersion: resourceList.GroupVersion, Version: gv.Version}, + }, + PreferredVersion: metav1.GroupVersionForDiscovery{ + GroupVersion: resourceList.GroupVersion, + Version: gv.Version, + }, + }, + VersionedResources: map[string][]metav1.APIResource{ + gv.Version: resourceList.APIResources, + }, + }) + } + restMapper := restmapper.NewDiscoveryRESTMapper(groupResources) + + // Create fake informer manager + fakeInformerManager := &testinformer.FakeManager{ + APIResources: make(map[schema.GroupVersionKind]bool), + } + + // Track handler additions + testHandler := cache.ResourceEventHandlerFuncs{ + AddFunc: func(obj interface{}) {}, + } + + // Create ChangeDetector with the interface type + detector := &ChangeDetector{ + DiscoveryClient: fakeDiscovery, + RESTMapper: restMapper, + InformerManager: fakeInformerManager, + ResourceConfig: tt.resourceConfig, + } + + // Test discoverResources which discovers resources and adds handlers + detector.discoverResources(testHandler) + + // The main goal is to verify no panics occur during discovery and handler addition + }) + } +} + +func TestChangeDetector_NeedLeaderElection(t *testing.T) { + detector := &ChangeDetector{} + + // ChangeDetector SHOULD need leader election so only the leader processes events + if !detector.NeedLeaderElection() { + t.Error("ChangeDetector should need leader election") + } +} diff --git a/pkg/resourcewatcher/informer_populator.go b/pkg/resourcewatcher/informer_populator.go new file mode 100644 index 000000000..8162f606a --- /dev/null +++ b/pkg/resourcewatcher/informer_populator.go @@ -0,0 +1,102 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package resourcewatcher + +import ( + "context" + "time" + + "k8s.io/apimachinery/pkg/api/meta" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/discovery" + "k8s.io/klog/v2" + "sigs.k8s.io/controller-runtime/pkg/manager" + + "github.com/kubefleet-dev/kubefleet/pkg/utils" + "github.com/kubefleet-dev/kubefleet/pkg/utils/informer" +) + +const ( + // informerPopulatorDiscoveryPeriod is how often the InformerPopulator rediscovers API resources + informerPopulatorDiscoveryPeriod = 30 * time.Second +) + +// make sure that our InformerPopulator implements controller runtime interfaces +var ( + _ manager.Runnable = &InformerPopulator{} + _ manager.LeaderElectionRunnable = &InformerPopulator{} +) + +// InformerPopulator discovers API resources and creates informers for them WITHOUT adding event handlers. +// This allows follower pods to have synced informer caches for webhook validation while the leader's +// ChangeDetector adds event handlers and runs controllers. +type InformerPopulator struct { + // DiscoveryClient is used to do resource discovery. + DiscoveryClient discovery.DiscoveryInterface + + // RESTMapper is used to convert between GVK and GVR + RESTMapper meta.RESTMapper + + // InformerManager manages all the dynamic informers created by the discovery client + InformerManager informer.Manager + + // ResourceConfig contains all the API resources that we won't select based on the allowed or skipped propagating APIs option. + ResourceConfig *utils.ResourceConfig +} + +// Start runs the informer populator, discovering resources and creating informers. +// This runs on ALL pods (leader and followers) to ensure all have synced caches. +func (p *InformerPopulator) Start(ctx context.Context) error { + klog.InfoS("Starting the informer populator") + defer klog.InfoS("The informer populator is stopped") + + // Run initial discovery to create informers + p.discoverAndCreateInformers() + + // Wait for initial cache sync + p.InformerManager.WaitForCacheSync() + klog.InfoS("Informer populator: initial cache sync complete") + + // Continue discovering resources periodically to handle CRD installations + wait.UntilWithContext(ctx, func(ctx context.Context) { + p.discoverAndCreateInformers() + }, informerPopulatorDiscoveryPeriod) + + return nil +} + +// discoverAndCreateInformers discovers API resources and creates informers WITHOUT adding event handlers +func (p *InformerPopulator) discoverAndCreateInformers() { + resourcesToWatch := discoverWatchableResources(p.DiscoveryClient, p.RESTMapper, p.ResourceConfig) + + // Create informers directly without adding event handlers. + // This avoids adding any event handlers on follower pods + for _, res := range resourcesToWatch { + p.InformerManager.CreateInformerForResource(res) + } + + // Start any newly created informers + p.InformerManager.Start() + + klog.V(2).InfoS("Informer populator: discovered resources", "count", len(resourcesToWatch)) +} + +// NeedLeaderElection implements LeaderElectionRunnable interface. +// Returns false so this runs on ALL pods (leader and followers). +func (p *InformerPopulator) NeedLeaderElection() bool { + return false +} diff --git a/pkg/resourcewatcher/informer_populator_test.go b/pkg/resourcewatcher/informer_populator_test.go new file mode 100644 index 000000000..592be39b2 --- /dev/null +++ b/pkg/resourcewatcher/informer_populator_test.go @@ -0,0 +1,437 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package resourcewatcher + +import ( + "context" + "testing" + "time" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + fakediscovery "k8s.io/client-go/discovery/fake" + "k8s.io/client-go/kubernetes/fake" + "k8s.io/client-go/restmapper" + + "github.com/kubefleet-dev/kubefleet/pkg/utils" + testinformer "github.com/kubefleet-dev/kubefleet/test/utils/informer" +) + +const ( + // testTimeout is the timeout for test operations + testTimeout = 200 * time.Millisecond + // testSleep is how long to sleep to allow periodic operations + testSleep = 150 * time.Millisecond +) + +func TestInformerPopulator_NeedLeaderElection(t *testing.T) { + populator := &InformerPopulator{} + + // InformerPopulator should NOT need leader election so it runs on all pods + if populator.NeedLeaderElection() { + t.Error("InformerPopulator should not need leader election") + } +} + +func TestInformerPopulator_discoverAndCreateInformers(t *testing.T) { + tests := []struct { + name string + discoveryResources []*metav1.APIResourceList + resourceConfig *utils.ResourceConfig + expectedInformerCreated bool + expectedResourceCount int + }{ + { + name: "creates informers for watchable resources", + discoveryResources: []*metav1.APIResourceList{ + { + GroupVersion: "v1", + APIResources: []metav1.APIResource{ + { + Name: "configmaps", + Kind: "ConfigMap", + Namespaced: true, + Verbs: []string{"list", "watch", "get"}, + }, + }, + }, + }, + resourceConfig: nil, // Allow all resources + expectedInformerCreated: true, + expectedResourceCount: 1, + }, + { + name: "skips resources without list/watch verbs", + discoveryResources: []*metav1.APIResourceList{ + { + GroupVersion: "v1", + APIResources: []metav1.APIResource{ + { + Name: "configmaps", + Kind: "ConfigMap", + Namespaced: true, + Verbs: []string{"get", "delete"}, // Missing list/watch + }, + }, + }, + }, + resourceConfig: nil, + expectedInformerCreated: false, + expectedResourceCount: 0, + }, + { + name: "respects resource config filtering", + discoveryResources: []*metav1.APIResourceList{ + { + GroupVersion: "v1", + APIResources: []metav1.APIResource{ + { + Name: "secrets", + Kind: "Secret", + Namespaced: true, + Verbs: []string{"list", "watch", "get"}, + }, + }, + }, + }, + resourceConfig: func() *utils.ResourceConfig { + rc := utils.NewResourceConfig(false) // Skip mode + _ = rc.Parse("v1/Secret") // Skip secrets + return rc + }(), + expectedInformerCreated: false, + expectedResourceCount: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Create fake discovery client + fakeClient := fake.NewSimpleClientset() + fakeDiscovery, ok := fakeClient.Discovery().(*fakediscovery.FakeDiscovery) + if !ok { + t.Fatal("Failed to cast to FakeDiscovery") + } + fakeDiscovery.Resources = tt.discoveryResources + + // Create REST mapper + groupResources := []*restmapper.APIGroupResources{} + for _, resourceList := range tt.discoveryResources { + gv, err := schema.ParseGroupVersion(resourceList.GroupVersion) + if err != nil { + t.Fatalf("Failed to parse group version: %v", err) + } + + apiResources := []metav1.APIResource{} + apiResources = append(apiResources, resourceList.APIResources...) + + groupResources = append(groupResources, &restmapper.APIGroupResources{ + Group: metav1.APIGroup{ + Name: gv.Group, + Versions: []metav1.GroupVersionForDiscovery{ + {GroupVersion: resourceList.GroupVersion, Version: gv.Version}, + }, + PreferredVersion: metav1.GroupVersionForDiscovery{ + GroupVersion: resourceList.GroupVersion, + Version: gv.Version, + }, + }, + VersionedResources: map[string][]metav1.APIResource{ + gv.Version: apiResources, + }, + }) + } + restMapper := restmapper.NewDiscoveryRESTMapper(groupResources) + + // Create fake informer manager + fakeInformerManager := &testinformer.FakeManager{ + APIResources: make(map[schema.GroupVersionKind]bool), + } + + // Track calls to CreateInformerForResource + populator := &InformerPopulator{ + DiscoveryClient: fakeDiscovery, + RESTMapper: restMapper, + InformerManager: fakeInformerManager, + ResourceConfig: tt.resourceConfig, + } + + // Run discovery + populator.discoverAndCreateInformers() + + // Note: FakeManager doesn't track calls, so we verify no panics occurred + }) + } +} + +func TestInformerPopulator_Start(t *testing.T) { + // Create fake discovery client with some resources + fakeClient := fake.NewSimpleClientset() + fakeDiscovery, ok := fakeClient.Discovery().(*fakediscovery.FakeDiscovery) + if !ok { + t.Fatal("Failed to cast to FakeDiscovery") + } + + fakeDiscovery.Resources = []*metav1.APIResourceList{ + { + GroupVersion: "v1", + APIResources: []metav1.APIResource{ + { + Name: "configmaps", + Kind: "ConfigMap", + Namespaced: true, + Verbs: []string{"list", "watch", "get"}, + }, + }, + }, + } + + // Create REST mapper + gv := schema.GroupVersion{Group: "", Version: "v1"} + groupResources := []*restmapper.APIGroupResources{ + { + Group: metav1.APIGroup{ + Name: "", + Versions: []metav1.GroupVersionForDiscovery{ + {GroupVersion: "v1", Version: "v1"}, + }, + PreferredVersion: metav1.GroupVersionForDiscovery{ + GroupVersion: "v1", + Version: "v1", + }, + }, + VersionedResources: map[string][]metav1.APIResource{ + "v1": { + { + Name: "configmaps", + Kind: "ConfigMap", + Namespaced: true, + Verbs: []string{"list", "watch", "get"}, + }, + }, + }, + }, + } + restMapper := restmapper.NewDiscoveryRESTMapper(groupResources) + + // Create fake informer manager + fakeInformerManager := &testinformer.FakeManager{ + APIResources: map[schema.GroupVersionKind]bool{ + gv.WithKind("ConfigMap"): true, + }, + IsClusterScopedResource: false, + } + + populator := &InformerPopulator{ + DiscoveryClient: fakeDiscovery, + RESTMapper: restMapper, + InformerManager: fakeInformerManager, + ResourceConfig: nil, + } + + // Create a context that will cancel after a short time + // Use half of testTimeout to ensure we have time to verify after cancellation + ctx, cancel := context.WithTimeout(context.Background(), testTimeout/2) + defer cancel() + + // Start the populator in a goroutine + done := make(chan error, 1) + go func() { + done <- populator.Start(ctx) + }() + + // Wait for context to cancel or error + select { + case err := <-done: + // Should return nil when context is canceled + if err != nil { + t.Errorf("Start should not return error on context cancellation: %v", err) + } + case <-time.After(testTimeout): + t.Fatal("Start did not exit after context cancellation") + } +} + +func TestInformerPopulator_Integration(t *testing.T) { + // This test verifies the integration between InformerPopulator and the informer manager + + // Create fake discovery with multiple resource types + fakeClient := fake.NewSimpleClientset() + fakeDiscovery, ok := fakeClient.Discovery().(*fakediscovery.FakeDiscovery) + if !ok { + t.Fatal("Failed to cast to FakeDiscovery") + } + + fakeDiscovery.Resources = []*metav1.APIResourceList{ + { + GroupVersion: "v1", + APIResources: []metav1.APIResource{ + { + Name: "configmaps", + Kind: "ConfigMap", + Namespaced: true, + Verbs: []string{"list", "watch", "get"}, + }, + { + Name: "secrets", + Kind: "Secret", + Namespaced: true, + Verbs: []string{"list", "watch", "get"}, + }, + }, + }, + { + GroupVersion: "apps/v1", + APIResources: []metav1.APIResource{ + { + Name: "deployments", + Kind: "Deployment", + Namespaced: true, + Verbs: []string{"list", "watch", "get"}, + }, + }, + }, + } + + // Create REST mapper + groupResources := []*restmapper.APIGroupResources{ + { + Group: metav1.APIGroup{ + Name: "", + Versions: []metav1.GroupVersionForDiscovery{ + {GroupVersion: "v1", Version: "v1"}, + }, + PreferredVersion: metav1.GroupVersionForDiscovery{ + GroupVersion: "v1", + Version: "v1", + }, + }, + VersionedResources: map[string][]metav1.APIResource{ + "v1": fakeDiscovery.Resources[0].APIResources, + }, + }, + { + Group: metav1.APIGroup{ + Name: "apps", + Versions: []metav1.GroupVersionForDiscovery{ + {GroupVersion: "apps/v1", Version: "v1"}, + }, + PreferredVersion: metav1.GroupVersionForDiscovery{ + GroupVersion: "apps/v1", + Version: "v1", + }, + }, + VersionedResources: map[string][]metav1.APIResource{ + "v1": fakeDiscovery.Resources[1].APIResources, + }, + }, + } + restMapper := restmapper.NewDiscoveryRESTMapper(groupResources) + + // Create resource config that skips secrets + resourceConfig := utils.NewResourceConfig(false) + err := resourceConfig.Parse("v1/Secret") + if err != nil { + t.Fatalf("Failed to parse resource config: %v", err) + } + + fakeInformerManager := &testinformer.FakeManager{ + APIResources: make(map[schema.GroupVersionKind]bool), + IsClusterScopedResource: false, + } + + populator := &InformerPopulator{ + DiscoveryClient: fakeDiscovery, + RESTMapper: restMapper, + InformerManager: fakeInformerManager, + ResourceConfig: resourceConfig, + } + + // Run discovery + populator.discoverAndCreateInformers() + + // Note: FakeManager doesn't track calls, so we just verify no panics +} + +func TestInformerPopulator_PeriodicDiscovery(t *testing.T) { + // This test verifies that the populator continues to discover resources periodically + + fakeClient := fake.NewSimpleClientset() + fakeDiscovery, ok := fakeClient.Discovery().(*fakediscovery.FakeDiscovery) + if !ok { + t.Fatal("Failed to cast to FakeDiscovery") + } + + fakeDiscovery.Resources = []*metav1.APIResourceList{ + { + GroupVersion: "v1", + APIResources: []metav1.APIResource{ + { + Name: "configmaps", + Kind: "ConfigMap", + Namespaced: true, + Verbs: []string{"list", "watch", "get"}, + }, + }, + }, + } + + groupResources := []*restmapper.APIGroupResources{ + { + Group: metav1.APIGroup{ + Name: "", + Versions: []metav1.GroupVersionForDiscovery{ + {GroupVersion: "v1", Version: "v1"}, + }, + PreferredVersion: metav1.GroupVersionForDiscovery{ + GroupVersion: "v1", + Version: "v1", + }, + }, + VersionedResources: map[string][]metav1.APIResource{ + "v1": fakeDiscovery.Resources[0].APIResources, + }, + }, + } + restMapper := restmapper.NewDiscoveryRESTMapper(groupResources) + + fakeInformerManager := &testinformer.FakeManager{ + APIResources: make(map[schema.GroupVersionKind]bool), + IsClusterScopedResource: false, + } + + populator := &InformerPopulator{ + DiscoveryClient: fakeDiscovery, + RESTMapper: restMapper, + InformerManager: fakeInformerManager, + ResourceConfig: nil, + } + + // Override the discovery period for testing + ctx, cancel := context.WithTimeout(context.Background(), testTimeout) + defer cancel() + + // Start the populator + go func() { + _ = populator.Start(ctx) + }() + + // Wait a bit to allow multiple discovery cycles + time.Sleep(testSleep) + + // Note: FakeManager doesn't track calls, so we just verify successful execution +} diff --git a/pkg/resourcewatcher/resource_collector.go b/pkg/resourcewatcher/resource_collector.go index 86e2cf235..b740bfc8c 100644 --- a/pkg/resourcewatcher/resource_collector.go +++ b/pkg/resourcewatcher/resource_collector.go @@ -17,12 +17,14 @@ limitations under the License. package resourcewatcher import ( + "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/util/errors" "k8s.io/client-go/discovery" "k8s.io/klog/v2" metricsV1beta1 "k8s.io/metrics/pkg/apis/metrics/v1beta1" + "github.com/kubefleet-dev/kubefleet/pkg/utils" "github.com/kubefleet-dev/kubefleet/pkg/utils/informer" ) @@ -30,10 +32,11 @@ import ( // More specifically, all api resources which support the 'list', and 'watch' verbs. // All discovery errors are considered temporary. Upon encountering any error, // getWatchableResources will log and return any discovered resources it was able to process (which may be none). -func (d *ChangeDetector) getWatchableResources() ([]informer.APIResourceMeta, error) { +// This is a standalone function that can be used by both ChangeDetector and InformerPopulator. +func getWatchableResources(discoveryClient discovery.ServerResourcesInterface) ([]informer.APIResourceMeta, error) { // Get all the resources this cluster has. We only need to care about the preferred version as the informers watch // the preferred version will get watch event for resources on the other versions since there is only one version in etcd. - allResources, discoverError := d.DiscoveryClient.ServerPreferredResources() + allResources, discoverError := discoveryClient.ServerPreferredResources() allErr := make([]error, 0) if discoverError != nil { if discovery.IsGroupDiscoveryFailedError(discoverError) { @@ -82,3 +85,44 @@ func (d *ChangeDetector) getWatchableResources() ([]informer.APIResourceMeta, er return watchableGroupVersionResources, errors.NewAggregate(allErr) } + +// shouldWatchResource returns whether a GroupVersionResource should be watched. +// This is a standalone function that can be used by both ChangeDetector and InformerPopulator. +func shouldWatchResource(gvr schema.GroupVersionResource, restMapper meta.RESTMapper, resourceConfig *utils.ResourceConfig) bool { + // By default, all of the APIs are allowed. + if resourceConfig == nil { + return true + } + + gvks, err := restMapper.KindsFor(gvr) + if err != nil { + klog.ErrorS(err, "gvr transform failed", "gvr", gvr.String()) + return false + } + for _, gvk := range gvks { + if resourceConfig.IsResourceDisabled(gvk) { + klog.V(4).InfoS("Skip watch resource", "group version kind", gvk.String()) + return false + } + } + return true +} + +// discoverWatchableResources discovers all API resources in the cluster and filters them +// based on the resource configuration. This is a shared helper used by both InformerPopulator +// and ChangeDetector to ensure consistent resource discovery logic. +func discoverWatchableResources(discoveryClient discovery.DiscoveryInterface, restMapper meta.RESTMapper, resourceConfig *utils.ResourceConfig) []informer.APIResourceMeta { + newResources, err := getWatchableResources(discoveryClient) + if err != nil { + klog.ErrorS(err, "Failed to get all the api resources from the cluster") + } + + var resourcesToWatch []informer.APIResourceMeta + for _, res := range newResources { + if shouldWatchResource(res.GroupVersionResource, restMapper, resourceConfig) { + resourcesToWatch = append(resourcesToWatch, res) + } + } + + return resourcesToWatch +} diff --git a/pkg/resourcewatcher/resource_collector_test.go b/pkg/resourcewatcher/resource_collector_test.go new file mode 100644 index 000000000..33a5d1d1f --- /dev/null +++ b/pkg/resourcewatcher/resource_collector_test.go @@ -0,0 +1,221 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package resourcewatcher + +import ( + "testing" + + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/restmapper" + + "github.com/kubefleet-dev/kubefleet/pkg/utils" +) + +func TestShouldWatchResource(t *testing.T) { + tests := []struct { + name string + gvr schema.GroupVersionResource + resourceConfig *utils.ResourceConfig + setupMapper func() meta.RESTMapper + expected bool + }{ + { + name: "returns true when resourceConfig is nil", + gvr: schema.GroupVersionResource{ + Group: "", + Version: "v1", + Resource: "configmaps", + }, + resourceConfig: nil, + setupMapper: func() meta.RESTMapper { + groupResources := []*restmapper.APIGroupResources{ + { + Group: metav1.APIGroup{ + Name: "", + Versions: []metav1.GroupVersionForDiscovery{ + {GroupVersion: "v1", Version: "v1"}, + }, + PreferredVersion: metav1.GroupVersionForDiscovery{ + GroupVersion: "v1", + Version: "v1", + }, + }, + VersionedResources: map[string][]metav1.APIResource{ + "v1": { + { + Name: "configmaps", + Kind: "ConfigMap", + Namespaced: true, + Verbs: []string{"list", "watch", "get"}, + }, + }, + }, + }, + } + return restmapper.NewDiscoveryRESTMapper(groupResources) + }, + expected: true, + }, + { + name: "returns true when resource is not disabled", + gvr: schema.GroupVersionResource{ + Group: "", + Version: "v1", + Resource: "configmaps", + }, + resourceConfig: func() *utils.ResourceConfig { + rc := utils.NewResourceConfig(false) + // Disable secrets, but not configmaps + _ = rc.Parse("v1/Secret") + return rc + }(), + setupMapper: func() meta.RESTMapper { + groupResources := []*restmapper.APIGroupResources{ + { + Group: metav1.APIGroup{ + Name: "", + Versions: []metav1.GroupVersionForDiscovery{ + {GroupVersion: "v1", Version: "v1"}, + }, + PreferredVersion: metav1.GroupVersionForDiscovery{ + GroupVersion: "v1", + Version: "v1", + }, + }, + VersionedResources: map[string][]metav1.APIResource{ + "v1": { + { + Name: "configmaps", + Kind: "ConfigMap", + Namespaced: true, + Verbs: []string{"list", "watch", "get"}, + }, + }, + }, + }, + } + return restmapper.NewDiscoveryRESTMapper(groupResources) + }, + expected: true, + }, + { + name: "returns false when resource is disabled", + gvr: schema.GroupVersionResource{ + Group: "", + Version: "v1", + Resource: "secrets", + }, + resourceConfig: func() *utils.ResourceConfig { + rc := utils.NewResourceConfig(false) + _ = rc.Parse("v1/Secret") + return rc + }(), + setupMapper: func() meta.RESTMapper { + groupResources := []*restmapper.APIGroupResources{ + { + Group: metav1.APIGroup{ + Name: "", + Versions: []metav1.GroupVersionForDiscovery{ + {GroupVersion: "v1", Version: "v1"}, + }, + PreferredVersion: metav1.GroupVersionForDiscovery{ + GroupVersion: "v1", + Version: "v1", + }, + }, + VersionedResources: map[string][]metav1.APIResource{ + "v1": { + { + Name: "secrets", + Kind: "Secret", + Namespaced: true, + Verbs: []string{"list", "watch", "get"}, + }, + }, + }, + }, + } + return restmapper.NewDiscoveryRESTMapper(groupResources) + }, + expected: false, + }, + { + name: "returns false when GVR mapping fails", + gvr: schema.GroupVersionResource{ + Group: "invalid.group", + Version: "v1", + Resource: "nonexistent", + }, + resourceConfig: utils.NewResourceConfig(false), + setupMapper: func() meta.RESTMapper { + // Empty mapper - will fail to map the GVR + groupResources := []*restmapper.APIGroupResources{} + return restmapper.NewDiscoveryRESTMapper(groupResources) + }, + expected: false, + }, + { + name: "handles apps group resources correctly", + gvr: schema.GroupVersionResource{ + Group: "apps", + Version: "v1", + Resource: "deployments", + }, + resourceConfig: nil, + setupMapper: func() meta.RESTMapper { + groupResources := []*restmapper.APIGroupResources{ + { + Group: metav1.APIGroup{ + Name: "apps", + Versions: []metav1.GroupVersionForDiscovery{ + {GroupVersion: "apps/v1", Version: "v1"}, + }, + PreferredVersion: metav1.GroupVersionForDiscovery{ + GroupVersion: "apps/v1", + Version: "v1", + }, + }, + VersionedResources: map[string][]metav1.APIResource{ + "v1": { + { + Name: "deployments", + Kind: "Deployment", + Namespaced: true, + Verbs: []string{"list", "watch", "get"}, + }, + }, + }, + }, + } + return restmapper.NewDiscoveryRESTMapper(groupResources) + }, + expected: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + restMapper := tt.setupMapper() + result := shouldWatchResource(tt.gvr, restMapper, tt.resourceConfig) + if result != tt.expected { + t.Errorf("shouldWatchResource() = %v, want %v", result, tt.expected) + } + }) + } +} diff --git a/pkg/utils/informer/informermanager.go b/pkg/utils/informer/informermanager.go index 53da3343a..984a6d392 100644 --- a/pkg/utils/informer/informermanager.go +++ b/pkg/utils/informer/informermanager.go @@ -33,11 +33,6 @@ import ( // InformerManager manages dynamic shared informer for all resources, include Kubernetes resource and // custom resources defined by CustomResourceDefinition. type Manager interface { - // AddDynamicResources builds a dynamicInformer for each resource in the resources list with the event handler. - // A resource is dynamic if its definition can be created/deleted/updated during runtime. - // Normally, it is a custom resource that is installed by users. The handler should not be nil. - AddDynamicResources(resources []APIResourceMeta, handler cache.ResourceEventHandler, listComplete bool) - // AddStaticResource creates a dynamicInformer for the static 'resource' and set its event handler. // A resource is static if its definition is pre-determined and immutable during runtime. // Normally, it is a resource that is pre-installed by the system. @@ -72,6 +67,16 @@ type Manager interface { // GetClient returns the dynamic dynamicClient. GetClient() dynamic.Interface + + // AddEventHandlerToInformer adds an event handler to an existing informer for the given resource. + // If the informer doesn't exist, it will be created. This is used by the leader's ChangeDetector + // to add event handlers to informers that were created by the InformerPopulator. + AddEventHandlerToInformer(resource schema.GroupVersionResource, handler cache.ResourceEventHandler) + + // CreateInformerForResource creates an informer for the given resource without adding any event handlers. + // This is used by InformerPopulator to create informers on all pods (leader and followers) so they have + // synced caches for webhook validation. The leader's ChangeDetector will add event handlers later. + CreateInformerForResource(resource APIResourceMeta) } // NewInformerManager constructs a new instance of informerManagerImpl. @@ -124,61 +129,6 @@ type informerManagerImpl struct { resourcesLock sync.RWMutex } -func (s *informerManagerImpl) AddDynamicResources(dynResources []APIResourceMeta, handler cache.ResourceEventHandler, listComplete bool) { - newGVKs := make(map[schema.GroupVersionKind]bool, len(dynResources)) - - addInformerFunc := func(newRes APIResourceMeta) { - dynRes, exist := s.apiResources[newRes.GroupVersionKind] - if !exist { - newRes.isPresent = true - s.apiResources[newRes.GroupVersionKind] = &newRes - // TODO (rzhang): remember the ResourceEventHandlerRegistration and remove it when the resource is deleted - // TODO: handle error which only happens if the informer is stopped - informer := s.informerFactory.ForResource(newRes.GroupVersionResource).Informer() - // Strip away the ManagedFields info from objects to save memory. - // - // TO-DO (chenyu1): evaluate if there are other fields, e.g., owner refs, status, that can also be stripped - // away to save memory. - if err := informer.SetTransform(ctrlcache.TransformStripManagedFields()); err != nil { - // The SetTransform func would only fail if the informer has already started. In this case, - // no further action is needed. - klog.ErrorS(err, "Failed to set transform func for informer", "gvr", newRes.GroupVersionResource) - } - _, _ = informer.AddEventHandler(handler) - klog.InfoS("Added an informer for a new resource", "res", newRes) - } else if !dynRes.isPresent { - // we just mark it as enabled as we should not add another eventhandler to the informer as it's still - // in the informerFactory - // TODO: add the Event handler back - dynRes.isPresent = true - klog.InfoS("Reactivated an informer for a reappeared resource", "res", dynRes) - } - } - - s.resourcesLock.Lock() - defer s.resourcesLock.Unlock() - - // Add the new dynResources that do not exist yet while build a map to speed up lookup - for _, newRes := range dynResources { - newGVKs[newRes.GroupVersionKind] = true - addInformerFunc(newRes) - } - - if !listComplete { - // do not disable any informer if we know the resource list is not complete - return - } - - // mark the disappeared dynResources from the handler map - for gvk, dynRes := range s.apiResources { - if !newGVKs[gvk] && !dynRes.isStaticResource && dynRes.isPresent { - // TODO: Remove the Event handler from the informer using the resourceEventHandlerRegistration during creat - dynRes.isPresent = false - klog.InfoS("Disabled an informer for a disappeared resource", "res", dynRes) - } - } -} - func (s *informerManagerImpl) AddStaticResource(resource APIResourceMeta, handler cache.ResourceEventHandler) { s.resourcesLock.Lock() defer s.resourcesLock.Unlock() @@ -255,6 +205,46 @@ func (s *informerManagerImpl) Stop() { s.cancel() } +func (s *informerManagerImpl) AddEventHandlerToInformer(resource schema.GroupVersionResource, handler cache.ResourceEventHandler) { + // Get or create the informer (this is idempotent - if it exists, we get the same instance) + // The idempotent behavior is important because it is called by both the change detector and informer populator, + // which run concurrently on the leader hub agent. + informer := s.informerFactory.ForResource(resource).Informer() + + // Add the event handler to the informer + _, _ = informer.AddEventHandler(handler) + + klog.V(2).InfoS("Added event handler to informer", "gvr", resource) +} + +func (s *informerManagerImpl) CreateInformerForResource(resource APIResourceMeta) { + s.resourcesLock.Lock() + defer s.resourcesLock.Unlock() + + dynRes, exist := s.apiResources[resource.GroupVersionKind] + if !exist { + // Register this resource in our tracking map + resource.isPresent = true + s.apiResources[resource.GroupVersionKind] = &resource + + // Create the informer without adding any event handler + informer := s.informerFactory.ForResource(resource.GroupVersionResource).Informer() + + // Strip away the ManagedFields info from objects to save memory + if err := informer.SetTransform(ctrlcache.TransformStripManagedFields()); err != nil { + // The SetTransform func would only fail if the informer has already started. + // In this case, no further action is needed. + klog.ErrorS(err, "Failed to set transform func for informer", "gvr", resource.GroupVersionResource) + } + + klog.V(3).InfoS("Created informer without handler", "res", resource) + } else if !dynRes.isPresent { + // Mark it as present again (resource reappeared) + dynRes.isPresent = true + klog.V(3).InfoS("Reactivated informer for reappeared resource", "res", dynRes) + } +} + // ContextForChannel derives a child context from a parent channel. // // The derived context's Done channel is closed when the returned cancel function diff --git a/pkg/utils/informer/informermanager_test.go b/pkg/utils/informer/informermanager_test.go index 53f2ce74a..5c8b1c764 100644 --- a/pkg/utils/informer/informermanager_test.go +++ b/pkg/utils/informer/informermanager_test.go @@ -22,6 +22,8 @@ import ( "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/client-go/dynamic/fake" "k8s.io/client-go/kubernetes/scheme" + + testhandler "github.com/kubefleet-dev/kubefleet/test/utils/handler" ) func TestGetAllResources(t *testing.T) { @@ -287,3 +289,218 @@ func TestGetAllResources_NotPresent(t *testing.T) { t.Errorf("GetAllResources()[0] = %v, want %v", got, presentRes.GroupVersionResource) } } + +func TestAddEventHandlerToInformer(t *testing.T) { + tests := []struct { + name string + gvr schema.GroupVersionResource + addTwice bool // Test adding handler twice to same informer + }{ + { + name: "add handler to new informer", + gvr: schema.GroupVersionResource{ + Group: "", + Version: "v1", + Resource: "configmaps", + }, + addTwice: false, + }, + { + name: "add multiple handlers to same informer", + gvr: schema.GroupVersionResource{ + Group: "apps", + Version: "v1", + Resource: "deployments", + }, + addTwice: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + fakeClient := fake.NewSimpleDynamicClient(scheme.Scheme) + stopCh := make(chan struct{}) + defer close(stopCh) + + mgr := NewInformerManager(fakeClient, 0, stopCh) + + // Track handler calls + callCount := 0 + handler := &testhandler.TestHandler{ + OnAddFunc: func() { callCount++ }, + } + + // Add the handler + mgr.AddEventHandlerToInformer(tt.gvr, handler) + + // Verify informer was created + implMgr := mgr.(*informerManagerImpl) + informer := implMgr.informerFactory.ForResource(tt.gvr).Informer() + if informer == nil { + t.Fatal("Expected informer to be created") + } + + if tt.addTwice { + // Add another handler to the same informer + handler2 := &testhandler.TestHandler{ + OnAddFunc: func() { callCount++ }, + } + mgr.AddEventHandlerToInformer(tt.gvr, handler2) + } + + // Test is successful if no panic occurred and informer exists + }) + } +} + +func TestCreateInformerForResource(t *testing.T) { + tests := []struct { + name string + resource APIResourceMeta + createTwice bool + markNotPresent bool // Mark resource as not present before second create + }{ + { + name: "create new informer", + resource: APIResourceMeta{ + GroupVersionKind: schema.GroupVersionKind{ + Group: "", + Version: "v1", + Kind: "ConfigMap", + }, + GroupVersionResource: schema.GroupVersionResource{ + Group: "", + Version: "v1", + Resource: "configmaps", + }, + IsClusterScoped: false, + }, + createTwice: false, + }, + { + name: "create informer twice (idempotent)", + resource: APIResourceMeta{ + GroupVersionKind: schema.GroupVersionKind{ + Group: "apps", + Version: "v1", + Kind: "Deployment", + }, + GroupVersionResource: schema.GroupVersionResource{ + Group: "apps", + Version: "v1", + Resource: "deployments", + }, + IsClusterScoped: false, + }, + createTwice: true, + }, + { + name: "recreate informer for reappeared resource", + resource: APIResourceMeta{ + GroupVersionKind: schema.GroupVersionKind{ + Group: "", + Version: "v1", + Kind: "Secret", + }, + GroupVersionResource: schema.GroupVersionResource{ + Group: "", + Version: "v1", + Resource: "secrets", + }, + IsClusterScoped: false, + }, + createTwice: true, + markNotPresent: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + fakeClient := fake.NewSimpleDynamicClient(scheme.Scheme) + stopCh := make(chan struct{}) + defer close(stopCh) + + mgr := NewInformerManager(fakeClient, 0, stopCh) + implMgr := mgr.(*informerManagerImpl) + + // Create the informer + mgr.CreateInformerForResource(tt.resource) + + // Verify resource is tracked + resMeta, exists := implMgr.apiResources[tt.resource.GroupVersionKind] + if !exists { + t.Fatal("Expected resource to be tracked in apiResources map") + } + if !resMeta.isPresent { + t.Error("Expected resource to be marked as present") + } + if resMeta.IsClusterScoped != tt.resource.IsClusterScoped { + t.Errorf("IsClusterScoped = %v, want %v", resMeta.IsClusterScoped, tt.resource.IsClusterScoped) + } + + // Verify informer was created + informer := implMgr.informerFactory.ForResource(tt.resource.GroupVersionResource).Informer() + if informer == nil { + t.Fatal("Expected informer to be created") + } + + if tt.createTwice { + if tt.markNotPresent { + // Mark as not present (simulating resource deletion) + resMeta.isPresent = false + } + + // Create again + mgr.CreateInformerForResource(tt.resource) + + // Verify it's marked as present again + if !resMeta.isPresent { + t.Error("Expected resource to be marked as present after recreation") + } + } + }) + } +} + +func TestCreateInformerForResource_IsIdempotent(t *testing.T) { + // Test that creating the same informer multiple times doesn't cause issues + fakeClient := fake.NewSimpleDynamicClient(scheme.Scheme) + stopCh := make(chan struct{}) + defer close(stopCh) + + mgr := NewInformerManager(fakeClient, 0, stopCh) + implMgr := mgr.(*informerManagerImpl) + + resource := APIResourceMeta{ + GroupVersionKind: schema.GroupVersionKind{ + Group: "", + Version: "v1", + Kind: "Pod", + }, + GroupVersionResource: schema.GroupVersionResource{ + Group: "", + Version: "v1", + Resource: "pods", + }, + IsClusterScoped: false, + } + + // Create multiple times + for i := 0; i < 3; i++ { + mgr.CreateInformerForResource(resource) + } + + // Should only have one entry in apiResources + if len(implMgr.apiResources) != 1 { + t.Errorf("Expected 1 resource in apiResources, got %d", len(implMgr.apiResources)) + } + + // Verify resource is still tracked correctly + resMeta, exists := implMgr.apiResources[resource.GroupVersionKind] + if !exists { + t.Fatal("Expected resource to be tracked") + } + if !resMeta.isPresent { + t.Error("Expected resource to be marked as present") + } +} diff --git a/pkg/utils/informer/readiness/readiness.go b/pkg/utils/informer/readiness/readiness.go index 23e12cf86..749be5592 100644 --- a/pkg/utils/informer/readiness/readiness.go +++ b/pkg/utils/informer/readiness/readiness.go @@ -37,7 +37,7 @@ func InformerReadinessChecker(resourceInformer informer.Manager) func(*http.Requ // Require ALL informer caches to be synced before marking ready allResources := resourceInformer.GetAllResources() if len(allResources) == 0 { - // This can happen during startup when the ResourceInformer is created but the ChangeDetector + // This can happen during startup when the ResourceInformer is created but the InformerPopulator // hasn't discovered and registered any resources yet via AddDynamicResources(). return fmt.Errorf("resource informer not ready: no resources registered") } diff --git a/pkg/webhook/webhook.go b/pkg/webhook/webhook.go index 4f4c89b56..7dd698c89 100644 --- a/pkg/webhook/webhook.go +++ b/pkg/webhook/webhook.go @@ -70,6 +70,7 @@ import ( const ( fleetWebhookCertFileName = "tls.crt" fleetWebhookKeyFileName = "tls.key" + fleetWebhookCertSecretName = "fleet-webhook-server-cert" //nolint:gosec // This is a Secret name, not a credential fleetValidatingWebhookCfgName = "fleet-validating-webhook-configuration" fleetGuardRailWebhookCfgName = "fleet-guard-rail-webhook-configuration" fleetMutatingWebhookCfgName = "fleet-mutating-webhook-configuration" @@ -162,9 +163,16 @@ type Config struct { denyModifyMemberClusterLabels bool enableWorkload bool + // useCertManager indicates whether cert-manager is used for certificate management + useCertManager bool + // webhookCertName is the name of the Certificate resource created by cert-manager. + // This is referenced in the cert-manager.io/inject-ca-from annotation. + webhookCertName string + // webhookCertSecretName is the name of the Secret containing webhook certificates + webhookCertSecretName string } -func NewWebhookConfig(mgr manager.Manager, webhookServiceName string, port int32, clientConnectionType *options.WebhookClientConnectionType, certDir string, enableGuardRail bool, denyModifyMemberClusterLabels bool, enableWorkload bool) (*Config, error) { +func NewWebhookConfig(mgr manager.Manager, webhookServiceName string, port int32, clientConnectionType *options.WebhookClientConnectionType, certDir string, enableGuardRail bool, denyModifyMemberClusterLabels bool, enableWorkload bool, useCertManager bool, webhookCertName string, webhookCertSecretName string) (*Config, error) { // We assume the Pod namespace should be passed to env through downward API in the Pod spec. namespace := os.Getenv("POD_NAMESPACE") if namespace == "" { @@ -180,13 +188,26 @@ func NewWebhookConfig(mgr manager.Manager, webhookServiceName string, port int32 enableGuardRail: enableGuardRail, denyModifyMemberClusterLabels: denyModifyMemberClusterLabels, enableWorkload: enableWorkload, + useCertManager: useCertManager, + webhookCertName: webhookCertName, + webhookCertSecretName: webhookCertSecretName, + } + + if useCertManager { + // When using cert-manager, the CA bundle is automatically injected by cert-manager's CA injector + // based on the cert-manager.io/inject-ca-from annotation. We don't need to load or set the CA here. + // The certificates (tls.crt and tls.key) are mounted by Kubernetes and used automatically by the webhook server. + klog.V(2).InfoS("Using cert-manager for certificate management", "certDir", certDir) + } else { + // Use self-signed certificate generation (original flow) + caPEM, err := w.genCertificate(certDir) + if err != nil { + return nil, fmt.Errorf("failed to generate self-signed certificate: %w", err) + } + w.caPEM = caPEM } - caPEM, err := w.genCertificate(certDir) - if err != nil { - return nil, err - } - w.caPEM = caPEM - return &w, err + + return &w, nil } func (w *Config) Start(ctx context.Context) error { @@ -198,6 +219,56 @@ func (w *Config) Start(ctx context.Context) error { return nil } +// CheckCAInjection verifies that cert-manager has injected the CA bundle into all webhook configurations. +// This is used as a readiness check when useCertManager is enabled. +// Returns nil when CA bundles are injected, or an error if they are missing. +func (w *Config) CheckCAInjection(ctx context.Context) error { + if !w.useCertManager { + // Not using cert-manager, no need to check + return nil + } + + cl := w.mgr.GetClient() + + // Check mutating webhook configuration + var mutatingCfg admv1.MutatingWebhookConfiguration + if err := cl.Get(ctx, client.ObjectKey{Name: fleetMutatingWebhookCfgName}, &mutatingCfg); err != nil { + return fmt.Errorf("failed to get MutatingWebhookConfiguration %s: %w", fleetMutatingWebhookCfgName, err) + } + for _, webhook := range mutatingCfg.Webhooks { + if len(webhook.ClientConfig.CABundle) == 0 { + return fmt.Errorf("MutatingWebhookConfiguration %s webhook %s is missing CA bundle (cert-manager injection pending)", fleetMutatingWebhookCfgName, webhook.Name) + } + } + + // Check validating webhook configuration + var validatingCfg admv1.ValidatingWebhookConfiguration + if err := cl.Get(ctx, client.ObjectKey{Name: fleetValidatingWebhookCfgName}, &validatingCfg); err != nil { + return fmt.Errorf("failed to get ValidatingWebhookConfiguration %s: %w", fleetValidatingWebhookCfgName, err) + } + for _, webhook := range validatingCfg.Webhooks { + if len(webhook.ClientConfig.CABundle) == 0 { + return fmt.Errorf("ValidatingWebhookConfiguration %s webhook %s is missing CA bundle (cert-manager injection pending)", fleetValidatingWebhookCfgName, webhook.Name) + } + } + + // Check guard rail webhook configuration if enabled + if w.enableGuardRail { + var guardRailCfg admv1.ValidatingWebhookConfiguration + if err := cl.Get(ctx, client.ObjectKey{Name: fleetGuardRailWebhookCfgName}, &guardRailCfg); err != nil { + return fmt.Errorf("failed to get ValidatingWebhookConfiguration %s: %w", fleetGuardRailWebhookCfgName, err) + } + for _, webhook := range guardRailCfg.Webhooks { + if len(webhook.ClientConfig.CABundle) == 0 { + return fmt.Errorf("ValidatingWebhookConfiguration %s webhook %s is missing CA bundle (cert-manager injection pending)", fleetGuardRailWebhookCfgName, webhook.Name) + } + } + } + + klog.V(2).InfoS("All webhook configurations have CA bundles injected by cert-manager") + return nil +} + // createFleetWebhookConfiguration creates the ValidatingWebhookConfiguration object for the webhook. func (w *Config) createFleetWebhookConfiguration(ctx context.Context) error { if err := w.createMutatingWebhookConfiguration(ctx, w.buildFleetMutatingWebhooks(), fleetMutatingWebhookCfgName); err != nil { @@ -214,14 +285,29 @@ func (w *Config) createFleetWebhookConfiguration(ctx context.Context) error { return nil } +// buildWebhookAnnotations creates annotations for webhook configurations. +// When using cert-manager, adds the inject-ca-from annotation to automatically inject the CA bundle. +func (w *Config) buildWebhookAnnotations() map[string]string { + annotations := map[string]string{} + if w.useCertManager { + // Tell cert-manager's CA injector to automatically inject the CA bundle from the Certificate resource. + // Format: "namespace/certificate-name" - references the Certificate resource, not the Secret. + annotations["cert-manager.io/inject-ca-from"] = fmt.Sprintf("%s/%s", w.serviceNamespace, w.webhookCertName) + } + return annotations +} + // createMutatingWebhookConfiguration creates the MutatingWebhookConfiguration object for the webhook. func (w *Config) createMutatingWebhookConfiguration(ctx context.Context, webhooks []admv1.MutatingWebhook, configName string) error { + annotations := w.buildWebhookAnnotations() + mutatingWebhookConfig := admv1.MutatingWebhookConfiguration{ ObjectMeta: metav1.ObjectMeta{ Name: configName, Labels: map[string]string{ "admissions.enforcer/disabled": "true", }, + Annotations: annotations, }, Webhooks: webhooks, } @@ -269,12 +355,15 @@ func (w *Config) buildFleetMutatingWebhooks() []admv1.MutatingWebhook { } func (w *Config) createValidatingWebhookConfiguration(ctx context.Context, webhooks []admv1.ValidatingWebhook, configName string) error { + annotations := w.buildWebhookAnnotations() + validatingWebhookConfig := admv1.ValidatingWebhookConfiguration{ ObjectMeta: metav1.ObjectMeta{ Name: configName, Labels: map[string]string{ "admissions.enforcer/disabled": "true", }, + Annotations: annotations, }, Webhooks: webhooks, } @@ -631,9 +720,14 @@ func (w *Config) createClientConfig(validationPath string) admv1.WebhookClientCo } serviceEndpoint := w.serviceURL + validationPath serviceRef.Path = ptr.To(validationPath) - config := admv1.WebhookClientConfig{ - CABundle: w.caPEM, + config := admv1.WebhookClientConfig{} + + // When using cert-manager, leave CABundle empty so cert-manager's CA injector can populate it. + // The cert-manager.io/inject-ca-from annotation triggers automatic CA injection. + if !w.useCertManager { + config.CABundle = w.caPEM } + switch *w.clientConnectionType { case options.Service: config.Service = &serviceRef diff --git a/pkg/webhook/webhook_test.go b/pkg/webhook/webhook_test.go index f1975b30b..d42a69790 100644 --- a/pkg/webhook/webhook_test.go +++ b/pkg/webhook/webhook_test.go @@ -1,15 +1,23 @@ package webhook import ( + "context" + "strings" "testing" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" "github.com/stretchr/testify/assert" + admv1 "k8s.io/api/admissionregistration/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/controller-runtime/pkg/manager" "github.com/kubefleet-dev/kubefleet/cmd/hubagent/options" "github.com/kubefleet-dev/kubefleet/pkg/utils" + testmanager "github.com/kubefleet-dev/kubefleet/test/utils/manager" ) func TestBuildFleetMutatingWebhooks(t *testing.T) { @@ -110,6 +118,7 @@ func TestNewWebhookConfig(t *testing.T) { enableGuardRail bool denyModifyMemberClusterLabels bool enableWorkload bool + useCertManager bool want *Config wantErr bool }{ @@ -119,10 +128,11 @@ func TestNewWebhookConfig(t *testing.T) { webhookServiceName: "test-webhook", port: 8080, clientConnectionType: nil, - certDir: "/tmp/cert", + certDir: t.TempDir(), enableGuardRail: true, denyModifyMemberClusterLabels: true, enableWorkload: false, + useCertManager: false, want: &Config{ serviceNamespace: "test-namespace", serviceName: "test-webhook", @@ -131,6 +141,30 @@ func TestNewWebhookConfig(t *testing.T) { enableGuardRail: true, denyModifyMemberClusterLabels: true, enableWorkload: false, + useCertManager: false, + }, + wantErr: false, + }, + { + name: "valid input with cert-manager", + mgr: nil, + webhookServiceName: "test-webhook", + port: 8080, + clientConnectionType: nil, + certDir: t.TempDir(), + enableGuardRail: true, + denyModifyMemberClusterLabels: true, + enableWorkload: false, + useCertManager: true, + want: &Config{ + serviceNamespace: "test-namespace", + serviceName: "test-webhook", + servicePort: 8080, + clientConnectionType: nil, + enableGuardRail: true, + denyModifyMemberClusterLabels: true, + enableWorkload: false, + useCertManager: true, }, wantErr: false, }, @@ -138,8 +172,8 @@ func TestNewWebhookConfig(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { t.Setenv("POD_NAMESPACE", "test-namespace") - defer t.Setenv("POD_NAMESPACE", "") - got, err := NewWebhookConfig(tt.mgr, tt.webhookServiceName, tt.port, tt.clientConnectionType, tt.certDir, tt.enableGuardRail, tt.denyModifyMemberClusterLabels, tt.enableWorkload) + + got, err := NewWebhookConfig(tt.mgr, tt.webhookServiceName, tt.port, tt.clientConnectionType, tt.certDir, tt.enableGuardRail, tt.denyModifyMemberClusterLabels, tt.enableWorkload, tt.useCertManager, "fleet-webhook-server-cert", "fleet-webhook-server-cert") if (err != nil) != tt.wantErr { t.Errorf("NewWebhookConfig() error = %v, wantErr %v", err, tt.wantErr) return @@ -161,3 +195,400 @@ func TestNewWebhookConfig(t *testing.T) { }) } } + +func TestNewWebhookConfig_SelfSignedCertError(t *testing.T) { + t.Setenv("POD_NAMESPACE", "test-namespace") + + // Use an invalid certDir (read-only location) to force genCertificate to fail + invalidCertDir := "/proc/invalid-cert-dir" + + clientConnectionType := options.Service + _, err := NewWebhookConfig( + nil, + "test-service", + 443, + &clientConnectionType, + invalidCertDir, + false, // enableGuardRail + false, // denyModifyMemberClusterLabels + false, // enableWorkload + false, // useCertManager = false to trigger self-signed path + "fleet-webhook-server-cert", // webhookCertName + "fleet-webhook-server-cert", // webhookCertSecretName + ) + + if err == nil { + t.Fatal("Expected error when genCertificate fails, got nil") + } + + expectedErrMsg := "failed to generate self-signed certificate" + if !strings.Contains(err.Error(), expectedErrMsg) { + t.Errorf("Expected error to contain '%s', got: %v", expectedErrMsg, err) + } +} + +func TestBuildWebhookAnnotations(t *testing.T) { + testCases := map[string]struct { + config Config + expectedAnnotions map[string]string + }{ + "useCertManager is false - returns empty map": { + config: Config{ + useCertManager: false, + serviceNamespace: "fleet-system", + webhookCertName: "fleet-webhook-server-cert", + webhookCertSecretName: "fleet-webhook-server-cert", + }, + expectedAnnotions: map[string]string{}, + }, + "useCertManager is true - returns annotation with correct format": { + config: Config{ + useCertManager: true, + serviceNamespace: "fleet-system", + webhookCertName: "fleet-webhook-server-cert", + webhookCertSecretName: "fleet-webhook-server-cert", + }, + expectedAnnotions: map[string]string{ + "cert-manager.io/inject-ca-from": "fleet-system/fleet-webhook-server-cert", + }, + }, + "useCertManager is true with custom namespace and cert name": { + config: Config{ + useCertManager: true, + serviceNamespace: "custom-namespace", + webhookCertName: "custom-webhook-cert", + webhookCertSecretName: "custom-webhook-secret", + }, + expectedAnnotions: map[string]string{ + "cert-manager.io/inject-ca-from": "custom-namespace/custom-webhook-cert", + }, + }, + } + + for name, tc := range testCases { + t.Run(name, func(t *testing.T) { + got := tc.config.buildWebhookAnnotations() + if diff := cmp.Diff(tc.expectedAnnotions, got); diff != "" { + t.Errorf("buildWebhookAnnotations() mismatch (-want +got):\n%s", diff) + } + }) + } +} + +func TestCreateClientConfig(t *testing.T) { + serviceConnectionType := options.Service + testCases := map[string]struct { + config Config + validationPath string + expectCABundle bool + }{ + "useCertManager is false - CABundle should be set": { + config: Config{ + useCertManager: false, + caPEM: []byte("test-ca-bundle"), + serviceNamespace: "fleet-system", + serviceName: "fleet-webhook-service", + servicePort: 443, + clientConnectionType: &serviceConnectionType, + }, + validationPath: "/validate", + expectCABundle: true, + }, + "useCertManager is true - CABundle should be empty for cert-manager injection": { + config: Config{ + useCertManager: true, + caPEM: []byte("test-ca-bundle"), + serviceNamespace: "fleet-system", + serviceName: "fleet-webhook-service", + servicePort: 443, + clientConnectionType: &serviceConnectionType, + }, + validationPath: "/validate", + expectCABundle: false, + }, + } + + for name, tc := range testCases { + t.Run(name, func(t *testing.T) { + clientConfig := tc.config.createClientConfig(tc.validationPath) + + if tc.expectCABundle { + if len(clientConfig.CABundle) == 0 { + t.Errorf("Expected CABundle to be set, but it was empty") + } + if diff := cmp.Diff(tc.config.caPEM, clientConfig.CABundle); diff != "" { + t.Errorf("CABundle mismatch (-want +got):\n%s", diff) + } + } else { + if len(clientConfig.CABundle) != 0 { + t.Errorf("Expected CABundle to be empty for cert-manager injection, but got: %v", clientConfig.CABundle) + } + } + + // Verify service reference is set correctly + if clientConfig.Service == nil { + t.Errorf("Expected Service to be set") + } else { + if clientConfig.Service.Namespace != tc.config.serviceNamespace { + t.Errorf("Expected Service.Namespace=%s, got %s", tc.config.serviceNamespace, clientConfig.Service.Namespace) + } + if clientConfig.Service.Name != tc.config.serviceName { + t.Errorf("Expected Service.Name=%s, got %s", tc.config.serviceName, clientConfig.Service.Name) + } + if *clientConfig.Service.Port != tc.config.servicePort { + t.Errorf("Expected Service.Port=%d, got %d", tc.config.servicePort, *clientConfig.Service.Port) + } + if *clientConfig.Service.Path != tc.validationPath { + t.Errorf("Expected Service.Path=%s, got %s", tc.validationPath, *clientConfig.Service.Path) + } + } + }) + } +} + +func TestCheckCAInjection(t *testing.T) { + testCases := map[string]struct { + config Config + existingObjects []client.Object + expectError bool + expectedErrorMsg string + }{ + "useCertManager is false - returns nil without checking": { + config: Config{ + useCertManager: false, + enableGuardRail: false, + }, + expectError: false, + }, + "useCertManager is true, all CA bundles present": { + config: Config{ + useCertManager: true, + enableGuardRail: false, + }, + existingObjects: []client.Object{ + &admv1.MutatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: fleetMutatingWebhookCfgName}, + Webhooks: []admv1.MutatingWebhook{ + { + Name: "test-mutating-webhook", + ClientConfig: admv1.WebhookClientConfig{ + CABundle: []byte("fake-ca-bundle"), + }, + }, + }, + }, + &admv1.ValidatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: fleetValidatingWebhookCfgName}, + Webhooks: []admv1.ValidatingWebhook{ + { + Name: "test-validating-webhook-1", + ClientConfig: admv1.WebhookClientConfig{ + CABundle: []byte("fake-ca-bundle"), + }, + }, + { + Name: "test-validating-webhook-2", + ClientConfig: admv1.WebhookClientConfig{ + CABundle: []byte("fake-ca-bundle"), + }, + }, + }, + }, + }, + expectError: false, + }, + "useCertManager is true, mutating webhook missing CA bundle": { + config: Config{ + useCertManager: true, + enableGuardRail: false, + }, + existingObjects: []client.Object{ + &admv1.MutatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: fleetMutatingWebhookCfgName}, + Webhooks: []admv1.MutatingWebhook{ + { + Name: "test-mutating-webhook", + ClientConfig: admv1.WebhookClientConfig{ + CABundle: nil, // Missing CA bundle + }, + }, + }, + }, + &admv1.ValidatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: fleetValidatingWebhookCfgName}, + Webhooks: []admv1.ValidatingWebhook{ + { + Name: "test-validating-webhook", + ClientConfig: admv1.WebhookClientConfig{ + CABundle: []byte("fake-ca-bundle"), + }, + }, + }, + }, + }, + expectError: true, + expectedErrorMsg: "test-mutating-webhook is missing CA bundle", + }, + "useCertManager is true, validating webhook missing CA bundle": { + config: Config{ + useCertManager: true, + enableGuardRail: false, + }, + existingObjects: []client.Object{ + &admv1.MutatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: fleetMutatingWebhookCfgName}, + Webhooks: []admv1.MutatingWebhook{ + { + Name: "test-mutating-webhook", + ClientConfig: admv1.WebhookClientConfig{ + CABundle: []byte("fake-ca-bundle"), + }, + }, + }, + }, + &admv1.ValidatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: fleetValidatingWebhookCfgName}, + Webhooks: []admv1.ValidatingWebhook{ + { + Name: "test-validating-webhook-1", + ClientConfig: admv1.WebhookClientConfig{ + CABundle: []byte("fake-ca-bundle"), + }, + }, + { + Name: "test-validating-webhook-2", + ClientConfig: admv1.WebhookClientConfig{ + CABundle: []byte{}, // Empty CA bundle + }, + }, + }, + }, + }, + expectError: true, + expectedErrorMsg: "test-validating-webhook-2 is missing CA bundle", + }, + "useCertManager is true with guard rail, all CA bundles present": { + config: Config{ + useCertManager: true, + enableGuardRail: true, + }, + existingObjects: []client.Object{ + &admv1.MutatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: fleetMutatingWebhookCfgName}, + Webhooks: []admv1.MutatingWebhook{ + { + Name: "test-mutating-webhook", + ClientConfig: admv1.WebhookClientConfig{ + CABundle: []byte("fake-ca-bundle"), + }, + }, + }, + }, + &admv1.ValidatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: fleetValidatingWebhookCfgName}, + Webhooks: []admv1.ValidatingWebhook{ + { + Name: "test-validating-webhook", + ClientConfig: admv1.WebhookClientConfig{ + CABundle: []byte("fake-ca-bundle"), + }, + }, + }, + }, + &admv1.ValidatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: fleetGuardRailWebhookCfgName}, + Webhooks: []admv1.ValidatingWebhook{ + { + Name: "test-guard-rail-webhook", + ClientConfig: admv1.WebhookClientConfig{ + CABundle: []byte("fake-ca-bundle"), + }, + }, + }, + }, + }, + expectError: false, + }, + "useCertManager is true with guard rail, guard rail webhook missing CA bundle": { + config: Config{ + useCertManager: true, + enableGuardRail: true, + }, + existingObjects: []client.Object{ + &admv1.MutatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: fleetMutatingWebhookCfgName}, + Webhooks: []admv1.MutatingWebhook{ + { + Name: "test-mutating-webhook", + ClientConfig: admv1.WebhookClientConfig{ + CABundle: []byte("fake-ca-bundle"), + }, + }, + }, + }, + &admv1.ValidatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: fleetValidatingWebhookCfgName}, + Webhooks: []admv1.ValidatingWebhook{ + { + Name: "test-validating-webhook", + ClientConfig: admv1.WebhookClientConfig{ + CABundle: []byte("fake-ca-bundle"), + }, + }, + }, + }, + &admv1.ValidatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{Name: fleetGuardRailWebhookCfgName}, + Webhooks: []admv1.ValidatingWebhook{ + { + Name: "test-guard-rail-webhook", + ClientConfig: admv1.WebhookClientConfig{ + CABundle: nil, // Missing CA bundle + }, + }, + }, + }, + }, + expectError: true, + expectedErrorMsg: "test-guard-rail-webhook is missing CA bundle", + }, + "useCertManager is true, webhook configuration not found": { + config: Config{ + useCertManager: true, + enableGuardRail: false, + }, + existingObjects: []client.Object{}, + expectError: true, + expectedErrorMsg: "failed to get MutatingWebhookConfiguration", + }, + } + + for name, tc := range testCases { + t.Run(name, func(t *testing.T) { + // Create a fake client with a proper scheme + scheme := runtime.NewScheme() + _ = admv1.AddToScheme(scheme) + fakeClient := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(tc.existingObjects...). + Build() + + // Create a fake manager that returns our fake client + tc.config.mgr = &testmanager.FakeManager{Client: fakeClient} + + err := tc.config.CheckCAInjection(context.Background()) + + if tc.expectError { + if err == nil { + t.Errorf("Expected error but got nil") + } else if !strings.Contains(err.Error(), tc.expectedErrorMsg) { + t.Errorf("Expected error message to contain %q, but got: %v", tc.expectedErrorMsg, err) + } + } else { + if err != nil { + t.Errorf("Expected no error but got: %v", err) + } + } + }) + } +} diff --git a/test/e2e/setup.sh b/test/e2e/setup.sh index d4b6b8a2a..5edc75480 100755 --- a/test/e2e/setup.sh +++ b/test/e2e/setup.sh @@ -28,6 +28,7 @@ export PROPERTY_PROVIDER="${PROPERTY_PROVIDER:-azure}" export USE_PREDEFINED_REGIONS="${USE_PREDEFINED_REGIONS:-false}" export RESOURCE_SNAPSHOT_CREATION_MINIMUM_INTERVAL="${RESOURCE_SNAPSHOT_CREATION_MINIMUM_INTERVAL:-0m}" export RESOURCE_CHANGES_COLLECTION_DURATION="${RESOURCE_CHANGES_COLLECTION_DURATION:-0m}" +export CERT_MANAGER_VERSION="${CERT_MANAGER_VERSION:-v1.16.2}" # The pre-defined regions; if the AKS property provider is used. # @@ -114,14 +115,31 @@ done # Install the helm charts -# Install the hub agent to the hub cluster kind export kubeconfig --name $HUB_CLUSTER + +# Install cert-manager first (required for webhook certificates) +echo "Installing cert-manager..." + +# Install cert-manager using Helm to avoid ownership conflicts with hub-agent chart +helm repo add jetstack https://charts.jetstack.io --force-update +helm repo update +helm install cert-manager jetstack/cert-manager \ + --namespace cert-manager \ + --create-namespace \ + --version $CERT_MANAGER_VERSION \ + --set crds.enabled=true \ + --wait \ + --timeout=300s + +# Install the hub agent to the hub cluster helm install hub-agent ../../charts/hub-agent/ \ --set image.pullPolicy=Never \ --set image.repository=$REGISTRY/$HUB_AGENT_IMAGE \ --set image.tag=$TAG \ --set namespace=fleet-system \ --set logVerbosity=5 \ + --set replicaCount=3 \ + --set useCertManager=true \ --set enableWebhook=true \ --set enableWorkload=true \ --set webhookClientConnectionType=service \ diff --git a/test/utils/handler/handler.go b/test/utils/handler/handler.go new file mode 100644 index 000000000..67d382f48 --- /dev/null +++ b/test/utils/handler/handler.go @@ -0,0 +1,47 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package handler provides test utilities for Kubernetes event handlers. +package handler + +// TestHandler is a simple implementation of cache.ResourceEventHandler for testing. +// It allows tests to track when specific event handler methods are called. +type TestHandler struct { + OnAddFunc func() + OnUpdateFunc func() + OnDeleteFunc func() +} + +// OnAdd is called when an object is added. +func (h *TestHandler) OnAdd(obj interface{}, isInInitialList bool) { + if h.OnAddFunc != nil { + h.OnAddFunc() + } +} + +// OnUpdate is called when an object is updated. +func (h *TestHandler) OnUpdate(oldObj, newObj interface{}) { + if h.OnUpdateFunc != nil { + h.OnUpdateFunc() + } +} + +// OnDelete is called when an object is deleted. +func (h *TestHandler) OnDelete(obj interface{}) { + if h.OnDeleteFunc != nil { + h.OnDeleteFunc() + } +} diff --git a/test/utils/informer/manager.go b/test/utils/informer/manager.go index fde757292..004ef9364 100644 --- a/test/utils/informer/manager.go +++ b/test/utils/informer/manager.go @@ -185,3 +185,10 @@ func (m *FakeManager) WaitForCacheSync() { func (m *FakeManager) GetClient() dynamic.Interface { return nil } +func (m *FakeManager) AddEventHandlerToInformer(_ schema.GroupVersionResource, _ cache.ResourceEventHandler) { + // No-op for testing +} + +func (m *FakeManager) CreateInformerForResource(_ informer.APIResourceMeta) { + // No-op for testing +} diff --git a/test/utils/manager/manager.go b/test/utils/manager/manager.go new file mode 100644 index 000000000..adceb6f37 --- /dev/null +++ b/test/utils/manager/manager.go @@ -0,0 +1,35 @@ +/* +Copyright 2025 The KubeFleet Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package manager provides a fake controller-runtime manager for testing. +package manager + +import ( + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/manager" +) + +// FakeManager is a fake controller-runtime manager for testing. +type FakeManager struct { + manager.Manager + // Client is the Kubernetes client used by this manager. + Client client.Client +} + +// GetClient returns the configured client. +func (f *FakeManager) GetClient() client.Client { + return f.Client +}