From adfa9f6e54e3940296bca3bc24fb174a0375dba3 Mon Sep 17 00:00:00 2001 From: Daniel Orbach Date: Tue, 5 May 2026 16:05:18 +0300 Subject: [PATCH 1/7] feat(helm): add opt-in controller metrics Service Surface the controller manager's /metrics endpoint through a dedicated Service so cluster scrapers can target it without port-forwarding to individual pods. The new resource is gated on `controller.metrics.enabled` and defaults off so existing installations are unchanged. The Service mirrors the kmcp sub-chart layout (separate metrics Service, named `https` port) to keep the umbrella chart internally consistent and to let users author one ServiceMonitor pattern that targets both controllers. RBAC and Deployment wiring follow in subsequent commits. Signed-off-by: Daniel Orbach --- .../templates/controller-metrics-service.yaml | 18 ++++ .../controller-metrics-service_test.yaml | 93 +++++++++++++++++++ helm/kagent/values.yaml | 13 +++ 3 files changed, 124 insertions(+) create mode 100644 helm/kagent/templates/controller-metrics-service.yaml create mode 100644 helm/kagent/tests/controller-metrics-service_test.yaml diff --git a/helm/kagent/templates/controller-metrics-service.yaml b/helm/kagent/templates/controller-metrics-service.yaml new file mode 100644 index 000000000..622a1a2bb --- /dev/null +++ b/helm/kagent/templates/controller-metrics-service.yaml @@ -0,0 +1,18 @@ +{{- if .Values.controller.metrics.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "kagent.fullname" . }}-controller-metrics + namespace: {{ include "kagent.namespace" . }} + labels: + {{- include "kagent.controller.labels" . | nindent 4 }} +spec: + type: {{ .Values.controller.metrics.service.type }} + ports: + - name: {{ ternary "https" "http-metrics" .Values.controller.metrics.secureServing }} + port: {{ .Values.controller.metrics.service.port }} + targetPort: {{ .Values.controller.metrics.service.targetPort }} + protocol: TCP + selector: + {{- include "kagent.controller.selectorLabels" . | nindent 4 }} +{{- end }} diff --git a/helm/kagent/tests/controller-metrics-service_test.yaml b/helm/kagent/tests/controller-metrics-service_test.yaml new file mode 100644 index 000000000..c09cbba5b --- /dev/null +++ b/helm/kagent/tests/controller-metrics-service_test.yaml @@ -0,0 +1,93 @@ +suite: test controller metrics service +templates: + - controller-metrics-service.yaml +tests: + - it: should not render by default + asserts: + - hasDocuments: + count: 0 + + - it: should render when metrics are enabled + set: + controller.metrics.enabled: true + asserts: + - isKind: + of: Service + - equal: + path: metadata.name + value: RELEASE-NAME-controller-metrics + - equal: + path: spec.type + value: ClusterIP + - hasDocuments: + count: 1 + + - it: should expose https port from values + set: + controller.metrics.enabled: true + asserts: + - equal: + path: spec.ports[0].name + value: https + - equal: + path: spec.ports[0].port + value: 8443 + - equal: + path: spec.ports[0].targetPort + value: 8443 + - equal: + path: spec.ports[0].protocol + value: TCP + + - it: should respect custom port and targetPort + set: + controller.metrics.enabled: true + controller.metrics.service.port: 9443 + controller.metrics.service.targetPort: 9443 + asserts: + - equal: + path: spec.ports[0].port + value: 9443 + - equal: + path: spec.ports[0].targetPort + value: 9443 + + - it: should rename port when secure serving disabled + set: + controller.metrics.enabled: true + controller.metrics.secureServing: false + asserts: + - equal: + path: spec.ports[0].name + value: http-metrics + + - it: should select controller pods + set: + controller.metrics.enabled: true + asserts: + - equal: + path: spec.selector["app.kubernetes.io/name"] + value: kagent + - equal: + path: spec.selector["app.kubernetes.io/instance"] + value: RELEASE-NAME + - equal: + path: spec.selector["app.kubernetes.io/component"] + value: controller + + - it: should be in correct namespace + set: + controller.metrics.enabled: true + asserts: + - equal: + path: metadata.namespace + value: NAMESPACE + + - it: should use custom namespace when overridden + set: + controller.metrics.enabled: true + namespaceOverride: "custom-namespace" + asserts: + - equal: + path: metadata.namespace + value: custom-namespace diff --git a/helm/kagent/values.yaml b/helm/kagent/values.yaml index 446cc54e7..37150e863 100644 --- a/helm/kagent/values.yaml +++ b/helm/kagent/values.yaml @@ -222,6 +222,19 @@ controller: ports: port: 8083 targetPort: 8083 + # -- Prometheus-style /metrics endpoint for the controller manager. + # When enabled, provisions a dedicated metrics Service plus the ClusterRoles + # required for authenticated scrapes. Bind `-metrics-reader` to + # your Prometheus ServiceAccount to grant scrape access. + # @default -- disabled + metrics: + enabled: false + bindAddress: ":8443" + secureServing: true + service: + type: ClusterIP + port: 8443 + targetPort: 8443 env: [] envFrom: [] From f958b5500829baa7a7af18a190abb9179066402e Mon Sep 17 00:00:00 2001 From: Daniel Orbach Date: Tue, 5 May 2026 16:06:07 +0300 Subject: [PATCH 2/7] feat(helm): add RBAC for authenticated controller metrics scrapes The controller manager already wires controller-runtime's WithAuthenticationAndAuthorization filter on the metrics endpoint when --metrics-secure is true (the default). For that filter to validate incoming scrape requests it needs permission to issue TokenReview and SubjectAccessReview calls; ship those rights as a dedicated auth role bound to the controller ServiceAccount. Also ship an unbound `-metrics-reader` ClusterRole granting `get` on the `/metrics` non-resource URL. The chart deliberately leaves the binding to the cluster operator: which ServiceAccount (Prometheus, VictoriaMetrics, OpenTelemetry, etc.) is allowed to scrape is a deployment-time choice, not a chart-time one. The auth role and its binding are gated additionally on `controller.metrics.secureServing` since they are no-ops when the filter is disabled. All resources are gated on `controller.metrics.enabled` and default off. Signed-off-by: Daniel Orbach --- .../rbac/metrics-auth-clusterrole.yaml | 21 ++++ .../rbac/metrics-auth-clusterrolebinding.yaml | 16 +++ .../rbac/metrics-reader-clusterrole.yaml | 13 ++ .../tests/controller-metrics-rbac_test.yaml | 113 ++++++++++++++++++ 4 files changed, 163 insertions(+) create mode 100644 helm/kagent/templates/rbac/metrics-auth-clusterrole.yaml create mode 100644 helm/kagent/templates/rbac/metrics-auth-clusterrolebinding.yaml create mode 100644 helm/kagent/templates/rbac/metrics-reader-clusterrole.yaml create mode 100644 helm/kagent/tests/controller-metrics-rbac_test.yaml diff --git a/helm/kagent/templates/rbac/metrics-auth-clusterrole.yaml b/helm/kagent/templates/rbac/metrics-auth-clusterrole.yaml new file mode 100644 index 000000000..64ca06f38 --- /dev/null +++ b/helm/kagent/templates/rbac/metrics-auth-clusterrole.yaml @@ -0,0 +1,21 @@ +{{- if and .Values.controller.metrics.enabled .Values.controller.metrics.secureServing }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "kagent.fullname" . }}-metrics-auth-role + labels: + {{- include "kagent.controller.labels" . | nindent 4 }} +rules: + - apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create + - apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create +{{- end }} diff --git a/helm/kagent/templates/rbac/metrics-auth-clusterrolebinding.yaml b/helm/kagent/templates/rbac/metrics-auth-clusterrolebinding.yaml new file mode 100644 index 000000000..2baab225f --- /dev/null +++ b/helm/kagent/templates/rbac/metrics-auth-clusterrolebinding.yaml @@ -0,0 +1,16 @@ +{{- if and .Values.controller.metrics.enabled .Values.controller.metrics.secureServing }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "kagent.fullname" . }}-metrics-auth-rolebinding + labels: + {{- include "kagent.controller.labels" . | nindent 4 }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ include "kagent.fullname" . }}-metrics-auth-role +subjects: + - kind: ServiceAccount + name: {{ include "kagent.fullname" . }}-controller + namespace: {{ include "kagent.namespace" . }} +{{- end }} diff --git a/helm/kagent/templates/rbac/metrics-reader-clusterrole.yaml b/helm/kagent/templates/rbac/metrics-reader-clusterrole.yaml new file mode 100644 index 000000000..06c171d9e --- /dev/null +++ b/helm/kagent/templates/rbac/metrics-reader-clusterrole.yaml @@ -0,0 +1,13 @@ +{{- if .Values.controller.metrics.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "kagent.fullname" . }}-metrics-reader + labels: + {{- include "kagent.controller.labels" . | nindent 4 }} +rules: + - nonResourceURLs: + - "/metrics" + verbs: + - get +{{- end }} diff --git a/helm/kagent/tests/controller-metrics-rbac_test.yaml b/helm/kagent/tests/controller-metrics-rbac_test.yaml new file mode 100644 index 000000000..f721172ae --- /dev/null +++ b/helm/kagent/tests/controller-metrics-rbac_test.yaml @@ -0,0 +1,113 @@ +suite: test controller metrics rbac +templates: + - rbac/metrics-reader-clusterrole.yaml + - rbac/metrics-auth-clusterrole.yaml + - rbac/metrics-auth-clusterrolebinding.yaml +tests: + - it: should not render any metrics rbac by default + asserts: + - hasDocuments: + count: 0 + template: rbac/metrics-reader-clusterrole.yaml + - hasDocuments: + count: 0 + template: rbac/metrics-auth-clusterrole.yaml + - hasDocuments: + count: 0 + template: rbac/metrics-auth-clusterrolebinding.yaml + + - it: should render metrics-reader when metrics enabled + set: + controller.metrics.enabled: true + template: rbac/metrics-reader-clusterrole.yaml + asserts: + - isKind: + of: ClusterRole + - equal: + path: metadata.name + value: RELEASE-NAME-metrics-reader + - equal: + path: rules[0].nonResourceURLs[0] + value: /metrics + - equal: + path: rules[0].verbs[0] + value: get + + - it: should render auth role and binding when secure serving enabled + set: + controller.metrics.enabled: true + asserts: + - isKind: + of: ClusterRole + template: rbac/metrics-auth-clusterrole.yaml + - equal: + path: metadata.name + value: RELEASE-NAME-metrics-auth-role + template: rbac/metrics-auth-clusterrole.yaml + - isKind: + of: ClusterRoleBinding + template: rbac/metrics-auth-clusterrolebinding.yaml + - equal: + path: metadata.name + value: RELEASE-NAME-metrics-auth-rolebinding + template: rbac/metrics-auth-clusterrolebinding.yaml + + - it: auth role should grant token and subject access reviews + set: + controller.metrics.enabled: true + template: rbac/metrics-auth-clusterrole.yaml + asserts: + - equal: + path: rules[0].apiGroups[0] + value: authentication.k8s.io + - equal: + path: rules[0].resources[0] + value: tokenreviews + - equal: + path: rules[0].verbs[0] + value: create + - equal: + path: rules[1].apiGroups[0] + value: authorization.k8s.io + - equal: + path: rules[1].resources[0] + value: subjectaccessreviews + - equal: + path: rules[1].verbs[0] + value: create + + - it: auth rolebinding should reference controller serviceaccount + set: + controller.metrics.enabled: true + template: rbac/metrics-auth-clusterrolebinding.yaml + asserts: + - equal: + path: roleRef.kind + value: ClusterRole + - equal: + path: roleRef.name + value: RELEASE-NAME-metrics-auth-role + - equal: + path: subjects[0].kind + value: ServiceAccount + - equal: + path: subjects[0].name + value: RELEASE-NAME-controller + - equal: + path: subjects[0].namespace + value: NAMESPACE + + - it: should skip auth role when secure serving disabled + set: + controller.metrics.enabled: true + controller.metrics.secureServing: false + asserts: + - hasDocuments: + count: 1 + template: rbac/metrics-reader-clusterrole.yaml + - hasDocuments: + count: 0 + template: rbac/metrics-auth-clusterrole.yaml + - hasDocuments: + count: 0 + template: rbac/metrics-auth-clusterrolebinding.yaml From baac33ca3e6c0aa401ce131992f8662ede56c778 Mon Sep 17 00:00:00 2001 From: Daniel Orbach Date: Tue, 5 May 2026 16:07:50 +0300 Subject: [PATCH 3/7] feat(helm): wire controller deployment for opt-in metrics Add METRICS_BIND_ADDRESS and METRICS_SECURE env vars and a `metrics` containerPort to the controller pod, all gated on `controller.metrics.enabled`. The new env vars are emitted before the `controller.env` passthrough so user-supplied values continue to win (later entries take precedence in env arrays), preserving the existing escape hatch. The controller binary already understands METRICS_BIND_ADDRESS and METRICS_SECURE via its env-var loader (see go/core/pkg/app/app.go), so no controller changes are required. Signed-off-by: Daniel Orbach --- .../templates/controller-deployment.yaml | 11 +++ .../tests/controller-deployment_test.yaml | 84 +++++++++++++++++++ 2 files changed, 95 insertions(+) diff --git a/helm/kagent/templates/controller-deployment.yaml b/helm/kagent/templates/controller-deployment.yaml index ee7119b8e..4687316bc 100644 --- a/helm/kagent/templates/controller-deployment.yaml +++ b/helm/kagent/templates/controller-deployment.yaml @@ -84,6 +84,12 @@ spec: {{- else }} {{ fail "No database connection configured. Set database.postgres.url, database.postgres.urlFile, or enable database.postgres.bundled." }} {{- end }} + {{- if .Values.controller.metrics.enabled }} + - name: METRICS_BIND_ADDRESS + value: {{ .Values.controller.metrics.bindAddress | quote }} + - name: METRICS_SECURE + value: {{ .Values.controller.metrics.secureServing | quote }} + {{- end }} {{- with .Values.controller.env }} {{- toYaml . | nindent 12 }} {{- end }} @@ -97,6 +103,11 @@ spec: - name: http containerPort: {{ .Values.controller.service.ports.targetPort }} protocol: TCP + {{- if .Values.controller.metrics.enabled }} + - name: metrics + containerPort: {{ .Values.controller.metrics.service.targetPort }} + protocol: TCP + {{- end }} resources: {{- toYaml .Values.controller.resources | nindent 12 }} {{- with (.Values.controller.securityContext | default .Values.securityContext) }} diff --git a/helm/kagent/tests/controller-deployment_test.yaml b/helm/kagent/tests/controller-deployment_test.yaml index a35a9c227..bcdece19a 100644 --- a/helm/kagent/tests/controller-deployment_test.yaml +++ b/helm/kagent/tests/controller-deployment_test.yaml @@ -496,3 +496,87 @@ tests: path: spec.template.spec.containers[0].env content: name: POSTGRES_PASSWORD + + - it: should not expose metrics container port by default + template: controller-deployment.yaml + asserts: + - lengthEqual: + path: spec.template.spec.containers[0].ports + count: 1 + - notContains: + path: spec.template.spec.containers[0].ports + content: + name: metrics + + - it: should not set metrics env vars by default + template: controller-deployment.yaml + asserts: + - notContains: + path: spec.template.spec.containers[0].env + content: + name: METRICS_BIND_ADDRESS + any: true + - notContains: + path: spec.template.spec.containers[0].env + content: + name: METRICS_SECURE + any: true + + - it: should expose metrics container port when enabled + template: controller-deployment.yaml + set: + controller.metrics.enabled: true + asserts: + - contains: + path: spec.template.spec.containers[0].ports + content: + name: metrics + containerPort: 8443 + protocol: TCP + + - it: should set metrics env vars when enabled + template: controller-deployment.yaml + set: + controller.metrics.enabled: true + asserts: + - contains: + path: spec.template.spec.containers[0].env + content: + name: METRICS_BIND_ADDRESS + value: ":8443" + - contains: + path: spec.template.spec.containers[0].env + content: + name: METRICS_SECURE + value: "true" + + - it: should reflect insecure serving in env + template: controller-deployment.yaml + set: + controller.metrics.enabled: true + controller.metrics.secureServing: false + asserts: + - contains: + path: spec.template.spec.containers[0].env + content: + name: METRICS_SECURE + value: "false" + + - it: should reflect custom bind address and target port + template: controller-deployment.yaml + set: + controller.metrics.enabled: true + controller.metrics.bindAddress: ":9443" + controller.metrics.service.targetPort: 9443 + asserts: + - contains: + path: spec.template.spec.containers[0].env + content: + name: METRICS_BIND_ADDRESS + value: ":9443" + - contains: + path: spec.template.spec.containers[0].ports + content: + name: metrics + containerPort: 9443 + protocol: TCP From 2260bda6c4b0799b6dcbf61ee95fc5d9df16e94a Mon Sep 17 00:00:00 2001 From: Daniel Orbach Date: Wed, 6 May 2026 03:41:40 +0300 Subject: [PATCH 4/7] fix(helm): derive metrics targetPort from bindAddress The earlier commits exposed both `controller.metrics.bindAddress` and `controller.metrics.service.targetPort` as independent values, so a user could shift the runtime bind without moving the Service target and end up with a Service that points at a port the controller binary is not listening on. Drop the separate `service.targetPort` knob and derive both the container port and the Service targetPort from `bindAddress` via `regexFind`, matching the `kmcp` sub-chart's pattern. Add a test that asserts the derived ports stay in sync when `bindAddress` is customised, plus a regression test that documents the `controller.env` escape hatch (user-supplied env vars are emitted after chart defaults, so the user's value wins at runtime). The escape hatch still cannot move the Service target; values.yaml now flags this. Addresses review feedback on #1803. Signed-off-by: Daniel Orbach --- .../templates/controller-deployment.yaml | 2 +- .../templates/controller-metrics-service.yaml | 2 +- .../tests/controller-deployment_test.yaml | 22 +++++++++++++++++-- .../controller-metrics-service_test.yaml | 4 ++-- helm/kagent/values.yaml | 8 +++++-- 5 files changed, 30 insertions(+), 8 deletions(-) diff --git a/helm/kagent/templates/controller-deployment.yaml b/helm/kagent/templates/controller-deployment.yaml index 4687316bc..ebdfdd36f 100644 --- a/helm/kagent/templates/controller-deployment.yaml +++ b/helm/kagent/templates/controller-deployment.yaml @@ -105,7 +105,7 @@ spec: protocol: TCP {{- if .Values.controller.metrics.enabled }} - name: metrics - containerPort: {{ .Values.controller.metrics.service.targetPort }} + containerPort: {{ .Values.controller.metrics.bindAddress | regexFind "[0-9]+" | int }} protocol: TCP {{- end }} resources: diff --git a/helm/kagent/templates/controller-metrics-service.yaml b/helm/kagent/templates/controller-metrics-service.yaml index 622a1a2bb..7c0a3ac1c 100644 --- a/helm/kagent/templates/controller-metrics-service.yaml +++ b/helm/kagent/templates/controller-metrics-service.yaml @@ -11,7 +11,7 @@ spec: ports: - name: {{ ternary "https" "http-metrics" .Values.controller.metrics.secureServing }} port: {{ .Values.controller.metrics.service.port }} - targetPort: {{ .Values.controller.metrics.service.targetPort }} + targetPort: {{ .Values.controller.metrics.bindAddress | regexFind "[0-9]+" | int }} protocol: TCP selector: {{- include "kagent.controller.selectorLabels" . | nindent 4 }} diff --git a/helm/kagent/tests/controller-deployment_test.yaml b/helm/kagent/tests/controller-deployment_test.yaml index bcdece19a..de9826431 100644 --- a/helm/kagent/tests/controller-deployment_test.yaml +++ b/helm/kagent/tests/controller-deployment_test.yaml @@ -562,12 +562,11 @@ tests: name: METRICS_SECURE value: "false" - - it: should reflect custom bind address and target port + - it: should derive metrics container port from bindAddress template: controller-deployment.yaml set: controller.metrics.enabled: true controller.metrics.bindAddress: ":9443" - controller.metrics.service.targetPort: 9443 asserts: - contains: path: spec.template.spec.containers[0].env @@ -580,3 +579,22 @@ tests: name: metrics containerPort: 9443 protocol: TCP + + - it: should let controller.env override the chart-supplied metrics env + template: controller-deployment.yaml + set: + controller.metrics.enabled: true + controller.env: + - name: METRICS_BIND_ADDRESS + value: "0" + asserts: + - contains: + path: spec.template.spec.containers[0].env + content: + name: METRICS_BIND_ADDRESS + value: ":8443" + - contains: + path: spec.template.spec.containers[0].env + content: + name: METRICS_BIND_ADDRESS + value: "0" diff --git a/helm/kagent/tests/controller-metrics-service_test.yaml b/helm/kagent/tests/controller-metrics-service_test.yaml index c09cbba5b..f231af425 100644 --- a/helm/kagent/tests/controller-metrics-service_test.yaml +++ b/helm/kagent/tests/controller-metrics-service_test.yaml @@ -39,11 +39,11 @@ tests: path: spec.ports[0].protocol value: TCP - - it: should respect custom port and targetPort + - it: should derive targetPort from bindAddress set: controller.metrics.enabled: true + controller.metrics.bindAddress: ":9443" controller.metrics.service.port: 9443 - controller.metrics.service.targetPort: 9443 asserts: - equal: path: spec.ports[0].port diff --git a/helm/kagent/values.yaml b/helm/kagent/values.yaml index 37150e863..0129b0f39 100644 --- a/helm/kagent/values.yaml +++ b/helm/kagent/values.yaml @@ -225,7 +225,12 @@ controller: # -- Prometheus-style /metrics endpoint for the controller manager. # When enabled, provisions a dedicated metrics Service plus the ClusterRoles # required for authenticated scrapes. Bind `-metrics-reader` to - # your Prometheus ServiceAccount to grant scrape access. + # your Prometheus ServiceAccount to grant scrape access. The Service + # targetPort and pod containerPort are derived from `bindAddress` so the + # two cannot drift; only the cluster-facing `service.port` is independently + # configurable. Overriding `METRICS_BIND_ADDRESS` via `controller.env` + # changes the runtime bind but not the Service — keep `service.port` + # aligned manually if you take that escape hatch. # @default -- disabled metrics: enabled: false @@ -234,7 +239,6 @@ controller: service: type: ClusterIP port: 8443 - targetPort: 8443 env: [] envFrom: [] From 72aafde3129eec734f8ddf9265ef9bdf1cc956b9 Mon Sep 17 00:00:00 2001 From: Daniel Orbach Date: Wed, 6 May 2026 03:53:47 +0300 Subject: [PATCH 5/7] fix(helm): anchor metrics port regex so host-qualified addresses parse `regexFind "[0-9]+"` returns the first digit run, so a perfectly valid bindAddress like `127.0.0.1:8443` would extract `127` and the rendered Service would point at the wrong port. Anchor the match to the end of the string with `[0-9]+$` so every Go-style address form yields the trailing port: bare `:port`, host-qualified `host:port`, and bracketed IPv6 `[::1]:port` all parse correctly. The shared logic lives behind a `kagent.controller.metricsPort` helper in `_helpers.tpl` so the deployment containerPort and the Service targetPort cannot fall out of sync if the parsing rule evolves. Disabling metrics is exclusively the job of `controller.metrics.enabled=false`; the chart does not engage with the controller binary's `bindAddress=0` disable sentinel, since wiring two ways to express the same intent only invites contradictory configuration. Addresses review feedback on #1803. Signed-off-by: Daniel Orbach --- helm/kagent/templates/_helpers.tpl | 14 ++++++++++ .../templates/controller-deployment.yaml | 2 +- .../templates/controller-metrics-service.yaml | 2 +- .../tests/controller-deployment_test.yaml | 26 +++++++++++++++++++ .../controller-metrics-service_test.yaml | 20 ++++++++++++++ 5 files changed, 62 insertions(+), 2 deletions(-) diff --git a/helm/kagent/templates/_helpers.tpl b/helm/kagent/templates/_helpers.tpl index 2c517ba21..8b0e4a9bb 100644 --- a/helm/kagent/templates/_helpers.tpl +++ b/helm/kagent/templates/_helpers.tpl @@ -132,6 +132,20 @@ Check if leader election should be enabled (more than 1 replica) {{- gt (.Values.controller.replicas | int) 1 -}} {{- end -}} +{{/* +Extract the TCP port from controller.metrics.bindAddress. + +Anchors the digit run to the end of the string so every Go-style +address form the controller binary accepts is handled correctly: bare +":port", host-qualified "host:port", and bracketed IPv6 "[::1]:port" +all yield the trailing port. Disabling metrics is exclusively the job +of controller.metrics.enabled=false; bindAddress is expected to carry +a real, non-zero port whenever the metrics resources render. +*/}} +{{- define "kagent.controller.metricsPort" -}} +{{- regexFind "[0-9]+$" (.Values.controller.metrics.bindAddress | toString) -}} +{{- end -}} + {{/* PostgreSQL service name for the bundled postgres instance */}} diff --git a/helm/kagent/templates/controller-deployment.yaml b/helm/kagent/templates/controller-deployment.yaml index ebdfdd36f..5be1b8303 100644 --- a/helm/kagent/templates/controller-deployment.yaml +++ b/helm/kagent/templates/controller-deployment.yaml @@ -105,7 +105,7 @@ spec: protocol: TCP {{- if .Values.controller.metrics.enabled }} - name: metrics - containerPort: {{ .Values.controller.metrics.bindAddress | regexFind "[0-9]+" | int }} + containerPort: {{ include "kagent.controller.metricsPort" . | int }} protocol: TCP {{- end }} resources: diff --git a/helm/kagent/templates/controller-metrics-service.yaml b/helm/kagent/templates/controller-metrics-service.yaml index 7c0a3ac1c..4b5f5a74f 100644 --- a/helm/kagent/templates/controller-metrics-service.yaml +++ b/helm/kagent/templates/controller-metrics-service.yaml @@ -11,7 +11,7 @@ spec: ports: - name: {{ ternary "https" "http-metrics" .Values.controller.metrics.secureServing }} port: {{ .Values.controller.metrics.service.port }} - targetPort: {{ .Values.controller.metrics.bindAddress | regexFind "[0-9]+" | int }} + targetPort: {{ include "kagent.controller.metricsPort" . | int }} protocol: TCP selector: {{- include "kagent.controller.selectorLabels" . | nindent 4 }} diff --git a/helm/kagent/tests/controller-deployment_test.yaml b/helm/kagent/tests/controller-deployment_test.yaml index de9826431..d3f50b8e5 100644 --- a/helm/kagent/tests/controller-deployment_test.yaml +++ b/helm/kagent/tests/controller-deployment_test.yaml @@ -580,6 +580,32 @@ tests: containerPort: 9443 protocol: TCP + - it: should derive metrics container port from a host-qualified bindAddress + template: controller-deployment.yaml + set: + controller.metrics.enabled: true + controller.metrics.bindAddress: "127.0.0.1:9443" + asserts: + - contains: + path: spec.template.spec.containers[0].ports + content: + name: metrics + containerPort: 9443 + protocol: TCP + + - it: should derive metrics container port from a bracketed IPv6 bindAddress + template: controller-deployment.yaml + set: + controller.metrics.enabled: true + controller.metrics.bindAddress: "[::1]:9443" + asserts: + - contains: + path: spec.template.spec.containers[0].ports + content: + name: metrics + containerPort: 9443 + protocol: TCP + - it: should let controller.env override the chart-supplied metrics env template: controller-deployment.yaml set: diff --git a/helm/kagent/tests/controller-metrics-service_test.yaml b/helm/kagent/tests/controller-metrics-service_test.yaml index f231af425..ed4280739 100644 --- a/helm/kagent/tests/controller-metrics-service_test.yaml +++ b/helm/kagent/tests/controller-metrics-service_test.yaml @@ -52,6 +52,26 @@ tests: path: spec.ports[0].targetPort value: 9443 + - it: should derive targetPort from a host-qualified bindAddress + set: + controller.metrics.enabled: true + controller.metrics.bindAddress: "127.0.0.1:9443" + controller.metrics.service.port: 9443 + asserts: + - equal: + path: spec.ports[0].targetPort + value: 9443 + + - it: should derive targetPort from a bracketed IPv6 bindAddress + set: + controller.metrics.enabled: true + controller.metrics.bindAddress: "[::1]:9443" + controller.metrics.service.port: 9443 + asserts: + - equal: + path: spec.ports[0].targetPort + value: 9443 + - it: should rename port when secure serving disabled set: controller.metrics.enabled: true From 77e9a92647c5406dd9045aab3e3a6fbe1371d440 Mon Sep 17 00:00:00 2001 From: Daniel Orbach Date: Wed, 6 May 2026 03:54:52 +0300 Subject: [PATCH 6/7] docs(helm): correct the metrics bindAddress override note MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The earlier note suggested keeping `service.port` aligned with a runtime override of `METRICS_BIND_ADDRESS`, but `service.port` is the cluster-facing front-door port and has no effect on the Service backend. The Service `targetPort` and the pod `containerPort` are both derived from `bindAddress` at template time, so overriding the env var via `controller.env` cannot move the rendered Service at all — that operational consequence is what the comment now states, with `bindAddress` flagged as the only chart-side knob that moves the listener and the Service together. Addresses review feedback on #1803. Signed-off-by: Daniel Orbach --- helm/kagent/values.yaml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/helm/kagent/values.yaml b/helm/kagent/values.yaml index 0129b0f39..9c3e2ba91 100644 --- a/helm/kagent/values.yaml +++ b/helm/kagent/values.yaml @@ -223,14 +223,14 @@ controller: port: 8083 targetPort: 8083 # -- Prometheus-style /metrics endpoint for the controller manager. - # When enabled, provisions a dedicated metrics Service plus the ClusterRoles - # required for authenticated scrapes. Bind `-metrics-reader` to - # your Prometheus ServiceAccount to grant scrape access. The Service - # targetPort and pod containerPort are derived from `bindAddress` so the - # two cannot drift; only the cluster-facing `service.port` is independently - # configurable. Overriding `METRICS_BIND_ADDRESS` via `controller.env` - # changes the runtime bind but not the Service — keep `service.port` - # aligned manually if you take that escape hatch. + # When enabled, provisions a dedicated metrics Service plus the + # ClusterRoles required for authenticated scrapes. Bind + # `-metrics-reader` to your Prometheus ServiceAccount to grant + # scrape access. Use `bindAddress` for any port change: the Service + # `targetPort` and the pod `containerPort` are derived from it at + # template time, so overriding `METRICS_BIND_ADDRESS` via + # `controller.env` shifts only the runtime listener and leaves the + # rendered Service pointing at the chart-time port. # @default -- disabled metrics: enabled: false From 7405b044dc560985fde61106593310573891bbd0 Mon Sep 17 00:00:00 2001 From: Daniel Orbach Date: Wed, 6 May 2026 04:03:36 +0300 Subject: [PATCH 7/7] feat(helm): honour bindAddress=0 as a metrics disable signal The `controller.metrics.bindAddress` value mirrors the controller's `--metrics-bind-address` flag, and that flag's documented contract treats `0` as "disable the metrics server" (see go/core/pkg/app/app.go:145-146). Letting the chart silently render an invalid `containerPort: 0` when the operator follows that contract is surprising; treat the binary's disable sentinel as another way of saying `controller.metrics.enabled=false`. Wrap the existing `kagent.controller.metricsPort` helper with a new `kagent.controller.metricsEnabled` helper that returns truthy only when `enabled` is true *and* the resolved port is a real, non-zero value. Switch the Service, both ClusterRoles, the ClusterRoleBinding, and the Deployment env/port wiring over to the new helper so the two disable signals are equivalent in their effect on rendered manifests. The escape-hatch behaviour is unchanged: setting `METRICS_BIND_ADDRESS` via `controller.env` still shifts only the runtime listener and cannot move the chart-rendered Service. Addresses review feedback on #1803. Signed-off-by: Daniel Orbach --- helm/kagent/templates/_helpers.tpl | 19 ++++++++++++++--- .../templates/controller-deployment.yaml | 2 +- .../templates/controller-metrics-service.yaml | 2 +- .../rbac/metrics-auth-clusterrole.yaml | 2 +- .../rbac/metrics-auth-clusterrolebinding.yaml | 2 +- .../rbac/metrics-reader-clusterrole.yaml | 2 +- .../tests/controller-deployment_test.yaml | 21 +++++++++++++++++++ .../tests/controller-metrics-rbac_test.yaml | 15 +++++++++++++ .../controller-metrics-service_test.yaml | 16 ++++++++++++++ helm/kagent/values.yaml | 5 ++++- 10 files changed, 77 insertions(+), 9 deletions(-) diff --git a/helm/kagent/templates/_helpers.tpl b/helm/kagent/templates/_helpers.tpl index 8b0e4a9bb..36c5c3a54 100644 --- a/helm/kagent/templates/_helpers.tpl +++ b/helm/kagent/templates/_helpers.tpl @@ -138,14 +138,27 @@ Extract the TCP port from controller.metrics.bindAddress. Anchors the digit run to the end of the string so every Go-style address form the controller binary accepts is handled correctly: bare ":port", host-qualified "host:port", and bracketed IPv6 "[::1]:port" -all yield the trailing port. Disabling metrics is exclusively the job -of controller.metrics.enabled=false; bindAddress is expected to carry -a real, non-zero port whenever the metrics resources render. +all yield the trailing port. Returns "0" or "" when the binary's +disable sentinel is in use; callers must consult +`kagent.controller.metricsEnabled` before rendering manifests. */}} {{- define "kagent.controller.metricsPort" -}} {{- regexFind "[0-9]+$" (.Values.controller.metrics.bindAddress | toString) -}} {{- end -}} +{{/* +Returns "1" when the controller metrics resources (Service, RBAC, +container port, env vars) should render, empty otherwise. Honours both +disable signals: `controller.metrics.enabled=false` and the binary's +own `--metrics-bind-address=0` sentinel reached through `bindAddress`. +The two are equivalent so the field name keeps faith with the binary's +documented contract (see go/core/pkg/app/app.go). +*/}} +{{- define "kagent.controller.metricsEnabled" -}} +{{- $port := include "kagent.controller.metricsPort" . -}} +{{- if and .Values.controller.metrics.enabled $port (ne $port "0") -}}1{{- end -}} +{{- end -}} + {{/* PostgreSQL service name for the bundled postgres instance */}} diff --git a/helm/kagent/templates/controller-deployment.yaml b/helm/kagent/templates/controller-deployment.yaml index 5be1b8303..28cfa39a1 100644 --- a/helm/kagent/templates/controller-deployment.yaml +++ b/helm/kagent/templates/controller-deployment.yaml @@ -84,7 +84,7 @@ spec: {{- else }} {{ fail "No database connection configured. Set database.postgres.url, database.postgres.urlFile, or enable database.postgres.bundled." }} {{- end }} - {{- if .Values.controller.metrics.enabled }} + {{- if include "kagent.controller.metricsEnabled" . }} - name: METRICS_BIND_ADDRESS value: {{ .Values.controller.metrics.bindAddress | quote }} - name: METRICS_SECURE diff --git a/helm/kagent/templates/controller-metrics-service.yaml b/helm/kagent/templates/controller-metrics-service.yaml index 4b5f5a74f..973645cfb 100644 --- a/helm/kagent/templates/controller-metrics-service.yaml +++ b/helm/kagent/templates/controller-metrics-service.yaml @@ -1,4 +1,4 @@ -{{- if .Values.controller.metrics.enabled }} +{{- if include "kagent.controller.metricsEnabled" . }} apiVersion: v1 kind: Service metadata: diff --git a/helm/kagent/templates/rbac/metrics-auth-clusterrole.yaml b/helm/kagent/templates/rbac/metrics-auth-clusterrole.yaml index 64ca06f38..a97d5425f 100644 --- a/helm/kagent/templates/rbac/metrics-auth-clusterrole.yaml +++ b/helm/kagent/templates/rbac/metrics-auth-clusterrole.yaml @@ -1,4 +1,4 @@ -{{- if and .Values.controller.metrics.enabled .Values.controller.metrics.secureServing }} +{{- if and (include "kagent.controller.metricsEnabled" .) .Values.controller.metrics.secureServing }} apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: diff --git a/helm/kagent/templates/rbac/metrics-auth-clusterrolebinding.yaml b/helm/kagent/templates/rbac/metrics-auth-clusterrolebinding.yaml index 2baab225f..0f9d655dc 100644 --- a/helm/kagent/templates/rbac/metrics-auth-clusterrolebinding.yaml +++ b/helm/kagent/templates/rbac/metrics-auth-clusterrolebinding.yaml @@ -1,4 +1,4 @@ -{{- if and .Values.controller.metrics.enabled .Values.controller.metrics.secureServing }} +{{- if and (include "kagent.controller.metricsEnabled" .) .Values.controller.metrics.secureServing }} apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: diff --git a/helm/kagent/templates/rbac/metrics-reader-clusterrole.yaml b/helm/kagent/templates/rbac/metrics-reader-clusterrole.yaml index 06c171d9e..434500923 100644 --- a/helm/kagent/templates/rbac/metrics-reader-clusterrole.yaml +++ b/helm/kagent/templates/rbac/metrics-reader-clusterrole.yaml @@ -1,4 +1,4 @@ -{{- if .Values.controller.metrics.enabled }} +{{- if include "kagent.controller.metricsEnabled" . }} apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: diff --git a/helm/kagent/tests/controller-deployment_test.yaml b/helm/kagent/tests/controller-deployment_test.yaml index d3f50b8e5..a26c8e937 100644 --- a/helm/kagent/tests/controller-deployment_test.yaml +++ b/helm/kagent/tests/controller-deployment_test.yaml @@ -606,6 +606,27 @@ tests: containerPort: 9443 protocol: TCP + - it: should not gain metrics wiring when bindAddress disables metrics + template: controller-deployment.yaml + set: + controller.metrics.enabled: true + controller.metrics.bindAddress: "0" + asserts: + - notContains: + path: spec.template.spec.containers[0].env + content: + name: METRICS_BIND_ADDRESS + any: true + - notContains: + path: spec.template.spec.containers[0].env + content: + name: METRICS_SECURE + any: true + - notContains: + path: spec.template.spec.containers[0].ports + content: + name: metrics + - it: should let controller.env override the chart-supplied metrics env template: controller-deployment.yaml set: diff --git a/helm/kagent/tests/controller-metrics-rbac_test.yaml b/helm/kagent/tests/controller-metrics-rbac_test.yaml index f721172ae..87a3fce36 100644 --- a/helm/kagent/tests/controller-metrics-rbac_test.yaml +++ b/helm/kagent/tests/controller-metrics-rbac_test.yaml @@ -111,3 +111,18 @@ tests: - hasDocuments: count: 0 template: rbac/metrics-auth-clusterrolebinding.yaml + + - it: should skip all metrics rbac when bindAddress disables metrics + set: + controller.metrics.enabled: true + controller.metrics.bindAddress: "0" + asserts: + - hasDocuments: + count: 0 + template: rbac/metrics-reader-clusterrole.yaml + - hasDocuments: + count: 0 + template: rbac/metrics-auth-clusterrole.yaml + - hasDocuments: + count: 0 + template: rbac/metrics-auth-clusterrolebinding.yaml diff --git a/helm/kagent/tests/controller-metrics-service_test.yaml b/helm/kagent/tests/controller-metrics-service_test.yaml index ed4280739..751f8ee7e 100644 --- a/helm/kagent/tests/controller-metrics-service_test.yaml +++ b/helm/kagent/tests/controller-metrics-service_test.yaml @@ -72,6 +72,22 @@ tests: path: spec.ports[0].targetPort value: 9443 + - it: should not render when bindAddress disables metrics + set: + controller.metrics.enabled: true + controller.metrics.bindAddress: "0" + asserts: + - hasDocuments: + count: 0 + + - it: should not render when bindAddress is empty + set: + controller.metrics.enabled: true + controller.metrics.bindAddress: "" + asserts: + - hasDocuments: + count: 0 + - it: should rename port when secure serving disabled set: controller.metrics.enabled: true diff --git a/helm/kagent/values.yaml b/helm/kagent/values.yaml index 9c3e2ba91..07c1978a9 100644 --- a/helm/kagent/values.yaml +++ b/helm/kagent/values.yaml @@ -230,7 +230,10 @@ controller: # `targetPort` and the pod `containerPort` are derived from it at # template time, so overriding `METRICS_BIND_ADDRESS` via # `controller.env` shifts only the runtime listener and leaves the - # rendered Service pointing at the chart-time port. + # rendered Service pointing at the chart-time port. Setting + # `bindAddress: "0"` (or empty) is treated as a disable signal — + # equivalent to `enabled: false` — to keep faith with the controller + # binary's documented contract for `--metrics-bind-address`. # @default -- disabled metrics: enabled: false