diff --git a/core/helm-charts/genai-gateway/templates/ingress.yaml b/core/helm-charts/genai-gateway/templates/ingress.yaml index 67980d4a..6ecf959f 100644 --- a/core/helm-charts/genai-gateway/templates/ingress.yaml +++ b/core/helm-charts/genai-gateway/templates/ingress.yaml @@ -1,34 +1,24 @@ # Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 {{- if .Values.ingress.enabled }} -apiVersion: networking.k8s.io/v1 -kind: Ingress +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute metadata: - annotations: - kubernetes.io/ingress.class: nginx - nginx.ingress.kubernetes.io/proxy-body-size: 10m - nginx.ingress.kubernetes.io/ssl-redirect: "true" - generation: 1 labels: app.kubernetes.io/managed-by: Helm - name: genai-gateway-ingress + name: genai-gateway-httproute spec: - ingressClassName: nginx - rules: - - host: {{ .Values.ingress.host }} - http: - paths: - - backend: - service: - name: genai-gateway-service - port: - number: 4000 - path: / - pathType: ImplementationSpecific - tls: - - hosts: + parentRefs: + - name: enterprise-edge-gateway + namespace: envoy-gateway-system + hostnames: - {{ .Values.ingress.host }} - secretName: {{ .Values.ingress.secretname }} -status: - loadBalancer: {} + rules: + - matches: + - path: + type: PathPrefix + value: / + backendRefs: + - name: genai-gateway-service + port: 4000 {{- end }} diff --git a/core/helm-charts/genai-gateway/templates/ingress_eks.yaml b/core/helm-charts/genai-gateway/templates/ingress_eks.yaml index 367a5396..39fc872c 100644 --- a/core/helm-charts/genai-gateway/templates/ingress_eks.yaml +++ b/core/helm-charts/genai-gateway/templates/ingress_eks.yaml @@ -1,4 +1,4 @@ -{{- if .Values.ingress.enabled }} +{{- if and .Values.ingress.enabled (eq .Values.platform "eks") }} apiVersion: networking.k8s.io/v1 kind: Ingress metadata: diff --git a/core/helm-charts/istio/peer-auth-ingress.yaml b/core/helm-charts/istio/peer-auth-ingress.yaml index 043cbd87..ebc99076 100644 --- a/core/helm-charts/istio/peer-auth-ingress.yaml +++ b/core/helm-charts/istio/peer-auth-ingress.yaml @@ -4,11 +4,11 @@ apiVersion: security.istio.io/v1 kind: PeerAuthentication metadata: name: peer-auth-ingress - namespace: ingress-nginx + namespace: envoy-gateway-system spec: selector: matchLabels: - app.kubernetes.io/name: ingress-nginx + app.kubernetes.io/name: envoy mtls: mode: STRICT portLevelMtls: diff --git a/core/helm-charts/keycloak/templates/ingress.yaml b/core/helm-charts/keycloak/templates/ingress.yaml index 992360b0..0d567248 100644 --- a/core/helm-charts/keycloak/templates/ingress.yaml +++ b/core/helm-charts/keycloak/templates/ingress.yaml @@ -1,31 +1,23 @@ # Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 {{- if not .Values.apisixRoute.enabled }} -apiVersion: networking.k8s.io/v1 -kind: Ingress +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute metadata: - name: {{ .Release.Name }}-keycloak-apisix + name: {{ .Release.Name }}-keycloak-apisix-httproute namespace: auth-apisix - annotations: - kubernetes.io/ingress.class: "nginx" - nginx.ingress.kubernetes.io/ssl-redirect: "true" - nginx.ingress.kubernetes.io/backend-protocol: "HTTP" - nginx.ingress.kubernetes.io/proxy-pass-headers: "Content-Type, Authorization" spec: - ingressClassName: nginx + parentRefs: + - name: enterprise-edge-gateway + namespace: envoy-gateway-system + hostnames: + - {{ .Values.ingress.host }} rules: - - host: {{ .Values.ingress.host }} - http: - paths: - - path: /token - pathType: Exact - backend: - service: - name: {{ .Values.ingress.serviceName }} - port: - number: {{ .Values.ingress.servicePort }} - tls: - - hosts: - - {{ .Values.ingress.host }} - secretName: {{ .Values.ingress.secretName }} + - matches: + - path: + type: Exact + value: /token + backendRefs: + - name: {{ .Values.ingress.serviceName }} + port: {{ .Values.ingress.servicePort }} {{- end }} diff --git a/core/helm-charts/keycloak/templates/ingress_eks.yaml b/core/helm-charts/keycloak/templates/ingress_eks.yaml index 4af2999c..4467579e 100644 --- a/core/helm-charts/keycloak/templates/ingress_eks.yaml +++ b/core/helm-charts/keycloak/templates/ingress_eks.yaml @@ -1,5 +1,6 @@ # Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +{{- if eq .Values.platform "eks" }} apiVersion: networking.k8s.io/v1 kind: Ingress metadata: @@ -32,3 +33,4 @@ spec: - hosts: - {{ .Values.ingress.host }} secretName: {{ .Values.ingress.secretName }} +{{- end }} diff --git a/core/helm-charts/mcp-server-template/templates/ingress.yaml b/core/helm-charts/mcp-server-template/templates/ingress.yaml index 2bc1a77b..ad7f6b44 100644 --- a/core/helm-charts/mcp-server-template/templates/ingress.yaml +++ b/core/helm-charts/mcp-server-template/templates/ingress.yaml @@ -1,46 +1,32 @@ {{- if .Values.ingress.enabled }} -apiVersion: networking.k8s.io/v1 -kind: Ingress +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute metadata: - name: {{ include "mcp-demo.fullname" . }} + name: {{ include "mcp-demo.fullname" . }}-httproute namespace: {{ .Values.apisix.enabled | ternary "auth-apisix" (.Values.ingress.namespace | default .Release.Namespace) }} labels: {{- include "mcp-demo.labels" . | nindent 4 }} - annotations: - kubernetes.io/ingress.class: {{ .Values.ingress.className }} - nginx.ingress.kubernetes.io/use-regex: "true" - nginx.ingress.kubernetes.io/proxy-buffering: "off" - nginx.ingress.kubernetes.io/proxy-read-timeout: "3600" - nginx.ingress.kubernetes.io/proxy-send-timeout: "3600" - {{- with .Values.ingress.annotations }} - {{- toYaml . | nindent 4 }} - {{- end }} spec: - ingressClassName: {{ .Values.ingress.className }} - {{- if .Values.ingress.tls.enabled }} - tls: - - hosts: + parentRefs: + - name: enterprise-edge-gateway + namespace: envoy-gateway-system + hostnames: - {{ .Values.ingress.host }} - secretName: {{ .Values.ingress.tls.secretName }} - {{- end }} {{- $svcName := ternary "auth-apisix-gateway" (include "mcp-demo.fullname" .) .Values.apisix.enabled }} {{- $svcPort := ternary 80 .Values.service.port .Values.apisix.enabled }} rules: - - host: {{ .Values.ingress.host }} - http: - paths: - - backend: - service: - name: {{ $svcName }} - port: - number: {{ $svcPort }} - path: /health - pathType: Prefix - - backend: - service: - name: {{ $svcName }} - port: - number: {{ $svcPort }} - path: {{ .Values.ingress.path }} - pathType: Prefix + - matches: + - path: + type: PathPrefix + value: /health + backendRefs: + - name: {{ $svcName }} + port: {{ $svcPort }} + - matches: + - path: + type: PathPrefix + value: {{ .Values.ingress.path }} + backendRefs: + - name: {{ $svcName }} + port: {{ $svcPort }} {{- end }} diff --git a/core/helm-charts/ovms/templates/ingress.yaml b/core/helm-charts/ovms/templates/ingress.yaml index c5f0865e..bbcbb919 100644 --- a/core/helm-charts/ovms/templates/ingress.yaml +++ b/core/helm-charts/ovms/templates/ingress.yaml @@ -2,32 +2,33 @@ # SPDX-License-Identifier: Apache-2.0 {{- if and .Values.ingress.enabled .Values.modelSource }} -apiVersion: networking.k8s.io/v1 -kind: Ingress +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute metadata: - name: {{ include "ovms-model-server.fullname" . }} + name: {{ include "ovms-model-server.fullname" . }}-httproute namespace: {{ .Values.ingress.namespace }} labels: {{- include "ovms-model-server.labels" . | nindent 4 }} - annotations: - nginx.ingress.kubernetes.io/rewrite-target: /{{ .Values.modelName }}-ovms/$1 spec: - ingressClassName: {{ .Values.ingress.className }} - {{- if .Values.ingress.secretname }} - tls: - - hosts: + parentRefs: + - name: enterprise-edge-gateway + namespace: envoy-gateway-system + hostnames: - {{ .Values.ingress.host }} - secretName: {{ .Values.ingress.secretname }} - {{- end }} rules: - - host: {{ .Values.ingress.host }} - http: - paths: - - path: /{{ .Values.modelName }}-ovms/(.*) - pathType: ImplementationSpecific - backend: - service: - name: {{- if .Values.apisixRoute.enabled }} auth-apisix-gateway{{- else }} {{ include "ovms-model-server.fullname" . }}{{- end }} - port: - number: 80 + - matches: + - path: + type: PathPrefix + value: /{{ .Values.modelName }}-ovms + {{- if not .Values.apisixRoute.enabled }} + filters: + - type: URLRewrite + urlRewrite: + path: + type: ReplacePrefixMatch + replacePrefixMatch: / + {{- end }} + backendRefs: + - name: {{- if .Values.apisixRoute.enabled }} auth-apisix-gateway{{- else }} {{ include "ovms-model-server.fullname" . }}{{- end }} + port: 80 {{- end }} diff --git a/core/helm-charts/tei/templates/ingress.yaml b/core/helm-charts/tei/templates/ingress.yaml index aba43f9a..39c2c547 100644 --- a/core/helm-charts/tei/templates/ingress.yaml +++ b/core/helm-charts/tei/templates/ingress.yaml @@ -1,60 +1,48 @@ # Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -# Please edit the object below. Lines beginning with a '#' will be ignored, -# and an empty file will abort the edit. If an error occurs while saving this file will be -# reopened with the relevant failures. {{- if or .Values.ingress.enabled .Values.apisix.enabled }} -apiVersion: networking.k8s.io/v1 -kind: Ingress +{{- $modelName := .Values.EMBEDDING_MODEL_ID | splitList "/" | last }} +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute metadata: - annotations: - kubernetes.io/ingress.class: nginx - # nginx.ingress.kubernetes.io/rewrite-target: /{{ .Values.EMBEDDING_MODEL_ID | splitList "/" | last }}/$1 - nginx.ingress.kubernetes.io/rewrite-target: {{- if and .Values.apisix.enabled .Values.accelDevice }} - /{{ .Values.EMBEDDING_MODEL_ID | splitList "/" | last }}/$1 - {{- else if and .Values.apisix.enabled (not .Values.accelDevice) }} - /{{ .Values.EMBEDDING_MODEL_ID | splitList "/" | last }}-teicpu/$1 - {{- else if not .Values.apisix.enabled }} - /$1 - {{- end }} - creationTimestamp: "2024-11-22T03:27:37Z" - generation: 1 labels: {{- include "tei.labels" . | nindent 4 }} - name: {{ include "tei.fullname" . }}-ingress + name: {{ include "tei.fullname" . }}-httproute namespace: {{- if .Values.apisix.enabled }} auth-apisix {{- else }} default {{- end }} - resourceVersion: "244487" - uid: df2b31a1-6653-4d71-9de0-4df33cb93ad1 spec: - ingressClassName: nginx + parentRefs: + - name: enterprise-edge-gateway + namespace: envoy-gateway-system + hostnames: + - {{ .Values.ingress.host }} rules: - - host: {{ .Values.ingress.host }} - http: - paths: - - backend: - service: - name: {{- if .Values.apisix.enabled }} - auth-apisix-gateway - {{- else }} - {{ include "tei.fullname" . }}-service - {{- end }} - port: - number: 80 - # path: /{{ .Values.EMBEDDING_MODEL_ID | splitList "/" | last }}/(.*) - path: {{- if and .Values.apisix.enabled .Values.accelDevice }} - /{{ .Values.EMBEDDING_MODEL_ID | splitList "/" | last }}/(.*) - {{- else if and .Values.apisix.enabled (not .Values.accelDevice) }} - /{{ .Values.EMBEDDING_MODEL_ID | splitList "/" | last }}-teicpu/(.*) - {{- else if not .Values.apisix.enabled }} - /{{ .Values.EMBEDDING_MODEL_ID | splitList "/" | last }}/(.*) + - matches: + - path: + type: PathPrefix + value: {{- if and .Values.apisix.enabled .Values.accelDevice }} + /{{ $modelName }} + {{- else if and .Values.apisix.enabled (not .Values.accelDevice) }} + /{{ $modelName }}-teicpu + {{- else }} + /{{ $modelName }} + {{- end }} + {{- if not .Values.apisix.enabled }} + filters: + - type: URLRewrite + urlRewrite: + path: + type: ReplacePrefixMatch + replacePrefixMatch: / + {{- end }} + backendRefs: + - name: {{- if .Values.apisix.enabled }} + auth-apisix-gateway + {{- else }} + {{ include "tei.fullname" . }}-service {{- end }} - pathType: ImplementationSpecific - tls: - - hosts: - - {{ .Values.ingress.host }} - secretName: {{ .Values.ingress.secretname }} + port: 80 {{- end }} \ No newline at end of file diff --git a/core/helm-charts/tei/templates/ingress_eks.yaml b/core/helm-charts/tei/templates/ingress_eks.yaml index 462c767e..5c6b4982 100644 --- a/core/helm-charts/tei/templates/ingress_eks.yaml +++ b/core/helm-charts/tei/templates/ingress_eks.yaml @@ -1,6 +1,6 @@ # Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -{{- if or .Values.ingress.enabled .Values.apisix.enabled }} +{{- if and (or .Values.ingress.enabled .Values.apisix.enabled) (eq .Values.platform "eks") }} apiVersion: networking.k8s.io/v1 kind: Ingress metadata: diff --git a/core/helm-charts/teirerank/templates/ingress.yaml b/core/helm-charts/teirerank/templates/ingress.yaml index 402d05ab..359574c5 100644 --- a/core/helm-charts/teirerank/templates/ingress.yaml +++ b/core/helm-charts/teirerank/templates/ingress.yaml @@ -1,61 +1,48 @@ # Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 - -# Please edit the object below. Lines beginning with a '#' will be ignored, -# and an empty file will abort the edit. If an error occurs while saving this file will be -# reopened with the relevant failures. -{{- if .Values.ingress.enabled}} -apiVersion: networking.k8s.io/v1 -kind: Ingress +{{- if .Values.ingress.enabled }} +{{- $modelName := .Values.RERANK_MODEL_ID | splitList "/" | last }} +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute metadata: - annotations: - kubernetes.io/ingress.class: nginx - # nginx.ingress.kubernetes.io/rewrite-target: /{{ .Values.RERANK_MODEL_ID | splitList "/" | last }}/$1 - nginx.ingress.kubernetes.io/rewrite-target: {{- if and .Values.apisix.enabled .Values.accelDevice }} - /{{ .Values.RERANK_MODEL_ID | splitList "/" | last }}/$1 - {{- else if and .Values.apisix.enabled (not .Values.accelDevice) }} - /{{ .Values.RERANK_MODEL_ID | splitList "/" | last }}-teirerankcpu/$1 - {{- else if not .Values.apisix.enabled }} - /$1 - {{- end }} - creationTimestamp: "2024-11-22T03:27:37Z" - generation: 1 labels: {{- include "teirerank.labels" . | nindent 4 }} - name: {{ include "teirerank.fullname" . }}-ingress + name: {{ include "teirerank.fullname" . }}-httproute namespace: {{- if .Values.apisix.enabled }} auth-apisix {{- else }} default {{- end }} - resourceVersion: "244487" - uid: df2b31a1-6653-4d71-9de0-4df33cb93ad1 spec: - ingressClassName: nginx + parentRefs: + - name: enterprise-edge-gateway + namespace: envoy-gateway-system + hostnames: + - {{ .Values.ingress.host }} rules: - - host: {{ .Values.ingress.host }} - http: - paths: - - backend: - service: - name: {{- if .Values.apisix.enabled }} - auth-apisix-gateway - {{- else }} - {{ include "teirerank.fullname" . }}-service - {{- end }} - port: - number: 80 - # path: /{{ .Values.RERANK_MODEL_ID | splitList "/" | last }}/(.*) - path: {{- if and .Values.apisix.enabled .Values.accelDevice }} - /{{ .Values.RERANK_MODEL_ID | splitList "/" | last }}/(.*) - {{- else if and .Values.apisix.enabled (not .Values.accelDevice) }} - /{{ .Values.RERANK_MODEL_ID | splitList "/" | last }}-teirerankcpu/(.*) - {{- else if not .Values.apisix.enabled }} - /{{ .Values.RERANK_MODEL_ID | splitList "/" | last }}/(.*) + - matches: + - path: + type: PathPrefix + value: {{- if and .Values.apisix.enabled .Values.accelDevice }} + /{{ $modelName }} + {{- else if and .Values.apisix.enabled (not .Values.accelDevice) }} + /{{ $modelName }}-teirerankcpu + {{- else }} + /{{ $modelName }} + {{- end }} + {{- if not .Values.apisix.enabled }} + filters: + - type: URLRewrite + urlRewrite: + path: + type: ReplacePrefixMatch + replacePrefixMatch: / + {{- end }} + backendRefs: + - name: {{- if .Values.apisix.enabled }} + auth-apisix-gateway + {{- else }} + {{ include "teirerank.fullname" . }}-service {{- end }} - pathType: ImplementationSpecific - tls: - - hosts: - - {{ .Values.ingress.host }} - secretName: {{ .Values.ingress.secretname }} + port: 80 {{- end }} \ No newline at end of file diff --git a/core/helm-charts/teirerank/templates/ingress_eks.yaml b/core/helm-charts/teirerank/templates/ingress_eks.yaml index 202afd6d..25bfe05d 100644 --- a/core/helm-charts/teirerank/templates/ingress_eks.yaml +++ b/core/helm-charts/teirerank/templates/ingress_eks.yaml @@ -1,6 +1,6 @@ # Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -{{- if .Values.ingress.enabled }} +{{- if and .Values.ingress.enabled (eq .Values.platform "eks") }} apiVersion: networking.k8s.io/v1 kind: Ingress metadata: diff --git a/core/helm-charts/tgi/templates/ingress.yaml b/core/helm-charts/tgi/templates/ingress.yaml index 2e1deac1..363f68d9 100644 --- a/core/helm-charts/tgi/templates/ingress.yaml +++ b/core/helm-charts/tgi/templates/ingress.yaml @@ -1,60 +1,48 @@ # Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -# Please edit the object below. Lines beginning with a '#' will be ignored, -# and an empty file will abort the edit. If an error occurs while saving this file will be -# reopened with the relevant failures. -{{- if .Values.ingress.enabled}} -apiVersion: networking.k8s.io/v1 -kind: Ingress +{{- if .Values.ingress.enabled }} +{{- $modelName := .Values.LLM_MODEL_ID | splitList "/" | last }} +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute metadata: - annotations: - kubernetes.io/ingress.class: nginx - # nginx.ingress.kubernetes.io/rewrite-target: /{{ .Values.LLM_MODEL_ID | splitList "/" | last }}/$1 - nginx.ingress.kubernetes.io/rewrite-target: {{- if and .Values.apisix.enabled .Values.accelDevice }} - /{{ .Values.LLM_MODEL_ID | splitList "/" | last }}/$1 - {{- else if and .Values.apisix.enabled (not .Values.accelDevice) }} - /{{ .Values.LLM_MODEL_ID | splitList "/" | last }}-tgicpu/$1 - {{- else if not .Values.apisix.enabled }} - /$1 - {{- end }} - creationTimestamp: "2024-11-22T03:27:37Z" - generation: 1 labels: {{- include "tgi.labels" . | nindent 4 }} - name: {{ include "tgi.fullname" . }}-ingress + name: {{ include "tgi.fullname" . }}-httproute namespace: {{- if .Values.apisix.enabled }} auth-apisix {{- else }} default {{- end }} - resourceVersion: "244487" - uid: df2b31a1-6653-4d71-9de0-4df33cb93ad1 spec: - ingressClassName: nginx + parentRefs: + - name: enterprise-edge-gateway + namespace: envoy-gateway-system + hostnames: + - {{ .Values.ingress.host }} rules: - - host: {{ .Values.ingress.host }} - http: - paths: - - backend: - service: - name: {{- if .Values.apisix.enabled }} - auth-apisix-gateway - {{- else }} - {{ include "tgi.fullname" . }}-service - {{- end }} - port: - number: 80 - # path: /{{ .Values.LLM_MODEL_ID | splitList "/" | last }}/(.*) - path: {{- if and .Values.apisix.enabled .Values.accelDevice }} - /{{ .Values.LLM_MODEL_ID | splitList "/" | last }}/(.*) - {{- else if and .Values.apisix.enabled (not .Values.accelDevice) }} - /{{ .Values.LLM_MODEL_ID | splitList "/" | last }}-tgicpu/(.*) - {{- else if not .Values.apisix.enabled }} - /{{ .Values.LLM_MODEL_ID | splitList "/" | last }}/(.*) + - matches: + - path: + type: PathPrefix + value: {{- if and .Values.apisix.enabled .Values.accelDevice }} + /{{ $modelName }} + {{- else if and .Values.apisix.enabled (not .Values.accelDevice) }} + /{{ $modelName }}-tgicpu + {{- else }} + /{{ $modelName }} + {{- end }} + {{- if not .Values.apisix.enabled }} + filters: + - type: URLRewrite + urlRewrite: + path: + type: ReplacePrefixMatch + replacePrefixMatch: / + {{- end }} + backendRefs: + - name: {{- if .Values.apisix.enabled }} + auth-apisix-gateway + {{- else }} + {{ include "tgi.fullname" . }}-service {{- end }} - pathType: ImplementationSpecific - tls: - - hosts: - - {{ .Values.ingress.host }} - secretName: {{ .Values.ingress.secretname }} + port: 80 {{- end }} \ No newline at end of file diff --git a/core/helm-charts/tgi/templates/ingress_eks.yaml b/core/helm-charts/tgi/templates/ingress_eks.yaml index d0cddf64..f719db5e 100644 --- a/core/helm-charts/tgi/templates/ingress_eks.yaml +++ b/core/helm-charts/tgi/templates/ingress_eks.yaml @@ -1,6 +1,6 @@ # Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -{{- if .Values.ingress.enabled }} +{{- if and .Values.ingress.enabled (eq .Values.platform "eks") }} apiVersion: networking.k8s.io/v1 kind: Ingress metadata: diff --git a/core/helm-charts/vllm/templates/ingress.yaml b/core/helm-charts/vllm/templates/ingress.yaml index 5ef5caf5..d1afe076 100644 --- a/core/helm-charts/vllm/templates/ingress.yaml +++ b/core/helm-charts/vllm/templates/ingress.yaml @@ -1,61 +1,48 @@ # Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -# Please edit the object below. Lines beginning with a '#' will be ignored, -# and an empty file will abort the edit. If an error occurs while saving this file will be -# reopened with the relevant failures. -{{- if .Values.ingress.enabled}} +{{- if .Values.ingress.enabled }} {{- $modelName := (default .Values.LLM_MODEL_ID .Values.SERVED_MODEL_NAME) | splitList "/" | last }} -apiVersion: networking.k8s.io/v1 -kind: Ingress +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute metadata: - annotations: - kubernetes.io/ingress.class: nginx - # nginx.ingress.kubernetes.io/rewrite-target: /{{ $modelName }}/$1 - nginx.ingress.kubernetes.io/rewrite-target: {{- if and .Values.apisix.enabled .Values.accelDevice }} - /{{ $modelName }}/$1 - {{- else if and .Values.apisix.enabled (not .Values.accelDevice) }} - /{{ $modelName }}-vllmcpu/$1 - {{- else if not .Values.apisix.enabled }} - /$1 - {{- end }} - creationTimestamp: "2024-11-22T03:27:37Z" - generation: 1 labels: {{- include "vllm.labels" . | nindent 4 }} - name: {{ include "vllm.fullname" . }}-ingress + name: {{ include "vllm.fullname" . }}-httproute namespace: {{- if .Values.apisix.enabled }} auth-apisix {{- else }} default {{- end }} - resourceVersion: "244487" - uid: df2b31a1-6653-4d71-9de0-4df33cb93ad1 spec: - ingressClassName: nginx + parentRefs: + - name: enterprise-edge-gateway + namespace: envoy-gateway-system + hostnames: + - {{ .Values.ingress.host }} rules: - - host: {{ .Values.ingress.host }} - http: - paths: - - backend: - service: - name: {{- if .Values.apisix.enabled }} - auth-apisix-gateway - {{- else }} - {{ include "vllm.fullname" . }}-service - {{- end }} - port: - number: 80 - # path: /{{ $modelName }}/(.*) - path: {{- if and .Values.apisix.enabled .Values.accelDevice }} - /{{ $modelName }}/(.*) - {{- else if and .Values.apisix.enabled (not .Values.accelDevice) }} - /{{ $modelName }}-vllmcpu/(.*) - {{- else if not .Values.apisix.enabled }} - /{{ $modelName }}/(.*) + - matches: + - path: + type: PathPrefix + value: {{- if and .Values.apisix.enabled .Values.accelDevice }} + /{{ $modelName }} + {{- else if and .Values.apisix.enabled (not .Values.accelDevice) }} + /{{ $modelName }}-vllmcpu + {{- else }} + /{{ $modelName }} + {{- end }} + {{- if not .Values.apisix.enabled }} + filters: + - type: URLRewrite + urlRewrite: + path: + type: ReplacePrefixMatch + replacePrefixMatch: / + {{- end }} + backendRefs: + - name: {{- if .Values.apisix.enabled }} + auth-apisix-gateway + {{- else }} + {{ include "vllm.fullname" . }}-service {{- end }} - pathType: ImplementationSpecific - tls: - - hosts: - - {{ .Values.ingress.host }} - secretName: {{ .Values.ingress.secretname }} + port: 80 {{- end }} \ No newline at end of file diff --git a/core/helm-charts/vllm/templates/ingress_eks.yaml b/core/helm-charts/vllm/templates/ingress_eks.yaml index 51710c16..6ce50e32 100644 --- a/core/helm-charts/vllm/templates/ingress_eks.yaml +++ b/core/helm-charts/vllm/templates/ingress_eks.yaml @@ -1,6 +1,6 @@ # Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -{{- if .Values.ingress.enabled }} +{{- if and .Values.ingress.enabled (eq .Values.platform "eks") }} {{- $modelName := (default .Values.LLM_MODEL_ID .Values.SERVED_MODEL_NAME) | splitList "/" | last }} apiVersion: networking.k8s.io/v1 kind: Ingress diff --git a/core/inventory/inference-config.cfg b/core/inventory/inference-config.cfg index e63552d4..3255b8ed 100644 --- a/core/inventory/inference-config.cfg +++ b/core/inventory/inference-config.cfg @@ -20,4 +20,9 @@ deploy_istio=off uninstall_ceph=off # Agentic AI Plugin -deploy_agenticai_plugin=off \ No newline at end of file +deploy_agenticai_plugin=off + +# Proxy Configuration (central source — leave empty if no proxy needed) +http_proxy= +https_proxy= +no_proxy= diff --git a/core/inventory/metadata/inference-metadata.cfg b/core/inventory/metadata/inference-metadata.cfg index 48b01376..0861b2e7 100644 --- a/core/inventory/metadata/inference-metadata.cfg +++ b/core/inventory/metadata/inference-metadata.cfg @@ -1,7 +1,7 @@ gaudi2_operator="1.22.0-740" gaudi3_operator="1.22.0-740" python3_interpreter="/usr/bin/python3" -ingress_controller="4.12.2" +envoy_gateway_version="v1.2.0" keycloak_chart_version="22.1.0" apisix_chart_version="0.1.0" kubespray_version="v2.27.0" diff --git a/core/lib/cluster/deployment/fresh-install.sh b/core/lib/cluster/deployment/fresh-install.sh index e4c78c79..99d5c35c 100644 --- a/core/lib/cluster/deployment/fresh-install.sh +++ b/core/lib/cluster/deployment/fresh-install.sh @@ -93,11 +93,11 @@ fresh_installation() { fi if [[ "$deploy_ingress_controller" == "yes" ]]; then - execute_and_check "Deploying Ingress NGINX Controller..." run_ingress_nginx_playbook \ - "Ingress NGINX Controller is deployed successfully." \ - "Failed to deploy Ingress NGINX Controller. Exiting." + execute_and_check "Deploying Envoy Gateway Edge Controller..." run_edge_gateway_playbook \ + "Envoy Gateway Edge Controller is deployed successfully." \ + "Failed to deploy Envoy Gateway Edge Controller. Exiting." else - echo "Skipping Ingress NGINX Controller deployment..." + echo "Skipping Edge Gateway Controller deployment..." fi if [[ "$deploy_keycloak" == "yes" || "$deploy_apisix" == "yes" ]]; then diff --git a/core/lib/components/ingress-controller.sh b/core/lib/components/ingress-controller.sh index fc15d7a4..070dba20 100644 --- a/core/lib/components/ingress-controller.sh +++ b/core/lib/components/ingress-controller.sh @@ -1,7 +1,7 @@ # Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -run_ingress_nginx_playbook() { - echo "Deploying the Ingress NGINX Controller..." - ansible-playbook -i "${INVENTORY_PATH}" playbooks/deploy-ingress-controller.yml --extra-vars "secret_name=${cluster_url} cert_file=${cert_file} key_file=${key_file} ingress_controller=${ingress_controller}" +run_edge_gateway_playbook() { + echo "Deploying the Envoy Gateway Edge Controller..." + ansible-playbook -i "${INVENTORY_PATH}" playbooks/deploy-ingress-controller.yml --extra-vars "secret_name=${cluster_url} cert_file=${cert_file} key_file=${key_file} envoy_gateway_version=${envoy_gateway_version:-v1.2.0}" } \ No newline at end of file diff --git a/core/lib/models/install-model.sh b/core/lib/models/install-model.sh index 40321f8d..4f20d66e 100644 --- a/core/lib/models/install-model.sh +++ b/core/lib/models/install-model.sh @@ -27,10 +27,10 @@ deploy_inference_llm_models_playbook() { else apisix_enabled="true" fi - if [ "$deploy_keycloak" == "no" ]; then - ingress_enabled="false" - else + if [ "$deploy_ingress_controller" == "yes" ]; then ingress_enabled="true" + else + ingress_enabled="false" fi if [ "$deploy_observability" == "yes" ]; then vllm_metrics_enabled="true" diff --git a/core/lib/system/precheck/read-config-file.sh b/core/lib/system/precheck/read-config-file.sh index 45c6baf1..b9a8c43c 100644 --- a/core/lib/system/precheck/read-config-file.sh +++ b/core/lib/system/precheck/read-config-file.sh @@ -46,17 +46,32 @@ read_config_file() { sed -i -E "s|^[[:space:]]*#?[[:space:]]*http_proxy:.*|http_proxy: \"$http_proxy\"|" "$INVENTORY_ALL_FILE" sed -i -E "/^env_proxy:/,/^[^[:space:]]/s|^[[:space:]]*http_proxy:.*| http_proxy: \"$http_proxy\"|" "$INVENTORY_ALL_FILE" export http_proxy + else + sed -i -E "s|^[[:space:]]*#?[[:space:]]*http_proxy:.*|http_proxy: \"\"|" "$INVENTORY_ALL_FILE" + sed -i -E "/^env_proxy:/,/^[^[:space:]]/s|^[[:space:]]*http_proxy:.*| http_proxy: \"\"|" "$INVENTORY_ALL_FILE" + unset http_proxy 2>/dev/null || true fi if [[ -n "$https_proxy" ]]; then sed -i -E "s|^[[:space:]]*#?[[:space:]]*https_proxy:.*|https_proxy: \"$https_proxy\"|" "$INVENTORY_ALL_FILE" sed -i -E "/^env_proxy:/,/^[^[:space:]]/s|^[[:space:]]*https_proxy:.*| https_proxy: \"$https_proxy\"|" "$INVENTORY_ALL_FILE" export https_proxy + else + sed -i -E "s|^[[:space:]]*#?[[:space:]]*https_proxy:.*|https_proxy: \"\"|" "$INVENTORY_ALL_FILE" + sed -i -E "/^env_proxy:/,/^[^[:space:]]/s|^[[:space:]]*https_proxy:.*| https_proxy: \"\"|" "$INVENTORY_ALL_FILE" + unset https_proxy 2>/dev/null || true fi if [[ -n "$no_proxy" ]]; then + # Ensure .svc.cluster.local is always in no_proxy for in-cluster traffic + if [[ "$no_proxy" != *".svc.cluster.local"* ]]; then + no_proxy="${no_proxy},.svc.cluster.local" + fi sed -i -E "/^env_proxy:/,/^[^[:space:]]/s|^[[:space:]]*no_proxy:.*| no_proxy: \"$no_proxy\"|" "$INVENTORY_ALL_FILE" export no_proxy + else + sed -i -E "/^env_proxy:/,/^[^[:space:]]/s|^[[:space:]]*no_proxy:.*| no_proxy: \"\"|" "$INVENTORY_ALL_FILE" + unset no_proxy 2>/dev/null || true fi diff --git a/core/lib/system/precheck/readiness-check.sh b/core/lib/system/precheck/readiness-check.sh index 0cdf4f5b..6d6c99db 100644 --- a/core/lib/system/precheck/readiness-check.sh +++ b/core/lib/system/precheck/readiness-check.sh @@ -5,6 +5,10 @@ run_infrastructure_readiness_check() { echo "Running infrastructure readiness check..." echo "This will verify system compatibility and infrastructure requirements." + if [ ! -f "$HOMEDIR/inventory/hosts.yaml" ]; then + echo -e "${YELLOW}Inventory file not found — auto-generating hosts.yaml for single-node...${NC}" + bash "$HOMEDIR/scripts/generate-hosts.sh" + fi if [ ! -f "$HOMEDIR/inventory/hosts.yaml" ]; then echo -e "${RED}Error: Inventory file not found at $HOMEDIR/inventory/hosts.yaml${NC}" echo -e "${YELLOW}Please ensure the inventory file exists and contains the correct host information.${NC}" diff --git a/core/lib/system/setup-env.sh b/core/lib/system/setup-env.sh index d92caf36..40c36f13 100644 --- a/core/lib/system/setup-env.sh +++ b/core/lib/system/setup-env.sh @@ -90,6 +90,13 @@ setup_initial_env() { echo -e "${GREEN} Enterprise Inference requirements installed.${NC}" cp -r "$HOMEDIR"/helm-charts "$HOMEDIR"/scripts "$KUBESPRAYDIR"/ cp -r "$KUBESPRAYDIR"/inventory/sample/ "$KUBESPRAYDIR"/inventory/mycluster + + # Auto-generate hosts.yaml if it doesn't exist (single-node auto-detect) + if [ ! -f "$HOMEDIR/inventory/hosts.yaml" ]; then + echo -e "${YELLOW}No hosts.yaml found — auto-generating for single-node deployment...${NC}" + bash "$HOMEDIR/scripts/generate-hosts.sh" + fi + cp "$HOMEDIR"/inventory/hosts.yaml $KUBESPRAYDIR/inventory/mycluster/ cp "$HOMEDIR"/inventory/metadata/addons.yml $KUBESPRAYDIR/inventory/mycluster/group_vars/k8s_cluster/addons.yml cp "$HOMEDIR"/playbooks/* "$KUBESPRAYDIR"/playbooks/ diff --git a/core/lib/user-menu/parse-user-prompts.sh b/core/lib/user-menu/parse-user-prompts.sh index 0928bdf2..200d92e1 100644 --- a/core/lib/user-menu/parse-user-prompts.sh +++ b/core/lib/user-menu/parse-user-prompts.sh @@ -35,9 +35,9 @@ prompt_for_input() { echo "Proceeding with the setup of Habana AI Operator: $deploy_habana_ai_operator" fi if [ -z "$deploy_ingress_controller" ]; then - read -p "Do you want to proceed with deploying Ingress NGINX Controller? (yes/no): " deploy_ingress_controller + read -p "Do you want to proceed with deploying Envoy Gateway Edge Controller? (yes/no): " deploy_ingress_controller else - echo "Proceeding with the setup of Ingress Controller: $deploy_ingress_controller" + echo "Proceeding with the setup of Edge Gateway Controller: $deploy_ingress_controller" fi if [ -z "$deploy_keycloak" ]; then read -p "Do you want to proceed with deploying Keycloak & APISIX? (yes/no): " deploy_keycloak diff --git a/core/playbooks/deploy-cluster-config.yml b/core/playbooks/deploy-cluster-config.yml index bcd66d3f..3cbc00dd 100644 --- a/core/playbooks/deploy-cluster-config.yml +++ b/core/playbooks/deploy-cluster-config.yml @@ -29,35 +29,35 @@ tls.key: "{{ lookup('file', key_file) | b64encode }}" register: kubectl_output tags: deploy_cluster_dashboard - - name: Create Admin dashboard ingress + - name: Create Admin dashboard HTTPRoute community.kubernetes.k8s: state: present definition: - apiVersion: networking.k8s.io/v1 - kind: Ingress + apiVersion: gateway.networking.k8s.io/v1 + kind: HTTPRoute metadata: - annotations: - nginx.ingress.kubernetes.io/backend-protocol: HTTPS - nginx.ingress.kubernetes.io/rewrite-target: /$2 - name: kubernetes-dashboard-ingress + name: kubernetes-dashboard-httproute namespace: kube-system spec: - ingressClassName: nginx + parentRefs: + - name: enterprise-edge-gateway + namespace: envoy-gateway-system + hostnames: + - "{{ secret_name }}" rules: - - host: "{{ secret_name }}" - http: - paths: - - backend: - service: - name: kubernetes-dashboard - port: - number: 443 - path: /dashboard(/|$)(.*) - pathType: ImplementationSpecific - tls: - - hosts: - - "{{ secret_name }}" - secretName: "{{ secret_name }}" + - matches: + - path: + type: PathPrefix + value: /dashboard + filters: + - type: URLRewrite + urlRewrite: + path: + type: ReplacePrefixMatch + replacePrefixMatch: / + backendRefs: + - name: kubernetes-dashboard + port: 443 when: brownfield_deployment != "yes" tags: deploy_cluster_dashboard - name: Create ServiceAccount for Admin Dashboard diff --git a/core/playbooks/deploy-genai-gateway.yml b/core/playbooks/deploy-genai-gateway.yml index d4394f6e..ca39b803 100644 --- a/core/playbooks/deploy-genai-gateway.yml +++ b/core/playbooks/deploy-genai-gateway.yml @@ -95,7 +95,7 @@ state: absent when: kubernetes_platform != "eks" run_once: true - - name: Delete Ingress Resource + - name: Delete stale Ingress/HTTPRoute Resource kubernetes.core.k8s: state: absent api_version: networking.k8s.io/v1 @@ -137,6 +137,11 @@ --set route.tls.termination=edge --set route.tls.insecureEdgeTerminationPolicy=Redirect --set ingress.enabled=false + {% elif kubernetes_platform == 'eks' %} + --set ingress.enabled=true + --set ingress.host={{ secret_name }} + --set ingress.secretname={{ secret_name }} + --set route.enabled=false {% else %} --set ingress.enabled=true --set ingress.host={{ secret_name }} @@ -232,15 +237,19 @@ --set langfuse.route.tls.insecureEdgeTerminationPolicy=Redirect --set langfuse.ingress.enabled=false --set langfuse.nextauth.url=https://trace-{{ secret_name }} - {% else %} + {% elif kubernetes_platform == 'eks' %} --set langfuse.ingress.enabled=true - --set langfuse.ingress.className={{ 'alb' if kubernetes_platform == 'eks' else 'nginx' }} + --set langfuse.ingress.className=alb --set langfuse.ingress.tls.enabled=true --set langfuse.ingress.hosts[0].host=trace-{{ secret_name }} --set langfuse.ingress.hosts[0].paths[0].pathType=Prefix --set langfuse.ingress.tls.secretName=trace-{{ secret_name }} --set langfuse.nextauth.url=https://trace-{{ secret_name }} --set langfuse.route.enabled=false + {% else %} + --set langfuse.ingress.enabled=false + --set langfuse.nextauth.url=https://trace-{{ secret_name }} + --set langfuse.route.enabled=false {% endif %} {% if kubernetes_platform == 'eks' %} -f {{ remote_helm_charts_base }}/genai-gateway-trace/eks-ingress-annotations.yaml @@ -287,3 +296,29 @@ targetPort: 3000 when: kubernetes_platform == "openshift" run_once: true + + - name: Create HTTPRoute for GenAI Gateway Trace (non-EKS, non-OpenShift) + kubernetes.core.k8s: + state: present + definition: + apiVersion: gateway.networking.k8s.io/v1 + kind: HTTPRoute + metadata: + name: genai-gateway-trace-httproute + namespace: genai-gateway + spec: + parentRefs: + - name: enterprise-edge-gateway + namespace: envoy-gateway-system + hostnames: + - "trace-{{ secret_name }}" + rules: + - matches: + - path: + type: PathPrefix + value: / + backendRefs: + - name: genai-gateway-trace-web + port: 3000 + when: kubernetes_platform != "openshift" and kubernetes_platform != "eks" + run_once: true diff --git a/core/playbooks/deploy-ingress-controller.yml b/core/playbooks/deploy-ingress-controller.yml index 5e0fd648..5adebf12 100644 --- a/core/playbooks/deploy-ingress-controller.yml +++ b/core/playbooks/deploy-ingress-controller.yml @@ -1,7 +1,7 @@ # Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 --- -- name: Deploy Ingress NGINX using Helm +- name: Deploy Envoy Gateway and Enterprise Edge Gateway hosts: "{{ inference_delegate | default('kube_control_plane') }}" gather_facts: false any_errors_fatal: "{{ any_errors_fatal | default(true) }}" @@ -11,19 +11,10 @@ roles: - role: inference-tools tasks: - - name: Add the Ingress-NGINX Helm repository - community.kubernetes.helm_repository: - name: ingress-nginx - repo_url: https://kubernetes.github.io/ingress-nginx - state: present - - name: Validate if the Helm repositories are configured correctly - ansible.builtin.command: helm repo list - register: helm_repo_list - failed_when: false - changed_when: false - - name: Update Helm repositories to fetch the latest charts - ansible.builtin.shell: helm repo update - when: helm_repo_list.stdout != "" + - name: Install Gateway API CRDs + ansible.builtin.shell: > + kubectl apply -f https://github.com/kubernetes-sigs/gateway-api/releases/download/v1.2.0/standard-install.yaml + run_once: true - name: Retrieving the Number of Infrastructure Nodes (ei-infra-eligible) ansible.builtin.shell: kubectl get nodes --selector=ei-infra-eligible=true -o jsonpath='{.items[*].metadata.name}' | wc -w @@ -36,7 +27,7 @@ changed_when: false when: inference_node_count.stdout | int == 0 - - name: Determine Replica Node + - name: Determine Replica Count set_fact: inference_infra_replica_count: >- {{ @@ -45,62 +36,155 @@ else 1) }} - - name: Display the Number of Nodes on which Ingress is deployed + - name: Display the Number of Gateway Proxy Replicas debug: - msg: "Number of Nodes on which Ingress is deployed: {{ inference_infra_replica_count }}" + msg: "Number of Envoy Gateway proxy replicas: {{ inference_infra_replica_count }}" run_once: true - - name: Deploy Ingress Nginx Controller - community.kubernetes.helm: - name: ingress-nginx - chart_ref: ingress-nginx/ingress-nginx - release_namespace: ingress-nginx - create_namespace: true - chart_version: "{{ ingress_controller | default('4.12.2') }}" - state: present - values: - controller: - progressDeadlineSeconds: 300 - minReadySeconds: 0 - replicaCount: "{{ inference_infra_replica_count | int }}" - hostPort: - enabled: true - ports: - http: 80 - https: 443 - tolerations: - - key: node-role.kubernetes.io/control-plane - operator: Exists - effect: NoSchedule - - key: node-role.kubernetes.io/master - operator: Exists - effect: NoSchedule - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: ei-infra-eligible - operator: In - values: ["true"] - - matchExpressions: + - name: Deploy Envoy Gateway Controller + ansible.builtin.shell: > + helm upgrade --install eg oci://docker.io/envoyproxy/gateway-helm + --version {{ envoy_gateway_version | default('v1.2.0') }} + --namespace envoy-gateway-system + --create-namespace + --wait + run_once: true + + - name: Create EnvoyProxy configuration for proxy pod placement + ansible.builtin.shell: | + cat <<'INNEREOF' | sed 's/__REPLICAS__/{{ inference_infra_replica_count | int }}/' | kubectl apply -f - + apiVersion: gateway.envoyproxy.io/v1alpha1 + kind: EnvoyProxy + metadata: + name: enterprise-proxy-config + namespace: envoy-gateway-system + spec: + provider: + type: Kubernetes + kubernetes: + envoyDeployment: + replicas: __REPLICAS__ + pod: + tolerations: - key: node-role.kubernetes.io/control-plane operator: Exists - podAntiAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 100 - podAffinityTerm: - labelSelector: - matchLabels: - app: ingress-nginx - topologyKey: "kubernetes.io/hostname" + effect: NoSchedule + - key: node-role.kubernetes.io/master + operator: Exists + effect: NoSchedule + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: ei-infra-eligible + operator: In + values: ["true"] + - matchExpressions: + - key: node-role.kubernetes.io/control-plane + operator: Exists + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchLabels: + app.kubernetes.io/name: envoy + topologyKey: "kubernetes.io/hostname" + patch: + type: StrategicMerge + value: + spec: + template: + spec: + containers: + - name: envoy + ports: + - containerPort: 10443 + hostPort: 443 + name: https-443 + protocol: TCP + - containerPort: 10080 + hostPort: 80 + name: http-80 + protocol: TCP + - containerPort: 19001 + hostPort: 19001 + name: metrics + protocol: TCP + envoyService: + type: ClusterIP + INNEREOF run_once: true - - name: Pause to Allow Controller to Initialize + + - name: Create GatewayClass + kubernetes.core.k8s: + state: present + definition: + apiVersion: gateway.networking.k8s.io/v1 + kind: GatewayClass + metadata: + name: envoy + spec: + controllerName: gateway.envoyproxy.io/gatewayclass-controller + parametersRef: + group: gateway.envoyproxy.io + kind: EnvoyProxy + name: enterprise-proxy-config + namespace: envoy-gateway-system + run_once: true + + - name: Create TLS Secret in Gateway namespace + community.kubernetes.k8s: + state: present + definition: + apiVersion: v1 + kind: Secret + metadata: + name: "{{ secret_name }}" + namespace: envoy-gateway-system + type: kubernetes.io/tls + data: + tls.crt: "{{ lookup('file', cert_file) | b64encode }}" + tls.key: "{{ lookup('file', key_file) | b64encode }}" + run_once: true + + - name: Create Enterprise Edge Gateway + kubernetes.core.k8s: + state: present + definition: + apiVersion: gateway.networking.k8s.io/v1 + kind: Gateway + metadata: + name: enterprise-edge-gateway + namespace: envoy-gateway-system + spec: + gatewayClassName: envoy + listeners: + - name: https + protocol: HTTPS + port: 443 + tls: + mode: Terminate + certificateRefs: + - name: "{{ secret_name }}" + allowedRoutes: + namespaces: + from: All + - name: http + protocol: HTTP + port: 80 + allowedRoutes: + namespaces: + from: All + run_once: true + + - name: Pause to Allow Gateway to Initialize pause: - seconds: 10 - - name: Wait for all ingress-nginx pods to be in a running and ready state + seconds: 15 + - name: Wait for all Envoy Gateway pods to be in a running and ready state shell: | - kubectl get pods -n ingress-nginx -o json | jq -r ' + kubectl get pods -n envoy-gateway-system -o json | jq -r ' .items[] | select(.status.phase != "Running" or (.status.containerStatuses[] | select(.ready != true))) | .metadata.name' | wc -l @@ -109,6 +193,6 @@ retries: 160 delay: 10 failed_when: pod_status.rc != 0 and pod_status.stdout != "0" - - name: Ingress Controller Deployment status + - name: Envoy Gateway Deployment status debug: - msg: "All Ingress NGINX Controller pods are running and ready." + msg: "Envoy Gateway and Enterprise Edge Gateway are running and ready." diff --git a/core/playbooks/deploy-istio-openshift.yml b/core/playbooks/deploy-istio-openshift.yml index 9841d9d0..82e7107d 100644 --- a/core/playbooks/deploy-istio-openshift.yml +++ b/core/playbooks/deploy-istio-openshift.yml @@ -9,7 +9,7 @@ vars: test_ns: test-ns default_ns: default - ingress_ns: ingress-nginx + ingress_ns: envoy-gateway-system genai_gw_ns: genai-gateway observability_ns: observability habana_ns: habana-ai-operator @@ -348,12 +348,12 @@ when: observability_ns_check.rc == 0 run_once: true - - name: "[OpenShift] Label ingress-nginx namespace for ambient mode" + - name: "[OpenShift] Label envoy-gateway-system namespace for ambient mode" command: kubectl label namespace {{ ingress_ns }} istio.io/dataplane-mode=ambient --overwrite when: is_openshift | bool run_once: true - - name: Apply peer-auth-ingress.yaml to ingress-nginx namespace + - name: Apply peer-auth-ingress.yaml to envoy-gateway-system namespace command: kubectl apply -f {{ peer_auth_ingress_path }} -n {{ ingress_ns }} run_once: true diff --git a/core/playbooks/deploy-istio.yml b/core/playbooks/deploy-istio.yml index 2c4aff9c..a7912a6d 100644 --- a/core/playbooks/deploy-istio.yml +++ b/core/playbooks/deploy-istio.yml @@ -9,7 +9,7 @@ vars: test_ns: test-ns default_ns: default - ingress_ns: ingress-nginx + ingress_ns: envoy-gateway-system genai_gw_ns: genai-gateway observability_ns: observability habana_ns: habana-ai-operator @@ -177,11 +177,11 @@ when: observability_ns_check.rc == 0 run_once: true - - name: Label ingress-nginx namespace for ambient mode + - name: Label envoy-gateway-system namespace for ambient mode command: kubectl label namespace {{ ingress_ns }} istio.io/dataplane-mode=ambient --overwrite run_once: true - - name: Apply peer-auth-ingress.yaml to ingress-nginx namespace + - name: Apply peer-auth-ingress.yaml to envoy-gateway-system namespace command: kubectl apply -f {{ peer_auth_ingress_path }} -n {{ ingress_ns }} run_once: true diff --git a/core/playbooks/deploy-keycloak-controller.yml b/core/playbooks/deploy-keycloak-controller.yml index de0220c6..3e49127a 100644 --- a/core/playbooks/deploy-keycloak-controller.yml +++ b/core/playbooks/deploy-keycloak-controller.yml @@ -12,20 +12,6 @@ roles: - role: inference-tools tasks: - - name: Add Ingress-Nginx repository - community.kubernetes.helm_repository: - name: ingress-nginx - repo_url: https://kubernetes.github.io/ingress-nginx - state: present - when: delete_pv_on_purge == "no" - - name: Verify repository availability - ansible.builtin.command: helm repo list - register: helm_repo_list - failed_when: false - changed_when: false - - name: Synchronize repositories - ansible.builtin.shell: helm repo update - when: helm_repo_list.stdout != "" and delete_pv_on_purge == "no" - name: Remove existing Keycloak PersistentVolume data directory ansible.builtin.file: path: /mnt/local-path-provisioner/ diff --git a/core/playbooks/deploy-keycloak-service.yml b/core/playbooks/deploy-keycloak-service.yml index 30219569..c173bb4f 100644 --- a/core/playbooks/deploy-keycloak-service.yml +++ b/core/playbooks/deploy-keycloak-service.yml @@ -11,16 +11,3 @@ roles: - role: inference-tools tasks: - - name: Add ingress-nginx repository using Helm module - community.kubernetes.helm_repository: - name: ingress-nginx - repo_url: https://kubernetes.github.io/ingress-nginx - state: present - - name: Add Ingress-Nginx repository - ansible.builtin.command: helm repo list - register: helm_repo_list - failed_when: false - changed_when: false - - name: Synchronize repositories - ansible.builtin.shell: helm repo update - when: helm_repo_list.stdout != "" diff --git a/core/playbooks/deploy-keycloak-tls-cert.yml b/core/playbooks/deploy-keycloak-tls-cert.yml index 65836378..de9e8de5 100644 --- a/core/playbooks/deploy-keycloak-tls-cert.yml +++ b/core/playbooks/deploy-keycloak-tls-cert.yml @@ -67,7 +67,7 @@ msg: "Number of ei-infra-eligible nodes configured: {{ inference_infra_replica_count }}" run_once: true when: deploy_keycloak == "yes" - - name: Delete genai-gateway-ingress resource + - name: Delete stale genai-gateway-ingress resource kubernetes.core.k8s: state: absent kind: Ingress @@ -156,10 +156,10 @@ name: memory targetAverageUtilization: 60 ingress: - enabled: "{{ false if kubernetes_platform == 'openshift' or kubernetes_platform == 'eks' else true }}" + enabled: "{{ true if kubernetes_platform == 'eks' else false }}" hostname: "{{ secret_name }}" tls: true - ingressClassName: "nginx" + ingressClassName: "{{ 'alb' if kubernetes_platform == 'eks' else 'nginx' }}" proxyAddressForwarding: true annotations: nginx.ingress.kubernetes.io/ssl-redirect: "true" @@ -321,6 +321,34 @@ - deploy_keycloak == "yes" - kubernetes_platform == "eks" + - name: Create HTTPRoute for Keycloak (non-EKS, non-OpenShift) + kubernetes.core.k8s: + state: present + definition: + apiVersion: gateway.networking.k8s.io/v1 + kind: HTTPRoute + metadata: + name: keycloak-httproute + namespace: default + spec: + parentRefs: + - name: enterprise-edge-gateway + namespace: envoy-gateway-system + hostnames: + - "{{ secret_name }}" + rules: + - matches: + - path: + type: PathPrefix + value: / + backendRefs: + - name: keycloak + port: 80 + when: + - deploy_keycloak == "yes" + - kubernetes_platform != "eks" + - kubernetes_platform != "openshift" + - name: Ensure Remote Directory Exists ansible.builtin.file: path: "{{ remote_helm_charts_base }}" diff --git a/core/playbooks/deploy-observability.yml b/core/playbooks/deploy-observability.yml index c1e5863a..4d646940 100644 --- a/core/playbooks/deploy-observability.yml +++ b/core/playbooks/deploy-observability.yml @@ -206,36 +206,31 @@ - "{{ secret_name }}" secretName: "{{ secret_name }}" - - name: Create Grafana observability ingress with nginx (non-EKS) + - name: Create Grafana observability HTTPRoute (non-EKS) tags: deploy_observability when: kubernetes_platform is not defined or kubernetes_platform != "eks" community.kubernetes.k8s: state: present definition: - apiVersion: networking.k8s.io/v1 - kind: Ingress + apiVersion: gateway.networking.k8s.io/v1 + kind: HTTPRoute metadata: - annotations: - nginx.ingress.kubernetes.io/backend-protocol: HTTP # Assuming Grafana is served over HTTP - name: observability-grafana-ingress - namespace: observability # Change this to the namespace where Grafana is deployed + name: observability-grafana-httproute + namespace: observability spec: - ingressClassName: nginx + parentRefs: + - name: enterprise-edge-gateway + namespace: envoy-gateway-system + hostnames: + - "{{ secret_name }}" rules: - - host: "{{ secret_name }}" # Replace with your desired hostname for Grafana - http: - paths: - - backend: - service: - name: observability-grafana - port: - number: 80 - path: /observability(/|$)(.*) - pathType: ImplementationSpecific - tls: - - hosts: - - "{{ secret_name }}" - secretName: "{{ secret_name }}" + - matches: + - path: + type: PathPrefix + value: /observability + backendRefs: + - name: observability-grafana + port: 80 - name: Add Bitnami Helm repository tags: deploy_logging diff --git a/docs/envoy-gateway-deployment-guide.md b/docs/envoy-gateway-deployment-guide.md new file mode 100644 index 00000000..c86469b8 --- /dev/null +++ b/docs/envoy-gateway-deployment-guide.md @@ -0,0 +1,237 @@ +# Single Node Deployment Guide (Envoy Gateway) + +This guide provides step-by-step instructions to deploy Intel® AI for Enterprise +Inference on a single node using Envoy Gateway as the ingress controller. + +## Prerequisites + +1. [SSH Key Setup](./prerequisites.md#ssh-key-setup) +2. [SSL/TLS Certificate Setup for Development Environment](./prerequisites.md#development-environment) +3. [Hugging Face Token Generation](./prerequisites.md#hugging-face-token-generation) + +## Deployment + +### Step 1: Configure the Automation Config File + +Clone the Enterprise Inference repo and set up the config: + +```bash +cd ~ +git clone https://github.com/opea-project/Enterprise-Inference.git +cd Enterprise-Inference +cp -f docs/examples/single-node/inference-config.cfg core/inventory/inference-config.cfg +``` + +Edit `core/inventory/inference-config.cfg` and update the following fields: + +| Field | Description | Example | +|---|---|---| +| `cluster_url` | DNS hostname for the cluster | `api.example.com` | +| `cert_file` | Path to TLS certificate | `~/certs/cert.pem` | +| `key_file` | Path to TLS private key | `~/certs/key.pem` | +| `keycloak_client_id` | Keycloak OAuth2 client ID | `my-client-id` | +| `keycloak_admin_user` | Keycloak admin username | `your-keycloak-admin-user` | +| `keycloak_admin_password` | Keycloak admin password | `changeme` | + +For systems behind a proxy, set the proxy fields accordingly and ensure +`cluster_url` (e.g. `api.example.com`) is included in the `no_proxy` list. + +### Step 2: Update `hosts.yaml` File + +```bash +cp -f docs/examples/single-node/hosts.yaml core/inventory/hosts.yaml +``` + +Update the `ansible_user` field to the actual username. + +### Step 3: Update `/etc/hosts` + +Add the `cluster_url` hostname pointing to the node's IP: + +```bash +echo " api.example.com" | sudo tee -a /etc/hosts +``` + +Replace `` with the actual node IP address (e.g. `10.75.129.152`). + +> **Note:** Do NOT map `cluster_url` to `127.0.0.1`. The Envoy Gateway uses +> `hostPort` bindings which are accessible on the node IP, not loopback. + +### Step 4: Run the Automation + +```bash +cd core +chmod +x inference-stack-deploy.sh +export HUGGINGFACE_TOKEN= +``` + +#### CPU Only + +```bash +./inference-stack-deploy.sh --models "21" --cpu-or-gpu "cpu" --hugging-face-token $HUGGINGFACE_TOKEN +``` + +#### Intel® AI Accelerators + +```bash +./inference-stack-deploy.sh --models "1" --cpu-or-gpu "gpu" --hugging-face-token $HUGGINGFACE_TOKEN +``` + +Select Option 1 and confirm the Yes/No prompt. + +## Architecture + +The traffic flow through the system is: + +``` +Client (HTTPS:443) → Envoy Gateway → APISIX (auth + rewrite) → vLLM Service +``` + +- **Envoy Gateway** – Edge proxy, terminates TLS on port 443 (hostPort), routes + based on path and hostname. +- **APISIX** – Handles authentication (OpenID Connect token introspection via + Keycloak) and path rewriting. +- **Keycloak** – Identity provider, issues and validates OAuth2 tokens. +- **vLLM** – Model inference backend. + +## Testing Inference + +### Step 1: Get the Keycloak Client Secret + +Retrieve the client secret from the deployed Kubernetes secret: + +```bash +export CLIENT_SECRET=$(kubectl get secret -secret -n default \ + -o jsonpath='{.data.client_secret}' | base64 -d) +``` + +For example, with Llama 3.1 8B on CPU: + +```bash +export CLIENT_SECRET=$(kubectl get secret vllm-llama-8b-cpu-secret -n default \ + -o jsonpath='{.data.client_secret}' | base64 -d) +``` + +### Step 2: Generate an Access Token + +Generate a token via the internal Keycloak service. This ensures the token +issuer matches what APISIX expects for introspection. + +```bash +export KEYCLOAK_IP=$(kubectl get svc keycloak -n default -o jsonpath='{.spec.clusterIP}') +export KEYCLOAK_CLIENT_ID=my-client-id + +export TOKEN=$(curl -s --noproxy '*' \ + -H "Host: keycloak.default.svc.cluster.local" \ + http://${KEYCLOAK_IP}/realms/master/protocol/openid-connect/token \ + -X POST \ + -H 'Content-Type: application/x-www-form-urlencoded' \ + -d "grant_type=client_credentials&client_id=${KEYCLOAK_CLIENT_ID}&client_secret=${CLIENT_SECRET}" \ + | jq -r .access_token) + +echo "Token generated (length: ${#TOKEN})" +``` + +> **Important:** The token must be generated through Keycloak's internal cluster +> service URL (`keycloak.default.svc.cluster.local`) so the token issuer matches +> the APISIX OIDC introspection endpoint. Generating the token via the external +> URL (`https://api.example.com`) will result in an issuer mismatch and `401` +> errors. + +### Step 3: Test Inference + +Set the base URL: + +```bash +export BASE_URL=api.example.com +``` + +#### CPU Model (vLLM CPU) + +Note: `-vllmcpu` is appended to the model path for CPU deployments. + +**Completions:** + +```bash +curl -sk https://${BASE_URL}/Llama-3.1-8B-Instruct-vllmcpu/v1/completions \ + -X POST \ + -H 'Content-Type: application/json' \ + -H "Authorization: Bearer $TOKEN" \ + -d '{ + "model": "meta-llama/Llama-3.1-8B-Instruct", + "prompt": "What is Deep Learning?", + "max_tokens": 50, + "temperature": 0 + }' +``` + +**Chat Completions:** + +```bash +curl -sk https://${BASE_URL}/Llama-3.1-8B-Instruct-vllmcpu/v1/chat/completions \ + -X POST \ + -H 'Content-Type: application/json' \ + -H "Authorization: Bearer $TOKEN" \ + -d '{ + "model": "meta-llama/Llama-3.1-8B-Instruct", + "messages": [{"role": "user", "content": "What is Deep Learning?"}], + "max_tokens": 50, + "temperature": 0 + }' +``` + +**List Models:** + +```bash +curl -sk https://${BASE_URL}/Llama-3.1-8B-Instruct-vllmcpu/v1/models \ + -H "Authorization: Bearer $TOKEN" +``` + +#### Intel® AI Accelerator Model + +```bash +curl -sk https://${BASE_URL}/Llama-3.1-8B-Instruct/v1/completions \ + -X POST \ + -H 'Content-Type: application/json' \ + -H "Authorization: Bearer $TOKEN" \ + -d '{ + "model": "meta-llama/Llama-3.1-8B-Instruct", + "prompt": "What is Deep Learning?", + "max_tokens": 50, + "temperature": 0 + }' +``` + +### List Deployed Routes + +To see all available model routes: + +```bash +kubectl get apisixroutes -A +kubectl get httproute -A +``` + +## Troubleshooting + +### Token returns `401 Authorization Required` + +- Ensure the token was generated via the **internal** Keycloak service, not the + external URL. The issuer in the JWT (`iss` claim) must be + `http://keycloak.default.svc.cluster.local/realms/master`. +- Verify the client secret matches: `kubectl get secret -secret -o jsonpath='{.data.client_secret}' | base64 -d` + +### Cannot reach `https://api.example.com` + +- Verify `/etc/hosts` maps `api.example.com` to the **node IP** (not `127.0.0.1`). +- Ensure `api.example.com` is in the `no_proxy` environment variable. +- Verify the Envoy Gateway pod is running: `kubectl get pods -n envoy-gateway-system` +- Confirm port 443 is accessible: `curl -sk https://api.example.com/ -o /dev/null -w '%{http_code}'` + +### vLLM pod stuck at `0/1 Running` + +- The model may still be downloading or loading. Check logs: + `kubectl logs -f ` +- CPU model loading for Llama 3.1 8B can take 20-30 minutes on first deploy + (downloading ~15GB + CPU weight loading). +- Verify the readiness probe failure count has not hit the threshold: + `kubectl describe pod ` diff --git a/docs/ingress-to-envoy-gateway-migration.md b/docs/ingress-to-envoy-gateway-migration.md new file mode 100644 index 00000000..edb67c70 --- /dev/null +++ b/docs/ingress-to-envoy-gateway-migration.md @@ -0,0 +1,453 @@ +# Ingress to Envoy Gateway Migration Guide + +> **Enterprise Inference — Edge Traffic Migration** +> NGINX Ingress Controller (`networking.k8s.io/v1 Ingress`) → Envoy Gateway (`gateway.networking.k8s.io/v1 HTTPRoute`) + +--- + +## Table of Contents + +1. [Why This Migration](#why-this-migration) +2. [Architecture — Before (NGINX Ingress)](#architecture--before-nginx-ingress) +3. [Architecture — After (Envoy Gateway)](#architecture--after-envoy-gateway) +4. [Concept Mapping](#concept-mapping) +5. [What Changed](#what-changed) +6. [What Did NOT Change](#what-did-not-change) +7. [File-by-File Change Inventory](#file-by-file-change-inventory) +8. [Deployment Workflow](#deployment-workflow) +9. [Configuration](#configuration) +10. [Route Mapping Reference](#route-mapping-reference) +11. [Platform Matrix](#platform-matrix) +12. [Rollback Procedure](#rollback-procedure) + +--- + +## Why This Migration + +Kubernetes `networking.k8s.io/v1 Ingress` is approaching End-of-Life. The Kubernetes community has standardized on the **Gateway API** (`gateway.networking.k8s.io/v1`) as the successor, offering: + +| Aspect | Ingress (Old) | Gateway API (New) | +|--------|---------------|-------------------| +| API maturity | Feature-frozen, EOL path | GA since K8s 1.26, actively developed | +| Routing | Single host/path rules, vendor annotations | Rich match types (headers, methods, query params) | +| TLS | Per-Ingress resource | Centralized at Gateway listener | +| Multi-tenancy | Flat, single namespace | Role-based: Infra → GatewayClass, Cluster → Gateway, App → HTTPRoute | +| URL rewriting | Vendor-specific annotation (`nginx.ingress.kubernetes.io/rewrite-target`) | Standard `URLRewrite` filter | +| Vendor lock-in | NGINX-specific annotations | Portable across Envoy, Istio, Traefik, etc. | + +--- + +## Architecture — Before (NGINX Ingress) + +``` +┌───────────┐ +│ Client │ +│ (HTTPS) │ +└─────┬─────┘ + │ :443 + ▼ +┌─────────────────────────────────────────────────┐ +│ NGINX Ingress Controller │ +│ namespace: ingress-nginx │ +│ Helm chart: ingress-nginx/ingress-nginx v4.12.2│ +│ hostPort: 80, 443 │ +│ Tolerations: control-plane │ +│ Affinity: ei-infra-eligible nodes │ +└────┬────┬────┬────┬────┬────┬────┬──────────────┘ + │ │ │ │ │ │ │ + ▼ ▼ ▼ ▼ ▼ ▼ ▼ + ┌──────────────────────────────────────────┐ + │ networking.k8s.io/v1 Ingress resources │ + │ ingressClassName: nginx │ + │ nginx.ingress.kubernetes.io/* annotations│ + ├──────────────────────────────────────────┤ + │ • model-ingress → vLLM/TGI/TEI svc │ + │ • genai-gw-ingress → LiteLLM :4000 │ + │ • keycloak-ingress → Keycloak/APISIX │ + │ • dashboard-ingress → K8s Dashboard │ + │ • grafana-ingress → Grafana :80 │ + │ • flowise-root → Flowise :3000 │ + │ • mcp-server → MCP Server │ + └──────────────────────────────────────────┘ +``` + +### Key Characteristics (Before) + +- **Controller:** NGINX Ingress Controller deployed via Helm (`ingress-nginx` chart v4.12.2) +- **Namespace:** `ingress-nginx` +- **TLS:** Each Ingress resource carried its own `tls:` block with `secretName` +- **Rewriting:** `nginx.ingress.kubernetes.io/rewrite-target: /$1` annotation with regex capture groups +- **Pod placement:** `hostPort: 80/443` with control-plane tolerations and `ei-infra-eligible` node affinity +- **EKS variant:** Separate `ingress_eks.yaml` templates with `ingressClassName: alb` and ALB annotations + +--- + +## Architecture — After (Envoy Gateway) + +``` +┌───────────┐ +│ Client │ +│ (HTTPS) │ +└─────┬─────┘ + │ :443 + ▼ +┌─────────────────────────────────────────────────┐ +│ Envoy Gateway │ +│ namespace: envoy-gateway-system │ +│ │ +│ ┌─────────────────────────────────────────┐ │ +│ │ GatewayClass: envoy │ │ +│ │ controller: gateway.envoyproxy.io │ │ +│ │ parametersRef → EnvoyProxy │ │ +│ └─────────────────────────────────────────┘ │ +│ ┌─────────────────────────────────────────┐ │ +│ │ EnvoyProxy: enterprise-proxy-config │ │ +│ │ hostNetwork: true │ │ +│ │ Tolerations: control-plane │ │ +│ │ Affinity: ei-infra-eligible nodes │ │ +│ │ podAntiAffinity: spread across hosts │ │ +│ └─────────────────────────────────────────┘ │ +│ ┌─────────────────────────────────────────┐ │ +│ │ Gateway: enterprise-edge-gateway │ │ +│ │ Listeners: │ │ +│ │ - https :443 (TLS Terminate) │ │ +│ │ - http :80 │ │ +│ │ allowedRoutes: All namespaces │ │ +│ │ TLS cert: secret │ │ +│ └─────────────────────────────────────────┘ │ +└────┬────┬────┬────┬────┬────┬────┬───────────────┘ + │ │ │ │ │ │ │ + ▼ ▼ ▼ ▼ ▼ ▼ ▼ + ┌──────────────────────────────────────────┐ + │ gateway.networking.k8s.io/v1 HTTPRoutes │ + │ parentRefs: enterprise-edge-gateway │ + │ Standard filters (URLRewrite, etc.) │ + ├──────────────────────────────────────────┤ + │ • model-httproute → vLLM/TGI/TEI svc │ + │ • genai-gw-httproute → LiteLLM :4000 │ + │ • keycloak-httproute → Keycloak/APISIX │ + │ • dashboard-httproute→ K8s Dashboard │ + │ • grafana-httproute → Grafana :80 │ + │ • flowise-root → Flowise :3000 │ + │ • mcp-httproute → MCP Server │ + └──────────────────────────────────────────┘ +``` + +### Key Characteristics (After) + +- **Controller:** Envoy Gateway deployed via OCI Helm chart (`gateway-helm v1.2.0`) +- **Namespace:** `envoy-gateway-system` +- **TLS:** Centralized at the Gateway listener — HTTPRoutes do NOT carry TLS config +- **Rewriting:** Standard Gateway API `URLRewrite` filter with `ReplacePrefixMatch` +- **Pod placement:** `hostNetwork: true` with same tolerations and node affinity as before +- **EKS variant:** ALB `ingress_eks.yaml` templates kept as-is (separate migration path) + +--- + +## Concept Mapping + +| NGINX Ingress Concept | Envoy Gateway Equivalent | Notes | +|----------------------|--------------------------|-------| +| `ingress-nginx` Helm chart | `gateway-helm` OCI chart | Deployed to `envoy-gateway-system` | +| `IngressClass: nginx` | `GatewayClass: envoy` | References `EnvoyProxy` for pod config | +| — | `EnvoyProxy` CR | New: configures proxy pod placement, replicas, hostNetwork | +| — | `Gateway` CR | New: defines listeners (HTTPS/HTTP), TLS termination | +| `Ingress` resource | `HTTPRoute` resource | 1:1 replacement per service | +| `ingressClassName: nginx` | `parentRefs: [{name: enterprise-edge-gateway}]` | Routes reference the Gateway, not a class | +| `tls:` block on each Ingress | TLS on Gateway listener only | Eliminates per-route TLS duplication | +| `nginx.ingress.kubernetes.io/rewrite-target: /$1` | `filters: [{type: URLRewrite, urlRewrite: {path: {type: ReplacePrefixMatch}}}]` | Standard API, no vendor annotation | +| `nginx.ingress.kubernetes.io/backend-protocol: HTTPS` | (handled at Backend/service level) | — | +| `hostPort: 80, 443` | `hostNetwork: true` on EnvoyProxy | Equivalent node-level port binding | +| `pathType: ImplementationSpecific` + regex | `path.type: PathPrefix` | Gateway API uses structured prefix matching | + +--- + +## What Changed + +### Files Modified (20 files) + +| # | Category | File | Summary | +|---|----------|------|---------| +| 1 | **Controller Playbook** | `core/playbooks/deploy-ingress-controller.yml` | Entire file: NGINX Helm → Envoy Gateway + GatewayClass + EnvoyProxy + Gateway | +| 2 | **Shell Script** | `core/lib/components/ingress-controller.sh` | Updated message and `--extra-vars` | +| 3 | **Shell Script** | `core/lib/cluster/deployment/fresh-install.sh` | Updated log messages | +| 4 | **Shell Script** | `core/lib/user-menu/parse-user-prompts.sh` | Updated interactive prompt text | +| 5 | **Metadata** | `core/inventory/metadata/inference-metadata.cfg` | `ingress_controller=4.12.2` → `envoy_gateway_version=v1.2.0` | +| 6 | **Helm Template** | `core/helm-charts/vllm/templates/ingress.yaml` | `Ingress` → `HTTPRoute` with `URLRewrite` filter | +| 7 | **Helm Template** | `core/helm-charts/tgi/templates/ingress.yaml` | `Ingress` → `HTTPRoute` with `URLRewrite` filter | +| 8 | **Helm Template** | `core/helm-charts/tei/templates/ingress.yaml` | `Ingress` → `HTTPRoute` with `URLRewrite` filter | +| 9 | **Helm Template** | `core/helm-charts/teirerank/templates/ingress.yaml` | `Ingress` → `HTTPRoute` with `URLRewrite` filter | +| 10 | **Helm Template** | `core/helm-charts/ovms/templates/ingress.yaml` | `Ingress` → `HTTPRoute` with `URLRewrite` filter | +| 11 | **Helm Template** | `core/helm-charts/genai-gateway/templates/ingress.yaml` | `Ingress` → `HTTPRoute` | +| 12 | **Helm Template** | `core/helm-charts/keycloak/templates/ingress.yaml` | `Ingress` → `HTTPRoute` | +| 13 | **Helm Template** | `core/helm-charts/mcp-server-template/templates/ingress.yaml` | `Ingress` → `HTTPRoute` | +| 14 | **Playbook** | `core/playbooks/deploy-cluster-config.yml` | Dashboard Ingress → HTTPRoute | +| 15 | **Playbook** | `core/playbooks/deploy-observability.yml` | Grafana Ingress (non-EKS) → HTTPRoute | +| 16 | **Playbook** | `core/playbooks/deploy-genai-gateway.yml` | Langfuse: disabled built-in Ingress, added HTTPRoute | +| 17 | **Playbook** | `core/playbooks/deploy-keycloak-tls-cert.yml` | Keycloak Ingress disabled for non-EKS, added HTTPRoute | +| 18 | **Playbook** | `core/playbooks/deploy-keycloak-controller.yml` & `deploy-keycloak-service.yml` | Helm repo refs: ingress-nginx → envoy-gateway | +| 19 | **Istio** | `core/playbooks/deploy-istio.yml` & `deploy-istio-openshift.yml` | Namespace `ingress-nginx` → `envoy-gateway-system` | +| 20 | **Istio** | `core/helm-charts/istio/peer-auth-ingress.yaml` | PeerAuth target: `ingress-nginx` → `envoy-gateway-system` pods | +| 21 | **Plugin** | `plugins/agenticai/playbooks/deploy-agenticai-plugin.yml` | Flowise Ingress → HTTPRoute | + +--- + +## What Did NOT Change + +| Item | Reason | +|------|--------| +| **`values.yaml`** in all Helm charts | Keys `ingress.enabled`, `ingress.host`, `ingress.secretname` kept identical | +| **`inference-config.cfg`** | `deploy_ingress_controller=on` still controls edge gateway deployment | +| **Template filenames** | All `ingress.yaml` filenames kept (only content changed to HTTPRoute) | +| **Shell function name** | Renamed to `run_edge_gateway_playbook()` (previously `run_ingress_nginx_playbook()`) | +| **EKS ALB templates** | `ingress_eks.yaml` variants with `ingressClassName: alb` are untouched | +| **OpenShift Routes** | `route.yaml` templates are not Ingress — unaffected | +| **APISIX integration** | APISIX catch-all routing through HTTPRoutes works the same way | +| **Model deployment logic** | `install-model.sh`, `deploy-inference-models.yml` — no changes to `ingress_enabled` logic | +| **Brownfield detection** | `setup-bastion.yml` pre-flight checks kept (informational only) | + +--- + +## Deployment Workflow + +The deployment sequence is **unchanged**. The `deploy_ingress_controller=on` flag in `inference-config.cfg` triggers the edge gateway step: + +``` +inference-stack-deploy.sh + └── fresh-install.sh + ├── 1. Kubernetes cluster setup (if deploy_kubernetes_fresh=on) + ├── 2. Cluster config (dashboard) ← HTTPRoute for dashboard + ├── 3. NRI CPU Balloons (if cpu deployment) + ├── 4. Habana AI Operator (if GPU) + ├── 5. Ceph storage (if deploy_ceph=on) + │ + ├── 6. Edge Gateway Controller (if deploy_ingress_controller=on) + │ └── deploy-ingress-controller.yml + │ ├── Install Gateway API CRDs (v1.2.0) + │ ├── Deploy Envoy Gateway Helm chart + │ ├── Create EnvoyProxy (pod placement config) + │ ├── Create GatewayClass: envoy + │ ├── Create TLS Secret in envoy-gateway-system + │ └── Create Gateway: enterprise-edge-gateway + │ + ├── 7. Keycloak + APISIX (if deploy_keycloak_apisix=on) + │ └── HTTPRoute for Keycloak created here + ├── 8. GenAI Gateway (LiteLLM) (if deploy_genai_gateway=on) + │ └── HTTPRoute for LiteLLM + Langfuse trace + ├── 9. Observability (Grafana) (if deploy_observability=on) + │ └── HTTPRoute for Grafana + ├── 10. Agentic AI Plugin (if deploy_agenticai_plugin=on) + │ └── HTTPRoute for Flowise + ├── 11. Istio (if deploy_istio=on) + │ └── Labels envoy-gateway-system for ambient mode + └── 12. LLM Model Deployment (if deploy_llm_models=on) + └── HTTPRoutes created per model via Helm templates +``` + +--- + +## Configuration + +### No inference-config.cfg Changes Required + +The existing config toggle works the same way: + +```ini +# Controls edge gateway deployment (formerly NGINX, now Envoy Gateway) +deploy_ingress_controller=on +``` + +### Metadata Version + +In `core/inventory/metadata/inference-metadata.cfg`: + +```ini +# Before: +# ingress_controller="4.12.2" + +# After: +envoy_gateway_version="v1.2.0" +``` + +### Helm Chart values.yaml — No Changes + +All `values.yaml` files retain the same `ingress:` block: + +```yaml +ingress: + enabled: false # Set to true to enable the HTTPRoute resource + host: "" + namespace: default + secretname: "" # (used by EKS ALB variant only) +``` + +--- + +## Route Mapping Reference + +### Model Serving (vLLM, TGI, TEI, TEI-Rerank, OVMS) + +| Before (Ingress) | After (HTTPRoute) | +|-------------------|-------------------| +| `ingressClassName: nginx` | `parentRefs: [{name: enterprise-edge-gateway, namespace: envoy-gateway-system}]` | +| `nginx.ingress.kubernetes.io/rewrite-target: /$1` | `filters: [{type: URLRewrite, urlRewrite: {path: {type: ReplacePrefixMatch, replacePrefixMatch: /}}}]` | +| `path: /model-name/(.*)` | `path: {type: PathPrefix, value: /model-name}` | +| `pathType: ImplementationSpecific` | (PathPrefix is the type) | +| `tls: [{hosts: [host], secretName: secret}]` | (TLS handled at Gateway level) | + +**Example — vLLM HTTPRoute:** + +```yaml +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: vllm-model-httproute +spec: + parentRefs: + - name: enterprise-edge-gateway + namespace: envoy-gateway-system + hostnames: + - api.example.com + rules: + - matches: + - path: + type: PathPrefix + value: /Meta-Llama-3.1-8B-Instruct + filters: + - type: URLRewrite + urlRewrite: + path: + type: ReplacePrefixMatch + replacePrefixMatch: / + backendRefs: + - name: vllm-model-service + port: 80 +``` + +### Infrastructure Services + +| Service | Path | Type | Notes | +|---------|------|------|-------| +| GenAI Gateway (LiteLLM) | `/` | PathPrefix | Full catch-all for LiteLLM API | +| Keycloak (via APISIX) | `/token` | Exact | Token endpoint only | +| Kubernetes Dashboard | `/dashboard` | PathPrefix | URLRewrite strips prefix | +| Grafana | `/observability` | PathPrefix | `serve_from_sub_path: true` in Grafana | +| Flowise | `/` (subdomain) | PathPrefix | Hostname: `flowise-` | +| MCP Server | `/health`, `/sse` | PathPrefix | SSE-optimized (no special annotation needed) | +| Langfuse Trace | `/` (subdomain) | PathPrefix | Hostname: `trace-` | + +--- + +## Platform Matrix + +| Platform | Edge Gateway | Model Routes | Infra Routes | Auth Mode | +|----------|-------------|-------------|-------------|-----------| +| **Vanilla K8s** | Envoy Gateway (HTTPRoutes) | HTTPRoute | HTTPRoute | Keycloak or LiteLLM | +| **EKS** | AWS ALB (Ingress with `ingressClassName: alb`) | ALB Ingress | ALB Ingress | Same | +| **OpenShift** | OpenShift Routes (`route.yaml`) | Route | Route | Same | + +> **Note:** EKS ALB and OpenShift Routes are **not affected** by this migration. Only vanilla Kubernetes deployments use the new Envoy Gateway path. + +--- + +## Key Resources Created by deploy-ingress-controller.yml + +```yaml +# 1. Gateway API CRDs (from upstream) +kubectl apply -f https://github.com/kubernetes-sigs/gateway-api/releases/download/v1.2.0/standard-install.yaml + +# 2. Envoy Gateway Controller (Helm) +helm upgrade --install eg oci://docker.io/envoyproxy/gateway-helm + --version v1.2.0 + --namespace envoy-gateway-system + +# 3. EnvoyProxy — proxy pod configuration +apiVersion: gateway.envoyproxy.io/v1alpha1 +kind: EnvoyProxy +metadata: + name: enterprise-proxy-config + namespace: envoy-gateway-system +spec: + provider: + type: Kubernetes + kubernetes: + envoyDeployment: + replicas: + pod: + tolerations: [control-plane, master] + affinity: {ei-infra-eligible nodes, pod anti-affinity} + patch: + spec: + template: + spec: + hostNetwork: true # Binds ports 80/443 to node + dnsPolicy: ClusterFirstWithHostNet + envoyService: + type: ClusterIP + +# 4. GatewayClass +apiVersion: gateway.networking.k8s.io/v1 +kind: GatewayClass +metadata: + name: envoy +spec: + controllerName: gateway.envoyproxy.io/gatewayclass-controller + parametersRef: {EnvoyProxy: enterprise-proxy-config} + +# 5. Gateway +apiVersion: gateway.networking.k8s.io/v1 +kind: Gateway +metadata: + name: enterprise-edge-gateway + namespace: envoy-gateway-system +spec: + gatewayClassName: envoy + listeners: + - name: https + protocol: HTTPS + port: 443 + tls: + mode: Terminate + certificateRefs: [{name: }] + - name: http + protocol: HTTP + port: 80 + allowedRoutes: + namespaces: {from: All} +``` + +--- + +## Rollback Procedure + +If a rollback to NGINX Ingress is needed: + +1. **Revert the code** — `git checkout` the prior commit on the 20 modified files +2. **Remove Envoy Gateway resources:** + ```bash + kubectl delete gateway enterprise-edge-gateway -n envoy-gateway-system + kubectl delete gatewayclass envoy + kubectl delete envoyproxy enterprise-proxy-config -n envoy-gateway-system + helm uninstall eg -n envoy-gateway-system + kubectl delete namespace envoy-gateway-system + ``` +3. **Re-deploy** — run `inference-stack-deploy.sh` which will install NGINX Ingress Controller and create Ingress resources + +--- + +## FAQ + +**Q: Do I need to change `inference-config.cfg`?** +A: No. `deploy_ingress_controller=on` works exactly as before. + +**Q: Will my existing model deployments break?** +A: If upgrading in-place, you need to run the edge gateway deployment step first, then re-deploy models so HTTPRoutes replace the old Ingress resources. + +**Q: What about EKS deployments?** +A: EKS uses the AWS ALB Ingress Controller (`ingressClassName: alb`). This migration does not affect EKS deployments. + +**Q: What about the APISIX integration?** +A: APISIX still works the same way. When `apisix.enabled=true`, the HTTPRoute backend points to `auth-apisix-gateway:80` instead of the model service directly — identical behavior to the old Ingress. + +**Q: Where is TLS configured now?** +A: TLS terminates at the `enterprise-edge-gateway` Gateway listener in `envoy-gateway-system`. Individual HTTPRoutes no longer carry TLS configuration. diff --git a/docs/ovms-model-deploy-guide.md b/docs/ovms-model-deploy-guide.md index a298430b..a8fc63c3 100644 --- a/docs/ovms-model-deploy-guide.md +++ b/docs/ovms-model-deploy-guide.md @@ -194,16 +194,22 @@ echo "Access Token: $TOKEN" ```bash # Test chat completions endpoint -For Inferencing with Qwen3-4B-int4-ov: -curl -k ${BASE_URL}/qwen3-4b-ovms/v3/chat/completions -X POST -d '{"messages": [{"role": "system","content": "You are helpful assistant"},{"role": "user","content": "what is photosynthesis"}],"model": "qwen3-4b","max_tokens": 32,"temperature": 0.4}' -H 'Content-Type: application/json' -sS -H "Authorization: Bearer $TOKEN" -For Inferencing with Mistral-7B-Instruct-v0.3-int4-cw-ov: -curl -k ${BASE_URL}/mistral-7b-ovms/v3/chat/completions -X POST -d '{"messages": [{"role": "system","content": "You are helpful assistant"},{"role": "user","content": "what is photosynthesis"}],"model": "mistral-7b","max_tokens": 32,"temperature": 0.4}' -H 'Content-Type: application/json' -sS -H "Authorization: Bearer $TOKEN" +# For Inferencing with any deployed models, use below command to get model route +kubectl get apisixroute -A +``` +![alt text](pictures/apisix-route.png) +``` +export MODEL_APISIX_ROUTE="qwen3-4b-ovms" +export MODEL_ID=OpenVINO/Qwen3-4B-int4-ov -For Inferencing with meta-llama/Llama-3.2-3B-Instruct: -curl -k ${BASE_URL}/llama-3.2-3b-instruct/v3/chat/completions -X POST -d '{"messages": [{"role": "system","content": "You are helpful assistant"},{"role": "user","content": "what is api"}],"model": "llama-3.2-3b-instruct","max_tokens": 32,"temperature": 0.4}' -H 'Content-Type: application/json' -sS -H "Authorization: Bearer $TOKEN" +curl -k ${BASE_URL}/${MODEL_APISIX_ROUTE}/v3/chat/completions -X POST \ + -H 'Content-Type: application/json' \ + -H "Authorization: Bearer $TOKEN" \ + -d '{"messages": [{"role": "system","content": "You are helpful assistant"},{"role": "user","content": "what is api"}],"model": "'"$MODEL_ID"'","max_tokens": 32,"temperature": 0.4}' ``` +**NOTE:** export respective MODEL_APISIX_ROUTE and MODEL_ID to test the model endpoints --- ## Undeployment diff --git a/docs/pictures/apisix-route.png b/docs/pictures/apisix-route.png new file mode 100644 index 00000000..dc707487 Binary files /dev/null and b/docs/pictures/apisix-route.png differ diff --git a/plugins/agenticai/playbooks/deploy-agenticai-plugin.yml b/plugins/agenticai/playbooks/deploy-agenticai-plugin.yml index fd3d8d26..e4ba638a 100644 --- a/plugins/agenticai/playbooks/deploy-agenticai-plugin.yml +++ b/plugins/agenticai/playbooks/deploy-agenticai-plugin.yml @@ -145,34 +145,31 @@ run_once: true ########################################################################### - # Root Ingress (SUBDOMAIN MODE) + # Root HTTPRoute (SUBDOMAIN MODE) ########################################################################### - - name: Create Flowise Root Ingress (Subdomain) + - name: Create Flowise Root HTTPRoute (Subdomain) kubernetes.core.k8s: state: present definition: - apiVersion: networking.k8s.io/v1 - kind: Ingress + apiVersion: gateway.networking.k8s.io/v1 + kind: HTTPRoute metadata: name: flowise-root namespace: "{{ agenticai_namespace }}" spec: - ingressClassName: "{{ agenticai_ingress_class }}" - tls: - - secretName: "flowise-{{ cluster_url }}" - hosts: - - "flowise-{{ cluster_url }}" + parentRefs: + - name: enterprise-edge-gateway + namespace: envoy-gateway-system + hostnames: + - "flowise-{{ cluster_url }}" rules: - - host: "flowise-{{ cluster_url }}" - http: - paths: - - path: / - pathType: Prefix - backend: - service: - name: flowise - port: - number: 3000 + - matches: + - path: + type: PathPrefix + value: / + backendRefs: + - name: flowise + port: 3000 when: kubernetes_platform != 'openshift' and agenticai_ingress_enabled | bool run_once: true