From 2e7d300ce05864988e2be35fd4488701234d215f Mon Sep 17 00:00:00 2001 From: Mushtaq-BGA Date: Wed, 1 Apr 2026 06:04:47 +0000 Subject: [PATCH] feat: Migrate from NGINX Ingress to Envoy Gateway - Replace NGINX Ingress with Envoy Gateway as cluster edge provider - Convert all ingress.yaml helm templates to HTTPRoute resources - Add GatewayClass, Gateway, and EnvoyProxy configuration - Update deploy-ingress-controller.yml for Envoy Gateway deployment - Add Keycloak, Grafana, GenAI Gateway Trace HTTPRoute integration - Gate EKS ingress_eks.yaml templates with platform check - Rename run_ingress_nginx_playbook() to run_edge_gateway_playbook() - Add envoy-gateway-deployment-guide and migration docs - Update observability, genai-gateway, and model chart templates - Increase Langfuse probe thresholds for single-node stability - Add single-node ClickHouse/Redis resource limits - Update proxy handling in read-config-file.sh - Rename Gaudi references to Intel AI Accelerator in docs - Update OVMS model deploy guide with generic model routing --- .../genai-gateway/templates/ingress.yaml | 40 +- .../genai-gateway/templates/ingress_eks.yaml | 2 +- core/helm-charts/istio/peer-auth-ingress.yaml | 4 +- .../keycloak/templates/ingress.yaml | 38 +- .../keycloak/templates/ingress_eks.yaml | 2 + .../templates/ingress.yaml | 56 +-- core/helm-charts/ovms/templates/ingress.yaml | 43 +- core/helm-charts/tei/templates/ingress.yaml | 78 ++- .../tei/templates/ingress_eks.yaml | 2 +- .../teirerank/templates/ingress.yaml | 81 ++-- .../teirerank/templates/ingress_eks.yaml | 2 +- core/helm-charts/tgi/templates/ingress.yaml | 80 ++-- .../tgi/templates/ingress_eks.yaml | 2 +- core/helm-charts/vllm/templates/ingress.yaml | 79 ++- .../vllm/templates/ingress_eks.yaml | 2 +- core/inventory/inference-config.cfg | 7 +- .../inventory/metadata/inference-metadata.cfg | 2 +- core/lib/cluster/deployment/fresh-install.sh | 8 +- core/lib/components/ingress-controller.sh | 6 +- core/lib/models/install-model.sh | 6 +- core/lib/system/precheck/read-config-file.sh | 15 + core/lib/system/precheck/readiness-check.sh | 4 + core/lib/system/setup-env.sh | 7 + core/lib/user-menu/parse-user-prompts.sh | 4 +- core/playbooks/deploy-cluster-config.yml | 44 +- core/playbooks/deploy-genai-gateway.yml | 41 +- core/playbooks/deploy-ingress-controller.yml | 214 ++++++--- core/playbooks/deploy-istio-openshift.yml | 6 +- core/playbooks/deploy-istio.yml | 6 +- core/playbooks/deploy-keycloak-controller.yml | 14 - core/playbooks/deploy-keycloak-service.yml | 13 - core/playbooks/deploy-keycloak-tls-cert.yml | 34 +- core/playbooks/deploy-observability.yml | 39 +- docs/README.md | 2 +- docs/configuring-inference-config-cfg-file.md | 2 +- docs/cpu-optimization-guide.md | 4 +- docs/deploy-llm-model-from-hugging-face.md | 2 +- docs/envoy-gateway-deployment-guide.md | 237 +++++++++ docs/examples/single-node/README.md | 13 +- ... einf-singlenode-intel-ai-accelerator.yml} | 6 +- .../single-node/einf-singlenode-xeon.yml | 4 +- docs/ingress-to-envoy-gateway-migration.md | 453 ++++++++++++++++++ ... => intel-ai-accelerator-prerequisites.md} | 14 +- docs/inventory-design-guide.md | 68 +-- docs/multi-node-deployment.md | 2 +- docs/ovms-model-deploy-guide.md | 18 +- ...e-Intel-AI-Accelerator-Driver-version.png} | Bin ...Intel-AI-Accelerator-Firmware-version.png} | Bin ...e-Intel-AI-Accelerator-Habana-version.png} | Bin ...ce-Intel-AI-Accelerator-Observability.png} | Bin ...tor-Utilization-Cluster-Observability.png} | Bin docs/pictures/apisix-route.png | Bin 0 -> 32306 bytes docs/prerequisites.md | 8 +- docs/single-node-deployment.md | 10 +- .../playbooks/deploy-agenticai-plugin.yml | 35 +- 55 files changed, 1310 insertions(+), 549 deletions(-) create mode 100644 docs/envoy-gateway-deployment-guide.md rename docs/examples/single-node/{einf-singlenode-gaudi.yml => einf-singlenode-intel-ai-accelerator.yml} (97%) create mode 100644 docs/ingress-to-envoy-gateway-migration.md rename docs/{gaudi-prerequisites.md => intel-ai-accelerator-prerequisites.md} (86%) rename docs/pictures/{Enterprise-Inference-Gaudi-Driver-version.png => Enterprise-Inference-Intel-AI-Accelerator-Driver-version.png} (100%) rename docs/pictures/{Enterprise-Inference-Gaudi-Firmware-version.png => Enterprise-Inference-Intel-AI-Accelerator-Firmware-version.png} (100%) rename docs/pictures/{Enterprise-Inference-Gaudi-Habana-version.png => Enterprise-Inference-Intel-AI-Accelerator-Habana-version.png} (100%) rename docs/pictures/{Enterprise-Inference-Gaudi-Observability.png => Enterprise-Inference-Intel-AI-Accelerator-Observability.png} (100%) rename docs/pictures/{Enterprise-Inference-Gaudi-Utilization-Cluster-Observability.png => Enterprise-Inference-Intel-AI-Accelerator-Utilization-Cluster-Observability.png} (100%) create mode 100644 docs/pictures/apisix-route.png diff --git a/core/helm-charts/genai-gateway/templates/ingress.yaml b/core/helm-charts/genai-gateway/templates/ingress.yaml index 67980d4a..6ecf959f 100644 --- a/core/helm-charts/genai-gateway/templates/ingress.yaml +++ b/core/helm-charts/genai-gateway/templates/ingress.yaml @@ -1,34 +1,24 @@ # Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 {{- if .Values.ingress.enabled }} -apiVersion: networking.k8s.io/v1 -kind: Ingress +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute metadata: - annotations: - kubernetes.io/ingress.class: nginx - nginx.ingress.kubernetes.io/proxy-body-size: 10m - nginx.ingress.kubernetes.io/ssl-redirect: "true" - generation: 1 labels: app.kubernetes.io/managed-by: Helm - name: genai-gateway-ingress + name: genai-gateway-httproute spec: - ingressClassName: nginx - rules: - - host: {{ .Values.ingress.host }} - http: - paths: - - backend: - service: - name: genai-gateway-service - port: - number: 4000 - path: / - pathType: ImplementationSpecific - tls: - - hosts: + parentRefs: + - name: enterprise-edge-gateway + namespace: envoy-gateway-system + hostnames: - {{ .Values.ingress.host }} - secretName: {{ .Values.ingress.secretname }} -status: - loadBalancer: {} + rules: + - matches: + - path: + type: PathPrefix + value: / + backendRefs: + - name: genai-gateway-service + port: 4000 {{- end }} diff --git a/core/helm-charts/genai-gateway/templates/ingress_eks.yaml b/core/helm-charts/genai-gateway/templates/ingress_eks.yaml index 367a5396..39fc872c 100644 --- a/core/helm-charts/genai-gateway/templates/ingress_eks.yaml +++ b/core/helm-charts/genai-gateway/templates/ingress_eks.yaml @@ -1,4 +1,4 @@ -{{- if .Values.ingress.enabled }} +{{- if and .Values.ingress.enabled (eq .Values.platform "eks") }} apiVersion: networking.k8s.io/v1 kind: Ingress metadata: diff --git a/core/helm-charts/istio/peer-auth-ingress.yaml b/core/helm-charts/istio/peer-auth-ingress.yaml index 043cbd87..ebc99076 100644 --- a/core/helm-charts/istio/peer-auth-ingress.yaml +++ b/core/helm-charts/istio/peer-auth-ingress.yaml @@ -4,11 +4,11 @@ apiVersion: security.istio.io/v1 kind: PeerAuthentication metadata: name: peer-auth-ingress - namespace: ingress-nginx + namespace: envoy-gateway-system spec: selector: matchLabels: - app.kubernetes.io/name: ingress-nginx + app.kubernetes.io/name: envoy mtls: mode: STRICT portLevelMtls: diff --git a/core/helm-charts/keycloak/templates/ingress.yaml b/core/helm-charts/keycloak/templates/ingress.yaml index 992360b0..0d567248 100644 --- a/core/helm-charts/keycloak/templates/ingress.yaml +++ b/core/helm-charts/keycloak/templates/ingress.yaml @@ -1,31 +1,23 @@ # Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 {{- if not .Values.apisixRoute.enabled }} -apiVersion: networking.k8s.io/v1 -kind: Ingress +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute metadata: - name: {{ .Release.Name }}-keycloak-apisix + name: {{ .Release.Name }}-keycloak-apisix-httproute namespace: auth-apisix - annotations: - kubernetes.io/ingress.class: "nginx" - nginx.ingress.kubernetes.io/ssl-redirect: "true" - nginx.ingress.kubernetes.io/backend-protocol: "HTTP" - nginx.ingress.kubernetes.io/proxy-pass-headers: "Content-Type, Authorization" spec: - ingressClassName: nginx + parentRefs: + - name: enterprise-edge-gateway + namespace: envoy-gateway-system + hostnames: + - {{ .Values.ingress.host }} rules: - - host: {{ .Values.ingress.host }} - http: - paths: - - path: /token - pathType: Exact - backend: - service: - name: {{ .Values.ingress.serviceName }} - port: - number: {{ .Values.ingress.servicePort }} - tls: - - hosts: - - {{ .Values.ingress.host }} - secretName: {{ .Values.ingress.secretName }} + - matches: + - path: + type: Exact + value: /token + backendRefs: + - name: {{ .Values.ingress.serviceName }} + port: {{ .Values.ingress.servicePort }} {{- end }} diff --git a/core/helm-charts/keycloak/templates/ingress_eks.yaml b/core/helm-charts/keycloak/templates/ingress_eks.yaml index 4af2999c..4467579e 100644 --- a/core/helm-charts/keycloak/templates/ingress_eks.yaml +++ b/core/helm-charts/keycloak/templates/ingress_eks.yaml @@ -1,5 +1,6 @@ # Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +{{- if eq .Values.platform "eks" }} apiVersion: networking.k8s.io/v1 kind: Ingress metadata: @@ -32,3 +33,4 @@ spec: - hosts: - {{ .Values.ingress.host }} secretName: {{ .Values.ingress.secretName }} +{{- end }} diff --git a/core/helm-charts/mcp-server-template/templates/ingress.yaml b/core/helm-charts/mcp-server-template/templates/ingress.yaml index 2bc1a77b..ad7f6b44 100644 --- a/core/helm-charts/mcp-server-template/templates/ingress.yaml +++ b/core/helm-charts/mcp-server-template/templates/ingress.yaml @@ -1,46 +1,32 @@ {{- if .Values.ingress.enabled }} -apiVersion: networking.k8s.io/v1 -kind: Ingress +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute metadata: - name: {{ include "mcp-demo.fullname" . }} + name: {{ include "mcp-demo.fullname" . }}-httproute namespace: {{ .Values.apisix.enabled | ternary "auth-apisix" (.Values.ingress.namespace | default .Release.Namespace) }} labels: {{- include "mcp-demo.labels" . | nindent 4 }} - annotations: - kubernetes.io/ingress.class: {{ .Values.ingress.className }} - nginx.ingress.kubernetes.io/use-regex: "true" - nginx.ingress.kubernetes.io/proxy-buffering: "off" - nginx.ingress.kubernetes.io/proxy-read-timeout: "3600" - nginx.ingress.kubernetes.io/proxy-send-timeout: "3600" - {{- with .Values.ingress.annotations }} - {{- toYaml . | nindent 4 }} - {{- end }} spec: - ingressClassName: {{ .Values.ingress.className }} - {{- if .Values.ingress.tls.enabled }} - tls: - - hosts: + parentRefs: + - name: enterprise-edge-gateway + namespace: envoy-gateway-system + hostnames: - {{ .Values.ingress.host }} - secretName: {{ .Values.ingress.tls.secretName }} - {{- end }} {{- $svcName := ternary "auth-apisix-gateway" (include "mcp-demo.fullname" .) .Values.apisix.enabled }} {{- $svcPort := ternary 80 .Values.service.port .Values.apisix.enabled }} rules: - - host: {{ .Values.ingress.host }} - http: - paths: - - backend: - service: - name: {{ $svcName }} - port: - number: {{ $svcPort }} - path: /health - pathType: Prefix - - backend: - service: - name: {{ $svcName }} - port: - number: {{ $svcPort }} - path: {{ .Values.ingress.path }} - pathType: Prefix + - matches: + - path: + type: PathPrefix + value: /health + backendRefs: + - name: {{ $svcName }} + port: {{ $svcPort }} + - matches: + - path: + type: PathPrefix + value: {{ .Values.ingress.path }} + backendRefs: + - name: {{ $svcName }} + port: {{ $svcPort }} {{- end }} diff --git a/core/helm-charts/ovms/templates/ingress.yaml b/core/helm-charts/ovms/templates/ingress.yaml index c5f0865e..bbcbb919 100644 --- a/core/helm-charts/ovms/templates/ingress.yaml +++ b/core/helm-charts/ovms/templates/ingress.yaml @@ -2,32 +2,33 @@ # SPDX-License-Identifier: Apache-2.0 {{- if and .Values.ingress.enabled .Values.modelSource }} -apiVersion: networking.k8s.io/v1 -kind: Ingress +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute metadata: - name: {{ include "ovms-model-server.fullname" . }} + name: {{ include "ovms-model-server.fullname" . }}-httproute namespace: {{ .Values.ingress.namespace }} labels: {{- include "ovms-model-server.labels" . | nindent 4 }} - annotations: - nginx.ingress.kubernetes.io/rewrite-target: /{{ .Values.modelName }}-ovms/$1 spec: - ingressClassName: {{ .Values.ingress.className }} - {{- if .Values.ingress.secretname }} - tls: - - hosts: + parentRefs: + - name: enterprise-edge-gateway + namespace: envoy-gateway-system + hostnames: - {{ .Values.ingress.host }} - secretName: {{ .Values.ingress.secretname }} - {{- end }} rules: - - host: {{ .Values.ingress.host }} - http: - paths: - - path: /{{ .Values.modelName }}-ovms/(.*) - pathType: ImplementationSpecific - backend: - service: - name: {{- if .Values.apisixRoute.enabled }} auth-apisix-gateway{{- else }} {{ include "ovms-model-server.fullname" . }}{{- end }} - port: - number: 80 + - matches: + - path: + type: PathPrefix + value: /{{ .Values.modelName }}-ovms + {{- if not .Values.apisixRoute.enabled }} + filters: + - type: URLRewrite + urlRewrite: + path: + type: ReplacePrefixMatch + replacePrefixMatch: / + {{- end }} + backendRefs: + - name: {{- if .Values.apisixRoute.enabled }} auth-apisix-gateway{{- else }} {{ include "ovms-model-server.fullname" . }}{{- end }} + port: 80 {{- end }} diff --git a/core/helm-charts/tei/templates/ingress.yaml b/core/helm-charts/tei/templates/ingress.yaml index aba43f9a..39c2c547 100644 --- a/core/helm-charts/tei/templates/ingress.yaml +++ b/core/helm-charts/tei/templates/ingress.yaml @@ -1,60 +1,48 @@ # Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -# Please edit the object below. Lines beginning with a '#' will be ignored, -# and an empty file will abort the edit. If an error occurs while saving this file will be -# reopened with the relevant failures. {{- if or .Values.ingress.enabled .Values.apisix.enabled }} -apiVersion: networking.k8s.io/v1 -kind: Ingress +{{- $modelName := .Values.EMBEDDING_MODEL_ID | splitList "/" | last }} +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute metadata: - annotations: - kubernetes.io/ingress.class: nginx - # nginx.ingress.kubernetes.io/rewrite-target: /{{ .Values.EMBEDDING_MODEL_ID | splitList "/" | last }}/$1 - nginx.ingress.kubernetes.io/rewrite-target: {{- if and .Values.apisix.enabled .Values.accelDevice }} - /{{ .Values.EMBEDDING_MODEL_ID | splitList "/" | last }}/$1 - {{- else if and .Values.apisix.enabled (not .Values.accelDevice) }} - /{{ .Values.EMBEDDING_MODEL_ID | splitList "/" | last }}-teicpu/$1 - {{- else if not .Values.apisix.enabled }} - /$1 - {{- end }} - creationTimestamp: "2024-11-22T03:27:37Z" - generation: 1 labels: {{- include "tei.labels" . | nindent 4 }} - name: {{ include "tei.fullname" . }}-ingress + name: {{ include "tei.fullname" . }}-httproute namespace: {{- if .Values.apisix.enabled }} auth-apisix {{- else }} default {{- end }} - resourceVersion: "244487" - uid: df2b31a1-6653-4d71-9de0-4df33cb93ad1 spec: - ingressClassName: nginx + parentRefs: + - name: enterprise-edge-gateway + namespace: envoy-gateway-system + hostnames: + - {{ .Values.ingress.host }} rules: - - host: {{ .Values.ingress.host }} - http: - paths: - - backend: - service: - name: {{- if .Values.apisix.enabled }} - auth-apisix-gateway - {{- else }} - {{ include "tei.fullname" . }}-service - {{- end }} - port: - number: 80 - # path: /{{ .Values.EMBEDDING_MODEL_ID | splitList "/" | last }}/(.*) - path: {{- if and .Values.apisix.enabled .Values.accelDevice }} - /{{ .Values.EMBEDDING_MODEL_ID | splitList "/" | last }}/(.*) - {{- else if and .Values.apisix.enabled (not .Values.accelDevice) }} - /{{ .Values.EMBEDDING_MODEL_ID | splitList "/" | last }}-teicpu/(.*) - {{- else if not .Values.apisix.enabled }} - /{{ .Values.EMBEDDING_MODEL_ID | splitList "/" | last }}/(.*) + - matches: + - path: + type: PathPrefix + value: {{- if and .Values.apisix.enabled .Values.accelDevice }} + /{{ $modelName }} + {{- else if and .Values.apisix.enabled (not .Values.accelDevice) }} + /{{ $modelName }}-teicpu + {{- else }} + /{{ $modelName }} + {{- end }} + {{- if not .Values.apisix.enabled }} + filters: + - type: URLRewrite + urlRewrite: + path: + type: ReplacePrefixMatch + replacePrefixMatch: / + {{- end }} + backendRefs: + - name: {{- if .Values.apisix.enabled }} + auth-apisix-gateway + {{- else }} + {{ include "tei.fullname" . }}-service {{- end }} - pathType: ImplementationSpecific - tls: - - hosts: - - {{ .Values.ingress.host }} - secretName: {{ .Values.ingress.secretname }} + port: 80 {{- end }} \ No newline at end of file diff --git a/core/helm-charts/tei/templates/ingress_eks.yaml b/core/helm-charts/tei/templates/ingress_eks.yaml index 462c767e..5c6b4982 100644 --- a/core/helm-charts/tei/templates/ingress_eks.yaml +++ b/core/helm-charts/tei/templates/ingress_eks.yaml @@ -1,6 +1,6 @@ # Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -{{- if or .Values.ingress.enabled .Values.apisix.enabled }} +{{- if and (or .Values.ingress.enabled .Values.apisix.enabled) (eq .Values.platform "eks") }} apiVersion: networking.k8s.io/v1 kind: Ingress metadata: diff --git a/core/helm-charts/teirerank/templates/ingress.yaml b/core/helm-charts/teirerank/templates/ingress.yaml index 402d05ab..359574c5 100644 --- a/core/helm-charts/teirerank/templates/ingress.yaml +++ b/core/helm-charts/teirerank/templates/ingress.yaml @@ -1,61 +1,48 @@ # Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 - -# Please edit the object below. Lines beginning with a '#' will be ignored, -# and an empty file will abort the edit. If an error occurs while saving this file will be -# reopened with the relevant failures. -{{- if .Values.ingress.enabled}} -apiVersion: networking.k8s.io/v1 -kind: Ingress +{{- if .Values.ingress.enabled }} +{{- $modelName := .Values.RERANK_MODEL_ID | splitList "/" | last }} +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute metadata: - annotations: - kubernetes.io/ingress.class: nginx - # nginx.ingress.kubernetes.io/rewrite-target: /{{ .Values.RERANK_MODEL_ID | splitList "/" | last }}/$1 - nginx.ingress.kubernetes.io/rewrite-target: {{- if and .Values.apisix.enabled .Values.accelDevice }} - /{{ .Values.RERANK_MODEL_ID | splitList "/" | last }}/$1 - {{- else if and .Values.apisix.enabled (not .Values.accelDevice) }} - /{{ .Values.RERANK_MODEL_ID | splitList "/" | last }}-teirerankcpu/$1 - {{- else if not .Values.apisix.enabled }} - /$1 - {{- end }} - creationTimestamp: "2024-11-22T03:27:37Z" - generation: 1 labels: {{- include "teirerank.labels" . | nindent 4 }} - name: {{ include "teirerank.fullname" . }}-ingress + name: {{ include "teirerank.fullname" . }}-httproute namespace: {{- if .Values.apisix.enabled }} auth-apisix {{- else }} default {{- end }} - resourceVersion: "244487" - uid: df2b31a1-6653-4d71-9de0-4df33cb93ad1 spec: - ingressClassName: nginx + parentRefs: + - name: enterprise-edge-gateway + namespace: envoy-gateway-system + hostnames: + - {{ .Values.ingress.host }} rules: - - host: {{ .Values.ingress.host }} - http: - paths: - - backend: - service: - name: {{- if .Values.apisix.enabled }} - auth-apisix-gateway - {{- else }} - {{ include "teirerank.fullname" . }}-service - {{- end }} - port: - number: 80 - # path: /{{ .Values.RERANK_MODEL_ID | splitList "/" | last }}/(.*) - path: {{- if and .Values.apisix.enabled .Values.accelDevice }} - /{{ .Values.RERANK_MODEL_ID | splitList "/" | last }}/(.*) - {{- else if and .Values.apisix.enabled (not .Values.accelDevice) }} - /{{ .Values.RERANK_MODEL_ID | splitList "/" | last }}-teirerankcpu/(.*) - {{- else if not .Values.apisix.enabled }} - /{{ .Values.RERANK_MODEL_ID | splitList "/" | last }}/(.*) + - matches: + - path: + type: PathPrefix + value: {{- if and .Values.apisix.enabled .Values.accelDevice }} + /{{ $modelName }} + {{- else if and .Values.apisix.enabled (not .Values.accelDevice) }} + /{{ $modelName }}-teirerankcpu + {{- else }} + /{{ $modelName }} + {{- end }} + {{- if not .Values.apisix.enabled }} + filters: + - type: URLRewrite + urlRewrite: + path: + type: ReplacePrefixMatch + replacePrefixMatch: / + {{- end }} + backendRefs: + - name: {{- if .Values.apisix.enabled }} + auth-apisix-gateway + {{- else }} + {{ include "teirerank.fullname" . }}-service {{- end }} - pathType: ImplementationSpecific - tls: - - hosts: - - {{ .Values.ingress.host }} - secretName: {{ .Values.ingress.secretname }} + port: 80 {{- end }} \ No newline at end of file diff --git a/core/helm-charts/teirerank/templates/ingress_eks.yaml b/core/helm-charts/teirerank/templates/ingress_eks.yaml index 202afd6d..25bfe05d 100644 --- a/core/helm-charts/teirerank/templates/ingress_eks.yaml +++ b/core/helm-charts/teirerank/templates/ingress_eks.yaml @@ -1,6 +1,6 @@ # Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -{{- if .Values.ingress.enabled }} +{{- if and .Values.ingress.enabled (eq .Values.platform "eks") }} apiVersion: networking.k8s.io/v1 kind: Ingress metadata: diff --git a/core/helm-charts/tgi/templates/ingress.yaml b/core/helm-charts/tgi/templates/ingress.yaml index 2e1deac1..363f68d9 100644 --- a/core/helm-charts/tgi/templates/ingress.yaml +++ b/core/helm-charts/tgi/templates/ingress.yaml @@ -1,60 +1,48 @@ # Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -# Please edit the object below. Lines beginning with a '#' will be ignored, -# and an empty file will abort the edit. If an error occurs while saving this file will be -# reopened with the relevant failures. -{{- if .Values.ingress.enabled}} -apiVersion: networking.k8s.io/v1 -kind: Ingress +{{- if .Values.ingress.enabled }} +{{- $modelName := .Values.LLM_MODEL_ID | splitList "/" | last }} +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute metadata: - annotations: - kubernetes.io/ingress.class: nginx - # nginx.ingress.kubernetes.io/rewrite-target: /{{ .Values.LLM_MODEL_ID | splitList "/" | last }}/$1 - nginx.ingress.kubernetes.io/rewrite-target: {{- if and .Values.apisix.enabled .Values.accelDevice }} - /{{ .Values.LLM_MODEL_ID | splitList "/" | last }}/$1 - {{- else if and .Values.apisix.enabled (not .Values.accelDevice) }} - /{{ .Values.LLM_MODEL_ID | splitList "/" | last }}-tgicpu/$1 - {{- else if not .Values.apisix.enabled }} - /$1 - {{- end }} - creationTimestamp: "2024-11-22T03:27:37Z" - generation: 1 labels: {{- include "tgi.labels" . | nindent 4 }} - name: {{ include "tgi.fullname" . }}-ingress + name: {{ include "tgi.fullname" . }}-httproute namespace: {{- if .Values.apisix.enabled }} auth-apisix {{- else }} default {{- end }} - resourceVersion: "244487" - uid: df2b31a1-6653-4d71-9de0-4df33cb93ad1 spec: - ingressClassName: nginx + parentRefs: + - name: enterprise-edge-gateway + namespace: envoy-gateway-system + hostnames: + - {{ .Values.ingress.host }} rules: - - host: {{ .Values.ingress.host }} - http: - paths: - - backend: - service: - name: {{- if .Values.apisix.enabled }} - auth-apisix-gateway - {{- else }} - {{ include "tgi.fullname" . }}-service - {{- end }} - port: - number: 80 - # path: /{{ .Values.LLM_MODEL_ID | splitList "/" | last }}/(.*) - path: {{- if and .Values.apisix.enabled .Values.accelDevice }} - /{{ .Values.LLM_MODEL_ID | splitList "/" | last }}/(.*) - {{- else if and .Values.apisix.enabled (not .Values.accelDevice) }} - /{{ .Values.LLM_MODEL_ID | splitList "/" | last }}-tgicpu/(.*) - {{- else if not .Values.apisix.enabled }} - /{{ .Values.LLM_MODEL_ID | splitList "/" | last }}/(.*) + - matches: + - path: + type: PathPrefix + value: {{- if and .Values.apisix.enabled .Values.accelDevice }} + /{{ $modelName }} + {{- else if and .Values.apisix.enabled (not .Values.accelDevice) }} + /{{ $modelName }}-tgicpu + {{- else }} + /{{ $modelName }} + {{- end }} + {{- if not .Values.apisix.enabled }} + filters: + - type: URLRewrite + urlRewrite: + path: + type: ReplacePrefixMatch + replacePrefixMatch: / + {{- end }} + backendRefs: + - name: {{- if .Values.apisix.enabled }} + auth-apisix-gateway + {{- else }} + {{ include "tgi.fullname" . }}-service {{- end }} - pathType: ImplementationSpecific - tls: - - hosts: - - {{ .Values.ingress.host }} - secretName: {{ .Values.ingress.secretname }} + port: 80 {{- end }} \ No newline at end of file diff --git a/core/helm-charts/tgi/templates/ingress_eks.yaml b/core/helm-charts/tgi/templates/ingress_eks.yaml index d0cddf64..f719db5e 100644 --- a/core/helm-charts/tgi/templates/ingress_eks.yaml +++ b/core/helm-charts/tgi/templates/ingress_eks.yaml @@ -1,6 +1,6 @@ # Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -{{- if .Values.ingress.enabled }} +{{- if and .Values.ingress.enabled (eq .Values.platform "eks") }} apiVersion: networking.k8s.io/v1 kind: Ingress metadata: diff --git a/core/helm-charts/vllm/templates/ingress.yaml b/core/helm-charts/vllm/templates/ingress.yaml index 5ef5caf5..d1afe076 100644 --- a/core/helm-charts/vllm/templates/ingress.yaml +++ b/core/helm-charts/vllm/templates/ingress.yaml @@ -1,61 +1,48 @@ # Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -# Please edit the object below. Lines beginning with a '#' will be ignored, -# and an empty file will abort the edit. If an error occurs while saving this file will be -# reopened with the relevant failures. -{{- if .Values.ingress.enabled}} +{{- if .Values.ingress.enabled }} {{- $modelName := (default .Values.LLM_MODEL_ID .Values.SERVED_MODEL_NAME) | splitList "/" | last }} -apiVersion: networking.k8s.io/v1 -kind: Ingress +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute metadata: - annotations: - kubernetes.io/ingress.class: nginx - # nginx.ingress.kubernetes.io/rewrite-target: /{{ $modelName }}/$1 - nginx.ingress.kubernetes.io/rewrite-target: {{- if and .Values.apisix.enabled .Values.accelDevice }} - /{{ $modelName }}/$1 - {{- else if and .Values.apisix.enabled (not .Values.accelDevice) }} - /{{ $modelName }}-vllmcpu/$1 - {{- else if not .Values.apisix.enabled }} - /$1 - {{- end }} - creationTimestamp: "2024-11-22T03:27:37Z" - generation: 1 labels: {{- include "vllm.labels" . | nindent 4 }} - name: {{ include "vllm.fullname" . }}-ingress + name: {{ include "vllm.fullname" . }}-httproute namespace: {{- if .Values.apisix.enabled }} auth-apisix {{- else }} default {{- end }} - resourceVersion: "244487" - uid: df2b31a1-6653-4d71-9de0-4df33cb93ad1 spec: - ingressClassName: nginx + parentRefs: + - name: enterprise-edge-gateway + namespace: envoy-gateway-system + hostnames: + - {{ .Values.ingress.host }} rules: - - host: {{ .Values.ingress.host }} - http: - paths: - - backend: - service: - name: {{- if .Values.apisix.enabled }} - auth-apisix-gateway - {{- else }} - {{ include "vllm.fullname" . }}-service - {{- end }} - port: - number: 80 - # path: /{{ $modelName }}/(.*) - path: {{- if and .Values.apisix.enabled .Values.accelDevice }} - /{{ $modelName }}/(.*) - {{- else if and .Values.apisix.enabled (not .Values.accelDevice) }} - /{{ $modelName }}-vllmcpu/(.*) - {{- else if not .Values.apisix.enabled }} - /{{ $modelName }}/(.*) + - matches: + - path: + type: PathPrefix + value: {{- if and .Values.apisix.enabled .Values.accelDevice }} + /{{ $modelName }} + {{- else if and .Values.apisix.enabled (not .Values.accelDevice) }} + /{{ $modelName }}-vllmcpu + {{- else }} + /{{ $modelName }} + {{- end }} + {{- if not .Values.apisix.enabled }} + filters: + - type: URLRewrite + urlRewrite: + path: + type: ReplacePrefixMatch + replacePrefixMatch: / + {{- end }} + backendRefs: + - name: {{- if .Values.apisix.enabled }} + auth-apisix-gateway + {{- else }} + {{ include "vllm.fullname" . }}-service {{- end }} - pathType: ImplementationSpecific - tls: - - hosts: - - {{ .Values.ingress.host }} - secretName: {{ .Values.ingress.secretname }} + port: 80 {{- end }} \ No newline at end of file diff --git a/core/helm-charts/vllm/templates/ingress_eks.yaml b/core/helm-charts/vllm/templates/ingress_eks.yaml index 51710c16..6ce50e32 100644 --- a/core/helm-charts/vllm/templates/ingress_eks.yaml +++ b/core/helm-charts/vllm/templates/ingress_eks.yaml @@ -1,6 +1,6 @@ # Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -{{- if .Values.ingress.enabled }} +{{- if and .Values.ingress.enabled (eq .Values.platform "eks") }} {{- $modelName := (default .Values.LLM_MODEL_ID .Values.SERVED_MODEL_NAME) | splitList "/" | last }} apiVersion: networking.k8s.io/v1 kind: Ingress diff --git a/core/inventory/inference-config.cfg b/core/inventory/inference-config.cfg index e63552d4..3255b8ed 100644 --- a/core/inventory/inference-config.cfg +++ b/core/inventory/inference-config.cfg @@ -20,4 +20,9 @@ deploy_istio=off uninstall_ceph=off # Agentic AI Plugin -deploy_agenticai_plugin=off \ No newline at end of file +deploy_agenticai_plugin=off + +# Proxy Configuration (central source — leave empty if no proxy needed) +http_proxy= +https_proxy= +no_proxy= diff --git a/core/inventory/metadata/inference-metadata.cfg b/core/inventory/metadata/inference-metadata.cfg index 48b01376..0861b2e7 100644 --- a/core/inventory/metadata/inference-metadata.cfg +++ b/core/inventory/metadata/inference-metadata.cfg @@ -1,7 +1,7 @@ gaudi2_operator="1.22.0-740" gaudi3_operator="1.22.0-740" python3_interpreter="/usr/bin/python3" -ingress_controller="4.12.2" +envoy_gateway_version="v1.2.0" keycloak_chart_version="22.1.0" apisix_chart_version="0.1.0" kubespray_version="v2.27.0" diff --git a/core/lib/cluster/deployment/fresh-install.sh b/core/lib/cluster/deployment/fresh-install.sh index e4c78c79..99d5c35c 100644 --- a/core/lib/cluster/deployment/fresh-install.sh +++ b/core/lib/cluster/deployment/fresh-install.sh @@ -93,11 +93,11 @@ fresh_installation() { fi if [[ "$deploy_ingress_controller" == "yes" ]]; then - execute_and_check "Deploying Ingress NGINX Controller..." run_ingress_nginx_playbook \ - "Ingress NGINX Controller is deployed successfully." \ - "Failed to deploy Ingress NGINX Controller. Exiting." + execute_and_check "Deploying Envoy Gateway Edge Controller..." run_edge_gateway_playbook \ + "Envoy Gateway Edge Controller is deployed successfully." \ + "Failed to deploy Envoy Gateway Edge Controller. Exiting." else - echo "Skipping Ingress NGINX Controller deployment..." + echo "Skipping Edge Gateway Controller deployment..." fi if [[ "$deploy_keycloak" == "yes" || "$deploy_apisix" == "yes" ]]; then diff --git a/core/lib/components/ingress-controller.sh b/core/lib/components/ingress-controller.sh index fc15d7a4..070dba20 100644 --- a/core/lib/components/ingress-controller.sh +++ b/core/lib/components/ingress-controller.sh @@ -1,7 +1,7 @@ # Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -run_ingress_nginx_playbook() { - echo "Deploying the Ingress NGINX Controller..." - ansible-playbook -i "${INVENTORY_PATH}" playbooks/deploy-ingress-controller.yml --extra-vars "secret_name=${cluster_url} cert_file=${cert_file} key_file=${key_file} ingress_controller=${ingress_controller}" +run_edge_gateway_playbook() { + echo "Deploying the Envoy Gateway Edge Controller..." + ansible-playbook -i "${INVENTORY_PATH}" playbooks/deploy-ingress-controller.yml --extra-vars "secret_name=${cluster_url} cert_file=${cert_file} key_file=${key_file} envoy_gateway_version=${envoy_gateway_version:-v1.2.0}" } \ No newline at end of file diff --git a/core/lib/models/install-model.sh b/core/lib/models/install-model.sh index 40321f8d..4f20d66e 100644 --- a/core/lib/models/install-model.sh +++ b/core/lib/models/install-model.sh @@ -27,10 +27,10 @@ deploy_inference_llm_models_playbook() { else apisix_enabled="true" fi - if [ "$deploy_keycloak" == "no" ]; then - ingress_enabled="false" - else + if [ "$deploy_ingress_controller" == "yes" ]; then ingress_enabled="true" + else + ingress_enabled="false" fi if [ "$deploy_observability" == "yes" ]; then vllm_metrics_enabled="true" diff --git a/core/lib/system/precheck/read-config-file.sh b/core/lib/system/precheck/read-config-file.sh index 45c6baf1..b9a8c43c 100644 --- a/core/lib/system/precheck/read-config-file.sh +++ b/core/lib/system/precheck/read-config-file.sh @@ -46,17 +46,32 @@ read_config_file() { sed -i -E "s|^[[:space:]]*#?[[:space:]]*http_proxy:.*|http_proxy: \"$http_proxy\"|" "$INVENTORY_ALL_FILE" sed -i -E "/^env_proxy:/,/^[^[:space:]]/s|^[[:space:]]*http_proxy:.*| http_proxy: \"$http_proxy\"|" "$INVENTORY_ALL_FILE" export http_proxy + else + sed -i -E "s|^[[:space:]]*#?[[:space:]]*http_proxy:.*|http_proxy: \"\"|" "$INVENTORY_ALL_FILE" + sed -i -E "/^env_proxy:/,/^[^[:space:]]/s|^[[:space:]]*http_proxy:.*| http_proxy: \"\"|" "$INVENTORY_ALL_FILE" + unset http_proxy 2>/dev/null || true fi if [[ -n "$https_proxy" ]]; then sed -i -E "s|^[[:space:]]*#?[[:space:]]*https_proxy:.*|https_proxy: \"$https_proxy\"|" "$INVENTORY_ALL_FILE" sed -i -E "/^env_proxy:/,/^[^[:space:]]/s|^[[:space:]]*https_proxy:.*| https_proxy: \"$https_proxy\"|" "$INVENTORY_ALL_FILE" export https_proxy + else + sed -i -E "s|^[[:space:]]*#?[[:space:]]*https_proxy:.*|https_proxy: \"\"|" "$INVENTORY_ALL_FILE" + sed -i -E "/^env_proxy:/,/^[^[:space:]]/s|^[[:space:]]*https_proxy:.*| https_proxy: \"\"|" "$INVENTORY_ALL_FILE" + unset https_proxy 2>/dev/null || true fi if [[ -n "$no_proxy" ]]; then + # Ensure .svc.cluster.local is always in no_proxy for in-cluster traffic + if [[ "$no_proxy" != *".svc.cluster.local"* ]]; then + no_proxy="${no_proxy},.svc.cluster.local" + fi sed -i -E "/^env_proxy:/,/^[^[:space:]]/s|^[[:space:]]*no_proxy:.*| no_proxy: \"$no_proxy\"|" "$INVENTORY_ALL_FILE" export no_proxy + else + sed -i -E "/^env_proxy:/,/^[^[:space:]]/s|^[[:space:]]*no_proxy:.*| no_proxy: \"\"|" "$INVENTORY_ALL_FILE" + unset no_proxy 2>/dev/null || true fi diff --git a/core/lib/system/precheck/readiness-check.sh b/core/lib/system/precheck/readiness-check.sh index 0cdf4f5b..6d6c99db 100644 --- a/core/lib/system/precheck/readiness-check.sh +++ b/core/lib/system/precheck/readiness-check.sh @@ -5,6 +5,10 @@ run_infrastructure_readiness_check() { echo "Running infrastructure readiness check..." echo "This will verify system compatibility and infrastructure requirements." + if [ ! -f "$HOMEDIR/inventory/hosts.yaml" ]; then + echo -e "${YELLOW}Inventory file not found — auto-generating hosts.yaml for single-node...${NC}" + bash "$HOMEDIR/scripts/generate-hosts.sh" + fi if [ ! -f "$HOMEDIR/inventory/hosts.yaml" ]; then echo -e "${RED}Error: Inventory file not found at $HOMEDIR/inventory/hosts.yaml${NC}" echo -e "${YELLOW}Please ensure the inventory file exists and contains the correct host information.${NC}" diff --git a/core/lib/system/setup-env.sh b/core/lib/system/setup-env.sh index d92caf36..40c36f13 100644 --- a/core/lib/system/setup-env.sh +++ b/core/lib/system/setup-env.sh @@ -90,6 +90,13 @@ setup_initial_env() { echo -e "${GREEN} Enterprise Inference requirements installed.${NC}" cp -r "$HOMEDIR"/helm-charts "$HOMEDIR"/scripts "$KUBESPRAYDIR"/ cp -r "$KUBESPRAYDIR"/inventory/sample/ "$KUBESPRAYDIR"/inventory/mycluster + + # Auto-generate hosts.yaml if it doesn't exist (single-node auto-detect) + if [ ! -f "$HOMEDIR/inventory/hosts.yaml" ]; then + echo -e "${YELLOW}No hosts.yaml found — auto-generating for single-node deployment...${NC}" + bash "$HOMEDIR/scripts/generate-hosts.sh" + fi + cp "$HOMEDIR"/inventory/hosts.yaml $KUBESPRAYDIR/inventory/mycluster/ cp "$HOMEDIR"/inventory/metadata/addons.yml $KUBESPRAYDIR/inventory/mycluster/group_vars/k8s_cluster/addons.yml cp "$HOMEDIR"/playbooks/* "$KUBESPRAYDIR"/playbooks/ diff --git a/core/lib/user-menu/parse-user-prompts.sh b/core/lib/user-menu/parse-user-prompts.sh index 0928bdf2..200d92e1 100644 --- a/core/lib/user-menu/parse-user-prompts.sh +++ b/core/lib/user-menu/parse-user-prompts.sh @@ -35,9 +35,9 @@ prompt_for_input() { echo "Proceeding with the setup of Habana AI Operator: $deploy_habana_ai_operator" fi if [ -z "$deploy_ingress_controller" ]; then - read -p "Do you want to proceed with deploying Ingress NGINX Controller? (yes/no): " deploy_ingress_controller + read -p "Do you want to proceed with deploying Envoy Gateway Edge Controller? (yes/no): " deploy_ingress_controller else - echo "Proceeding with the setup of Ingress Controller: $deploy_ingress_controller" + echo "Proceeding with the setup of Edge Gateway Controller: $deploy_ingress_controller" fi if [ -z "$deploy_keycloak" ]; then read -p "Do you want to proceed with deploying Keycloak & APISIX? (yes/no): " deploy_keycloak diff --git a/core/playbooks/deploy-cluster-config.yml b/core/playbooks/deploy-cluster-config.yml index bcd66d3f..3cbc00dd 100644 --- a/core/playbooks/deploy-cluster-config.yml +++ b/core/playbooks/deploy-cluster-config.yml @@ -29,35 +29,35 @@ tls.key: "{{ lookup('file', key_file) | b64encode }}" register: kubectl_output tags: deploy_cluster_dashboard - - name: Create Admin dashboard ingress + - name: Create Admin dashboard HTTPRoute community.kubernetes.k8s: state: present definition: - apiVersion: networking.k8s.io/v1 - kind: Ingress + apiVersion: gateway.networking.k8s.io/v1 + kind: HTTPRoute metadata: - annotations: - nginx.ingress.kubernetes.io/backend-protocol: HTTPS - nginx.ingress.kubernetes.io/rewrite-target: /$2 - name: kubernetes-dashboard-ingress + name: kubernetes-dashboard-httproute namespace: kube-system spec: - ingressClassName: nginx + parentRefs: + - name: enterprise-edge-gateway + namespace: envoy-gateway-system + hostnames: + - "{{ secret_name }}" rules: - - host: "{{ secret_name }}" - http: - paths: - - backend: - service: - name: kubernetes-dashboard - port: - number: 443 - path: /dashboard(/|$)(.*) - pathType: ImplementationSpecific - tls: - - hosts: - - "{{ secret_name }}" - secretName: "{{ secret_name }}" + - matches: + - path: + type: PathPrefix + value: /dashboard + filters: + - type: URLRewrite + urlRewrite: + path: + type: ReplacePrefixMatch + replacePrefixMatch: / + backendRefs: + - name: kubernetes-dashboard + port: 443 when: brownfield_deployment != "yes" tags: deploy_cluster_dashboard - name: Create ServiceAccount for Admin Dashboard diff --git a/core/playbooks/deploy-genai-gateway.yml b/core/playbooks/deploy-genai-gateway.yml index d4394f6e..ca39b803 100644 --- a/core/playbooks/deploy-genai-gateway.yml +++ b/core/playbooks/deploy-genai-gateway.yml @@ -95,7 +95,7 @@ state: absent when: kubernetes_platform != "eks" run_once: true - - name: Delete Ingress Resource + - name: Delete stale Ingress/HTTPRoute Resource kubernetes.core.k8s: state: absent api_version: networking.k8s.io/v1 @@ -137,6 +137,11 @@ --set route.tls.termination=edge --set route.tls.insecureEdgeTerminationPolicy=Redirect --set ingress.enabled=false + {% elif kubernetes_platform == 'eks' %} + --set ingress.enabled=true + --set ingress.host={{ secret_name }} + --set ingress.secretname={{ secret_name }} + --set route.enabled=false {% else %} --set ingress.enabled=true --set ingress.host={{ secret_name }} @@ -232,15 +237,19 @@ --set langfuse.route.tls.insecureEdgeTerminationPolicy=Redirect --set langfuse.ingress.enabled=false --set langfuse.nextauth.url=https://trace-{{ secret_name }} - {% else %} + {% elif kubernetes_platform == 'eks' %} --set langfuse.ingress.enabled=true - --set langfuse.ingress.className={{ 'alb' if kubernetes_platform == 'eks' else 'nginx' }} + --set langfuse.ingress.className=alb --set langfuse.ingress.tls.enabled=true --set langfuse.ingress.hosts[0].host=trace-{{ secret_name }} --set langfuse.ingress.hosts[0].paths[0].pathType=Prefix --set langfuse.ingress.tls.secretName=trace-{{ secret_name }} --set langfuse.nextauth.url=https://trace-{{ secret_name }} --set langfuse.route.enabled=false + {% else %} + --set langfuse.ingress.enabled=false + --set langfuse.nextauth.url=https://trace-{{ secret_name }} + --set langfuse.route.enabled=false {% endif %} {% if kubernetes_platform == 'eks' %} -f {{ remote_helm_charts_base }}/genai-gateway-trace/eks-ingress-annotations.yaml @@ -287,3 +296,29 @@ targetPort: 3000 when: kubernetes_platform == "openshift" run_once: true + + - name: Create HTTPRoute for GenAI Gateway Trace (non-EKS, non-OpenShift) + kubernetes.core.k8s: + state: present + definition: + apiVersion: gateway.networking.k8s.io/v1 + kind: HTTPRoute + metadata: + name: genai-gateway-trace-httproute + namespace: genai-gateway + spec: + parentRefs: + - name: enterprise-edge-gateway + namespace: envoy-gateway-system + hostnames: + - "trace-{{ secret_name }}" + rules: + - matches: + - path: + type: PathPrefix + value: / + backendRefs: + - name: genai-gateway-trace-web + port: 3000 + when: kubernetes_platform != "openshift" and kubernetes_platform != "eks" + run_once: true diff --git a/core/playbooks/deploy-ingress-controller.yml b/core/playbooks/deploy-ingress-controller.yml index 5e0fd648..5adebf12 100644 --- a/core/playbooks/deploy-ingress-controller.yml +++ b/core/playbooks/deploy-ingress-controller.yml @@ -1,7 +1,7 @@ # Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 --- -- name: Deploy Ingress NGINX using Helm +- name: Deploy Envoy Gateway and Enterprise Edge Gateway hosts: "{{ inference_delegate | default('kube_control_plane') }}" gather_facts: false any_errors_fatal: "{{ any_errors_fatal | default(true) }}" @@ -11,19 +11,10 @@ roles: - role: inference-tools tasks: - - name: Add the Ingress-NGINX Helm repository - community.kubernetes.helm_repository: - name: ingress-nginx - repo_url: https://kubernetes.github.io/ingress-nginx - state: present - - name: Validate if the Helm repositories are configured correctly - ansible.builtin.command: helm repo list - register: helm_repo_list - failed_when: false - changed_when: false - - name: Update Helm repositories to fetch the latest charts - ansible.builtin.shell: helm repo update - when: helm_repo_list.stdout != "" + - name: Install Gateway API CRDs + ansible.builtin.shell: > + kubectl apply -f https://github.com/kubernetes-sigs/gateway-api/releases/download/v1.2.0/standard-install.yaml + run_once: true - name: Retrieving the Number of Infrastructure Nodes (ei-infra-eligible) ansible.builtin.shell: kubectl get nodes --selector=ei-infra-eligible=true -o jsonpath='{.items[*].metadata.name}' | wc -w @@ -36,7 +27,7 @@ changed_when: false when: inference_node_count.stdout | int == 0 - - name: Determine Replica Node + - name: Determine Replica Count set_fact: inference_infra_replica_count: >- {{ @@ -45,62 +36,155 @@ else 1) }} - - name: Display the Number of Nodes on which Ingress is deployed + - name: Display the Number of Gateway Proxy Replicas debug: - msg: "Number of Nodes on which Ingress is deployed: {{ inference_infra_replica_count }}" + msg: "Number of Envoy Gateway proxy replicas: {{ inference_infra_replica_count }}" run_once: true - - name: Deploy Ingress Nginx Controller - community.kubernetes.helm: - name: ingress-nginx - chart_ref: ingress-nginx/ingress-nginx - release_namespace: ingress-nginx - create_namespace: true - chart_version: "{{ ingress_controller | default('4.12.2') }}" - state: present - values: - controller: - progressDeadlineSeconds: 300 - minReadySeconds: 0 - replicaCount: "{{ inference_infra_replica_count | int }}" - hostPort: - enabled: true - ports: - http: 80 - https: 443 - tolerations: - - key: node-role.kubernetes.io/control-plane - operator: Exists - effect: NoSchedule - - key: node-role.kubernetes.io/master - operator: Exists - effect: NoSchedule - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: ei-infra-eligible - operator: In - values: ["true"] - - matchExpressions: + - name: Deploy Envoy Gateway Controller + ansible.builtin.shell: > + helm upgrade --install eg oci://docker.io/envoyproxy/gateway-helm + --version {{ envoy_gateway_version | default('v1.2.0') }} + --namespace envoy-gateway-system + --create-namespace + --wait + run_once: true + + - name: Create EnvoyProxy configuration for proxy pod placement + ansible.builtin.shell: | + cat <<'INNEREOF' | sed 's/__REPLICAS__/{{ inference_infra_replica_count | int }}/' | kubectl apply -f - + apiVersion: gateway.envoyproxy.io/v1alpha1 + kind: EnvoyProxy + metadata: + name: enterprise-proxy-config + namespace: envoy-gateway-system + spec: + provider: + type: Kubernetes + kubernetes: + envoyDeployment: + replicas: __REPLICAS__ + pod: + tolerations: - key: node-role.kubernetes.io/control-plane operator: Exists - podAntiAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 100 - podAffinityTerm: - labelSelector: - matchLabels: - app: ingress-nginx - topologyKey: "kubernetes.io/hostname" + effect: NoSchedule + - key: node-role.kubernetes.io/master + operator: Exists + effect: NoSchedule + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: ei-infra-eligible + operator: In + values: ["true"] + - matchExpressions: + - key: node-role.kubernetes.io/control-plane + operator: Exists + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchLabels: + app.kubernetes.io/name: envoy + topologyKey: "kubernetes.io/hostname" + patch: + type: StrategicMerge + value: + spec: + template: + spec: + containers: + - name: envoy + ports: + - containerPort: 10443 + hostPort: 443 + name: https-443 + protocol: TCP + - containerPort: 10080 + hostPort: 80 + name: http-80 + protocol: TCP + - containerPort: 19001 + hostPort: 19001 + name: metrics + protocol: TCP + envoyService: + type: ClusterIP + INNEREOF run_once: true - - name: Pause to Allow Controller to Initialize + + - name: Create GatewayClass + kubernetes.core.k8s: + state: present + definition: + apiVersion: gateway.networking.k8s.io/v1 + kind: GatewayClass + metadata: + name: envoy + spec: + controllerName: gateway.envoyproxy.io/gatewayclass-controller + parametersRef: + group: gateway.envoyproxy.io + kind: EnvoyProxy + name: enterprise-proxy-config + namespace: envoy-gateway-system + run_once: true + + - name: Create TLS Secret in Gateway namespace + community.kubernetes.k8s: + state: present + definition: + apiVersion: v1 + kind: Secret + metadata: + name: "{{ secret_name }}" + namespace: envoy-gateway-system + type: kubernetes.io/tls + data: + tls.crt: "{{ lookup('file', cert_file) | b64encode }}" + tls.key: "{{ lookup('file', key_file) | b64encode }}" + run_once: true + + - name: Create Enterprise Edge Gateway + kubernetes.core.k8s: + state: present + definition: + apiVersion: gateway.networking.k8s.io/v1 + kind: Gateway + metadata: + name: enterprise-edge-gateway + namespace: envoy-gateway-system + spec: + gatewayClassName: envoy + listeners: + - name: https + protocol: HTTPS + port: 443 + tls: + mode: Terminate + certificateRefs: + - name: "{{ secret_name }}" + allowedRoutes: + namespaces: + from: All + - name: http + protocol: HTTP + port: 80 + allowedRoutes: + namespaces: + from: All + run_once: true + + - name: Pause to Allow Gateway to Initialize pause: - seconds: 10 - - name: Wait for all ingress-nginx pods to be in a running and ready state + seconds: 15 + - name: Wait for all Envoy Gateway pods to be in a running and ready state shell: | - kubectl get pods -n ingress-nginx -o json | jq -r ' + kubectl get pods -n envoy-gateway-system -o json | jq -r ' .items[] | select(.status.phase != "Running" or (.status.containerStatuses[] | select(.ready != true))) | .metadata.name' | wc -l @@ -109,6 +193,6 @@ retries: 160 delay: 10 failed_when: pod_status.rc != 0 and pod_status.stdout != "0" - - name: Ingress Controller Deployment status + - name: Envoy Gateway Deployment status debug: - msg: "All Ingress NGINX Controller pods are running and ready." + msg: "Envoy Gateway and Enterprise Edge Gateway are running and ready." diff --git a/core/playbooks/deploy-istio-openshift.yml b/core/playbooks/deploy-istio-openshift.yml index 9841d9d0..82e7107d 100644 --- a/core/playbooks/deploy-istio-openshift.yml +++ b/core/playbooks/deploy-istio-openshift.yml @@ -9,7 +9,7 @@ vars: test_ns: test-ns default_ns: default - ingress_ns: ingress-nginx + ingress_ns: envoy-gateway-system genai_gw_ns: genai-gateway observability_ns: observability habana_ns: habana-ai-operator @@ -348,12 +348,12 @@ when: observability_ns_check.rc == 0 run_once: true - - name: "[OpenShift] Label ingress-nginx namespace for ambient mode" + - name: "[OpenShift] Label envoy-gateway-system namespace for ambient mode" command: kubectl label namespace {{ ingress_ns }} istio.io/dataplane-mode=ambient --overwrite when: is_openshift | bool run_once: true - - name: Apply peer-auth-ingress.yaml to ingress-nginx namespace + - name: Apply peer-auth-ingress.yaml to envoy-gateway-system namespace command: kubectl apply -f {{ peer_auth_ingress_path }} -n {{ ingress_ns }} run_once: true diff --git a/core/playbooks/deploy-istio.yml b/core/playbooks/deploy-istio.yml index 2c4aff9c..a7912a6d 100644 --- a/core/playbooks/deploy-istio.yml +++ b/core/playbooks/deploy-istio.yml @@ -9,7 +9,7 @@ vars: test_ns: test-ns default_ns: default - ingress_ns: ingress-nginx + ingress_ns: envoy-gateway-system genai_gw_ns: genai-gateway observability_ns: observability habana_ns: habana-ai-operator @@ -177,11 +177,11 @@ when: observability_ns_check.rc == 0 run_once: true - - name: Label ingress-nginx namespace for ambient mode + - name: Label envoy-gateway-system namespace for ambient mode command: kubectl label namespace {{ ingress_ns }} istio.io/dataplane-mode=ambient --overwrite run_once: true - - name: Apply peer-auth-ingress.yaml to ingress-nginx namespace + - name: Apply peer-auth-ingress.yaml to envoy-gateway-system namespace command: kubectl apply -f {{ peer_auth_ingress_path }} -n {{ ingress_ns }} run_once: true diff --git a/core/playbooks/deploy-keycloak-controller.yml b/core/playbooks/deploy-keycloak-controller.yml index de0220c6..3e49127a 100644 --- a/core/playbooks/deploy-keycloak-controller.yml +++ b/core/playbooks/deploy-keycloak-controller.yml @@ -12,20 +12,6 @@ roles: - role: inference-tools tasks: - - name: Add Ingress-Nginx repository - community.kubernetes.helm_repository: - name: ingress-nginx - repo_url: https://kubernetes.github.io/ingress-nginx - state: present - when: delete_pv_on_purge == "no" - - name: Verify repository availability - ansible.builtin.command: helm repo list - register: helm_repo_list - failed_when: false - changed_when: false - - name: Synchronize repositories - ansible.builtin.shell: helm repo update - when: helm_repo_list.stdout != "" and delete_pv_on_purge == "no" - name: Remove existing Keycloak PersistentVolume data directory ansible.builtin.file: path: /mnt/local-path-provisioner/ diff --git a/core/playbooks/deploy-keycloak-service.yml b/core/playbooks/deploy-keycloak-service.yml index 30219569..c173bb4f 100644 --- a/core/playbooks/deploy-keycloak-service.yml +++ b/core/playbooks/deploy-keycloak-service.yml @@ -11,16 +11,3 @@ roles: - role: inference-tools tasks: - - name: Add ingress-nginx repository using Helm module - community.kubernetes.helm_repository: - name: ingress-nginx - repo_url: https://kubernetes.github.io/ingress-nginx - state: present - - name: Add Ingress-Nginx repository - ansible.builtin.command: helm repo list - register: helm_repo_list - failed_when: false - changed_when: false - - name: Synchronize repositories - ansible.builtin.shell: helm repo update - when: helm_repo_list.stdout != "" diff --git a/core/playbooks/deploy-keycloak-tls-cert.yml b/core/playbooks/deploy-keycloak-tls-cert.yml index 65836378..de9e8de5 100644 --- a/core/playbooks/deploy-keycloak-tls-cert.yml +++ b/core/playbooks/deploy-keycloak-tls-cert.yml @@ -67,7 +67,7 @@ msg: "Number of ei-infra-eligible nodes configured: {{ inference_infra_replica_count }}" run_once: true when: deploy_keycloak == "yes" - - name: Delete genai-gateway-ingress resource + - name: Delete stale genai-gateway-ingress resource kubernetes.core.k8s: state: absent kind: Ingress @@ -156,10 +156,10 @@ name: memory targetAverageUtilization: 60 ingress: - enabled: "{{ false if kubernetes_platform == 'openshift' or kubernetes_platform == 'eks' else true }}" + enabled: "{{ true if kubernetes_platform == 'eks' else false }}" hostname: "{{ secret_name }}" tls: true - ingressClassName: "nginx" + ingressClassName: "{{ 'alb' if kubernetes_platform == 'eks' else 'nginx' }}" proxyAddressForwarding: true annotations: nginx.ingress.kubernetes.io/ssl-redirect: "true" @@ -321,6 +321,34 @@ - deploy_keycloak == "yes" - kubernetes_platform == "eks" + - name: Create HTTPRoute for Keycloak (non-EKS, non-OpenShift) + kubernetes.core.k8s: + state: present + definition: + apiVersion: gateway.networking.k8s.io/v1 + kind: HTTPRoute + metadata: + name: keycloak-httproute + namespace: default + spec: + parentRefs: + - name: enterprise-edge-gateway + namespace: envoy-gateway-system + hostnames: + - "{{ secret_name }}" + rules: + - matches: + - path: + type: PathPrefix + value: / + backendRefs: + - name: keycloak + port: 80 + when: + - deploy_keycloak == "yes" + - kubernetes_platform != "eks" + - kubernetes_platform != "openshift" + - name: Ensure Remote Directory Exists ansible.builtin.file: path: "{{ remote_helm_charts_base }}" diff --git a/core/playbooks/deploy-observability.yml b/core/playbooks/deploy-observability.yml index c1e5863a..4d646940 100644 --- a/core/playbooks/deploy-observability.yml +++ b/core/playbooks/deploy-observability.yml @@ -206,36 +206,31 @@ - "{{ secret_name }}" secretName: "{{ secret_name }}" - - name: Create Grafana observability ingress with nginx (non-EKS) + - name: Create Grafana observability HTTPRoute (non-EKS) tags: deploy_observability when: kubernetes_platform is not defined or kubernetes_platform != "eks" community.kubernetes.k8s: state: present definition: - apiVersion: networking.k8s.io/v1 - kind: Ingress + apiVersion: gateway.networking.k8s.io/v1 + kind: HTTPRoute metadata: - annotations: - nginx.ingress.kubernetes.io/backend-protocol: HTTP # Assuming Grafana is served over HTTP - name: observability-grafana-ingress - namespace: observability # Change this to the namespace where Grafana is deployed + name: observability-grafana-httproute + namespace: observability spec: - ingressClassName: nginx + parentRefs: + - name: enterprise-edge-gateway + namespace: envoy-gateway-system + hostnames: + - "{{ secret_name }}" rules: - - host: "{{ secret_name }}" # Replace with your desired hostname for Grafana - http: - paths: - - backend: - service: - name: observability-grafana - port: - number: 80 - path: /observability(/|$)(.*) - pathType: ImplementationSpecific - tls: - - hosts: - - "{{ secret_name }}" - secretName: "{{ secret_name }}" + - matches: + - path: + type: PathPrefix + value: /observability + backendRefs: + - name: observability-grafana + port: 80 - name: Add Bitnami Helm repository tags: deploy_logging diff --git a/docs/README.md b/docs/README.md index 8c3032da..498be368 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,7 +1,7 @@ # Quick Start To set up prerequisities and quickly deploy Intel® AI for Enterprise Inference on a single node, follow the steps in the [**Single Node Deployment Guide**](./single-node-deployment.md). Otherwise, proceed to the section below for all deployment options. -> 🚀 **New**: Automated Gaudi firmware and driver management! See [Gaudi Prerequisites](./gaudi-prerequisites.md) for automated setup scripts. +> 🚀 **New**: Automated Intel® AI Accelerator firmware and driver management! See [Intel® AI Accelerator Prerequisites](./intel-ai-accelerator-prerequisites.md) for automated setup scripts. # Complete Intel® AI for Enterprise Inference Cluster Setup diff --git a/docs/configuring-inference-config-cfg-file.md b/docs/configuring-inference-config-cfg-file.md index ab2bd657..bdce2b5d 100644 --- a/docs/configuring-inference-config-cfg-file.md +++ b/docs/configuring-inference-config-cfg-file.md @@ -37,7 +37,7 @@ Make sure to update the values in the inference-config.cfg file according to you > - If `deploy_keycloak_apisix` is set to `off`, the `keycloak_client_id`, `keycloak_admin_user`, and `keycloak_admin_password` values will have no effect. > - The `hugging_face_token` is the token used for pulling LLM models from Hugging Face. > - If `deploy_llm_models` is set to `off`, the `hugging_face_token` value will be ignored. -> - The `cpu_or_gpu` value specifies whether to deploy models for CPU or Intel Gaudi. +> - The `cpu_or_gpu` value specifies whether to deploy models for CPU or Intel® AI Accelerator. > For running behind corporate proxy, please refer to this [guide](./running-behind-proxy.md) diff --git a/docs/cpu-optimization-guide.md b/docs/cpu-optimization-guide.md index dadb6888..6a32fc8b 100644 --- a/docs/cpu-optimization-guide.md +++ b/docs/cpu-optimization-guide.md @@ -49,7 +49,7 @@ resources: For single-node Xeon clusters, **Keycloak** and **APISIX** are recommended. -For Gaudi or large multi-node Xeon clusters, the GenAI Gateway is well-suited. +For Intel® AI Accelerator or large multi-node Xeon clusters, the GenAI Gateway is well-suited. ## Status Verification @@ -74,4 +74,4 @@ If models aren't performing optimally: CPU optimization runs automatically and provides: - Dedicated CPU cores for each model - Consistent performance -- Optimal resource utilization \ No newline at end of file +- Optimal resource utilization diff --git a/docs/deploy-llm-model-from-hugging-face.md b/docs/deploy-llm-model-from-hugging-face.md index 7fd6c9ef..7c104bdd 100644 --- a/docs/deploy-llm-model-from-hugging-face.md +++ b/docs/deploy-llm-model-from-hugging-face.md @@ -17,6 +17,6 @@ This option allows you to deploy any Hugging Face-hosted LLM on the Inference Cl 3. When prompted, provide: - **Hugging Face Model ID** (e.g., `meta-llama/Meta-Llama-3-8B`) - **Model Deployment Name** (e.g., `metallama-8b`) - - **Tensor Parallel Size** (based on available Gaudi cards) + - **Tensor Parallel Size** (based on available Intel® AI Accelerator cards) > **Note**: This deploys a model that has **not** been pre-validated. Make sure the tensor parallel size is configured correctly. An incorrect value can result in the model being stuck in a "not ready" state. diff --git a/docs/envoy-gateway-deployment-guide.md b/docs/envoy-gateway-deployment-guide.md new file mode 100644 index 00000000..c86469b8 --- /dev/null +++ b/docs/envoy-gateway-deployment-guide.md @@ -0,0 +1,237 @@ +# Single Node Deployment Guide (Envoy Gateway) + +This guide provides step-by-step instructions to deploy Intel® AI for Enterprise +Inference on a single node using Envoy Gateway as the ingress controller. + +## Prerequisites + +1. [SSH Key Setup](./prerequisites.md#ssh-key-setup) +2. [SSL/TLS Certificate Setup for Development Environment](./prerequisites.md#development-environment) +3. [Hugging Face Token Generation](./prerequisites.md#hugging-face-token-generation) + +## Deployment + +### Step 1: Configure the Automation Config File + +Clone the Enterprise Inference repo and set up the config: + +```bash +cd ~ +git clone https://github.com/opea-project/Enterprise-Inference.git +cd Enterprise-Inference +cp -f docs/examples/single-node/inference-config.cfg core/inventory/inference-config.cfg +``` + +Edit `core/inventory/inference-config.cfg` and update the following fields: + +| Field | Description | Example | +|---|---|---| +| `cluster_url` | DNS hostname for the cluster | `api.example.com` | +| `cert_file` | Path to TLS certificate | `~/certs/cert.pem` | +| `key_file` | Path to TLS private key | `~/certs/key.pem` | +| `keycloak_client_id` | Keycloak OAuth2 client ID | `my-client-id` | +| `keycloak_admin_user` | Keycloak admin username | `your-keycloak-admin-user` | +| `keycloak_admin_password` | Keycloak admin password | `changeme` | + +For systems behind a proxy, set the proxy fields accordingly and ensure +`cluster_url` (e.g. `api.example.com`) is included in the `no_proxy` list. + +### Step 2: Update `hosts.yaml` File + +```bash +cp -f docs/examples/single-node/hosts.yaml core/inventory/hosts.yaml +``` + +Update the `ansible_user` field to the actual username. + +### Step 3: Update `/etc/hosts` + +Add the `cluster_url` hostname pointing to the node's IP: + +```bash +echo " api.example.com" | sudo tee -a /etc/hosts +``` + +Replace `` with the actual node IP address (e.g. `10.75.129.152`). + +> **Note:** Do NOT map `cluster_url` to `127.0.0.1`. The Envoy Gateway uses +> `hostPort` bindings which are accessible on the node IP, not loopback. + +### Step 4: Run the Automation + +```bash +cd core +chmod +x inference-stack-deploy.sh +export HUGGINGFACE_TOKEN= +``` + +#### CPU Only + +```bash +./inference-stack-deploy.sh --models "21" --cpu-or-gpu "cpu" --hugging-face-token $HUGGINGFACE_TOKEN +``` + +#### Intel® AI Accelerators + +```bash +./inference-stack-deploy.sh --models "1" --cpu-or-gpu "gpu" --hugging-face-token $HUGGINGFACE_TOKEN +``` + +Select Option 1 and confirm the Yes/No prompt. + +## Architecture + +The traffic flow through the system is: + +``` +Client (HTTPS:443) → Envoy Gateway → APISIX (auth + rewrite) → vLLM Service +``` + +- **Envoy Gateway** – Edge proxy, terminates TLS on port 443 (hostPort), routes + based on path and hostname. +- **APISIX** – Handles authentication (OpenID Connect token introspection via + Keycloak) and path rewriting. +- **Keycloak** – Identity provider, issues and validates OAuth2 tokens. +- **vLLM** – Model inference backend. + +## Testing Inference + +### Step 1: Get the Keycloak Client Secret + +Retrieve the client secret from the deployed Kubernetes secret: + +```bash +export CLIENT_SECRET=$(kubectl get secret -secret -n default \ + -o jsonpath='{.data.client_secret}' | base64 -d) +``` + +For example, with Llama 3.1 8B on CPU: + +```bash +export CLIENT_SECRET=$(kubectl get secret vllm-llama-8b-cpu-secret -n default \ + -o jsonpath='{.data.client_secret}' | base64 -d) +``` + +### Step 2: Generate an Access Token + +Generate a token via the internal Keycloak service. This ensures the token +issuer matches what APISIX expects for introspection. + +```bash +export KEYCLOAK_IP=$(kubectl get svc keycloak -n default -o jsonpath='{.spec.clusterIP}') +export KEYCLOAK_CLIENT_ID=my-client-id + +export TOKEN=$(curl -s --noproxy '*' \ + -H "Host: keycloak.default.svc.cluster.local" \ + http://${KEYCLOAK_IP}/realms/master/protocol/openid-connect/token \ + -X POST \ + -H 'Content-Type: application/x-www-form-urlencoded' \ + -d "grant_type=client_credentials&client_id=${KEYCLOAK_CLIENT_ID}&client_secret=${CLIENT_SECRET}" \ + | jq -r .access_token) + +echo "Token generated (length: ${#TOKEN})" +``` + +> **Important:** The token must be generated through Keycloak's internal cluster +> service URL (`keycloak.default.svc.cluster.local`) so the token issuer matches +> the APISIX OIDC introspection endpoint. Generating the token via the external +> URL (`https://api.example.com`) will result in an issuer mismatch and `401` +> errors. + +### Step 3: Test Inference + +Set the base URL: + +```bash +export BASE_URL=api.example.com +``` + +#### CPU Model (vLLM CPU) + +Note: `-vllmcpu` is appended to the model path for CPU deployments. + +**Completions:** + +```bash +curl -sk https://${BASE_URL}/Llama-3.1-8B-Instruct-vllmcpu/v1/completions \ + -X POST \ + -H 'Content-Type: application/json' \ + -H "Authorization: Bearer $TOKEN" \ + -d '{ + "model": "meta-llama/Llama-3.1-8B-Instruct", + "prompt": "What is Deep Learning?", + "max_tokens": 50, + "temperature": 0 + }' +``` + +**Chat Completions:** + +```bash +curl -sk https://${BASE_URL}/Llama-3.1-8B-Instruct-vllmcpu/v1/chat/completions \ + -X POST \ + -H 'Content-Type: application/json' \ + -H "Authorization: Bearer $TOKEN" \ + -d '{ + "model": "meta-llama/Llama-3.1-8B-Instruct", + "messages": [{"role": "user", "content": "What is Deep Learning?"}], + "max_tokens": 50, + "temperature": 0 + }' +``` + +**List Models:** + +```bash +curl -sk https://${BASE_URL}/Llama-3.1-8B-Instruct-vllmcpu/v1/models \ + -H "Authorization: Bearer $TOKEN" +``` + +#### Intel® AI Accelerator Model + +```bash +curl -sk https://${BASE_URL}/Llama-3.1-8B-Instruct/v1/completions \ + -X POST \ + -H 'Content-Type: application/json' \ + -H "Authorization: Bearer $TOKEN" \ + -d '{ + "model": "meta-llama/Llama-3.1-8B-Instruct", + "prompt": "What is Deep Learning?", + "max_tokens": 50, + "temperature": 0 + }' +``` + +### List Deployed Routes + +To see all available model routes: + +```bash +kubectl get apisixroutes -A +kubectl get httproute -A +``` + +## Troubleshooting + +### Token returns `401 Authorization Required` + +- Ensure the token was generated via the **internal** Keycloak service, not the + external URL. The issuer in the JWT (`iss` claim) must be + `http://keycloak.default.svc.cluster.local/realms/master`. +- Verify the client secret matches: `kubectl get secret -secret -o jsonpath='{.data.client_secret}' | base64 -d` + +### Cannot reach `https://api.example.com` + +- Verify `/etc/hosts` maps `api.example.com` to the **node IP** (not `127.0.0.1`). +- Ensure `api.example.com` is in the `no_proxy` environment variable. +- Verify the Envoy Gateway pod is running: `kubectl get pods -n envoy-gateway-system` +- Confirm port 443 is accessible: `curl -sk https://api.example.com/ -o /dev/null -w '%{http_code}'` + +### vLLM pod stuck at `0/1 Running` + +- The model may still be downloading or loading. Check logs: + `kubectl logs -f ` +- CPU model loading for Llama 3.1 8B can take 20-30 minutes on first deploy + (downloading ~15GB + CPU weight loading). +- Verify the readiness probe failure count has not hit the threshold: + `kubectl describe pod ` diff --git a/docs/examples/single-node/README.md b/docs/examples/single-node/README.md index c0bf6a1a..8a0786ac 100644 --- a/docs/examples/single-node/README.md +++ b/docs/examples/single-node/README.md @@ -1,16 +1,16 @@ # Setup Single Node Using Ansible -These playbooks sets up a single node inference environment on either a Intel® Gaudi or Intel® Xeon node using Ansible. It is designed to be run on the Intel® Gaudi or Intel® Xeon node where the Intel® AI for Enterprise Inference Service will be deployed. The playbooks installs all necessary dependencies, configures the environment, and prepares the system for the Intel® AI for Enterprise Inference Service. If you are going to use Intel® Gaudi, you will need to have the Gaudi drivers and firmware installed on the system before running this playbook, for more information on installing the Gaudi drivers and firmware, refer to the [Gaudi Drivers Installation Guide](https://github.com/opea-project/Enterprise-Inference/blob/main/core/catalog/docs/gaudi/gaudi-prerequisites.md). +These playbooks sets up a single node inference environment on either a Intel® AI Accelerator or Intel® Xeon node using Ansible. It is designed to be run on the Intel® AI Accelerator or Intel® Xeon node where the Intel® AI for Enterprise Inference Service will be deployed. The playbooks installs all necessary dependencies, configures the environment, and prepares the system for the Intel® AI for Enterprise Inference Service. If you are going to use Intel® AI Accelerator, you will need to have the Intel® AI Accelerator drivers and firmware installed on the system before running this playbook, for more information on installing the Intel® AI Accelerator drivers and firmware, refer to the [Intel® AI Accelerator Drivers Installation Guide](../../intel-ai-accelerator-prerequisites.md). Many of the defaults are setup to work out of the box, but you will need to update the **`cluster_ip`** and provide the **`hf_token`** for downloading models from Hugging Face. There is also a template directory that contains a set of templates for the various configuration files that are used by the AI Inference Service. These templates are used to generate the final configuration files based on the variables defined in the playbook. Do not modify these files directly. -Depending on the deployment type or the size of the models used, the playbook may run up to 25 minutes, at the end of the playbook running it will output the results of the installation script. The models will be available sometime after the playbook is done, the models selected by default for the Intel® Gaudi deployment can take up to an hour for all four of them to be available. If you change the models that will be used, the start up time may be different. +Depending on the deployment type or the size of the models used, the playbook may run up to 25 minutes, at the end of the playbook running it will output the results of the installation script. The models will be available sometime after the playbook is done, the models selected by default for the Intel® AI Accelerator deployment can take up to an hour for all four of them to be available. If you change the models that will be used, the start up time may be different. | Deployment Type | Playbook File | |------------------|----------------| -| Gaudi Single Node Playbook | einf-singlenode-gaudi.yml | +| Intel® AI Accelerator Single Node Playbook | einf-singlenode-intel-ai-accelerator.yml | | Xeon Single Node Playbook | einf-singlenode-xeon.yml | @@ -66,12 +66,13 @@ These settings are all set to `on` by default in the playbook, change these vari 2. **Run the Playbook** - Execute the Gaudi playbook using the following command: + Execute the Intel® AI Accelerator playbook using the following command: ```bash git clone https://github.com/opea-project/Enterprise-Inference.git cd Enterprise-Inference/docs/examples/single-node - sudo ansible-playbook einf-singlenode-gaudi.yml + sudo ansible-playbook einf-singlenode-intel-ai-accelerator.yml + ``` Execute the Xeon playbook using the following command: @@ -154,4 +155,4 @@ curl -k ${BASE_URL}/Meta-Llama-3.1-70B-Instruct/v1/completions -X POST -d '{"mod --- -For more information on how to access the models, refer to the [Accessing Deployed Models](/docs/accessing-deployed-models.md) documentation. \ No newline at end of file +For more information on how to access the models, refer to the [Accessing Deployed Models](/docs/accessing-deployed-models.md) documentation. diff --git a/docs/examples/single-node/einf-singlenode-gaudi.yml b/docs/examples/single-node/einf-singlenode-intel-ai-accelerator.yml similarity index 97% rename from docs/examples/single-node/einf-singlenode-gaudi.yml rename to docs/examples/single-node/einf-singlenode-intel-ai-accelerator.yml index 9500fd45..e0b47205 100644 --- a/docs/examples/single-node/einf-singlenode-gaudi.yml +++ b/docs/examples/single-node/einf-singlenode-intel-ai-accelerator.yml @@ -1,7 +1,7 @@ # Copyright (C) 2025-2026 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -# Ansible Playbook to install and configure the Enterprise Inference Service on a Single Gaudi node running Ubuntu 22.04+ +# Ansible Playbook to install and configure the Enterprise Inference Service on a Single node of Intel® AI Accelerator running Ubuntu 22.04+ # Needs to run as root or with sudo privileges # Installs version: --- @@ -11,7 +11,7 @@ gather_facts: true vars: cluster_url: "api.example.com" # Cluster name, change if you want to use a different DNS name for the service - cluster_ip: "127.0.0.1" # Cluster IP, this should be the IP of the Gaudi node that will be used to access the service + cluster_ip: "127.0.0.1" # Cluster IP, this should be the IP of the node that will be used to access the service ai_user: "ai-inference" # Enterprise Inference Service OS user, change if you want to use a different user ssh_key_file: "/home/{{ ai_user }}/.ssh/id_rsa" # Path to your private key, this playbook will create this keycloak_client_id: "api" # Keycloak client ID @@ -20,7 +20,7 @@ hf_token: "YourHuggingFaceToken" # Hugging Face token for all models, you need to supply your Hugging Face token to download models hf_token_falcon3: "YourHuggingFaceToken" # Hugging Face token for Falcon 3, can be the same as hf_token models: "2,5,8,9" # Comma-separated list of model IDs, see repo - cpu_or_gpu: "gpu" # "cpu" or "gpu", set to "gpu" for Gaudi nodes + cpu_or_gpu: "gpu" # "cpu" or "gpu", set to "gpu" for Intel® AI Accelerator nodes deploy_kubernetes_fresh: "on" deploy_ingress_controller: "on" deploy_keycloak_apisix: "on" diff --git a/docs/examples/single-node/einf-singlenode-xeon.yml b/docs/examples/single-node/einf-singlenode-xeon.yml index 54bd4ee9..43a7c7e9 100644 --- a/docs/examples/single-node/einf-singlenode-xeon.yml +++ b/docs/examples/single-node/einf-singlenode-xeon.yml @@ -11,7 +11,7 @@ gather_facts: true vars: cluster_url: "api.example.com" # Cluster name, change if you want to use a different DNS name for the service - cluster_ip: "127.0.0.1" # Cluster IP, this should be the IP of the Gaudi node that will be used to access the service + cluster_ip: "127.0.0.1" # Cluster IP, this should be the IP of the Intel® AI Accelerator node that will be used to access the service ai_user: "ai-inference" # Enterprise Inference Service OS user, change if you want to use a different user ssh_key_file: "/home/{{ ai_user }}/.ssh/id_rsa" # Path to your private key, this playbook will create this keycloak_client_id: "api" # Keycloak client ID @@ -20,7 +20,7 @@ hf_token: "YourHuggingFaceToken" # Hugging Face token for all models, you need to supply your Hugging Face token to download models hf_token_falcon3: "YourHuggingFaceToken" # Hugging Face token for Falcon 3, can be the same as hf_token models: "21" # Comma-separated list of model IDs, see repo - cpu_or_gpu: "cpu" # "cpu" or "gpu", set to "gpu" for Gaudi nodes + cpu_or_gpu: "cpu" # "cpu" or "gpu", set to "gpu" for Intel® AI Accelerator nodes deploy_kubernetes_fresh: "on" deploy_ingress_controller: "on" deploy_keycloak_apisix: "on" diff --git a/docs/ingress-to-envoy-gateway-migration.md b/docs/ingress-to-envoy-gateway-migration.md new file mode 100644 index 00000000..edb67c70 --- /dev/null +++ b/docs/ingress-to-envoy-gateway-migration.md @@ -0,0 +1,453 @@ +# Ingress to Envoy Gateway Migration Guide + +> **Enterprise Inference — Edge Traffic Migration** +> NGINX Ingress Controller (`networking.k8s.io/v1 Ingress`) → Envoy Gateway (`gateway.networking.k8s.io/v1 HTTPRoute`) + +--- + +## Table of Contents + +1. [Why This Migration](#why-this-migration) +2. [Architecture — Before (NGINX Ingress)](#architecture--before-nginx-ingress) +3. [Architecture — After (Envoy Gateway)](#architecture--after-envoy-gateway) +4. [Concept Mapping](#concept-mapping) +5. [What Changed](#what-changed) +6. [What Did NOT Change](#what-did-not-change) +7. [File-by-File Change Inventory](#file-by-file-change-inventory) +8. [Deployment Workflow](#deployment-workflow) +9. [Configuration](#configuration) +10. [Route Mapping Reference](#route-mapping-reference) +11. [Platform Matrix](#platform-matrix) +12. [Rollback Procedure](#rollback-procedure) + +--- + +## Why This Migration + +Kubernetes `networking.k8s.io/v1 Ingress` is approaching End-of-Life. The Kubernetes community has standardized on the **Gateway API** (`gateway.networking.k8s.io/v1`) as the successor, offering: + +| Aspect | Ingress (Old) | Gateway API (New) | +|--------|---------------|-------------------| +| API maturity | Feature-frozen, EOL path | GA since K8s 1.26, actively developed | +| Routing | Single host/path rules, vendor annotations | Rich match types (headers, methods, query params) | +| TLS | Per-Ingress resource | Centralized at Gateway listener | +| Multi-tenancy | Flat, single namespace | Role-based: Infra → GatewayClass, Cluster → Gateway, App → HTTPRoute | +| URL rewriting | Vendor-specific annotation (`nginx.ingress.kubernetes.io/rewrite-target`) | Standard `URLRewrite` filter | +| Vendor lock-in | NGINX-specific annotations | Portable across Envoy, Istio, Traefik, etc. | + +--- + +## Architecture — Before (NGINX Ingress) + +``` +┌───────────┐ +│ Client │ +│ (HTTPS) │ +└─────┬─────┘ + │ :443 + ▼ +┌─────────────────────────────────────────────────┐ +│ NGINX Ingress Controller │ +│ namespace: ingress-nginx │ +│ Helm chart: ingress-nginx/ingress-nginx v4.12.2│ +│ hostPort: 80, 443 │ +│ Tolerations: control-plane │ +│ Affinity: ei-infra-eligible nodes │ +└────┬────┬────┬────┬────┬────┬────┬──────────────┘ + │ │ │ │ │ │ │ + ▼ ▼ ▼ ▼ ▼ ▼ ▼ + ┌──────────────────────────────────────────┐ + │ networking.k8s.io/v1 Ingress resources │ + │ ingressClassName: nginx │ + │ nginx.ingress.kubernetes.io/* annotations│ + ├──────────────────────────────────────────┤ + │ • model-ingress → vLLM/TGI/TEI svc │ + │ • genai-gw-ingress → LiteLLM :4000 │ + │ • keycloak-ingress → Keycloak/APISIX │ + │ • dashboard-ingress → K8s Dashboard │ + │ • grafana-ingress → Grafana :80 │ + │ • flowise-root → Flowise :3000 │ + │ • mcp-server → MCP Server │ + └──────────────────────────────────────────┘ +``` + +### Key Characteristics (Before) + +- **Controller:** NGINX Ingress Controller deployed via Helm (`ingress-nginx` chart v4.12.2) +- **Namespace:** `ingress-nginx` +- **TLS:** Each Ingress resource carried its own `tls:` block with `secretName` +- **Rewriting:** `nginx.ingress.kubernetes.io/rewrite-target: /$1` annotation with regex capture groups +- **Pod placement:** `hostPort: 80/443` with control-plane tolerations and `ei-infra-eligible` node affinity +- **EKS variant:** Separate `ingress_eks.yaml` templates with `ingressClassName: alb` and ALB annotations + +--- + +## Architecture — After (Envoy Gateway) + +``` +┌───────────┐ +│ Client │ +│ (HTTPS) │ +└─────┬─────┘ + │ :443 + ▼ +┌─────────────────────────────────────────────────┐ +│ Envoy Gateway │ +│ namespace: envoy-gateway-system │ +│ │ +│ ┌─────────────────────────────────────────┐ │ +│ │ GatewayClass: envoy │ │ +│ │ controller: gateway.envoyproxy.io │ │ +│ │ parametersRef → EnvoyProxy │ │ +│ └─────────────────────────────────────────┘ │ +│ ┌─────────────────────────────────────────┐ │ +│ │ EnvoyProxy: enterprise-proxy-config │ │ +│ │ hostNetwork: true │ │ +│ │ Tolerations: control-plane │ │ +│ │ Affinity: ei-infra-eligible nodes │ │ +│ │ podAntiAffinity: spread across hosts │ │ +│ └─────────────────────────────────────────┘ │ +│ ┌─────────────────────────────────────────┐ │ +│ │ Gateway: enterprise-edge-gateway │ │ +│ │ Listeners: │ │ +│ │ - https :443 (TLS Terminate) │ │ +│ │ - http :80 │ │ +│ │ allowedRoutes: All namespaces │ │ +│ │ TLS cert: secret │ │ +│ └─────────────────────────────────────────┘ │ +└────┬────┬────┬────┬────┬────┬────┬───────────────┘ + │ │ │ │ │ │ │ + ▼ ▼ ▼ ▼ ▼ ▼ ▼ + ┌──────────────────────────────────────────┐ + │ gateway.networking.k8s.io/v1 HTTPRoutes │ + │ parentRefs: enterprise-edge-gateway │ + │ Standard filters (URLRewrite, etc.) │ + ├──────────────────────────────────────────┤ + │ • model-httproute → vLLM/TGI/TEI svc │ + │ • genai-gw-httproute → LiteLLM :4000 │ + │ • keycloak-httproute → Keycloak/APISIX │ + │ • dashboard-httproute→ K8s Dashboard │ + │ • grafana-httproute → Grafana :80 │ + │ • flowise-root → Flowise :3000 │ + │ • mcp-httproute → MCP Server │ + └──────────────────────────────────────────┘ +``` + +### Key Characteristics (After) + +- **Controller:** Envoy Gateway deployed via OCI Helm chart (`gateway-helm v1.2.0`) +- **Namespace:** `envoy-gateway-system` +- **TLS:** Centralized at the Gateway listener — HTTPRoutes do NOT carry TLS config +- **Rewriting:** Standard Gateway API `URLRewrite` filter with `ReplacePrefixMatch` +- **Pod placement:** `hostNetwork: true` with same tolerations and node affinity as before +- **EKS variant:** ALB `ingress_eks.yaml` templates kept as-is (separate migration path) + +--- + +## Concept Mapping + +| NGINX Ingress Concept | Envoy Gateway Equivalent | Notes | +|----------------------|--------------------------|-------| +| `ingress-nginx` Helm chart | `gateway-helm` OCI chart | Deployed to `envoy-gateway-system` | +| `IngressClass: nginx` | `GatewayClass: envoy` | References `EnvoyProxy` for pod config | +| — | `EnvoyProxy` CR | New: configures proxy pod placement, replicas, hostNetwork | +| — | `Gateway` CR | New: defines listeners (HTTPS/HTTP), TLS termination | +| `Ingress` resource | `HTTPRoute` resource | 1:1 replacement per service | +| `ingressClassName: nginx` | `parentRefs: [{name: enterprise-edge-gateway}]` | Routes reference the Gateway, not a class | +| `tls:` block on each Ingress | TLS on Gateway listener only | Eliminates per-route TLS duplication | +| `nginx.ingress.kubernetes.io/rewrite-target: /$1` | `filters: [{type: URLRewrite, urlRewrite: {path: {type: ReplacePrefixMatch}}}]` | Standard API, no vendor annotation | +| `nginx.ingress.kubernetes.io/backend-protocol: HTTPS` | (handled at Backend/service level) | — | +| `hostPort: 80, 443` | `hostNetwork: true` on EnvoyProxy | Equivalent node-level port binding | +| `pathType: ImplementationSpecific` + regex | `path.type: PathPrefix` | Gateway API uses structured prefix matching | + +--- + +## What Changed + +### Files Modified (20 files) + +| # | Category | File | Summary | +|---|----------|------|---------| +| 1 | **Controller Playbook** | `core/playbooks/deploy-ingress-controller.yml` | Entire file: NGINX Helm → Envoy Gateway + GatewayClass + EnvoyProxy + Gateway | +| 2 | **Shell Script** | `core/lib/components/ingress-controller.sh` | Updated message and `--extra-vars` | +| 3 | **Shell Script** | `core/lib/cluster/deployment/fresh-install.sh` | Updated log messages | +| 4 | **Shell Script** | `core/lib/user-menu/parse-user-prompts.sh` | Updated interactive prompt text | +| 5 | **Metadata** | `core/inventory/metadata/inference-metadata.cfg` | `ingress_controller=4.12.2` → `envoy_gateway_version=v1.2.0` | +| 6 | **Helm Template** | `core/helm-charts/vllm/templates/ingress.yaml` | `Ingress` → `HTTPRoute` with `URLRewrite` filter | +| 7 | **Helm Template** | `core/helm-charts/tgi/templates/ingress.yaml` | `Ingress` → `HTTPRoute` with `URLRewrite` filter | +| 8 | **Helm Template** | `core/helm-charts/tei/templates/ingress.yaml` | `Ingress` → `HTTPRoute` with `URLRewrite` filter | +| 9 | **Helm Template** | `core/helm-charts/teirerank/templates/ingress.yaml` | `Ingress` → `HTTPRoute` with `URLRewrite` filter | +| 10 | **Helm Template** | `core/helm-charts/ovms/templates/ingress.yaml` | `Ingress` → `HTTPRoute` with `URLRewrite` filter | +| 11 | **Helm Template** | `core/helm-charts/genai-gateway/templates/ingress.yaml` | `Ingress` → `HTTPRoute` | +| 12 | **Helm Template** | `core/helm-charts/keycloak/templates/ingress.yaml` | `Ingress` → `HTTPRoute` | +| 13 | **Helm Template** | `core/helm-charts/mcp-server-template/templates/ingress.yaml` | `Ingress` → `HTTPRoute` | +| 14 | **Playbook** | `core/playbooks/deploy-cluster-config.yml` | Dashboard Ingress → HTTPRoute | +| 15 | **Playbook** | `core/playbooks/deploy-observability.yml` | Grafana Ingress (non-EKS) → HTTPRoute | +| 16 | **Playbook** | `core/playbooks/deploy-genai-gateway.yml` | Langfuse: disabled built-in Ingress, added HTTPRoute | +| 17 | **Playbook** | `core/playbooks/deploy-keycloak-tls-cert.yml` | Keycloak Ingress disabled for non-EKS, added HTTPRoute | +| 18 | **Playbook** | `core/playbooks/deploy-keycloak-controller.yml` & `deploy-keycloak-service.yml` | Helm repo refs: ingress-nginx → envoy-gateway | +| 19 | **Istio** | `core/playbooks/deploy-istio.yml` & `deploy-istio-openshift.yml` | Namespace `ingress-nginx` → `envoy-gateway-system` | +| 20 | **Istio** | `core/helm-charts/istio/peer-auth-ingress.yaml` | PeerAuth target: `ingress-nginx` → `envoy-gateway-system` pods | +| 21 | **Plugin** | `plugins/agenticai/playbooks/deploy-agenticai-plugin.yml` | Flowise Ingress → HTTPRoute | + +--- + +## What Did NOT Change + +| Item | Reason | +|------|--------| +| **`values.yaml`** in all Helm charts | Keys `ingress.enabled`, `ingress.host`, `ingress.secretname` kept identical | +| **`inference-config.cfg`** | `deploy_ingress_controller=on` still controls edge gateway deployment | +| **Template filenames** | All `ingress.yaml` filenames kept (only content changed to HTTPRoute) | +| **Shell function name** | Renamed to `run_edge_gateway_playbook()` (previously `run_ingress_nginx_playbook()`) | +| **EKS ALB templates** | `ingress_eks.yaml` variants with `ingressClassName: alb` are untouched | +| **OpenShift Routes** | `route.yaml` templates are not Ingress — unaffected | +| **APISIX integration** | APISIX catch-all routing through HTTPRoutes works the same way | +| **Model deployment logic** | `install-model.sh`, `deploy-inference-models.yml` — no changes to `ingress_enabled` logic | +| **Brownfield detection** | `setup-bastion.yml` pre-flight checks kept (informational only) | + +--- + +## Deployment Workflow + +The deployment sequence is **unchanged**. The `deploy_ingress_controller=on` flag in `inference-config.cfg` triggers the edge gateway step: + +``` +inference-stack-deploy.sh + └── fresh-install.sh + ├── 1. Kubernetes cluster setup (if deploy_kubernetes_fresh=on) + ├── 2. Cluster config (dashboard) ← HTTPRoute for dashboard + ├── 3. NRI CPU Balloons (if cpu deployment) + ├── 4. Habana AI Operator (if GPU) + ├── 5. Ceph storage (if deploy_ceph=on) + │ + ├── 6. Edge Gateway Controller (if deploy_ingress_controller=on) + │ └── deploy-ingress-controller.yml + │ ├── Install Gateway API CRDs (v1.2.0) + │ ├── Deploy Envoy Gateway Helm chart + │ ├── Create EnvoyProxy (pod placement config) + │ ├── Create GatewayClass: envoy + │ ├── Create TLS Secret in envoy-gateway-system + │ └── Create Gateway: enterprise-edge-gateway + │ + ├── 7. Keycloak + APISIX (if deploy_keycloak_apisix=on) + │ └── HTTPRoute for Keycloak created here + ├── 8. GenAI Gateway (LiteLLM) (if deploy_genai_gateway=on) + │ └── HTTPRoute for LiteLLM + Langfuse trace + ├── 9. Observability (Grafana) (if deploy_observability=on) + │ └── HTTPRoute for Grafana + ├── 10. Agentic AI Plugin (if deploy_agenticai_plugin=on) + │ └── HTTPRoute for Flowise + ├── 11. Istio (if deploy_istio=on) + │ └── Labels envoy-gateway-system for ambient mode + └── 12. LLM Model Deployment (if deploy_llm_models=on) + └── HTTPRoutes created per model via Helm templates +``` + +--- + +## Configuration + +### No inference-config.cfg Changes Required + +The existing config toggle works the same way: + +```ini +# Controls edge gateway deployment (formerly NGINX, now Envoy Gateway) +deploy_ingress_controller=on +``` + +### Metadata Version + +In `core/inventory/metadata/inference-metadata.cfg`: + +```ini +# Before: +# ingress_controller="4.12.2" + +# After: +envoy_gateway_version="v1.2.0" +``` + +### Helm Chart values.yaml — No Changes + +All `values.yaml` files retain the same `ingress:` block: + +```yaml +ingress: + enabled: false # Set to true to enable the HTTPRoute resource + host: "" + namespace: default + secretname: "" # (used by EKS ALB variant only) +``` + +--- + +## Route Mapping Reference + +### Model Serving (vLLM, TGI, TEI, TEI-Rerank, OVMS) + +| Before (Ingress) | After (HTTPRoute) | +|-------------------|-------------------| +| `ingressClassName: nginx` | `parentRefs: [{name: enterprise-edge-gateway, namespace: envoy-gateway-system}]` | +| `nginx.ingress.kubernetes.io/rewrite-target: /$1` | `filters: [{type: URLRewrite, urlRewrite: {path: {type: ReplacePrefixMatch, replacePrefixMatch: /}}}]` | +| `path: /model-name/(.*)` | `path: {type: PathPrefix, value: /model-name}` | +| `pathType: ImplementationSpecific` | (PathPrefix is the type) | +| `tls: [{hosts: [host], secretName: secret}]` | (TLS handled at Gateway level) | + +**Example — vLLM HTTPRoute:** + +```yaml +apiVersion: gateway.networking.k8s.io/v1 +kind: HTTPRoute +metadata: + name: vllm-model-httproute +spec: + parentRefs: + - name: enterprise-edge-gateway + namespace: envoy-gateway-system + hostnames: + - api.example.com + rules: + - matches: + - path: + type: PathPrefix + value: /Meta-Llama-3.1-8B-Instruct + filters: + - type: URLRewrite + urlRewrite: + path: + type: ReplacePrefixMatch + replacePrefixMatch: / + backendRefs: + - name: vllm-model-service + port: 80 +``` + +### Infrastructure Services + +| Service | Path | Type | Notes | +|---------|------|------|-------| +| GenAI Gateway (LiteLLM) | `/` | PathPrefix | Full catch-all for LiteLLM API | +| Keycloak (via APISIX) | `/token` | Exact | Token endpoint only | +| Kubernetes Dashboard | `/dashboard` | PathPrefix | URLRewrite strips prefix | +| Grafana | `/observability` | PathPrefix | `serve_from_sub_path: true` in Grafana | +| Flowise | `/` (subdomain) | PathPrefix | Hostname: `flowise-` | +| MCP Server | `/health`, `/sse` | PathPrefix | SSE-optimized (no special annotation needed) | +| Langfuse Trace | `/` (subdomain) | PathPrefix | Hostname: `trace-` | + +--- + +## Platform Matrix + +| Platform | Edge Gateway | Model Routes | Infra Routes | Auth Mode | +|----------|-------------|-------------|-------------|-----------| +| **Vanilla K8s** | Envoy Gateway (HTTPRoutes) | HTTPRoute | HTTPRoute | Keycloak or LiteLLM | +| **EKS** | AWS ALB (Ingress with `ingressClassName: alb`) | ALB Ingress | ALB Ingress | Same | +| **OpenShift** | OpenShift Routes (`route.yaml`) | Route | Route | Same | + +> **Note:** EKS ALB and OpenShift Routes are **not affected** by this migration. Only vanilla Kubernetes deployments use the new Envoy Gateway path. + +--- + +## Key Resources Created by deploy-ingress-controller.yml + +```yaml +# 1. Gateway API CRDs (from upstream) +kubectl apply -f https://github.com/kubernetes-sigs/gateway-api/releases/download/v1.2.0/standard-install.yaml + +# 2. Envoy Gateway Controller (Helm) +helm upgrade --install eg oci://docker.io/envoyproxy/gateway-helm + --version v1.2.0 + --namespace envoy-gateway-system + +# 3. EnvoyProxy — proxy pod configuration +apiVersion: gateway.envoyproxy.io/v1alpha1 +kind: EnvoyProxy +metadata: + name: enterprise-proxy-config + namespace: envoy-gateway-system +spec: + provider: + type: Kubernetes + kubernetes: + envoyDeployment: + replicas: + pod: + tolerations: [control-plane, master] + affinity: {ei-infra-eligible nodes, pod anti-affinity} + patch: + spec: + template: + spec: + hostNetwork: true # Binds ports 80/443 to node + dnsPolicy: ClusterFirstWithHostNet + envoyService: + type: ClusterIP + +# 4. GatewayClass +apiVersion: gateway.networking.k8s.io/v1 +kind: GatewayClass +metadata: + name: envoy +spec: + controllerName: gateway.envoyproxy.io/gatewayclass-controller + parametersRef: {EnvoyProxy: enterprise-proxy-config} + +# 5. Gateway +apiVersion: gateway.networking.k8s.io/v1 +kind: Gateway +metadata: + name: enterprise-edge-gateway + namespace: envoy-gateway-system +spec: + gatewayClassName: envoy + listeners: + - name: https + protocol: HTTPS + port: 443 + tls: + mode: Terminate + certificateRefs: [{name: }] + - name: http + protocol: HTTP + port: 80 + allowedRoutes: + namespaces: {from: All} +``` + +--- + +## Rollback Procedure + +If a rollback to NGINX Ingress is needed: + +1. **Revert the code** — `git checkout` the prior commit on the 20 modified files +2. **Remove Envoy Gateway resources:** + ```bash + kubectl delete gateway enterprise-edge-gateway -n envoy-gateway-system + kubectl delete gatewayclass envoy + kubectl delete envoyproxy enterprise-proxy-config -n envoy-gateway-system + helm uninstall eg -n envoy-gateway-system + kubectl delete namespace envoy-gateway-system + ``` +3. **Re-deploy** — run `inference-stack-deploy.sh` which will install NGINX Ingress Controller and create Ingress resources + +--- + +## FAQ + +**Q: Do I need to change `inference-config.cfg`?** +A: No. `deploy_ingress_controller=on` works exactly as before. + +**Q: Will my existing model deployments break?** +A: If upgrading in-place, you need to run the edge gateway deployment step first, then re-deploy models so HTTPRoutes replace the old Ingress resources. + +**Q: What about EKS deployments?** +A: EKS uses the AWS ALB Ingress Controller (`ingressClassName: alb`). This migration does not affect EKS deployments. + +**Q: What about the APISIX integration?** +A: APISIX still works the same way. When `apisix.enabled=true`, the HTTPRoute backend points to `auth-apisix-gateway:80` instead of the model service directly — identical behavior to the old Ingress. + +**Q: Where is TLS configured now?** +A: TLS terminates at the `enterprise-edge-gateway` Gateway listener in `envoy-gateway-system`. Individual HTTPRoutes no longer carry TLS configuration. diff --git a/docs/gaudi-prerequisites.md b/docs/intel-ai-accelerator-prerequisites.md similarity index 86% rename from docs/gaudi-prerequisites.md rename to docs/intel-ai-accelerator-prerequisites.md index f81fef5a..cb77f88c 100644 --- a/docs/gaudi-prerequisites.md +++ b/docs/intel-ai-accelerator-prerequisites.md @@ -1,9 +1,9 @@ -# Gaudi Node Requirements and Setup Guide +# Intel® AI Accelerator Node Requirements and Setup Guide -This guide helps verify and automatically install the latest firmware and driver version for **Habana Gaudi** nodes in your Kubernetes or Standalone Environment. +This guide helps verify and automatically install the latest firmware and driver version for **Intel® AI Accelerator** nodes in your Kubernetes or Standalone Environment. # What You Need -- Intel® Gaudi® cards installed in your system +- Intel® AI Accelerator cards installed in your system - Linux operating system - Internet connection - Root/sudo privileges @@ -33,11 +33,11 @@ Firmware [SPI] Version : Preboot version hl-gaudi2-1.20.0-fw-58.0.0-sec-9 (Jan 1 ``` ###### For visual assistance, refer to the following snapshot for Firmware version: -AI Inference Firmware Snapshot +AI Inference Firmware Snapshot #### Step 2: Check Driver Version -Use the following commands to check the required driver version installed on your Gaudi nodes: +Use the following commands to check the required driver version installed on your Intel® AI Accelerator nodes: ```bash hl-smi @@ -52,7 +52,7 @@ You'll see something like: ``` ###### For visual assistance, refer to the following snapshot for Driver version: -AI Inference Driver Snapshot +AI Inference Driver Snapshot #### Step 3: Check Runtime Version @@ -126,7 +126,7 @@ If the numbers don't match, run: ```bash kubectl rollout restart ds habana-ai-device-plugin-ds -n habana-ai-operator ``` -> **For detailed documentation, refer to the official guide:** [Intel® Gaudi® Software Installation Documentation](https://docs.habana.ai/en/latest/Installation_Guide/Driver_Installation.html) +> **For detailed documentation, refer to the official guide:** [Intel® AI Accelerator Software Installation Documentation](https://docs.habana.ai/en/latest/Installation_Guide/Driver_Installation.html) > > **For automation script details:** See [Firmware Update Script Documentation](../core/scripts/README.md) > diff --git a/docs/inventory-design-guide.md b/docs/inventory-design-guide.md index 4fe5cf9b..83c8cf65 100644 --- a/docs/inventory-design-guide.md +++ b/docs/inventory-design-guide.md @@ -5,17 +5,17 @@ - [Control Plane Node Sizing](#control-plane-node-sizing) - [Workload Node Sizing](#workload-node-sizing) - [CPU-based Workloads (Intel Xeon)](#cpu-based-workloads-intel-xeon) - - [HPU-based Workloads (Intel Gaudi)](#hpu-based-workloads-intel-gaudi) + - [HPU-based Workloads (Intel® AI Accelerator)](#hpu-based-workloads-intel-ai-accelerator) - [Infrastructure Node Sizing](#infrastructure-node-sizing) - [Setting Dedicated Infra Nodes](#setting-dedicated-inference-infra-nodes) - [Setting Dedicated Intel Xeon Nodes](#setting-dedicated-inference-xeon-nodes) - - [Setting Dedicated Intel Gaudi Nodes](#setting-dedicated-gaudi-nodes) + - [Setting Dedicated Intel® AI Accelerator Nodes](#setting-dedicated-intel-ai-accelerator-nodes) - [Setting Dedicated Intel CPU Nodes](#setting-dedicated-cpu-nodes) - [Node Sizing Guide](#node-sizing-guide) - [Single Node Deployment](#single-node-deployment) - [Single Master Multiple Workload Node Deployment](#single-master-multiple-workload-node-deployment) - [Multi Master Multi Workload Node Deployment](#multi-master-multi-workload-node-deployment) - - [Multi Master Node with Dedicated Intel Xeon, Gaudi and CPU nodes Deployment](#multi-master-multi-workload-node-with-dedicated-intel-xeon-gaudi-and-cpu-nodes-deployment) + - [Multi Master Multi Workload Node with Dedicated Intel Xeon, Intel® AI Accelerator and CPU nodes Deployment](#multi-master-multi-workload-node-with-dedicated-intel-xeon-intel-ai-accelerator-and-cpu-nodes-deployment) ##### Control Plane Node Sizing For an inference model deployment cluster in Kubernetes (K8s), the control plane nodes should have sufficient resources to handle the management and orchestration of the cluster. It's recommended to have at least 8 vCPUs and 32 GB of RAM per control plane node. @@ -27,9 +27,9 @@ ##### CPU-based Workloads (Intel Xeon) For CPU-based inference workloads, the workload nodes should have a sufficient number of vCPUs based on the number of models and the expected concurrency. A general guideline is to allocate 32 vCPUs per model instance, depending on the model complexity and resource requirements. - ##### HPU-based Workloads (Intel Gaudi) - For HPU-based inference workloads using Intel Gaudi HPUs, the workload nodes should be equipped with the appropriate number of Gaudi HPUs based on the number of models and the expected concurrency. - Each Gaudi HPU can handle multiple model instances, depending on the model size and resource requirements. + ##### HPU-based Workloads (Intel® AI Accelerator) + For HPU-based inference workloads using Intel® AI Accelerator, the workload nodes should be equipped with the appropriate number of Intel® AI Accelerator HPUs based on the number of models and the expected concurrency. + Each Intel® AI Accelerator can handle multiple model instances, depending on the model size and resource requirements. Additionally, the workload nodes should have sufficient RAM and storage capacity to accommodate the inference models and any associated data. @@ -176,21 +176,21 @@ ``` - ### Setting Dedicated Gaudi Nodes: - To configure a dedicated Gaudi nodes for deploying models, edit the file `inventory/hosts.yml` and add the label `inference-gaudi` to the nodes. - This group will be used to schedule the workloads dedicated to run on nodes with Intel Gaudi attached. + ### Setting Dedicated Intel® AI Accelerator Nodes: + To configure a dedicated Intel® AI Accelerator nodes for deploying models, edit the file `inventory/hosts.yml` and add the label `inference-ai-accelerator` to the nodes. + This group will be used to schedule the workloads dedicated to run on nodes with Intel® AI Accelerator attached. follow these steps: 1. Open the `inventory/hosts.yml` file in a text editor. 2. Locate the section where you define your nodes. This is typically under the `all` group or any other group you've defined for your nodes. - 3. For each node that you want to label as an `inference-gaudi`, add the following line under the node's IP or hostname: + 3. For each node that you want to label as an `inference-ai-accelerator`, add the following line under the node's IP or hostname: ```yaml node_labels: - node-role.kubernetes.io/inference-gaudi: "true" + node-role.kubernetes.io/inference-ai-accelerator: "true" ``` - 4.After labeling the desired nodes, list the nodes under the group kube_inference_gaudi to include in this group. + 4.After labeling the desired nodes, list the nodes under the group kube_inference_ai-accelerator to include in this group. - Please find the template for the inventory configuration with 2 dedicated Gaudi nodes for inference cluster + Please find the template for the inventory configuration with 2 dedicated Intel® AI Accelerator nodes for inference cluster ```yaml all: hosts: @@ -198,13 +198,13 @@ ansible_host: "{{ private_ip }}" ansible_user: "{{ ansible_user }}" ansible_ssh_private_key_file: /path/to/your/ssh/key - inference-gaudi-node-01: + inference-ai-accelerator-node-01: ansible_host: "{{ private_ip }}" ansible_user: "{{ ansible_user }}" ansible_ssh_private_key_file: /path/to/your/ssh/key node_labels: - node-role.kubernetes.io/inference-gaudi: "true" - inference-gaudi-node-02: + node-role.kubernetes.io/inference-ai-accelerator: "true" + inference-ai-accelerator-node-02: ansible_host: "{{ private_ip }}" ansible_user: "{{ ansible_user }}" ansible_ssh_private_key_file: /path/to/your/ssh/key @@ -220,12 +220,12 @@ inference-control-plane-01: kube_node: hosts: - inference-gaudi-node-01: - inference-gaudi-node-02: + inference-ai-accelerator-node-01: + inference-ai-accelerator-node-02: inference-infra-node-01: - kube_inference_gaudi: - inference-gaudi-node-01: - inference-gaudi-node-02: + kube_inference_ai-accelerator: + inference-ai-accelerator-node-01: + inference-ai-accelerator-node-02: etcd: hosts: inference-control-plane-01: @@ -233,7 +233,7 @@ children: kube_control_plane: kube_node: - kube_inference_gaudi: + kube_inference_ai-accelerator: ``` ### Setting Dedicated CPU Nodes: @@ -435,9 +435,9 @@ hosts: {} ``` - ### Multi Master Multi Workload Node with Dedicated Intel Xeon, Gaudi and CPU nodes Deployment: + ### Multi Master Multi Workload Node with Dedicated Intel Xeon, Intel® AI Accelerator and CPU nodes Deployment: For an enterprise-grade deployment with multiple control plane nodes and multiple workload nodes, - This setup uses workload nodes to be mix of Intel Xeon, Intel Gaudi and Intel CPU nodes for deploying models. + This setup uses workload nodes to be mix of Intel Xeon, Intel® AI Accelerator and Intel CPU nodes for deploying models. it is recommended to follow these guidelines: @@ -491,18 +491,18 @@ ansible_ssh_private_key_file: /path/to/your/ssh/key node_labels: node-role.kubernetes.io/inference-xeon: "true" - inference-workload-gaudi-node-01: + inference-workload-ai-accelerator-node-01: ansible_host: "{{ private_ip }}" ansible_user: "{{ ansible_user }}" ansible_ssh_private_key_file: /path/to/your/ssh/key node_labels: - node-role.kubernetes.io/inference-gaudi: "true" - inference-workload-gaudi-node-02: + node-role.kubernetes.io/inference-ai-accelerator: "true" + inference-workload-ai-accelerator-node-02: ansible_host: "{{ private_ip }}" ansible_user: "{{ ansible_user }}" ansible_ssh_private_key_file: /path/to/your/ssh/key node_labels: - node-role.kubernetes.io/inference-gaudi: "true" + node-role.kubernetes.io/inference-ai-accelerator: "true" inference-workload-cpu-node-01: ansible_host: "{{ private_ip }}" ansible_user: "{{ ansible_user }}" @@ -528,8 +528,8 @@ inference-infra-node-03: inference-workload-xeon-node-01: inference-workload-xeon-node-02: - inference-workload-gaudi-node-01: - inference-workload-gaudi-node-02: + inference-workload-ai-accelerator-node-01: + inference-workload-ai-accelerator-node-02: inference-workload-cpu-node-01: inference-workload-cpu-node-02: etcd: @@ -544,9 +544,9 @@ kube_inference_xeon: inference-workload-xeon-node-01: inference-workload-xeon-node-02: - kube_inference_gaudi: - inference-workload-gaudi-node-01: - inference-workload-gaudi-node-02: + kube_inference_ai-accelerator: + inference-workload-ai-accelerator-node-01: + inference-workload-ai-accelerator-node-02: kube_inference_cpu: inference-workload-cpu-node-01: inference-workload-cpu-node-02: @@ -556,7 +556,7 @@ kube_node: kube_inference_infra: kube_inference_xeon: - kube_inference_gaudi: + kube_inference_ai-accelerator: kube_inference_cpu: calico_rr: hosts: {} diff --git a/docs/multi-node-deployment.md b/docs/multi-node-deployment.md index 01759e0b..f29d6557 100644 --- a/docs/multi-node-deployment.md +++ b/docs/multi-node-deployment.md @@ -116,7 +116,7 @@ chmod +x inference-stack-deploy.sh ./inference-stack-deploy.sh --models "21" --cpu-or-gpu "cpu" --hugging-face-token $HUGGINGFACE_TOKEN ``` -### For Nodes with Gaudi Accelerators +### For Nodes with Intel® AI Accelerator ```sh ./inference-stack-deploy.sh --models "1" --cpu-or-gpu "gpu" --hugging-face-token $HUGGINGFACE_TOKEN diff --git a/docs/ovms-model-deploy-guide.md b/docs/ovms-model-deploy-guide.md index a298430b..a8fc63c3 100644 --- a/docs/ovms-model-deploy-guide.md +++ b/docs/ovms-model-deploy-guide.md @@ -194,16 +194,22 @@ echo "Access Token: $TOKEN" ```bash # Test chat completions endpoint -For Inferencing with Qwen3-4B-int4-ov: -curl -k ${BASE_URL}/qwen3-4b-ovms/v3/chat/completions -X POST -d '{"messages": [{"role": "system","content": "You are helpful assistant"},{"role": "user","content": "what is photosynthesis"}],"model": "qwen3-4b","max_tokens": 32,"temperature": 0.4}' -H 'Content-Type: application/json' -sS -H "Authorization: Bearer $TOKEN" -For Inferencing with Mistral-7B-Instruct-v0.3-int4-cw-ov: -curl -k ${BASE_URL}/mistral-7b-ovms/v3/chat/completions -X POST -d '{"messages": [{"role": "system","content": "You are helpful assistant"},{"role": "user","content": "what is photosynthesis"}],"model": "mistral-7b","max_tokens": 32,"temperature": 0.4}' -H 'Content-Type: application/json' -sS -H "Authorization: Bearer $TOKEN" +# For Inferencing with any deployed models, use below command to get model route +kubectl get apisixroute -A +``` +![alt text](pictures/apisix-route.png) +``` +export MODEL_APISIX_ROUTE="qwen3-4b-ovms" +export MODEL_ID=OpenVINO/Qwen3-4B-int4-ov -For Inferencing with meta-llama/Llama-3.2-3B-Instruct: -curl -k ${BASE_URL}/llama-3.2-3b-instruct/v3/chat/completions -X POST -d '{"messages": [{"role": "system","content": "You are helpful assistant"},{"role": "user","content": "what is api"}],"model": "llama-3.2-3b-instruct","max_tokens": 32,"temperature": 0.4}' -H 'Content-Type: application/json' -sS -H "Authorization: Bearer $TOKEN" +curl -k ${BASE_URL}/${MODEL_APISIX_ROUTE}/v3/chat/completions -X POST \ + -H 'Content-Type: application/json' \ + -H "Authorization: Bearer $TOKEN" \ + -d '{"messages": [{"role": "system","content": "You are helpful assistant"},{"role": "user","content": "what is api"}],"model": "'"$MODEL_ID"'","max_tokens": 32,"temperature": 0.4}' ``` +**NOTE:** export respective MODEL_APISIX_ROUTE and MODEL_ID to test the model endpoints --- ## Undeployment diff --git a/docs/pictures/Enterprise-Inference-Gaudi-Driver-version.png b/docs/pictures/Enterprise-Inference-Intel-AI-Accelerator-Driver-version.png similarity index 100% rename from docs/pictures/Enterprise-Inference-Gaudi-Driver-version.png rename to docs/pictures/Enterprise-Inference-Intel-AI-Accelerator-Driver-version.png diff --git a/docs/pictures/Enterprise-Inference-Gaudi-Firmware-version.png b/docs/pictures/Enterprise-Inference-Intel-AI-Accelerator-Firmware-version.png similarity index 100% rename from docs/pictures/Enterprise-Inference-Gaudi-Firmware-version.png rename to docs/pictures/Enterprise-Inference-Intel-AI-Accelerator-Firmware-version.png diff --git a/docs/pictures/Enterprise-Inference-Gaudi-Habana-version.png b/docs/pictures/Enterprise-Inference-Intel-AI-Accelerator-Habana-version.png similarity index 100% rename from docs/pictures/Enterprise-Inference-Gaudi-Habana-version.png rename to docs/pictures/Enterprise-Inference-Intel-AI-Accelerator-Habana-version.png diff --git a/docs/pictures/Enterprise-Inference-Gaudi-Observability.png b/docs/pictures/Enterprise-Inference-Intel-AI-Accelerator-Observability.png similarity index 100% rename from docs/pictures/Enterprise-Inference-Gaudi-Observability.png rename to docs/pictures/Enterprise-Inference-Intel-AI-Accelerator-Observability.png diff --git a/docs/pictures/Enterprise-Inference-Gaudi-Utilization-Cluster-Observability.png b/docs/pictures/Enterprise-Inference-Intel-AI-Accelerator-Utilization-Cluster-Observability.png similarity index 100% rename from docs/pictures/Enterprise-Inference-Gaudi-Utilization-Cluster-Observability.png rename to docs/pictures/Enterprise-Inference-Intel-AI-Accelerator-Utilization-Cluster-Observability.png diff --git a/docs/pictures/apisix-route.png b/docs/pictures/apisix-route.png new file mode 100644 index 0000000000000000000000000000000000000000..dc7074870170a3102a49528a4d45824945744062 GIT binary patch literal 32306 zcmb5W1yq&W+bzE7E@^2|8l*w#kS;;#Zje;EJES|LLqJLz>Fy4ZPNhSdP5swC=l6a0 zyLXKLxZ{p-Jiy+(i}zjYSDuJ`6q#KQgu`Nu$0S!ykr0M+Mo-ITe<8Lr_5S>56)P7-Oikbad#3>?D}Tft~;Pa?%xS%(51ODSkbKl0!;~ zvJF*UbK6&YJ>(~zU?=%&oIdbY=?Q;y;O#pu@WGBuBJx_T``+PhXte8bbQj zbd>EId?$jw_mOlqxEu5$2tMcUEHMQk9h28sN^L^KN0;WWp|eQ+G#rnFCvysYH{(Ay z55`h5<(i}ZC|Q5quy)W{!MM@Z{2)=Fqhl%f2!rq>1ACYz}-oC|CRa2|7ndh9Dot*&%g7)vGiaww zN=iCEjAdkGgd0Cw_Q`JRG37LxuQp?U8U=wAP0xS0Y157!`kt2uZ)RrZlo&fOJlvh} z>eHG7|99hFbWIkb*XUrL1|}wmp5=QGkE4>rT|l<;W8b z6POWier+`84w5KIw{KZW_0ir1-{Mx~Lek%6;T+D2zw5)v;W)7rc(BTTitfbG9m~J@ zJ{A1|+j|KWdg&*Ij~BQV`VdfC#40o5i!y2;@vE*iw3pgernXPc)Q$j_fh5D zyCB6cyqz|@(+v=av$Jz5pFQQrPoLhteJi$lM7;I;x6#?3Q3eKv9D5EZW9NBvr{Osp z!my?xC0vvhYR=qLgLpthwCJz=RTN@;xHh#gj3eS22c>bKu#9Qm}?Tu<6H8 zUO}d^Ulo&V6g{XWhiGF;OyRydHMi_C+0lul%nJ__8(M0=NqiqO%;H1vEF>my?WpTU zd2Ve--kjW>1E2oq@~}D~B&^BqbuC`iPH_W>HXd(J(X~pf9owh8qy2HjTyGoxP&O{4 z?<{JtE)3ADNiW0cZQ(lT9=qTnfAdKnHMd!)MSK7Ly^Nyb8*6K(Z{NQCf+S3C%V4ny z>Ai`>vWelnlp5q}QAr*i7*7axfOI%8d^r|^KGI{GFJ_+^C*Qj9g9^A(E zs@zXnFAx`zP`9<$uiJgaEvp0Ni=EbV`(m7x6%^ z4UF6oynpdpi89BYi@>3QFaKebY8^ab#y7Vo0mHH&3?@z`I;^_ajx0Na#~-mfm)}VL zu$~038Jr2BvY}{WzlRUlqcFcm|B0-dhWYM(WO!HtYzP}(vSccu`t@+;Bg;`=BD#l zPvN+nUTC+KQ<>7zIRQ${&?wp6+k!Nj=9X`K^q$d+Wped_3z}n~t$@UMxS0EXfb6xY&omGw~s{8*ym6 zKCaZ=UkmZQ2hUvjFg_n|d>=`h^!NK>9~!N`n~cn3 zL(zy-OPi>S9FP0Pay(;!ox2kI{a&c}(5J^u_mv}KqhIA|A4z4!t*lDE+ph{64aUC^ zzG}h`i6VKibiUNz#agO_u{wAge6UjIDnz$*TCLc)OY9kBRFd!Q^9440@)WE~=Mh3( z1%%FBJG;v0e1aGL4#VHnx(br$gUbS(#ityU_4`HI-ZcI;`?4DQS7or*DIq@St?LLL z9m|bEquDIudh+|Cpf5f@G)UYxoj8<+Qf-Vj&I~Z{vmdo(ejJ&vH>I@`(v=JDp6SM; zoZ-LkN^NimTvZg2oe>EPCO`x>!!S#ci!{eW9nf-AP@e?ZPau`ADrjEnq*gut)2j zt~isWPmRRK#}8h&nnO!N+7JHX`+?BTeV%UVPNy{X@1{nu5p9cYq+DzMsy%o__tPeD z?Zad{dbhOfUb=GH%^n3sMdoD~`);9qFxs2PCm}^K7e7Q!`*ypcB1BFT4~KbaJa2r1 zK773wG}IS5zU1pnbc`U8E^lKllpc=8eprA!+tksoM>9Fnqf5;$KD|}osARfG-Ljba z{F=sg%DdWglWO&ug4e?bqXQfbbnv(Yc0TK24u^U%;lDYL;u^fO%IOf9BO*iPdg_f2 za171|>}RdkzZx77s`e>8nWi~rvqi9bD1Se|qDhZQ=3Z41cg{)4ga_U{ZE0^fl+EOq zAnG@Wtz#92l4|yai=A^>X;z)^LB0DdaqLWN2 zLFT2qMU$=qVrA6)B2Oq+czAdK<0g^OUA`~=u$Zifyu5sHtLGIu1`!MHSLJNBgI`q= zn%@oml^&zCN4Ha?|@%cB23i`_2HQu9a3#vsK+E z_LF&UVBZIO-(KF_v%S=+mTYMUSLU{~EYT~0EzWM#g=n<@v)ov4_f{!eSnpTehZGhH ztJpvOKD?R7M~Kf5pg)`z{7++dlE^YcxEThO6?3Kw%@np8Ob4G-EnP7Vu>QK8_V=*^ zJMlo6l~22&;8;pTe%AxGGe69+8T~BNs=j(BmjZ^!LAJ}D>>MOt<&XGlG8%We-MLMBK zA>V(%k&gFw?TN{{9Y4;x0`s$<_cJwH563aIA4hd_eo(MmcJ{hoO6=1Wc+?9!kk@K4 zX1PvTL4+}(87!~eamZ2FqC<~D=O?xemedL#jd(FJ=sizM)&>6};{>ed%1#u#7>HL1 zT<_-_o<4)>u?zB1GzqjxNQe5Il2{&(K%BBTrxg`KeU8c@kSjd0h3(4F%@~1B=cb)( zs3B)sYm~Q_nu^pu`~ZV|7>=lEggcq$lVS~y{kht}vT6Ct~28@UVJ#C$q?4jOmQ|(bo#j@QpRMxNako@XR?8s-}WJ>&P z#eWkw%}0Pg3j4DA(QnvC-9=yz>V?_-=p6LIEbO?9EYXjqL5Ob8PZgvO7v^}%Yuc{J zxuGdZTyG7z@$3Us>jl3?SsdCfZm~;+l1NXTm^!?wupfW>0id(&%EJ};{i;g$nUsR0 zq@3lg`D`K*jo=M(&kFYNKQiz$X;W_JJN4m0}Y0-S03^my~Hy*$B zT%_F0Y!6^#dQ0?@#Cnk+P82%mNWytTiuuC?4}gg**FW47IFx86W0iQG-7ypHdm;{5 z0?0}qAikrfRzqu*HDJw*mT3ddx8(wn~l3)zkskctkPuv^he?EE0<;p`4qM{c6Mw4v^Hm=@ZfHH5k(;V z`0+!t(SZh_oC6nu&k+%C`oqm@NSQS%f|l)wSBH(O497BF=>sRCTu3*@xU`R^u^S7xASy24X z?V<7UPe`Jqv|dR{OAila+lUN|jt0-}mC$p8`yk`9Lxd=#alUC0c0JMsZ57paqTu*u zNl{k7to54;jUudg87ahpk>M4x2x3|}WqhxE#>P;5dw-gl@V7rXw!?_lHvD{DuCO0kM?TDX>F!W8PjC_B1Xtuo1TbM2>$Qrid z4r@BHtM$^S@wC&Nx_BqZ`ns+B%`(}v<6Vqvr~3mUH!|g<$#GoobPx`Fwh!6Zkq2qN zhO+ye}jvvyCu=iAu%MmBkFMcU!09~uznG}VgIFSfJici37O-}1DF zQ%&AE^XmeF!rv>69ag5vJn2}5s z&=@X)RD=@>9xF}v^&V>;Hb`A$MtvXF!8j5(wEWO`t&vi@SiHAdSHn|SUyo@9$0E{s zApvU|ecpNf6xHKGKpJsCc@ruv!- zW(2HJUPU|c#NH(If>#t5SJ;cZ_6`@~2cE>Jo720V*C7LW^p-F8_nH9+$l;YRHDA`C z(tE+$J3~uaWAN*OApJVHa&K;B`)&>1lD%qlaNt6;XukHDot^!@csMXV-hX#PNt(gG zF3l%Vkdq_IO+Y|QJl*i#gq`HO zSZy=GYc@o{&BHUOq)}X2+8zHwo~>D}e`G{~pB($0h6cjQ)k2L0j$2mBtLEXsK{!a6 zPJ>cQy-_zZ>>A|GRz*k0s*wQ|Ev;}U)B|KRDhn^(FN%pAgVMUXx&sLE34@4P+eS6> zrh&hEBZyu1XTpbuxk1fOwjT3ce%pFG% z;#`!uDAJFO%MNxAd>OvWC|84YsN0J>B&{Llnk@X8L`R=o9KLqq+7Pc@Zz^?6BIOPq zrR5Ss8U-mR*Uypo8z>Q%vXsI;Tp6LVQa}(up`qYx9LkvOrB+_GKo(c|6x zgx|5FM1lZh*MUX)yl?xO-9a0R+;?T4L)0zm$)R#9t1J&Us6B+NC(vI6J*!oy3&pvR zelZcZ=ON3FuZ{c0gF5-2mIOaao@Th_qD3U%f910UCBm`uT3K3>>O=hKyANF^5T%H(tsmB0YID~!k+PIrZfN7* z!EN7oWUQRBWNhzG&fc*u#LPh;=Yi5g$ec~Zl1_D5QfKX};u@t`^bhvPu=G|#)yg2t z%AZ-(27v_O42z#;e3bk`6$Ey1#=>0?KlSt*92oWhpvMs(BldDexIm~52YlOMmbjzy{X;4`E`zZ;##xPU|i-gh7e(6c{Gx}L_{kv0D45urBDaU$qV zXRJdMORJwpzhjs}E7BT&ZT7FwPtEc)jpVqx*xcUQAvIK<=7Qq00*H>}#966udhebg zTA++Qm=KcJR|2ASQG@^e--0(KXTz z9>X;bb(w*iBX}LZ_w#85=Oed5d7mAY_|k%Bcya_a5Y0y^!a^u&y}tK%ome?_M|w?$ z-T5#o;sCO~e!jeAKXs{U3Fj>-Bs|Lp4U=J1+RyBX^+uVk?pdG_15ix~V)aLb$ zQ%2rYowPOapYWPIWpnx=NaP)bobKShElH?nhw8k&Q|cXHK0poff>O6X#$31e*V*aa zX{(MpflktVAui~seqbu7NYjJ3$g z2@!t0I5KqHx2LoY5-+hcW^+iNB9MqLo~}c$98oX4k1upY+z`3fIV^9Cnae6y0_th0 z0;hv%eD+?(aTbxMhmr8x7<3(G%DzEW8YPcf?a)45j^MJ2_s>gITAUMd$tjGWL{_3w zGQbYIKamEj{l*}fhF@UKMV~ti&2GFNM~MtsCYc!Hhz$S?xX$0WmCm7O+2s<$#5y#M z%{eh#G(M-(BtK4QF|+HGGIFQ;3q|u|a(q2@9h}{fC4_=lD%ItwNk2R@&WSi5kY@Ei zSHh>nvh6X1oM3*u@4=aWN38?|jM z?@zs<9|wx9)KELm)$osQE??=8nH8xvX4>NFvN=2z;x-b@*jCOtye_IPM>WL=82^bZ z25C8DdTv{m2x58dpklRic^+O-lOV6=NO~yaW2BoY$}_1I1!-wVM~*V}(#8WJ5s~)A zK(1E(7{|>%IKOexZ+z5&1mdcyPtHAr{)}hGeEY`4F_JA}P+$J+#S0|lz+B~(624zm zCMZDVX#Cg#DF3MfSxysKFE2s2EMn!dSmbKw>dM%xbTM%k7ha0;IYt&1NkzpdmT`d6 zooZt>8|)}R4{+epY!fV1%s@(jDmYFnES%N0WHPho$qxMu zQB-)IGudH#97IQheuQy|tL=?~BWI}~kwbc@wKosSBY@ij^$Ie0Y_<6&yXLiz%8aiK z9v~Gj+mR4OKiNL(`s3z>Qrx?|gq%AJ%3}0oFmVH8?1ZpAYfw#=lCT)Rx1+;KO6>2y z_20JFj%+$hKEJW7w>f89cg}A;;>uXQ5a4J7at&zIvBVs5FGO^pQGC9xx9%N>L8ZUB z_f`htNExE9Usa5|x9MNHs_ zM%NdCekX}!xO}@75J<{B3!(=761G5__id4|yAZ%`4!jgs1@vjVICL@cQHw`fafTvy zc+q7er|#0#y^_!UB9WxYmavs6UO&!zWYPy?ine1a-^>^@z=~%Z+4XdS{VxDS#jH)#9$sD2q@I5NM*S5=naYb#S>m zj`p9{I|PKs^Uq4Yv4|T^I8S$4T{v)4O2Ht!WHnR@O+y+(_X(}7>)YwCxB+`b4Q0-8 z5?jo5=W%Pb+Nc#uleiPmSrpJYppv~N3JH3PN|s#GSSLmp;D>=SWORRD4p6s`ZZT`< z;w18N4q}h5ebx9L1?U5NN7A@J)@*JqnBR3W9!=-jIIvaLV4R38+y(6|NP(7?mblrn zmu~V1X*|M?8;IoCDDm-4Rhj2cVq;?=An*?kY~HwWG-U9-{} zHE+{bGcJtLyvzpmpuXLIiDX(x|93MsGjrKC(0|ahD?dgUy=WS(Xqxe?aN^6aLl+=# zIT9M#=c=ztd%wa9B zE0>gL-?8cXJag@tcNFDOvJ0vbaqS#dpigrDX=%F!d1DX*>Q63Q?IZymVFyURqaU>> zzc*aKFw%W1pQu1*11-rzsbJ9kfv2bwf0HmjV*fBzhwYiu5`TVn5|Ytt%L-CCgWy{- zvWY*4#AImQ-3@{Amr2qFYC!X1BD7G%U_%-6YJ`_IXhV}$kM3@;Cmt~Of2P1ECh{Ch zZwJ3TM__?IR0pbk-tL2 zMXaTItX&K&5GDEK6WAy#5=bYA4&#VG3$}4;>utjc>2P1yajN4MH(BG@x}-N`hrS*l zM|H{&grLGdlU6!q5h_YH3JqBM=wV(3Cuz1t!?cOA#=Xqv)w zz)XQZfZ>OG5k0-fVR<05#?PwV=7nQ-%miVipvUR36~O@f7ogrGG*yv19N9_Q19N}8 z6s7tC@4Ah|KTeJeIhpgJ!lSiEE}Ar&*s%&Dca|Q2l72hzD8<@2_9*+xBHqP3&33ER zxG>&rc!!O=dMYEstag_-OT)6RJ+3Z*&SQ)IZqH(S5W#N|?S2hU-y#ip$0R}MLM%UY zbIEbCW7_c)lA+k26Qd|Kb8#h3l^HfRZ2+~gRgnNCA~v-FJtXMEy5-%=F&kgF%SSsD zh~+br+$5WXKvRg4`SLyMOPt^&W{ocMnj3!Fb7^z)uiw%p|8Dlvvau<7N!JZm4vOj= zR_}&{AOlvVpdhHYVcD+>dqU5V_TinnwQU>R^7F)dcZxKlSqKNeVy^dW1x;G^9L(43T8dR^vN-UQ3wvJ9+PVHMI_^AIDsQ`@ zd#(%!D`6?5(%El24FR1b43-d4ev8oC)c&TT+ZB=%oh(1QQ<5_wSwXo}M zaezs?;Z}k)r6XOW-46E%XDk{h>gfz}K{Wxwp{l~=fJ~z{= z@S=Pr2P%<$$9l^NnuBnwtQMaWg0AcfH!Bw~o-R+8+2VpDo#aEd2IY!jD$m}2(kp0ba1UWjila|AH9lJotR_yL78=(kC+G2Cv zPYFFK42T{AO*`R+1niMcF*xj=QXVqfjXz{FIH{4Q1@?0uZ!4%YsxT z%3hzrb?$@K{~kz8IkCfpvaO z_B=od4=YIfuv6fPRA-i(JQgzE+E*1Wa`&~qHNX^M-Qv7q1Z&*BhBkD?I$(j?JLap6+~E2qqPd2U)u0VUi8y3u z|BTu%@V@lQySI-YiTLNt(GKA7s*hShq>8Os5%MT)!By=l$~5 zz@T#HT#*&jD!0SBIpB{tSqsD&LdFLK!k?4qjC5Sggu$Se7C}HY7*vZ0e;Aa|Ng@*_lrYBU;vx9ZjD*U@r;D<`uGviH zAai+N=hET5^L)udhXxnLkBx5$YkQqx93RRA9Tw2XSDVh$DCyk6YXu-H$ zuFScQ6Un`=YVYv58n%3SCoeO6vCMjS+T!wK9on+8DKo)%{QZ@jNYK84Ix0oh{HwPv zr6K_};t&1~R8^gSp-PR)JhvnQsF^Q)A=`b*G#MW*$cie9F+M&G(1v#(Jo*3-_S*Sv zd%)Qe#~Yj#h{K=I({q{^0)r} z5z)kk>QJ}f@G#Bekaab3;I!}%5#maebKfIeb+?QypxyTCbBHS?wwB23P({O{-$O;e z^^iF0_J^N&zlW~`ZwQuvj1kgjJKjC-6dC>PsjCIEl0B=qMfbMP@1lCexB1xt!~WAc zSz=&+SB;^w(Z5!{N5MLNAiuQV>hs*#QNbsg%}-(|H?Zsd;oF9+Q962dNQBt9Ft&bY zL1yS#DZ?$An3$OJ?Hl@V%1e~-vyBo|z`O+q2Rl{MnEa*GuwW-K?H?p0CeCkb)7PEK zNKgO#zEpkC3s1C~-UGn0jVZ6@$&!a z-&`0h7a7VMb_=z#3`zwAp=P3;kgYYdS!8a>j%;MHmg7Ck$+CA(NJvBsfn3 zu^=o4ZALt~g+?^{x4uW@HIg#b)TK~{( zdY+&HtXd^%MaI3K32DY3I6b0g=>2A-;aZS0x3)}yfvZiLS%?kkITc6*dw&^Wwlzma zRNK)_LcjeEv_@q<2`&~QBjwc>S%x{S!lXv+1G_x!=#guPXYr$k?05N>=Y9{E9}p^Z zD-iD!;}a8SEU@QEI%(*67!E%jkA3unx!{U7zv*Z}AW%5}%xu+c)FDw=HAD^A$E&v{yRmQ4abu7u@p}mV zf7$cEdk;o^|N9G^sa1aeW#5Bc8-i*6?_K_vsgEX*6ytvl3A3;n{+~lM_Vh>sSJ>vf zMK;p-S#fc(sFqe5Np3*_qTsj*u*g+&*y7Afliw&MaH$6|0?Fgs0xA77{OPtc>?j4agt%GK0eOEbDOymPQwb&<0s+z(n8QMT zv4i_A(`zOIq9zb32{(bXm{(jJri4vMNNAh^lt=1UuSmb}+6K(qB=^$#`riCb0dw?z z#1)vd0C|A}9>cRej#e64K<~|lmNKm(>hL-f5Mt9`74z+H*nRHYP6T5RP_R2JxuIKe z)JZAxHtO#@+ypFZ6*NO(!Bycka8=XPq;2zP%S#5#Ua=`D^bsl9*)MNHDm1I$KW&c% z51NYW=}`dV8vP1j?oW34VvpsD?~#x<>bA7n zY;6^lwdrl|yTAP$KzeAu7XwoW=oA_t2pIU>yr3qPkYi+G>N@>B3}k~IXu_8-7;gK? z1Ybr&f-#7PHb%ejRj!hvi;`ggb`f&y$AN3=&`z0Aa!o@1Z11Yin_ReM$wo1d?&Jz*23 z-PE`&0#-J*BjSYqzbe{A4#f!<5ca#4DrDiEi%DTF`U+umkfUFuvM?1ah3`8-T zLwVL8O(CJHn_pBF;@>LVg{C_=;1M~o00s-tNu zmEn5ZMX&-O@S;GJppc(1#r?~4fPU|9`plZx>zAs`ETsH=08au5Y2z4~DJm-J8oFOe zI2b?KT{KQ(Se7_8k=>*qk&(MjwbKS67qD|FK>W{Lrw%niL>o- z>G$uGND73J2Yh-ze@55R<;naEYO>?|Kqs`YnKd~3?cl6k)Fj|O(4vIuNEbFWJuN9I z`5fr0qR!=p1uF?fux|F9x_Tb)Y`QKvq9|nv?#_8+rl&I{gfDyq$|1;%!4E6t`mL`E zlFa^fXZX2>|CuV32M*vkFui~r1?j0?hZ)Ob%D|8vnQ00tbLZ@2G5-nV`<*$z^VLKz z9}W@?{BzU2G%gFnhQ;35IS^}WjG;$>4z0F-epus2GEnFUE%hJk*MuCGam)*gngA() zon+$IF9v;Dt_eBp^8rJz)i&=9zbQj@>K8BGL`n!S4ULY%m@n^3X#i&i$!kq5ESOyw z3Zm#7E1oZlh+|St30(q^g&j(}Llh}GfsI|L^z;@1yyDWVH;F4*W07iWZo_(&bHm9?6Y{c2ZSqGSnTyQ*M_-OERcvc1Pjy*8WA#{i0Tvho+QHfAGIyzP-K}^36 zWMd8u4G~1WhAnqvQ`NkU&y_I!ix-{i4q+58(?L>(m79|zghRAWC~PbCgkX`uwoY7J z?ypdsBp|Oi&c?Kd(}HXsqVV-eJ}D`!nT3T!x1&1=I)9@s1{W-+Q;hK)3EK-3jiR)&JQYx_d8#f<}4ahja>A(aNXfOnGf+4V1D zfc2mt5wy;Ooxw&d;Bs)nQ6HB-k4^DDC>=JYqe(+(~u$;7Iggad0vRhnQ;dkCLER5UfiK(3fi4hAtqF5ogE z?PwXQ{2m>f6^gLcdf_2NU6s!0K9-~&a(?avbx}zqC%@D{p@c=xj?lpT!PKz;5cpc$ zs;()%m6i3(8TcNa!SFZtkKm~fm5L!azKbz)qL z52l_awf0ib60i%wY;=@XwlS58kdl%vyQB^nVgga_Ygcms_LFR6(-E25F*QzJKm{FC z`Z&YTq<`DM{{*s|d%eg12awgEAJP2Zz^()`7l6>Pd;iasv7M1f|KmqPc2jl}xc`0^ z_DiJy??X5MODnv08~{fDfLD-Z!D$a*fg~UxpcWA^xLdSqFaZt7cTJXRw^lJ(Su}8( zq_>@!iOJ*q2!R3+8)n1FopufR-@nJUdC$`vTW~*xgD4f@@6T0Ao0+|!sM7!_NL)%v zDtp(+o;pdv* zcv*BCaluu>A@J${s?y!-4r?G)pWBjx^9ul_b-^;l+^9JhukAt*u=UMf_{#yH3$(io zCHkjiWM)?&&N)o_BAN4Eb{L)$dfTxK3=IJ+BMs)qJEGGNQS^1rAy%o#4Z~$&6*&6B zj%vDQ+_32QXM_~$k}ZK8pmF;xpcIRf{~ecM+Y@Rj{?p2*|1sx)bcD&E0pBA!V}Kw)UgC6f@$y_ScBM%3>GdD8TxA( zV7V0Y@p&c&_jtyENnC%(~gJ@tQR^@-# zv&V&=B(rEe^IWOWYYy6-F1l^efLW(q3kwT{9c5uA@9(b1`q{4ZbGw4^xCeSD+SF$6 z!S(tH$y}^zO)LfNv{sCR83Rd5F+Uj#SLL!PWKwo&?|KV|H5s$wj}lW-u4GUKj+0DN z7~CHLJVyls``-lDTxAjcwGi|n00|>Vuh|6+IMZWWne{HM1K+t! zohNQd?c=kaNs6q}sfI|pJ$CRjJ-(o`TeKn^9ZKf~i?Td^G(a7eN%iNNi7((k zBWq&K+5cE9R&u|OBUX~RXLXXIq20fmGt(#UtW=^av5SH@-K?_UcZ9gh7t+=bkKx1s zHpVwkNsHa4gq@+UnoJ;!;sLI+oy<$q@u^O@ZG1Z>bm-OHRJ>L}vknW&i>e}h4 zleQ`awTINcg}eKYGKFE-QxrOfdbR9$m0*EIs9t??pMTaiyh!@YLwcw`n-V!*HZQwx zZTH8m^-k#Jedh}O zfA(f=Ia4F@aK0rY3Yx${ExbP{cf|zUZC$qY0{WB^>;Qq=^?GP9Y<)o`!j+SkzppQK zY8Dqs@lp2W%=KMV6Qa9_vwK;158-ik@t@QDZqko!1Uq8zvs@>bLcj$TFl@qy)~@i& z4;LvpGfEfSh|_SYfBf6zoo=2p?Qnz_6;z(r%=X*r;yG)*#D^#uzkS$*V8^rttkIZ zfQ;A14-|s}*#t@Yq<=ZfY9Ti;WssIJuSDQ81|xinNm)_>gkti2eRcH{&~wxR0g4@Ul|G<*xufNg9b?2 zwxH8cR8Hcx=scl6}rr$z%Q5VAD_12 zS9sHM&J}l$E!WD6rSdt7^YN!%6`{~OuL!9M?Ly>eZl992hU!=Lg8~jKzS~rAmz8+q zRa>+QuWzTsPOkrM;t>;vE-YyOb(NcI!jO$vu`j-)6}x0+;`s2B1+-DOUM(I7P;WFR zp?tG4BGqHCEZ*ehPC&^2kJT!oAN%4Ro4@HntMEoiiy;*^WuC@8FNL6Ru>9rZuRO4hz8MZAR?r(eBcQ4gg zTg&_9Y`GPSoz>HmRToE4F-cRWFBk|wA$7~TvWggV)wW{6Eu+HyymGO%Od7djczH1I zbDORz9R|XoI{Yc(_1AL?@sAE9R8%C2P> zIFr&=Y9%sTPneRv@^e!K3YXF}re7X=Y@w{oaCW5_de!`r=LB&d<-C4{gTz}FFpJ=Z zj0)8y@CFzK0fN$O_YwLtk~aMT-yIY=u%590tx>RjLyKs187jNWfMO zv?wr*GO#zr0NV-x9JnA50s6gh>LPpk6d*ve-_%&D@+V+vVFW4Yg&i-G>ML$)P_K}t zwA56?2j^E|f3L}j5rD9JE&%DuwCm6n6%|vtETYF%`L6%k$qje^OFIWI+ojkx0Ky}# za8@JMR%9z<2l!li{hOaEto+^v_!|5dBMdE$sqnMLlvT0j4jfD!DW08~xh_bN&txoz zP4u~qyMfs2#@ro>5(`sg)j>#()UgMKhtx|j!8P-M%&2nORn5No0)S^jeHn~UESSwa zmSs;=*E~ngtqjJ{7CPKC-ox`1Bnl zmwk%BNsZXU{Hbzwe`=kdiS^HMo^zvqH?*L!KtM=c++E7WgI9T=P?nk;|O zTRroDI?|LToVARfa&THE9B~}8O^M|ZbLy) zW8QMP@B#3YZYP~6&F|9&<6DG0FP%O(OzX(U|Ht6q_`}CFRmhVUsHXtUMFZ)x3-DZE z(*W-p>T?rc+n6WT=g|cz>FEobyq{SD-&=1Ln*F2z=Kg;T7c}(r{Y7U4=$)9)0ekq_ zv+17oUNW{`-0H^JSIkytJq8fpy=i5JoY;l;rUSdOB9PVzpY-RYZi$$#xjUZUFKe1@ z-xL>XJyc2-dyp%*SVQ52%4;w$if1DXSIr`N;X?zIF;!EnXoUmL5r9<}dGjMo=K)dv z>$%UTrobt_;A@VZN=xcp0(pvX0zyJS(?K;aT@QZwgC-{%+tf9dSEgVc)SMG74@n$| z0v&!btInSW8Z~*)IslO@GgS`@tY!G{&%&DVB>hk?YZSJvO+_Z`<-r=@8h6@>9sdNf zOP240yVjCnteEk)IY$D2r;)Hv48|kz*pYzb0q5cg9Kwr0E$xdU15_4)ML>YqQ z0U%<0tET}6<6-Iu(3WXj!@yWzhnODk+dmdP4eOoRaORY}Tm|RH-@gX_4bJ4`^7L6; zU?j0tiLHN2mw+}d^5`{SCZBBmgq}x*q2_2$Pfss%bxe&R_VVrE4{8XE1_65l$VvET zKwal>-kb45;P$melC7jaUeYMGWnSYUk{^y7xn{ys=*z9oj*KWiL}3t{>pc=(P;k5V zoG6}kcI?{@{*d!EU}D7!uG3)T>Cle4j}c!)R3i)r>;rg?s7qy=mm-R6gE79Nm`YlaE^n-aX;x z*H5f+HsiPvD3HX8>OeCA!&t)oO%s3RTBT=YW3qBKWe~h2kwNGB$B=ylqD^K=wPPWtUPS4FELYEVx z8&8X}YwyJ4!c}!&3l(y{@E-^&kl4;_`Q#dnpLdo`BHAZ+b8@A?;~`xnUG!!(1zEdK z?zfbPPWcKW#i-;;a2ur@>Sgr#?V%cf1;}lv$MDqLZ_5tmpPN7Q5j1w1e zKlz>%xc_Q$E@cPB)F`Ek@8#RzNc{Vmx+54O_5Hn=DLSlPLM7V&R|ENBh{anc`Mt zcSyd6tG}_AZC~z+I{DdM-6Lr$Uom=dSqi=N7zat*$-^oA;WXzw@(9tYHUGHE;R3m^ zi5jb5RK?ojez&j|N6x9Dw)R~W_C;t1>|;A>;ru&QBtV}TDV3ks?-cUVywW}pklc(% z#}>tbyA*+rH-rCveuavjp115JLCvLF8aTXmB1ko5L)=YVZ9EH%-Cn;`N_R6DU1+969e@tQ6PW<$@Z}2 z{qpyqW*i;@Bu;;z{{XbtZC$4yZ?Yuw<*k}pAb?=d%lEyrGd}UW^Z>iVI4Gc`ot~Y6 zdgs-4Q>)9CWcTgO=q=!5Xaoev0B{9z)juB5z|J)_Dytn~;WMqA5xx02!=GE2Hmj}# zzAO6jt8IBF+cnfG*+0O2C(tRXwuIuIC-l%V%xd)g&K=hv!j%wHRKx;jFk$B@flhF# zg;M$bJB*&utTD#|bc?r+6bQ|q6?zy6gOaO2jk%9h+2akI%!Vn1|6g-w85ZUH?faoo zIwYi91T2v57L+hhS~{dfx*JJFP?1JNML_B96ony_Zk3^HU}(-7fB*m9``Y`wIoEk{ z&O2eQ8F}WppZi|xyT0qQ-rafwCB8*NJfKmW#TFSM+QK2?MIez#gqKustY*)Ti2zYi z(UwK`7N^QXQt2oo=m{Nh-WjkiHG_gANKiXhX_4SvC4^E7l#=+g(z2X;y1so=hQth1 zAXWGum0_EP+98~STtC@hODlgg+ydI|moHQ{eHkjC3&9=^+rBa%2erCfOiWC>Yt9e* zMfM9vrqOLnjF;)_*d(5AI5#a>O#@jry4yHaYIrI50Hmb$^}w z@?}pz>BDB+zyCK@V%pkr%d{Ej%L-&d`48bg@A!DHfc8lw>UWMxjO76`$!6f%JFD9! zCTm=s%FzMfkcfFw0NeHpLy*!gIOo;yCSQ)$ZZ!NX|Md}EL<>l5Z3~HIi*C8&XY(+e zivv5i_%EerJWoT3_}PzbVgyk=~Y zjc>qP##&G~p6$o?B15w$`X|qUcQp0u{=VBatWQ@^HiK5#f0$HW^0A;qy4Uf#f4N0W zeTs}eJ56iAsvMx1BKWL3JpSeTEY14O_~5UKHyUw`^LL^fw8zW_ijY1R3zI-O!G5RO za6El_H-Re28V}OJD?I$S^u{ay#puGlAac#8b@#RFtEVW4-t=(@!0SlvR{A>Fao z-X6_#z}D9A6g4mXk*e2eYeIOVUf_j}ml2%-+kq`p+K%<~g$jf;soZC2-ts&Au*mngz9az_td8Er6aHi&2k7M|$%$w``A2u|t7 zeh&5n=Z@jic)q5cteO_c(OQ)rl~*c{t{#`E?(HSjx76_jQcj(2SeQY9Qua4@a`Aw_ z!Tq=XOb4uXG$8Va!Ip%**Y)oT{IfCjEMJe&i+RCikKBzl@9w?3(7t$CL4+=mA_|T7 z9<8hUReZ8Pk3gR2fYG^O|J|OLUE5oNtan_z^;!dY)9K`R3XHy77x>FW)=z$EVu>&} zwcNc07Q@Qs6$kD;&8HoDVVuQ&OnK15^sM{RdKRB9hpD|1dwTNMIQJ%$-wh^<=Rd*8 z6*e$%TelP9VGik|jaFH2-HYR(1`7X@PK+PHzN`{M7rhRl|JwE!_Wi^Lq7T~xb^O0y zYQ;#uaq@k!rP7)Io55bkC*t`E%1dKiVAY7`nAhrtd(g?7BfSsv@6e`g5nj#r;P%>c@FI zBhydpvaE96(!3N#>&bB2eOM)0Ng^dZ{=sio11k!AdxcZCK##tFIUjM{&b4EOMS9d~ z2hw!azVIdL8Gy)bFNGlK%&}pfB$WocV=fvqn6FI_(xTs-`tKXVwFDe~xivH|#%50_ zam$}flAXBE7<4y1klcGmhS0#Xx0}GO8gjqnfh2N|;yl7$%MA^}jgt%|MP~+*sgWub zKeoKOXVU&h-6osIU5^)=D^lf2aRxkjL%ldH0v8AOc~nENE6QtS*s7&7bLE`uM7g1t z#XOyULzl2^ZW|L*Wfv)RS<&8LBWU6zhLuj|u-p?t;FWw5gGZx&D{c!jaYBn)54jB{ z`+-%#YSrQH6YBq=6e{c%{ns8`Ge{JJLXysYcuePW`VbBey2~blC&o}hfebHbVPBwS zfABZ&e~Mt!c?y=M{MlJq@_)p@{qr_U>IB@YBaC6fZl!@q(Y4u&svLUbfx ziUB!g`R)K1bl^i|Tx*2E0ly)Ac~F-JBnnZdi3@*nLCqtNR2s4<2hr`{GTa3VPMUu= zFhSN{Qu3?kH?)L+%Ps8q5EC&RQg9fgAx%4mN)c@HY#vL-;hC25z=k>yq%;%+xWvOM<37XE$3W4 z4~?CCkR(C3gd(G?<6Y>&LIr&U8;xGUxxYG=&#t#dHGw%>Ndz6 zw`I1WP3+63I*75rqfC+8k4n1#;l9&9?hpPBl8CVq9ZmSEp=%j*qo*MuxY(i}_?Mt5 z`{^RVa~8Y*Xk0d~96MVc_R$kB9b)s_6rL0SWkTSfJ)qd;pP&aWBWt%JhOIEXeiHC#l4%pmUGjrBqb znbS-UiyAL7M6XC?w)@ABS~sOYj8v=>`)i)_v@#7{9*iBS zK4lwk3JR(d+ZwC)QF!eyh2-|xv0X#^Q#SZzshcJ3jM1jl)r4${ue!S7yn zgM4#4@CPy5q>f)P`{u&Ur#wIN><;$bVI%66@A#f>UK@;1EdDWr{1RhmkCqf#(h{2= zF9qg|%}c`+^WMF?C~2@rKCpG$9>2RO#c2-}{AB>~;7s5?pOu1pNu{oURNziN2&DO~T1A@R zt4&#*y|dId*hhSiSK~kzWJ9>*Ck5pFNUK{}JwG#VhGbU=Dnic{DXeXXbD)Zgc~o|m zSE_WB6r5~de+=M-#-6uLXLrBffkzd1jHof$JKROnWxohFsL$o_UCM(5Vbn8)VI_G3 zBa}P-^x@ximbxA^=6sf8MC>UQ!+=`OGF2VkDcrN6bSG8+d4f?H{`}tZr=fdho4UwH z*RPPpDYz+$P!my5&+lulPlSt>JrTdkt?GoIHNPJ!&N=bGoejG}JU#uXT(a1R&A*L! zvsYn`Papi2^jR_zKuZ?dvg<0Tj_I^*m|Y7cIbu-r_`w=#QlMZxrgL>_ zrhg+#E9P$eKYo@(Mv@s8WsgYcO&%H=>RRF!qIv(q_yRJN4|md=obXGHwT(^Blp`3* zI6U3Yczb(~)OnW*%H&xr4R!U}ii9k9LxgUWVggnW!3={gc463Oc-POy4h`LX?Yivt zwe8bP_kEu=SJQ0hzOtIEdGgtpg`dFCik6rs6gQZ)3r$yF=!u6@6SveYmM3u<&=!%bwV+moZZ(4K(v1jR#zLl@0u{P3W>d9E8m!FT$GEx!lC715pG*f5OdrR)jC^juuH> zO=ozsw7*6(-(zE%vGVhAWFn7aD{mgZ>No(?A|0vyCeoFT4MS`d0hR0X_~2GiP*k` zF~5}MM$^X(rKTf-z9!8v$#ZAJzM+*Tt_E<+%GtLbUXSGA{}}n{0(%FIP6R3S+i|jx zS@-I8A!f^RoNgw4Gx=YWEGKpKdVa<8`ERb8+s5WV4p*FK(^l*O3JLZug98|(=7CBT zh!9BOP%aJE)qvXcNX7H3+JV;@Y$+dhk3rSc@~7ps?J+~m%iV6{2L}gS5f8i)C3npYc zNl_wHlH5zmDSKPmTif~5R%DJyZVx9`)uM?fH5+Tnfu@MFKooDWfT|qA$ zn%P~!T*!Tx-dM44dtMtGkL3Sbo&|Nm+M0L8!?#$FV~yXkyMEwA`TB+ZDHQq1S58{% zzKl3ncwlm{Sm;fMiYiu;XA!zU7t?MtPK@ht?in?T>DwDZq9m4ma^y4#zkMTrYzEEE%SVnBRWgsbVb9$Y zyNcVU8qRg;B5vezU-p`NsW_fi%E+N+v0UP7QQBu_KAHC@xFR~Dgx5hk@M-#eU~wm| zq`Ce)>IZHZ2o0|yIr&^-x@WX-WG_4_y|A^ zt-PCWa+Uw_L4oF!>vOy4^Jkq2j~`6)%Tua&-BVXS;JpF64VEF(a{>toREhW=s}32z zO*bzQ`Iwc}WYf(Q&1YPrnc|5K14r+MbTR76ibk-zM>N-^f5c{_4INgQfyl9PIzY<) z(Ib`gs^i`9?BQjVo`AgJS$!=nt*+kqq1({Q3!8Bwk&s5|f@MdA+7nM5SYw;z{ zp4B|^ChW@0Sg`VaoVQ4D?H5|q-b(H3ZZi=ZTEAr4Sbg$5tT}LDujVei)a12Hs45#Y zxqUmwo(b3WkLrR7nSGC zXLb%*Z+g5vK{~#M_U`RIIf}52TV`nHIq~64lf>5!9|jxpZKgutlpl5THOGG?B1*7J z&Id7HxKgJBgQz8?Drp?m%aLI*lP1`f+oH0sKGodx4Q12*94t^-$YI(pnK_ zxye}J?~Y!7b{xG{FowsMFsGYCg0T$eX@;(H#@*$W%kcTqd*rgU6E)lVxnTE|pscIA zIE067yg+-N)rX-!iSkR&secZmyXJIF$f-Nw>rc$3f-VTFJ=A=fAM&VSVN8PdELJ6=^8$~ zp6Zuah4y{s`^y+LPq&y$o%~C!M_TuHe}g}ZkLJ2B7t&Fb1+|Xw&&0Pod^^NT^zqG0 zslV<@NBM_F`yfC<8XH7+4P0(n|5=Jy^}NhF|8?Q^a`5Q#9c)D{z{a6Sb)>S+lf57q zcGt?I7WBFLdkvslzLI=$5rpqq&vRPtyUYjFGf(KmAGr}$tV=CnUdePMD>$^OJ*aSC z#FbdDrA4Pq3HwAQGDF4UUzX9>;NTDREKJvm99~%{MNt12r>Ifzkhk`eC2av-hT|j< z^E2<5F#Jc;RKF0&=eqaqg+qy&Du!SDD7b(4MBS(QDtTh*2xDvp$j96n zmVef{IXZp^|Ca(pk@*q)WF%wsUSHeP{K|i{XXO@u`1nx~G*|!E*fL6N0Lgg7|EU2s z>{n?J(>3B*0k$A(FXl90SIrk{FJ8bqd#i@ZOfHjjP3p#sIQiP0?=CL_h=&_2-0ybq zaJRVAP_G}$g!SsB`8{HV{!pYY!~LtNpB17qk4X3W?*M!Vfl!+l6W`x8sv3W@h`LYD z9NBhszsXdXaY4>7p;XHBodqc|O0?;RH%@C{6*=%OY$tZ5p;0>Wunn`JD;3a#(<(W}8d=dv=Pi2xd&+W&E@s!fvS1O2Qwl5|v${;(gl6QK+_a=RJgH>Qs$L@Xjh2kjALM9?)KCe*+LuyuE|X1ukf zG<3>SP%)j&V*DYOu)Qddb1eW77pykEseL%4lNG}l(Z*QZa(BZM+w2X5bind(%c1G9 zz31j90?^$p`Sl!L{*TsIhqyUKee`4mu`vwPs=#p~y?^Lk=JzGJLFQrAT}MPK{jCvp z?unhoD?(lrR3DO1<1%v^NbTHyV#ps#vj4Ws#0GaYU88%YCU0>b7gdffOq|K#qTEGc zn%k9|o@9|bQWPqTf0Aq2S+@X+qS-4~yyGOzcwQl#vDjJUh8en0HaO;ryqFzr%ga7_ z%l*FQ#zx|!TDitw5oe)al2h$w*tnTmPj+L>aBine5Oj-QjM)3o_#!Wxt~F6aIlhiD zb%Wu6E;kZLcZt_9&azfN=C8%Dg-6ZKvscw(+iZcu$EarVtu6WqrsN(00x+oLj>>`n z?1SdaThpwRdynvZ-bCEmcKy|UbEvSIsHlP$3QIAfZp3@Kp7Qh(_{_t##$mx9GykdJ zQTLr{0VN&MY6$UA@Bcn-#N9K*A3a~~ z2s~hNvAU+SBMjOXEIIJ8{mX@N@#4iqs9C{7FDd!geS4qQ2w=lAee?=@rGwe&(u75q z8&o=zoV)$#0J9myl!X8Nx)KgzZHxwGv@aX>xDXk8jdQLNgJ&k`-cUkj%;vv%V?9|P zX(C2il+d8u`LvLJ9>>Muh_Pc{EE~ov)J(&7(P(Ron%Q_xn6#8v zQEq{*N?SDBVpEE;_hMZSUnJG$z)joRG)VmtJYzQInpY_i+jSOO55-R^C?eRbb9>yF zQ_a30-;zcY?Ba(F(q|o>Jdw|*NiX)l;9C?0xRmLLqC%qCy={}G)m2(Manw5-#lX{B zNQPV-a>~>dmxg1;G(&QPlfsKn3njx`kmI0?R3pS;1h zFo0mT^k+-rmGd@oCoH|gpJ>)q5_}P722=1S-V{YgHDiZ6$ci%Bc-Ds~Sga}TeHJ8k zqF*s}4qdG`RdA8;+r2m_vOXOK0M6C@(D^I2=yV3ZWkuBdO&GHwApj^&RkeBoL{^JW5$J(eP4OXHm|f3^GThF`IRIha$})@!Z`6t-|Gglr*Ed~paXs^VR&Yx_nu3d1a4T?y2fIE zN$%z_AtfUdwd(A|+K;F!lTjP9^Elx)oXRr1`~4^`KR-I`zWzDvQNJ5%J*s+Ef!LTu zymt9s+ti)~+>Xz{n8-gpiCr1BC)T5Dvr+J!v_VPl5ae`DQpCwp$3Iqshn8QwkClLX zy(li0W)Kis*!S3Y9MP?jYa4SdUWeOk-D9Ypx+w*4X)I<+tFe-HwR88W>k-EzLqGIj z;2TBax0yf+e8m(HHu8owBt;v+-ZXj_7=ZTyRzN}d%#90su)+1_pEfW6e1|en$`&m` zTE9S0CAs>!9wWf8Za|3T;7J%X=TAmxjlTp&nM~U)AN(}c7&z8cfe?TQ;*e!gTJt(f z#Z>|jsHX?|ckkjuUIJ~-^Uz=Lpv;yEjNc#?@Hd1Syz2Ygd`&7+Q(q$gQjC0TojZ8Y zzRB)7GF6RzrFv2RReb#UGzGIO-x^s`+Zg&{D-=j3%Sci0;`1za8NNq(xkP_)5lf47 zd3TO#IbccO?G)2VRc(C zNlA24r@XHXVb=DG*G}e==*P|kh^mAUq->xJbKu08qCu6@AtKOVK$b$t>!q+sgsK1U z;oz}0SDNhrPs<=;Eots_M8$Ib!WXe4;*K;^C16}O*&(#>S`*#5C+~l(%Jf8ft|_m? zQ3MYzwJ&r`3YvaApW%l`IK3+_VwUkk0LYl?RgXWI`+OPQro8^gNm~{;)Afsd8I4Np z))RnB6G!DFQOfNzqAXipBKihEq1v&Gf^H`tIu49PkpAJ zZU5Qf*vMT=bhN|bQ^ftnfu(Zg`!uK1qo~~mw*2c4Q8Je~Jz5-WIi<%nVZ9ucPj`MM zI=NtyM6KRo5J3E#)eBE|_!hlCH;k$=|5p0&o4>*zHpRFpJ2+C^P5FUs5FzUJnXlMD zhI6nXrs~b*^WI=)b7AHjbSBF0Zw}aFlZ^;@zr1~QOM0G#KI;5nluV%V}6;y60Y}lcyqimvBZP;qr!RD)*NMi0_*%w2cQI(01&Ep z^kSjSEt=sf6%{}IxYzaf{3MEATVzr7b`@#?%tYAl`leyd1t~zva4pp^!$SX4s(3S( zq7%8;o`Jzs$aF{r5*Xjj5j43nRKd>~j82-rV!0*E;MU1$OT;L%{>?6nO_~~R9~Thb zv$ux=XQVwz(|#gOQo9FUG{Za*`CK*FY5@d-kRRD_of&k(6DK$8xXR!+PJ?9NY+h`k zOx~9RNT(72v|85!Szvp8y2X7UVf)Q}R2XU^)YR08&!6MgPc^P!JFA1*;z7kxwRb>oTmkFzRd_`KE3Qoq1-W#vT*M5={?`rA$DIn6A zBt{P0Rn5}O&i+1|{%c0z6-U%nCssQQx=v{KuLyb1R&{{7 z0s;2(^x^pxiQ+7a{ps0*KO!^fQHx$%LL=)&;l1Y!anxSgc$_XcKebD`M=*i3MZJq9 zSa56ovYYvNp)RCCKTvsi%DE|gIr|N-SQ+to^3-TM7yqN{E12Y#tSo=;Yatnbx_L>wFEa?1 zmCuSyiC5}|!pxv^>!|VN`Mo86moyn%t%697t9S^chr3vO!!;a;E0woTZk0@HL^D4j zf@cd$Bs5*0LmH`FfH$bH>F_q?Uo!)K8ey`zLj{*k}9+D6E0v`s)!R zRfCxkhZE*=edXQf_d_38+ehE5C7|Ns#WJh;e#*^k+I&925q}e}#Fn|nD6dY;JO4hG zja_i|ynEV2!hGPX2rhwAq|OPkk>jl>by2QwOccaLo(Rfx@5?QbaiKGAIdlZAbT56D+JHO^XC0 zAtgTekhp@x<;az91{H#|#C$`NAdE7|6I>2((+nw|8lEw#rBJe6M6k=x)ZyW$6Gli> za&8q8%uE}Gbv;$Ib>8|`!_~qrJ8tEOu2V9=FpxaO>tJ74WNE1Od{G|e{l zrM04F^%x#^9nv7`-VWOPZ!PKT6j>5bRN`n{)i3lIKH& zD%6wb8g4-nHn;u&-*HGMXdb(3Vz;}i^ENFHnd6390y({VYms5z6LQ8WA7tgw%n772Fg0Vc4Nw{wtXb?#AQ@`Tu{I&fAwLcGv5!SglRd z+N}M6tPxqZ@C4|y!9{vZH3ofAn2FWz;$POLf(TjNUYN9X-2nI-q*~aiD*(XCiqTT6s$>O336Z}$XU!#%toiX!^VFhvYmxOpCa2=M zvG}!!`H8FYlufP%L4!>I+nR{igOO;7?x)LGKzJGlbsU?ZQcqw-lpSzMu(X5g*E2j) z|JEMUB*ZMijnHvVp)!s@aa*OQ7WsSY0s8uR+m_$yH22)AIflG2Lf((4zMS~>jmWzlkJ2&WD9gfGn$y>hF; z1%EE!S0Z{D$c{s4sH(FOR6KIHV(7UheX`W-t3e!PLTp8WY>_(C7?Z}GP&IiP)=-#N z*K)8VWGl6G6{Z=$v^Q%0$6vFfqk>y^c_yY_-wm4?`UaB%Kk-c0izo3iUeH)9^DnG& z7NrI3zvT&H2n&gY@3a4<6`=jA(&@>}L&i&2LkA;pp8swKw6{>^{jfT=)IUcy%gC*E zJ)prtwgN(zi0Y*y;#QkN8XFYal^==XLS>pr8X3=J>_L0R{Q{T~bDiocQpM+2Zi>85 z{+WabQS&PZFT(18IpEATGk4nMMuZH8H?KNGmUerx6Csd3gf9_D?=j&Zo~kfZ!_$PWri7(NM$FZhfVFkz_(&OmPs2+X^L9@-HPp=4j<`q2KuaDuf(vnb>F2MuN=eyk8swxJs766jZ$WG_>+|`Y% zSY6aLZ*KZQ)+{2qbJw-Qme@xq>=ONiW#X-76^5oKotX+0v*w%}w#O7RIPx?!XKY1W z#-*}vM1>qGN`>i7H-}Q6AEdNZ^-b~dSUo!HxX)7}L~}cgWEJ(GhH>@u&ePtPazV>I z&Nw(oqc>5XC0M6%>|Mnw6;N&pwk5g#v+a&&xy$+tNoGGy1ix}Qn477)%+WLNc&`t* z!6A3f#ULF)ZCC9K1| z`Pe9P+pm6QJM`lR@IirlrhMR87j|J<;GT!Kr{&njxMaBYR1LBpRlT^w7<1_uza3gO?+ zwOrHuR z8OHm*>cS$8LtG_p=zN~^k|eHK&vxzaHtjQ)M5!k>Vx}_Bg!GiQ1jn3>w0`iZb8&9a zGb9Fm-KWZMj6>WQE0-PUv-a&B_;v(qMuj|-!OOl^UHTi3$c34R7#FlH_9`Rfv@c#6 zjh|G=Y~xp@k-i2Z<;dLSP$ieTBb*6Q?asLE4C@)CS6b4pickNy*x$EHpX{1{H@&8~ zKZEKKo9+7Qy0^QEJ1VFXw#bW$e4Aac&%L51n{vE+8%B;aWzEzTmTq1#Vi5uv(!A?e zyX;|LElK_lgB+ypJJwwc;)9WaW_>cTQ1a#$BS_tJUU zA_)i4OER5PkVkbMFM*>{4kxsKSmBR6#K1T(MMT2wxy_wq(7&7*YTok&D za)|uJhUURlTN)zG+13(iEAuG){Ojx(me)~SEzjACHBgiKTx%nJ!Lo6K2 zHt$Q83q`XUq0zO3_)PK5+7%lZAxaUtDaGK|!^Kf}b2COS5+olP!_^D0Dd~L3Oy{FO zIPt+O39odk^Fa6A9U_Wjg&6}Xc-o!actG&Rig~)j(7^r+43oL~J!zOCLvgQ^_40T4 zypV#9QU^n+-mtFI?XRs&ApL$FAc+8i@P;8P5zvF_6M|#Uswpojl`K&Etcu{rE8SIP z6)$Y|AZCm^Ad4LAFU|6C(v ze(oPw$l}5#c?L78N8E;!)kEgl>MgF}(X`2zz$NJJ(X4y$Y~e1R0)qKcl?PkI_GDm; z-4i04aDrZ;a>NsJoaQGR_{?wY*~`4wNMIZPG){(@9W-agR4z0xmd)HI%$=uSZwbH% z*akxLcHqk8&(p`(#z;=72i*KUHEX4djh(7wi!tKV?q8)G;s+>Z4_=;AjI{C4dNH|_ zF!L!fGkp!_fX(lu1Nw6Jss|-@QbBKArlR=pl9-k$)rtsyIsVr3$zkk$Uc>|vEL2-O zWv#=R`KESxZbeQnXIU|HT0EPxs+PSz|6Z)6P{anSN#V#>&Zq5KDOXe)J)%Zk%;;?&z*0L2jv(f8$dHF0IVdBWq@ zq0Pi8V;Gg=y45GA3NtmVe)ofvk&3D3@3>Ro`B94k;o{`UujTB$mQzH63t?K?sXJNP zf{buWdc|oDOx>}S`=TuvSlJp6u`K)?sq|C3ykvRxUmL4V&wgsT?f#aGwk&}dAhEMu z$G2gl%+3ASF**8l&@cu$x>0Sq|H1Lj5GsGhZ3dlUgLLrL{8Sp!6Wx2~RH3z2F7lYE zdNV^@awtq?$CgK9`K6)jT4r(c5BqJuS^}J+ipS!oql6<<0@P%bV64El&i&K8?sYMw z-CV)Uj3Zs}KZ{#)c`Q~Y*6x+YF`x6-BMxtpNYXXVmlkKb+A)F_2yA)0!`TFvk8|Bl z-zmvxbD+QoPx*}P5h34nhVLi_BTOg?6OCZh2%XUgJ17zsPY;`kaPQw?a=GrBhvEZE zI^s^g79f)Z9)9VHJGB3AMQ%;oN36vNBO4<#~E^XrDt?qOaAF5~-@85ff z&-d!315m@*%JYfPHc+|y?Z$)3fARJ$H*}0cm)GWl`+uK?BKM-2WT^c#VhB+rPI&ow z4m>bRF0g1~UOKTEWi|4bFIDAZCh(VbdKFo0Qx86hk#cez*e?+(JM7oU^=Vy75vT`W4|t^OM;mD9oMzcHvz*@}LBRUF3X S_`sZZ#0?cK5th Gen Intel® Xeon® Scalable processors
6th Gen Intel® Xeon® Scalable processors
3rd Gen Intel® Xeon® Scalable processors and Intel® Gaudi® 2 AI Accelerator
4th Gen Intel® Xeon® Scalable processors and Intel® Gaudi® 2 AI Accelerator
6th Gen Intel® Xeon® Scalable processors and Intel® Gaudi® 3 AI Accelerator| -| Gaudi Firmware Version | 1.20.0 or newer +| Hardware Platforms | 4th Gen Intel® Xeon® Scalable processors
5th Gen Intel® Xeon® Scalable processors
6th Gen Intel® Xeon® Scalable processors
3rd Gen Intel® Xeon® Scalable processors and Intel® AI Accelerator
4th Gen Intel® Xeon® Scalable processors and Intel® AI Accelerator
6th Gen Intel® Xeon® Scalable processors and Intel® AI Accelerator| +| Intel® AI Accelerator Firmware Version | 1.20.0 or newer ->**Note**: For Intel® Gaudi AI Accelerators, there are additional steps to ensure the node(s) meet the requirements. Follow the [Gaudi prerequisites guide](./gaudi-prerequisites.md) before proceeding. For Intel® Xeon® Scalable processors, no additional setup is needed. +>**Note**: For Intel® AI Accelerators, there are additional steps to ensure the node(s) meet the requirements. Follow the [Intel® AI Accelerator - prerequisites guide](./gaudi-prerequisites.md) before proceeding. For Intel® Xeon® Scalable processors, no additional setup is needed. All steps need to be completed before deploying Enterprise Inference. By the end of the prerequisites, the following artifacts should be ready: 1. SSH key pair @@ -233,4 +233,4 @@ To uninstall the Ceph storage cluster: **Note:** Adjust these values based on your system requirements. ## Next Steps -After completing the prerequisites, proceed to the [Deployment Options](./README.md#deployment-options) section of the guide to set up Enterprise Inference. \ No newline at end of file +After completing the prerequisites, proceed to the [Deployment Options](./README.md#deployment-options) section of the guide to set up Enterprise Inference. diff --git a/docs/single-node-deployment.md b/docs/single-node-deployment.md index 65e0bd76..823b7dbf 100644 --- a/docs/single-node-deployment.md +++ b/docs/single-node-deployment.md @@ -12,7 +12,7 @@ Before running the automation, it is recommended to complete all [prerequisites] For single-node Xeon clusters, **Keycloak** and **APISIX** are recommended. -For Gaudi or large multi-node Xeon clusters, the GenAI Gateway is well-suited. +For Intel® AI Accelerator or large multi-node Xeon clusters, the GenAI Gateway is well-suited. ## Deployment @@ -55,11 +55,11 @@ Run the command below to deploy the Llama 3.1 8B parameter model on CPU. ```bash ./inference-stack-deploy.sh --models "21" --cpu-or-gpu "cpu" --hugging-face-token $HUGGINGFACE_TOKEN ``` -#### Intel® Gaudi® AI Accelerators +#### Intel® AI Accelerators -> **📝 Note**: If running on Intel® Gaudi® AI Accelerators, ensure firmware and drivers are up to date using the [automated setup scripts](./gaudi-prerequisites.md#automated-installationupgrade-process) before deployment. +> **📝 Note**: If running on Intel® AI Accelerators, ensure firmware and drivers are up to date using the [automated setup scripts](./intel-ai-accelerator-prerequisites.md#automated-installationupgrade-process) before deployment. -Run the command below to deploy the Llama 3.1 8B parameter model on Intel® Gaudi®. For Gaudi 3, set `cpu-or-gpu` to `gaudi3` instead. +Run the command below to deploy the Llama 3.1 8B parameter model on Intel® AI Accelerator. For Gaudi 3, set `cpu-or-gpu` to `gaudi3` instead. ```bash ./inference-stack-deploy.sh --models "1" --cpu-or-gpu "gpu" --hugging-face-token $HUGGINGFACE_TOKEN ``` @@ -88,7 +88,7 @@ To test on CPU only. Note `vllmcpu` is appended to the URL. curl -k https://${BASE_URL}/Llama-3.1-8B-Instruct-vllmcpu/v1/completions -X POST -d '{"model": "meta-llama/Llama-3.1-8B-Instruct", "prompt": "What is Deep Learning?", "max_tokens": 50, "temperature": 0}' -H 'Content-Type: application/json' -H "Authorization: Bearer $TOKEN" ``` -To test on Intel® Gaudi® AI Accelerators: +To test on Intel® AI Accelerators: ```bash curl -k https://${BASE_URL}/Llama-3.1-8B-Instruct/v1/completions -X POST -d '{"model": "meta-llama/Llama-3.1-8B-Instruct", "prompt": "What is Deep Learning?", "max_tokens": 50, "temperature": 0}' -H 'Content-Type: application/json' -H "Authorization: Bearer $TOKEN" ``` diff --git a/plugins/agenticai/playbooks/deploy-agenticai-plugin.yml b/plugins/agenticai/playbooks/deploy-agenticai-plugin.yml index fd3d8d26..e4ba638a 100644 --- a/plugins/agenticai/playbooks/deploy-agenticai-plugin.yml +++ b/plugins/agenticai/playbooks/deploy-agenticai-plugin.yml @@ -145,34 +145,31 @@ run_once: true ########################################################################### - # Root Ingress (SUBDOMAIN MODE) + # Root HTTPRoute (SUBDOMAIN MODE) ########################################################################### - - name: Create Flowise Root Ingress (Subdomain) + - name: Create Flowise Root HTTPRoute (Subdomain) kubernetes.core.k8s: state: present definition: - apiVersion: networking.k8s.io/v1 - kind: Ingress + apiVersion: gateway.networking.k8s.io/v1 + kind: HTTPRoute metadata: name: flowise-root namespace: "{{ agenticai_namespace }}" spec: - ingressClassName: "{{ agenticai_ingress_class }}" - tls: - - secretName: "flowise-{{ cluster_url }}" - hosts: - - "flowise-{{ cluster_url }}" + parentRefs: + - name: enterprise-edge-gateway + namespace: envoy-gateway-system + hostnames: + - "flowise-{{ cluster_url }}" rules: - - host: "flowise-{{ cluster_url }}" - http: - paths: - - path: / - pathType: Prefix - backend: - service: - name: flowise - port: - number: 3000 + - matches: + - path: + type: PathPrefix + value: / + backendRefs: + - name: flowise + port: 3000 when: kubernetes_platform != 'openshift' and agenticai_ingress_enabled | bool run_once: true