From c4cdc1066133d99d5ff8b71524a83074ea7d32f2 Mon Sep 17 00:00:00 2001 From: Jordan Dubrick Date: Mon, 19 Jan 2026 13:20:59 -0500 Subject: [PATCH] update deployment doc Signed-off-by: Jordan Dubrick --- docs/DEPLOYMENT.md | 169 +++++++++++++++++++++++++++++++-------------- 1 file changed, 118 insertions(+), 51 deletions(-) diff --git a/docs/DEPLOYMENT.md b/docs/DEPLOYMENT.md index 0537513..c4295a4 100644 --- a/docs/DEPLOYMENT.md +++ b/docs/DEPLOYMENT.md @@ -6,9 +6,80 @@ Below you can find documentation related to deploying `Lightspeed Core` and `Lla - Red Hat Developer Hub -## Adding Containers +## Secrets -We need to add `Lightspeed Core` and `Llama Stack` to our `RHDH` Pod by adding the following to the `Backstage` CR: +You will need a Secret (can be multiple) that can contain the following variables, which of these are set is dependant on what you want enabled. See [README.md](../README.md) for more. + +> [!IMPORTANT] +> You only need to set the variables for the inference providers you want to enable. Leave unused provider variables empty. + +**Note:** `SAFETY_MODEL` and `SAFETY_URL` are preset based on the Ollama usage in the Deployment [below](#updating-rhdh-backstage-deployment). + +```yaml +apiVersion: v1 +kind: Secret +metadata: + name: llama-stack-secrets +type: Opaque +stringData: + ENABLE_VLLM: "" + ENABLE_VERTEX_AI: "" + ENABLE_OPENAI: "" + ENABLE_OLLAMA: "" + VLLM_URL: "" + VLLM_API_KEY: "" + VLLM_MAX_TOKENS: "" + VLLM_TLS_VERIFY: "" + OPENAI_API_KEY: "" + VERTEX_AI_PROJECT: "" + VERTEX_AI_LOCATION: "" + GOOGLE_APPLICATION_CREDENTIALS: "" + OLLAMA_URL: "" + SAFETY_MODEL: "llama-guard3:8b" + SAFETY_URL: "http://localhost:11434/v1" + SAFETY_API_KEY: "" +``` + +## Config Maps + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: lightspeed-stack +data: + lightspeed-stack.yaml: | + name: Lightspeed Core Service (LCS) + service: + host: 0.0.0.0 + port: 8080 + auth_enabled: false + workers: 1 + color_log: true + access_log: true + llama_stack: + use_as_library_client: false + url: http://localhost:8321 + user_data_collection: + feedback_enabled: false + feedback_storage: '/tmp/data/feedback' + authentication: + module: 'noop' + conversation_cache: + type: 'sqlite' + sqlite: + db_path: '/tmp/cache.db' +``` + +## Updating RHDH (Backstage) Deployment + +The deployment includes: +- **Init Containers**: + - Copies RAG content (embeddings model and vector database) to a shared volume + - Pulls the Llama Guard model for safety guards +- **Ollama Container**: Runs the Llama Guard model for safety +- **Llama Stack Container**: Runs the Llama Stack server with RAG capabilities +- **Lightspeed Core Container**: Provides the Lightspeed Core service ```yaml spec: @@ -17,19 +88,46 @@ spec: spec: template: spec: + initContainers: + - name: rag-content-init + image: 'quay.io/redhat-ai-dev/rag-content:experimental-release-1.8-lcs' + command: + - /bin/sh + - -c + - | + cp -r /rag/embeddings_model /rag-content/ + cp -r /rag/vector_db /rag-content/ + volumeMounts: + - mountPath: /rag-content + name: rag-content + - name: ollama-model-init + image: 'docker.io/ollama/ollama:latest' + command: + - /bin/sh + - -c + - | + ollama serve & + sleep 5 + ollama pull llama-guard3:8b + volumeMounts: + - mountPath: /root/.ollama + name: ollama-models containers: - - env: - - name: VLLM_URL - value: - - name: VLLM_API_KEY - value: + - name: ollama + image: 'docker.io/ollama/ollama:latest' + volumeMounts: + - mountPath: /root/.ollama + name: ollama-models + - name: llama-stack image: 'quay.io/redhat-ai-dev/llama-stack:latest' - name: llama-stack + envFrom: + - secretRef: + name: llama-stack-secrets volumeMounts: - - mountPath: /app-root/.llama - name: shared-storage - - image: 'quay.io/lightspeed-core/lightspeed-stack:dev-latest' - name: lightspeed-core + - mountPath: /rag-content + name: rag-content + - name: lightspeed-core + image: 'quay.io/lightspeed-core/lightspeed-stack:dev-latest' volumeMounts: - mountPath: /app-root/lightspeed-stack.yaml name: lightspeed-stack @@ -37,44 +135,13 @@ spec: - mountPath: /tmp/data/feedback name: shared-storage volumes: - - configMap: + - name: rag-content + emptyDir: {} + - name: ollama-models + emptyDir: {} + - name: lightspeed-stack + configMap: name: lightspeed-stack - name: lightspeed-stack - - emptyDir: {} - name: shared-storage -``` - -Also ensure that `lightspeed-stack` is created as a `Config Map` in the namespace: - -```yaml -name: Lightspeed Core Service (LCS) -service: - host: 0.0.0.0 - port: 8080 - auth_enabled: false - workers: 1 - color_log: true - access_log: true -llama_stack: - use_as_library_client: false - url: http://localhost:8321 -user_data_collection: - feedback_enabled: false - feedback_storage: "/tmp/data/feedback" -authentication: - module: "noop" -``` - -## Troubleshooting -> [!WARNING] -> Currently there is not full support for RHDH + Lightspeed Core as some endpoints are missing and/or been changed. Please be aware that some functionality may differ than what you are used to with Road Core + RHDH while these changes are being made. - -In the current state you need to add the following to your `RHDH` configuration file to enable Lightspeed: - -```yaml -lightspeed: - servers: - - id: vllm - url: - token: + - name: shared-storage + emptyDir: {} ```