From c4cdc1066133d99d5ff8b71524a83074ea7d32f2 Mon Sep 17 00:00:00 2001
From: Jordan Dubrick <jdubrick@redhat.com>
Date: Mon, 19 Jan 2026 13:20:59 -0500
Subject: [PATCH] update deployment doc

Signed-off-by: Jordan Dubrick <jdubrick@redhat.com>
---
 docs/DEPLOYMENT.md | 169 +++++++++++++++++++++++++++++++--------------
 1 file changed, 118 insertions(+), 51 deletions(-)
diff --git a/docs/DEPLOYMENT.md b/docs/DEPLOYMENT.md
index 0537513..c4295a4 100644
--- a/docs/DEPLOYMENT.md
+++ b/docs/DEPLOYMENT.md
@@ -6,9 +6,80 @@ Below you can find documentation related to deploying `Lightspeed Core` and `Lla
 
 - Red Hat Developer Hub
 
-## Adding Containers
+## Secrets
 
-We need to add `Lightspeed Core` and `Llama Stack` to our `RHDH` Pod by adding the following to the `Backstage` CR:
+You will need a Secret (can be multiple) that can contain the following variables, which of these are set is dependant on what you want enabled. See [README.md](../README.md) for more.
+
+> [!IMPORTANT]
+> You only need to set the variables for the inference providers you want to enable. Leave unused provider variables empty.
+
+**Note:** `SAFETY_MODEL` and `SAFETY_URL` are preset based on the Ollama usage in the Deployment [below](#updating-rhdh-backstage-deployment).
+
+```yaml
+apiVersion: v1
+kind: Secret
+metadata:
+  name: llama-stack-secrets
+type: Opaque
+stringData:
+  ENABLE_VLLM: ""
+  ENABLE_VERTEX_AI: ""
+  ENABLE_OPENAI: ""
+  ENABLE_OLLAMA: ""
+  VLLM_URL: ""
+  VLLM_API_KEY: ""
+  VLLM_MAX_TOKENS: ""
+  VLLM_TLS_VERIFY: ""
+  OPENAI_API_KEY: ""
+  VERTEX_AI_PROJECT: ""
+  VERTEX_AI_LOCATION: ""
+  GOOGLE_APPLICATION_CREDENTIALS: ""
+  OLLAMA_URL: ""
+  SAFETY_MODEL: "llama-guard3:8b"
+  SAFETY_URL: "http://localhost:11434/v1"
+  SAFETY_API_KEY: ""
+```
+
+## Config Maps
+
+```yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: lightspeed-stack
+data:
+  lightspeed-stack.yaml: |
+    name: Lightspeed Core Service (LCS)
+    service:
+      host: 0.0.0.0
+      port: 8080
+      auth_enabled: false
+      workers: 1
+      color_log: true
+      access_log: true
+    llama_stack:
+      use_as_library_client: false
+      url: http://localhost:8321
+    user_data_collection:
+      feedback_enabled: false
+      feedback_storage: '/tmp/data/feedback'
+    authentication:
+      module: 'noop'
+    conversation_cache:
+      type: 'sqlite'
+      sqlite:
+        db_path: '/tmp/cache.db'
+```
+
+## Updating RHDH (Backstage) Deployment
+
+The deployment includes:
+- **Init Containers**:
+  - Copies RAG content (embeddings model and vector database) to a shared volume
+  - Pulls the Llama Guard model for safety guards
+- **Ollama Container**: Runs the Llama Guard model for safety
+- **Llama Stack Container**: Runs the Llama Stack server with RAG capabilities
+- **Lightspeed Core Container**: Provides the Lightspeed Core service
 
 ```yaml
 spec:
@@ -17,19 +88,46 @@ spec:
       spec:
         template:
           spec:
+            initContainers:
+              - name: rag-content-init
+                image: 'quay.io/redhat-ai-dev/rag-content:experimental-release-1.8-lcs'
+                command:
+                  - /bin/sh
+                  - -c
+                  - |
+                    cp -r /rag/embeddings_model /rag-content/
+                    cp -r /rag/vector_db /rag-content/
+                volumeMounts:
+                  - mountPath: /rag-content
+                    name: rag-content
+              - name: ollama-model-init
+                image: 'docker.io/ollama/ollama:latest'
+                command:
+                  - /bin/sh
+                  - -c
+                  - |
+                    ollama serve &
+                    sleep 5
+                    ollama pull llama-guard3:8b
+                volumeMounts:
+                  - mountPath: /root/.ollama
+                    name: ollama-models
             containers:
-              - env:
-                  - name: VLLM_URL
-                    value: <your-url>
-                  - name: VLLM_API_KEY
-                    value: <your-key>
+              - name: ollama
+                image: 'docker.io/ollama/ollama:latest'
+                volumeMounts:
+                  - mountPath: /root/.ollama
+                    name: ollama-models
+              - name: llama-stack
                 image: 'quay.io/redhat-ai-dev/llama-stack:latest'
-                name: llama-stack
+                envFrom:
+                  - secretRef:
+                      name: llama-stack-secrets
                 volumeMounts:
-                  - mountPath: /app-root/.llama
-                    name: shared-storage
-              - image: 'quay.io/lightspeed-core/lightspeed-stack:dev-latest'
-                name: lightspeed-core
+                  - mountPath: /rag-content
+                    name: rag-content
+              - name: lightspeed-core
+                image: 'quay.io/lightspeed-core/lightspeed-stack:dev-latest'
                 volumeMounts:
                   - mountPath: /app-root/lightspeed-stack.yaml
                     name: lightspeed-stack
@@ -37,44 +135,13 @@ spec:
                   - mountPath: /tmp/data/feedback
                     name: shared-storage
             volumes:
-              - configMap:
+              - name: rag-content
+                emptyDir: {}
+              - name: ollama-models
+                emptyDir: {}
+              - name: lightspeed-stack
+                configMap:
                   name: lightspeed-stack
-                name: lightspeed-stack
-              - emptyDir: {}
-                name: shared-storage
-```
-
-Also ensure that `lightspeed-stack` is created as a `Config Map` in the namespace:
-
-```yaml
-name: Lightspeed Core Service (LCS)
-service:
-  host: 0.0.0.0
-  port: 8080
-  auth_enabled: false
-  workers: 1
-  color_log: true
-  access_log: true
-llama_stack:
-  use_as_library_client: false
-  url: http://localhost:8321
-user_data_collection:
-  feedback_enabled: false
-  feedback_storage: "/tmp/data/feedback"
-authentication:
-  module: "noop"
-```
-
-## Troubleshooting
-> [!WARNING]
-> Currently there is not full support for RHDH + Lightspeed Core as some endpoints are missing and/or been changed. Please be aware that some functionality may differ than what you are used to with Road Core + RHDH while these changes are being made.
-
-In the current state you need to add the following to your `RHDH` configuration file to enable Lightspeed:
-
-```yaml
-lightspeed:
-  servers:
-    - id: vllm
-      url: <your-url>
-      token: <your-token>
+              - name: shared-storage
+                emptyDir: {}
 ```