redhat-ai-dev · Jdubrick · Mar 19, 2026 · Mar 18, 2026 · Mar 18, 2026 · Mar 18, 2026
diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
@@ -24,7 +24,7 @@ env:
   IMAGE_REGISTRY: quay.io
   REGISTRY_ORG: redhat-ai-dev
   CONTAINER_FILE: Containerfile
-  TAG_BASE: library-0.4.3
+  TAG_BASE: library-0.5.2
 
 jobs:
   build-and-push:

diff --git a/.github/workflows/validation.yml b/.github/workflows/validation.yml
@@ -17,23 +17,18 @@ name: Validation Checks
 
 on:
   pull_request:
-    branches: [ main ]
+    branches: [ main, dev ]
 
 jobs:
-  validate-prompt:
+  validate-upstream-config:
     runs-on: ubuntu-latest
     permissions:
       contents: read
-    defaults:
-      run:
-        working-directory: ./scripts/python-scripts
     steps:
       - name: Checkout code
         uses: actions/checkout@v4
-      - name: Setup Environment
-        run: pip3 install -r requirements.txt
-      - name: Validate prompt
-        run: python3 sync.py -t validate
+      - name: Validate upstream synced content
+        run: make validate-upstream-config
   validate-yaml:
       runs-on: ubuntu-latest
       permissions:

diff --git a/Containerfile b/Containerfile
@@ -13,8 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-ARG TAG="dev-20260226-ca21850"
-FROM quay.io/lightspeed-core/lightspeed-stack:${TAG} AS builder
+FROM quay.io/lightspeed-core/lightspeed-stack:dev-20260316-b2f54cf AS builder
 
 USER root
 
@@ -58,7 +57,7 @@ COPY --from=builder --chown=1001:1001 /app-root /app-root
 # checked by konflux
 COPY --from=builder --chown=1001:1001 /app-root/LICENSE /licenses/
 
-COPY --chown=1001:1001 ./run.yaml ./lightspeed-stack.yaml ./
+COPY --chown=1001:1001 ./config.yaml ./lightspeed-stack.yaml ./
 COPY --chown=1001:1001 ./config/ ./config/
 COPY --chown=1001:1001 --chmod=755 ./scripts/entrypoint.sh ./
 
@@ -78,8 +77,8 @@ LABEL io.k8s.description="Red Hat Developer Hub Lightspeed Llama Stack"
 LABEL io.k8s.display-name="Red Hat Developer Hub Lightspeed Llama Stack"
 LABEL io.openshift.tags="developerhub,rhdh,lightspeed,ai,assistant,llama"
 LABEL name=rhdh-lightspeed-llama-stack
-LABEL release=1.8
+LABEL release=1.10
 LABEL url="https://github.com/redhat-ai-dev/llama-stack"
 LABEL vendor="Red Hat, Inc."
 LABEL version=0.1.1
-LABEL summary="Red Hat Developer Hub Lightspeed Llama Stack"
+LABEL summary="Red Hat Developer Hub Lightspeed Llama Stack"
diff --git a/Makefile b/Makefile
@@ -13,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-RAG_CONTENT_IMAGE ?= quay.io/redhat-ai-dev/rag-content:release-1.9-lls-0.4.3
+RAG_CONTENT_IMAGE ?= quay.io/redhat-ai-dev/rag-content:release-1.9-lls-0.4.3-d0444cd9b57222ec9bdbaa36354337480a2ecf97
 VENV := $(CURDIR)/scripts/python-scripts/.venv
 PYTHON := $(VENV)/bin/python3
 PIP := $(VENV)/bin/pip3
@@ -55,3 +55,10 @@ validate-prompt-templates: $(VENV)/bin/activate
 
 update-prompt-templates: $(VENV)/bin/activate
 	$(call run_sync,update)
+
+.PHONY: sync-upstream-config validate-upstream-config
+sync-upstream-config: ## Sync upstream config and image pins from lightspeed-configs
+	bash ./scripts/sync/upstream-config.sh update
+
+validate-upstream-config: ## Validate synced upstream config and image pins have not drifted
+	bash ./scripts/sync/upstream-config.sh validate
diff --git a/README.md b/README.md
@@ -1,7 +1,7 @@
 # Redhat-AI-Dev Llama Stack
 
 [![Apache2.0 License](https://img.shields.io/badge/license-Apache2.0-brightgreen.svg)](LICENSE)
-[![Llama Stack Version](https://img.shields.io/badge/llama_stack-v0.4.3-blue)](https://llamastack.github.io/docs/v0.4.3)
+[![Llama Stack Version](https://img.shields.io/badge/llama_stack-v0.5.2-blue)](https://llamastack.github.io/docs)
 [![Python Version](https://img.shields.io/badge/python-3.12-blue)](https://www.python.org/downloads/release/python-3120/)
 
 - [Image Availability](#image-availability)
@@ -28,7 +28,7 @@
 ## Developer Release (Library Mode)
 
 ```
-quay.io/redhat-ai-dev/llama-stack:library-0.4.3
+quay.io/redhat-ai-dev/llama-stack:library-0.5.2
 ```
 
 # Usage
@@ -97,7 +97,7 @@ For information about these variables see: https://llamastack.github.io/v0.2.18/
 
 ## Configuring RAG
 
-The `run.yaml` file that is included in the container image has a RAG tool enabled. In order for this tool to have the necessary reference content, you need to run:
+The `config.yaml` file that is included in the container image has a RAG tool enabled. In order for this tool to have the necessary reference content, you need to run:
 
 ```
 make get-rag
@@ -107,23 +107,29 @@ This will fetch the necessary reference content and add it to your local project
 
 ## Configuring Safety Guards
 
-> [!IMPORTANT]
-> If you want to omit the safety guards for development purposes, you can use [run-no-guard.yaml](./run-no-guard.yaml) instead.
+Safety guards are configured through environment variables in `env/values.env`. To disable safety guards, leave `ENABLE_SAFETY=` empty.
+
+To enable safety guards, set the following:
+
+```env
+ENABLE_SAFETY=true
+SAFETY_MODEL=<llama-guard-model-name>
+SAFETY_URL=<url-of-safety-model-server>/v1
+SAFETY_API_KEY=<api-key-if-required>
+```
 
-In the main [run.yaml](./run.yaml) file, Llama Guard is enabled by default. In order to avoid issues during startup you will need to ensure you have an instance of Llama Guard running.
+- `SAFETY_MODEL`: The name of the Llama Guard model being used. Defaults to `llama-guard3:8b`.
+- `SAFETY_URL`: The URL where the safety model is available. For local container runs, use `http://host.containers.internal:11434/v1`.
+- `SAFETY_API_KEY`: The API key required for access to the safety model. Not required for local deployments.
 
-You can do so by running the following to start an Ollama container with Llama Guard:
+You will also need an instance of Llama Guard running. You can start one locally with Ollama:
 
 ```sh
 podman run -d --name ollama -p 11434:11434 docker.io/ollama/ollama:latest
 podman exec ollama ollama pull llama-guard3:8b
 ```
-**Note:** Ensure the Ollama container is started and the model is ready before trying to query if deploying the containers manually.
 
-You will need to set the following environment variables to ensure functionality:
-- `SAFETY_MODEL`: The name of the Llama Guard model being used. Defaults to `llama-gaurd3:8b`
-- `SAFETY_URL`: The URL where the container is available. Defaults to `http://host.docker.internal:11434/v1`
-- `SAFETY_API_KEY`: The API key required for access to the safety model. Not required for local.
+**Note:** Ensure the Ollama container is started and the model is ready before trying to query if deploying the containers manually.
 
 # Running Locally
 
@@ -139,19 +145,14 @@ vector_stores:
     vector_store_id: vs_3d47e06c-ac95-49b6-9833-d5e6dd7252dd
 ```
 
-You will need the `vector_store_id` value. After copying that value you will need to update `run.yaml` and `run-no-guard.yaml`. The `vector_store_id` you copied will replace the `vector_store_id` in those files.
-
-## Running With Safety Guard
+You will need the `vector_store_id` value. After copying that value you will need to update `config.yaml`. The `vector_store_id` you copied will replace the `vector_store_id` in that file.
 
-```
-podman run -it -p 8080:8080 --env-file ./env/values.env -v ./embeddings_model:/rag-content/embeddings_model:Z -v ./vector_db/rhdh_product_docs:/rag-content/vector_db/rhdh_product_docs:Z quay.io/redhat-ai-dev/llama-stack:library-0.4.3
-```
+## Running the Container
 
-## Running Without Safety Guard
+If you want to enable safety guards, see [Configuring Safety Guards](#configuring-safety-guards) before running.
 
-You can override the built-in `run.yaml` file by mounting the `run-no-guard.yaml` file to the same path.
 ```
-podman run -it -p 8080:8080 --env-file ./env/values.env -v ./embeddings_model:/rag-content/embeddings_model:Z -v ./vector_db/rhdh_product_docs:/rag-content/vector_db/rhdh_product_docs:Z -v ./run-no-guard.yaml:/app-root/run.yaml:Z quay.io/redhat-ai-dev/llama-stack:library-0.4.3
+podman run -it -p 8080:8080 --env-file ./env/values.env -v ./embeddings_model:/rag-content/embeddings_model:Z -v ./vector_db/rhdh_product_docs:/rag-content/vector_db/rhdh_product_docs:Z quay.io/redhat-ai-dev/llama-stack:library-0.4.3
 ```
 
 ## Running With Host Network
@@ -167,9 +168,9 @@ To deploy on a cluster see [DEPLOYMENT.md](./docs/DEPLOYMENT.md).
 | Command | Description |
 | ---- | ----|
 | **get-rag** | Gets the RAG data and the embeddings model from the rag-content image registry to your local project directory |
+| **sync-upstream-config** | Syncs `config.yaml`, `env/default-values.env`, `lightspeed-stack.yaml`, and image pins from upstream |
+| **validate-upstream-config** | Validates that synced upstream files and image pins have not drifted |
 | **update-question-validation** | Updates the question validation content in `providers.d` |
-| **validate-prompt-templates** | Validates prompt values in run.yaml. |
-| **update-prompt-templates** | Updates the prompt values in run.yaml. |
 
 # Contributing
 

diff --git a/run.yaml → config.yaml b/run.yaml → config.yaml
@@ -13,8 +13,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-version: 2
-image_name: redhat-ai-dev-llama-stack
+version: 3
+distro_name: developer-lightspeed-lls-0.5.x
 apis:
   - agents
   - inference
@@ -43,7 +43,9 @@ providers:
         base_url: ${env.VLLM_URL:=}
         api_token: ${env.VLLM_API_KEY:=}
         max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
-        tls_verify: ${env.VLLM_TLS_VERIFY:=true}
+        network:
+          tls:
+            verify: ${env.VLLM_TLS_VERIFY:=true}
     - provider_id: ${env.ENABLE_OLLAMA:+ollama}
       provider_type: remote::ollama
       config:
@@ -56,20 +58,15 @@ providers:
       provider_type: remote::vertexai
       config:
         project: ${env.VERTEX_AI_PROJECT:=}
-        location: ${env.VERTEX_AI_LOCATION:=us-central1}
-    - provider_id: safety-guard
-      provider_type: remote::vllm
-      config:
-        base_url: ${env.SAFETY_URL:=http://host.docker.internal:11434/v1}
-        api_token: ${env.SAFETY_API_KEY:=token}
+        location: ${env.VERTEX_AI_LOCATION:=global}
     - provider_id: sentence-transformers
       provider_type: inline::sentence-transformers
       config: {}
-  safety:
-    - provider_id: llama-guard
-      provider_type: inline::llama-guard
+    - provider_id: ${env.ENABLE_SAFETY:+safety-guard}
+      provider_type: remote::vllm
       config:
-        excluded_categories: []
+        base_url: ${env.SAFETY_URL:=http://ollama:11434/v1}
+        api_token: ${env.SAFETY_API_KEY:=}
   tool_runtime:
     - provider_id: model-context-protocol
       provider_type: remote::model-context-protocol
@@ -92,6 +89,11 @@ providers:
         metadata_store:
           table_name: files_metadata
           backend: sql_default
+  safety:
+    - provider_id: ${env.ENABLE_SAFETY:+llama-guard}
+      provider_type: inline::llama-guard
+      config:
+        excluded_categories: []
 storage:
   backends:
     kv_default:
@@ -124,14 +126,10 @@ registered_resources:
       provider_id: sentence-transformers
       provider_model_id: /rag-content/embeddings_model
     - model_id: ${env.SAFETY_MODEL:=llama-guard3:8b}
-      provider_id: safety-guard
+      provider_id: ${env.ENABLE_SAFETY:+safety-guard}
       provider_model_id: ${env.SAFETY_MODEL:=llama-guard3:8b}
       model_type: llm
       metadata: {}
-  shields:
-    - shield_id: llama-guard-shield
-      provider_id: llama-guard
-      provider_shield_id: safety-guard/${env.SAFETY_MODEL:=llama-guard3:8b}
   tool_groups:
     - provider_id: rag-runtime
       toolgroup_id: builtin::rag
@@ -140,7 +138,17 @@ registered_resources:
       embedding_model: sentence-transformers//rag-content/embeddings_model
       embedding_dimension: 768
       provider_id: rhdh-docs
+  shields:
+    - shield_id: llama-guard-shield
+      provider_id: ${env.ENABLE_SAFETY:+llama-guard}
+      provider_shield_id: safety-guard/${env.SAFETY_MODEL:=llama-guard3:8b}
 vector_stores:
+  annotation_prompt_params:
+    enable_annotations: true
+    annotation_instruction_template: >
+      When appropriate, cite sources at the end of sentences using doc_url and doc_title format. 
+      Citing sources is not always required because citations are handled externally. 
+      Never include any citation that is in the form '<| file-id |>'.
   default_provider_id: rhdh-docs
   default_embedding_model:
     provider_id: sentence-transformers

diff --git a/env/default-values.env b/env/default-values.env
@@ -1,6 +1,12 @@
 # Note: You only need to set the variables you normally would with '-e' flags.
 # You do not need to set them all if they will go unused.
 
+# Service Images
+LIGHTSPEED_CORE_IMAGE=quay.io/lightspeed-core/lightspeed-stack:dev-20260316-b2f54cf
+LLAMA_STACK_IMAGE=quay.io/opendatahub/llama-stack:07a97331b3a8831e76ec15c833d2dcf6fa0a34c9
+RAG_CONTENT_IMAGE=quay.io/redhat-ai-dev/rag-content:release-1.9-lls-0.4.3-d0444cd9b57222ec9bdbaa36354337480a2ecf97
+OLLAMA_IMAGE=docker.io/ollama/ollama:0.18.2
+
 # Enable Inference Providers
 ## Set any providers you want enabled to 'true'
 ## E.g. ENABLE_VLLM=true
@@ -10,6 +16,7 @@ ENABLE_VLLM=
 ENABLE_VERTEX_AI=
 ENABLE_OPENAI=
 ENABLE_OLLAMA=
+ENABLE_SAFETY=
 
 # vLLM Inference Settings
 VLLM_URL=
@@ -31,17 +38,16 @@ OLLAMA_URL=
 
 # Question Validation Safety Shield Settings
 ## Ensure VALIDATION_PROVIDER is one of your enabled Inference Providers
+## Only required for Llama Stack configs that use the Lightspeed Core provider
 ## E.g. VALIDATION_PROVIDER=vllm if ENABLE_VLLM=true
 VALIDATION_PROVIDER=
 VALIDATION_MODEL_NAME=
 
 # Llama Guard Settings
-## Defaults to llama-guard3:8b if not set
-SAFETY_MODEL=
-## Defaults to http://host.docker.internal:11434/v1 if not set
-SAFETY_URL=
+SAFETY_MODEL=llama-guard3:8b
+SAFETY_URL=http://ollama:11434/v1
 ## Only required for non-local environments with a api key
 SAFETY_API_KEY=
 
 # Other
-LLAMA_STACK_LOGGING=
+LLAMA_STACK_LOGGING=
diff --git a/lightspeed-stack.yaml b/lightspeed-stack.yaml
@@ -23,7 +23,7 @@ service:
   access_log: true
 llama_stack:
   use_as_library_client: true
-  library_client_config_path: /app-root/run.yaml
+  library_client_config_path: /app-root/config.yaml
 user_data_collection:
   feedback_enabled: false
   feedback_storage: '/tmp/data/feedback'