From 0b602a1ff171006eb0a09053db6b705d7b2320e3 Mon Sep 17 00:00:00 2001 From: Jordan Dubrick Date: Wed, 18 Mar 2026 15:04:01 -0400 Subject: [PATCH 1/9] remove old config files for lls Signed-off-by: Jordan Dubrick --- run-no-guard.yaml | 136 ---------------------------------------- run.yaml | 155 ---------------------------------------------- 2 files changed, 291 deletions(-) delete mode 100644 run-no-guard.yaml delete mode 100644 run.yaml diff --git a/run-no-guard.yaml b/run-no-guard.yaml deleted file mode 100644 index c07c8a8..0000000 --- a/run-no-guard.yaml +++ /dev/null @@ -1,136 +0,0 @@ -# -# -# Copyright Red Hat -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -version: 2 -image_name: redhat-ai-dev-llama-stack-no-guard -apis: - - agents - - inference - - safety - - tool_runtime - - vector_io - - files -container_image: -external_providers_dir: -providers: - agents: - - config: - persistence: - agent_state: - namespace: agents - backend: kv_default - responses: - table_name: responses - backend: sql_default - provider_id: meta-reference - provider_type: inline::meta-reference - inference: - - provider_id: ${env.ENABLE_VLLM:+vllm} - provider_type: remote::vllm - config: - base_url: ${env.VLLM_URL:=} - api_token: ${env.VLLM_API_KEY:=} - max_tokens: ${env.VLLM_MAX_TOKENS:=4096} - tls_verify: ${env.VLLM_TLS_VERIFY:=true} - - provider_id: ${env.ENABLE_OLLAMA:+ollama} - provider_type: remote::ollama - config: - base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1} - - provider_id: ${env.ENABLE_OPENAI:+openai} - provider_type: remote::openai - config: - api_key: ${env.OPENAI_API_KEY:=} - - provider_id: ${env.ENABLE_VERTEX_AI:+vertexai} - provider_type: remote::vertexai - config: - project: ${env.VERTEX_AI_PROJECT:=} - location: ${env.VERTEX_AI_LOCATION:=us-central1} - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - config: {} - tool_runtime: - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} - - provider_id: rag-runtime - provider_type: inline::rag-runtime - config: {} - vector_io: - - provider_id: rhdh-docs - provider_type: inline::faiss - config: - persistence: - namespace: vector_io::faiss - backend: kv_rag - files: - - provider_id: localfs - provider_type: inline::localfs - config: - storage_dir: /tmp/llama-stack-files - metadata_store: - table_name: files_metadata - backend: sql_default -storage: - backends: - kv_default: - type: kv_sqlite - db_path: /tmp/kvstore.db - sql_default: - type: sql_sqlite - db_path: /tmp/sql_store.db - kv_rag: - type: kv_sqlite - db_path: /rag-content/vector_db/rhdh_product_docs/1.9/faiss_store.db - stores: - metadata: - namespace: registry - backend: kv_default - inference: - table_name: inference_store - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - conversations: - table_name: openai_conversations - backend: sql_default -registered_resources: - models: - - model_id: sentence-transformers/all-mpnet-base-v2 - metadata: - embedding_dimension: 768 - model_type: embedding - provider_id: sentence-transformers - provider_model_id: /rag-content/embeddings_model - tool_groups: - - provider_id: rag-runtime - toolgroup_id: builtin::rag - vector_stores: - - vector_store_id: vs_3d4808b2-5f00-4de6-baa3-c86752cf827c # see readme for this value - embedding_model: sentence-transformers//rag-content/embeddings_model - embedding_dimension: 768 - provider_id: rhdh-docs -vector_stores: - default_provider_id: rhdh-docs - default_embedding_model: - provider_id: sentence-transformers - model_id: /rag-content/embeddings_model -server: - auth: - host: - port: 8321 - quota: - tls_cafile: - tls_certfile: - tls_keyfile: diff --git a/run.yaml b/run.yaml deleted file mode 100644 index 160caa9..0000000 --- a/run.yaml +++ /dev/null @@ -1,155 +0,0 @@ -# -# -# Copyright Red Hat -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -version: 2 -image_name: redhat-ai-dev-llama-stack -apis: - - agents - - inference - - safety - - tool_runtime - - vector_io - - files -container_image: -external_providers_dir: -providers: - agents: - - config: - persistence: - agent_state: - namespace: agents - backend: kv_default - responses: - table_name: responses - backend: sql_default - provider_id: meta-reference - provider_type: inline::meta-reference - inference: - - provider_id: ${env.ENABLE_VLLM:+vllm} - provider_type: remote::vllm - config: - base_url: ${env.VLLM_URL:=} - api_token: ${env.VLLM_API_KEY:=} - max_tokens: ${env.VLLM_MAX_TOKENS:=4096} - tls_verify: ${env.VLLM_TLS_VERIFY:=true} - - provider_id: ${env.ENABLE_OLLAMA:+ollama} - provider_type: remote::ollama - config: - base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1} - - provider_id: ${env.ENABLE_OPENAI:+openai} - provider_type: remote::openai - config: - api_key: ${env.OPENAI_API_KEY:=} - - provider_id: ${env.ENABLE_VERTEX_AI:+vertexai} - provider_type: remote::vertexai - config: - project: ${env.VERTEX_AI_PROJECT:=} - location: ${env.VERTEX_AI_LOCATION:=us-central1} - - provider_id: safety-guard - provider_type: remote::vllm - config: - base_url: ${env.SAFETY_URL:=http://host.docker.internal:11434/v1} - api_token: ${env.SAFETY_API_KEY:=token} - - provider_id: sentence-transformers - provider_type: inline::sentence-transformers - config: {} - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: - excluded_categories: [] - tool_runtime: - - provider_id: model-context-protocol - provider_type: remote::model-context-protocol - config: {} - - provider_id: rag-runtime - provider_type: inline::rag-runtime - config: {} - vector_io: - - provider_id: rhdh-docs - provider_type: inline::faiss - config: - persistence: - namespace: vector_io::faiss - backend: kv_rag - files: - - provider_id: localfs - provider_type: inline::localfs - config: - storage_dir: /tmp/llama-stack-files - metadata_store: - table_name: files_metadata - backend: sql_default -storage: - backends: - kv_default: - type: kv_sqlite - db_path: /tmp/kvstore.db - sql_default: - type: sql_sqlite - db_path: /tmp/sql_store.db - kv_rag: - type: kv_sqlite - db_path: /rag-content/vector_db/rhdh_product_docs/1.9/faiss_store.db - stores: - metadata: - namespace: registry - backend: kv_default - inference: - table_name: inference_store - backend: sql_default - max_write_queue_size: 10000 - num_writers: 4 - conversations: - table_name: openai_conversations - backend: sql_default -registered_resources: - models: - - model_id: sentence-transformers/all-mpnet-base-v2 - metadata: - embedding_dimension: 768 - model_type: embedding - provider_id: sentence-transformers - provider_model_id: /rag-content/embeddings_model - - model_id: ${env.SAFETY_MODEL:=llama-guard3:8b} - provider_id: safety-guard - provider_model_id: ${env.SAFETY_MODEL:=llama-guard3:8b} - model_type: llm - metadata: {} - shields: - - shield_id: llama-guard-shield - provider_id: llama-guard - provider_shield_id: safety-guard/${env.SAFETY_MODEL:=llama-guard3:8b} - tool_groups: - - provider_id: rag-runtime - toolgroup_id: builtin::rag - vector_stores: - - vector_store_id: vs_3d4808b2-5f00-4de6-baa3-c86752cf827c # see readme for this value - embedding_model: sentence-transformers//rag-content/embeddings_model - embedding_dimension: 768 - provider_id: rhdh-docs -vector_stores: - default_provider_id: rhdh-docs - default_embedding_model: - provider_id: sentence-transformers - model_id: /rag-content/embeddings_model -server: - auth: - host: - port: 8321 - quota: - tls_cafile: - tls_certfile: - tls_keyfile: From 8299882d48d687c8fa805990ba9dc1cc553e43a3 Mon Sep 17 00:00:00 2001 From: Jordan Dubrick Date: Wed, 18 Mar 2026 15:04:29 -0400 Subject: [PATCH 2/9] add new sync functionality for config files Signed-off-by: Jordan Dubrick --- Makefile | 7 ++ scripts/sync/upstream-config.sh | 144 ++++++++++++++++++++++++++++++++ 2 files changed, 151 insertions(+) create mode 100755 scripts/sync/upstream-config.sh diff --git a/Makefile b/Makefile index 120fca0..9d6ea84 100644 --- a/Makefile +++ b/Makefile @@ -55,3 +55,10 @@ validate-prompt-templates: $(VENV)/bin/activate update-prompt-templates: $(VENV)/bin/activate $(call run_sync,update) + +.PHONY: sync-upstream-config validate-upstream-config +sync-upstream-config: ## Sync upstream config and image pins from lightspeed-configs + bash ./scripts/sync/upstream-config.sh update + +validate-upstream-config: ## Validate synced upstream config and image pins have not drifted + bash ./scripts/sync/upstream-config.sh validate diff --git a/scripts/sync/upstream-config.sh b/scripts/sync/upstream-config.sh new file mode 100755 index 0000000..c9440c9 --- /dev/null +++ b/scripts/sync/upstream-config.sh @@ -0,0 +1,144 @@ +#!/usr/bin/env bash + +set -euo pipefail + +MODE="${1:-}" + +if [[ "${MODE}" != "update" && "${MODE}" != "validate" ]]; then + echo "Usage: $0 " >&2 + exit 2 +fi + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" + +UPSTREAM_BASE="https://raw.githubusercontent.com/redhat-ai-dev/lightspeed-configs/main" +CONFIG_URL="${UPSTREAM_BASE}/llama-stack-configs/config.yaml" +DEFAULT_ENV_URL="${UPSTREAM_BASE}/env/default-values.env" +LIGHTSPEED_STACK_URL="${UPSTREAM_BASE}/lightspeed-core-configs/lightspeed-stack.yaml" +IMAGES_URL="${UPSTREAM_BASE}/images.yaml" + +BUILDER_IMAGE_PATTERN='/^FROM / && $3=="AS" && $4=="builder"' +RAG_IMAGE_PATTERN='/^RAG_CONTENT_IMAGE[[:space:]]*\?=/' + +TMP_DIR="$(mktemp -d)" +trap 'rm -rf "${TMP_DIR}"' EXIT + +fetch() { + curl -fsSL "$1" -o "$2" +} + +# Extracts the image value from a section in images.yaml (e.g. "lightspeed-core" -> "quay.io/...:tag") +extract_image_from_images_yaml() { + local section="$1" + local file_path="$2" + awk -v section="${section}" ' + $0 ~ "^" section ":" { in_section=1; next } + in_section && $0 ~ "^[^[:space:]]" { in_section=0 } + in_section && $0 ~ "^[[:space:]]*image:[[:space:]]" { + line=$0 + sub(/^[[:space:]]*image:[[:space:]]*/, "", line) + print line + exit + } + ' "${file_path}" +} + +# Replaces the first line matching a pattern with a replacement string, fails if no match found +replace_line() { + local file_path="$1" + local pattern="$2" + local replacement="$3" + local tmp_file + tmp_file="$(mktemp)" + awk -v replacement="${replacement}" " + BEGIN { replaced=0 } + ${pattern} && replaced==0 { + print replacement + replaced=1 + next + } + { print } + END { + if (replaced==0) { + exit 2 + } + } + " "${file_path}" > "${tmp_file}" + mv "${tmp_file}" "${file_path}" +} + +extract_builder_image() { + awk "${BUILDER_IMAGE_PATTERN}"'{print $2; exit}' "$1" +} + +extract_rag_image() { + awk "${RAG_IMAGE_PATTERN}"'{line=$0; sub(/^RAG_CONTENT_IMAGE[[:space:]]*\?=[[:space:]]*/, "", line); print line; exit}' "$1" +} + +compare_or_update_file() { + local src="$1" + local dest="$2" + local label="$3" + + if [[ "${MODE}" == "update" ]]; then + cp "${src}" "${dest}" + echo "Updated ${label}" + return 0 + fi + + if ! cmp -s "${src}" "${dest}"; then + echo "Drift detected: ${label}" >&2 + return 1 + fi +} + +compare_or_update_value() { + local current="$1" + local expected="$2" + local label="$3" + + if [[ "${current}" != "${expected}" ]]; then + echo "Drift detected: ${label} (${current}) != ${expected}" >&2 + return 1 + fi +} + +fetch "${CONFIG_URL}" "${TMP_DIR}/config.yaml" +fetch "${DEFAULT_ENV_URL}" "${TMP_DIR}/default-values.env" +fetch "${LIGHTSPEED_STACK_URL}" "${TMP_DIR}/lightspeed-stack.yaml" +fetch "${IMAGES_URL}" "${TMP_DIR}/images.yaml" + +lightspeed_core_image="$(extract_image_from_images_yaml "lightspeed-core" "${TMP_DIR}/images.yaml")" +rag_content_image="$(extract_image_from_images_yaml "rag-content" "${TMP_DIR}/images.yaml")" + +config_path="${REPO_ROOT}/config.yaml" +default_env_path="${REPO_ROOT}/env/default-values.env" +lightspeed_stack_path="${REPO_ROOT}/lightspeed-stack.yaml" +containerfile_path="${REPO_ROOT}/Containerfile" +makefile_path="${REPO_ROOT}/Makefile" + +status=0 + +compare_or_update_file "${TMP_DIR}/config.yaml" "${config_path}" "config.yaml" || status=1 +compare_or_update_file "${TMP_DIR}/default-values.env" "${default_env_path}" "env/default-values.env" || status=1 +compare_or_update_file "${TMP_DIR}/lightspeed-stack.yaml" "${lightspeed_stack_path}" "lightspeed-stack.yaml" || status=1 + +if [[ "${MODE}" == "update" ]]; then + replace_line "${containerfile_path}" "${BUILDER_IMAGE_PATTERN}" "FROM ${lightspeed_core_image} AS builder" + echo "Updated Containerfile builder image to ${lightspeed_core_image}" + replace_line "${makefile_path}" "${RAG_IMAGE_PATTERN}" "RAG_CONTENT_IMAGE ?= ${rag_content_image}" + echo "Updated Makefile RAG_CONTENT_IMAGE to ${rag_content_image}" +else + current_builder_image="$(extract_builder_image "${containerfile_path}")" + current_rag_image="$(extract_rag_image "${makefile_path}")" + + compare_or_update_value "${current_builder_image}" "${lightspeed_core_image}" "Containerfile builder image" || status=1 + compare_or_update_value "${current_rag_image}" "${rag_content_image}" "Makefile RAG_CONTENT_IMAGE" || status=1 +fi + +if [[ "${MODE}" == "validate" && "${status}" -eq 0 ]]; then + echo "Upstream synced content is up to date." +fi + +exit "${status}" From d041c7956fd190b41e23d817cc07f19bec4f9140 Mon Sep 17 00:00:00 2001 From: Jordan Dubrick Date: Wed, 18 Mar 2026 15:07:59 -0400 Subject: [PATCH 3/9] add library mode override to LCORE config file Signed-off-by: Jordan Dubrick --- scripts/sync/upstream-config.sh | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/scripts/sync/upstream-config.sh b/scripts/sync/upstream-config.sh index c9440c9..1370d68 100755 --- a/scripts/sync/upstream-config.sh +++ b/scripts/sync/upstream-config.sh @@ -28,6 +28,36 @@ fetch() { curl -fsSL "$1" -o "$2" } +# Keeps the repository-specific llama_stack library-client config, +# even though upstream lightspeed-stack.yaml uses direct URL mode. +apply_lightspeed_stack_override() { + local file_path="$1" + local tmp_file + tmp_file="$(mktemp)" + awk ' + BEGIN { in_block=0; replaced=0 } + /^llama_stack:[[:space:]]*$/ { + print "llama_stack:" + print " use_as_library_client: true" + print " library_client_config_path: /app-root/config.yaml" + in_block=1 + replaced=1 + next + } + in_block && $0 ~ /^[^[:space:]]/ { in_block=0 } + in_block { next } + { print } + END { + if (replaced==0) { + print "llama_stack:" + print " use_as_library_client: true" + print " library_client_config_path: /app-root/config.yaml" + } + } + ' "${file_path}" > "${tmp_file}" + mv "${tmp_file}" "${file_path}" +} + # Extracts the image value from a section in images.yaml (e.g. "lightspeed-core" -> "quay.io/...:tag") extract_image_from_images_yaml() { local section="$1" @@ -108,6 +138,7 @@ fetch "${CONFIG_URL}" "${TMP_DIR}/config.yaml" fetch "${DEFAULT_ENV_URL}" "${TMP_DIR}/default-values.env" fetch "${LIGHTSPEED_STACK_URL}" "${TMP_DIR}/lightspeed-stack.yaml" fetch "${IMAGES_URL}" "${TMP_DIR}/images.yaml" +apply_lightspeed_stack_override "${TMP_DIR}/lightspeed-stack.yaml" lightspeed_core_image="$(extract_image_from_images_yaml "lightspeed-core" "${TMP_DIR}/images.yaml")" rag_content_image="$(extract_image_from_images_yaml "rag-content" "${TMP_DIR}/images.yaml")" From 403a6330a3b3237f98405628283484c4586f91dd Mon Sep 17 00:00:00 2001 From: Jordan Dubrick Date: Wed, 18 Mar 2026 15:08:25 -0400 Subject: [PATCH 4/9] run sync scripts Signed-off-by: Jordan Dubrick --- Containerfile | 7 +- config.yaml | 163 +++++++++++++++++++++++++++++++++++++++++ env/default-values.env | 16 ++-- lightspeed-stack.yaml | 2 +- 4 files changed, 178 insertions(+), 10 deletions(-) create mode 100644 config.yaml diff --git a/Containerfile b/Containerfile index 3faa5f3..cd19bb8 100644 --- a/Containerfile +++ b/Containerfile @@ -13,8 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -ARG TAG="dev-20260226-ca21850" -FROM quay.io/lightspeed-core/lightspeed-stack:${TAG} AS builder +FROM quay.io/lightspeed-core/lightspeed-stack:dev-20260316-b2f54cf AS builder USER root @@ -58,7 +57,7 @@ COPY --from=builder --chown=1001:1001 /app-root /app-root # checked by konflux COPY --from=builder --chown=1001:1001 /app-root/LICENSE /licenses/ -COPY --chown=1001:1001 ./run.yaml ./lightspeed-stack.yaml ./ +COPY --chown=1001:1001 ./config.yaml ./lightspeed-stack.yaml ./ COPY --chown=1001:1001 ./config/ ./config/ COPY --chown=1001:1001 --chmod=755 ./scripts/entrypoint.sh ./ @@ -82,4 +81,4 @@ LABEL release=1.8 LABEL url="https://github.com/redhat-ai-dev/llama-stack" LABEL vendor="Red Hat, Inc." LABEL version=0.1.1 -LABEL summary="Red Hat Developer Hub Lightspeed Llama Stack" \ No newline at end of file +LABEL summary="Red Hat Developer Hub Lightspeed Llama Stack" diff --git a/config.yaml b/config.yaml new file mode 100644 index 0000000..45a9bf9 --- /dev/null +++ b/config.yaml @@ -0,0 +1,163 @@ +# +# +# Copyright Red Hat +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +version: 3 +distro_name: developer-lightspeed-lls-0.5.x +apis: + - agents + - inference + - safety + - tool_runtime + - vector_io + - files +container_image: +external_providers_dir: +providers: + agents: + - config: + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + provider_id: meta-reference + provider_type: inline::meta-reference + inference: + - provider_id: ${env.ENABLE_VLLM:+vllm} + provider_type: remote::vllm + config: + base_url: ${env.VLLM_URL:=} + api_token: ${env.VLLM_API_KEY:=} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + network: + tls: + verify: ${env.VLLM_TLS_VERIFY:=true} + - provider_id: ${env.ENABLE_OLLAMA:+ollama} + provider_type: remote::ollama + config: + base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1} + - provider_id: ${env.ENABLE_OPENAI:+openai} + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY:=} + - provider_id: ${env.ENABLE_VERTEX_AI:+vertexai} + provider_type: remote::vertexai + config: + project: ${env.VERTEX_AI_PROJECT:=} + location: ${env.VERTEX_AI_LOCATION:=global} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} + - provider_id: ${env.ENABLE_SAFETY:+safety-guard} + provider_type: remote::vllm + config: + base_url: ${env.SAFETY_URL:=http://ollama:11434/v1} + api_token: ${env.SAFETY_API_KEY:=} + tool_runtime: + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + vector_io: + - provider_id: rhdh-docs + provider_type: inline::faiss + config: + persistence: + namespace: vector_io::faiss + backend: kv_rag + files: + - provider_id: localfs + provider_type: inline::localfs + config: + storage_dir: /tmp/llama-stack-files + metadata_store: + table_name: files_metadata + backend: sql_default + safety: + - provider_id: ${env.ENABLE_SAFETY:+llama-guard} + provider_type: inline::llama-guard + config: + excluded_categories: [] +storage: + backends: + kv_default: + type: kv_sqlite + db_path: /tmp/kvstore.db + sql_default: + type: sql_sqlite + db_path: /tmp/sql_store.db + kv_rag: + type: kv_sqlite + db_path: /rag-content/vector_db/rhdh_product_docs/1.9/faiss_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default +registered_resources: + models: + - model_id: sentence-transformers/all-mpnet-base-v2 + metadata: + embedding_dimension: 768 + model_type: embedding + provider_id: sentence-transformers + provider_model_id: /rag-content/embeddings_model + - model_id: ${env.SAFETY_MODEL:=llama-guard3:8b} + provider_id: ${env.ENABLE_SAFETY:+safety-guard} + provider_model_id: ${env.SAFETY_MODEL:=llama-guard3:8b} + model_type: llm + metadata: {} + tool_groups: + - provider_id: rag-runtime + toolgroup_id: builtin::rag + vector_stores: + - vector_store_id: vs_3d4808b2-5f00-4de6-baa3-c86752cf827c # see readme for this value + embedding_model: sentence-transformers//rag-content/embeddings_model + embedding_dimension: 768 + provider_id: rhdh-docs + shields: + - shield_id: llama-guard-shield + provider_id: ${env.ENABLE_SAFETY:+llama-guard} + provider_shield_id: safety-guard/${env.SAFETY_MODEL:=llama-guard3:8b} +vector_stores: + annotation_prompt_params: + enable_annotations: true + annotation_instruction_template: > + When appropriate, cite sources at the end of sentences using doc_url and doc_title format. + Citing sources is not always required because citations are handled externally. + Never include any citation that is in the form '<| file-id |>'. + default_provider_id: rhdh-docs + default_embedding_model: + provider_id: sentence-transformers + model_id: /rag-content/embeddings_model +server: + auth: + host: + port: 8321 + quota: + tls_cafile: + tls_certfile: + tls_keyfile: diff --git a/env/default-values.env b/env/default-values.env index 5d1d2e3..f88e440 100644 --- a/env/default-values.env +++ b/env/default-values.env @@ -1,6 +1,12 @@ # Note: You only need to set the variables you normally would with '-e' flags. # You do not need to set them all if they will go unused. +# Service Images +LIGHTSPEED_CORE_IMAGE=quay.io/lightspeed-core/lightspeed-stack:dev-20260316-b2f54cf +LLAMA_STACK_IMAGE=quay.io/opendatahub/llama-stack:07a97331b3a8831e76ec15c833d2dcf6fa0a34c9 +RAG_CONTENT_IMAGE=quay.io/redhat-ai-dev/rag-content:release-1.9-lls-0.4.3 +OLLAMA_IMAGE=docker.io/ollama/ollama:latest + # Enable Inference Providers ## Set any providers you want enabled to 'true' ## E.g. ENABLE_VLLM=true @@ -10,6 +16,7 @@ ENABLE_VLLM= ENABLE_VERTEX_AI= ENABLE_OPENAI= ENABLE_OLLAMA= +ENABLE_SAFETY= # vLLM Inference Settings VLLM_URL= @@ -31,17 +38,16 @@ OLLAMA_URL= # Question Validation Safety Shield Settings ## Ensure VALIDATION_PROVIDER is one of your enabled Inference Providers +## Only required for Llama Stack configs that use the Lightspeed Core provider ## E.g. VALIDATION_PROVIDER=vllm if ENABLE_VLLM=true VALIDATION_PROVIDER= VALIDATION_MODEL_NAME= # Llama Guard Settings -## Defaults to llama-guard3:8b if not set -SAFETY_MODEL= -## Defaults to http://host.docker.internal:11434/v1 if not set -SAFETY_URL= +SAFETY_MODEL=llama-guard3:8b +SAFETY_URL=http://ollama:11434/v1 ## Only required for non-local environments with a api key SAFETY_API_KEY= # Other -LLAMA_STACK_LOGGING= \ No newline at end of file +LLAMA_STACK_LOGGING= diff --git a/lightspeed-stack.yaml b/lightspeed-stack.yaml index e9f4d68..80d4153 100644 --- a/lightspeed-stack.yaml +++ b/lightspeed-stack.yaml @@ -23,7 +23,7 @@ service: access_log: true llama_stack: use_as_library_client: true - library_client_config_path: /app-root/run.yaml + library_client_config_path: /app-root/config.yaml user_data_collection: feedback_enabled: false feedback_storage: '/tmp/data/feedback' From bb8601ac01d8c4b80a9fabf4c8ff3b19808d915d Mon Sep 17 00:00:00 2001 From: Jordan Dubrick Date: Wed, 18 Mar 2026 15:08:44 -0400 Subject: [PATCH 5/9] streamline documentation for running locally Signed-off-by: Jordan Dubrick --- README.md | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index dbf0a48..22d4490 100644 --- a/README.md +++ b/README.md @@ -97,7 +97,7 @@ For information about these variables see: https://llamastack.github.io/v0.2.18/ ## Configuring RAG -The `run.yaml` file that is included in the container image has a RAG tool enabled. In order for this tool to have the necessary reference content, you need to run: +The `config.yaml` file that is included in the container image has a RAG tool enabled. In order for this tool to have the necessary reference content, you need to run: ``` make get-rag @@ -107,23 +107,29 @@ This will fetch the necessary reference content and add it to your local project ## Configuring Safety Guards -> [!IMPORTANT] -> If you want to omit the safety guards for development purposes, you can use [run-no-guard.yaml](./run-no-guard.yaml) instead. +Safety guards are configured through environment variables in `env/values.env`. To disable safety guards, leave `ENABLE_SAFETY=` empty. + +To enable safety guards, set the following: + +```env +ENABLE_SAFETY=true +SAFETY_MODEL= +SAFETY_URL=/v1 +SAFETY_API_KEY= +``` -In the main [run.yaml](./run.yaml) file, Llama Guard is enabled by default. In order to avoid issues during startup you will need to ensure you have an instance of Llama Guard running. +- `SAFETY_MODEL`: The name of the Llama Guard model being used. Defaults to `llama-guard3:8b`. +- `SAFETY_URL`: The URL where the safety model is available. For local container runs, use `http://host.containers.internal:11434/v1`. +- `SAFETY_API_KEY`: The API key required for access to the safety model. Not required for local deployments. -You can do so by running the following to start an Ollama container with Llama Guard: +You will also need an instance of Llama Guard running. You can start one locally with Ollama: ```sh podman run -d --name ollama -p 11434:11434 docker.io/ollama/ollama:latest podman exec ollama ollama pull llama-guard3:8b ``` -**Note:** Ensure the Ollama container is started and the model is ready before trying to query if deploying the containers manually. -You will need to set the following environment variables to ensure functionality: -- `SAFETY_MODEL`: The name of the Llama Guard model being used. Defaults to `llama-gaurd3:8b` -- `SAFETY_URL`: The URL where the container is available. Defaults to `http://host.docker.internal:11434/v1` -- `SAFETY_API_KEY`: The API key required for access to the safety model. Not required for local. +**Note:** Ensure the Ollama container is started and the model is ready before trying to query if deploying the containers manually. # Running Locally @@ -139,19 +145,14 @@ vector_stores: vector_store_id: vs_3d47e06c-ac95-49b6-9833-d5e6dd7252dd ``` -You will need the `vector_store_id` value. After copying that value you will need to update `run.yaml` and `run-no-guard.yaml`. The `vector_store_id` you copied will replace the `vector_store_id` in those files. - -## Running With Safety Guard +You will need the `vector_store_id` value. After copying that value you will need to update `config.yaml`. The `vector_store_id` you copied will replace the `vector_store_id` in that file. -``` -podman run -it -p 8080:8080 --env-file ./env/values.env -v ./embeddings_model:/rag-content/embeddings_model:Z -v ./vector_db/rhdh_product_docs:/rag-content/vector_db/rhdh_product_docs:Z quay.io/redhat-ai-dev/llama-stack:library-0.4.3 -``` +## Running the Container -## Running Without Safety Guard +If you want to enable safety guards, see [Configuring Safety Guards](#configuring-safety-guards) before running. -You can override the built-in `run.yaml` file by mounting the `run-no-guard.yaml` file to the same path. ``` -podman run -it -p 8080:8080 --env-file ./env/values.env -v ./embeddings_model:/rag-content/embeddings_model:Z -v ./vector_db/rhdh_product_docs:/rag-content/vector_db/rhdh_product_docs:Z -v ./run-no-guard.yaml:/app-root/run.yaml:Z quay.io/redhat-ai-dev/llama-stack:library-0.4.3 +podman run -it -p 8080:8080 --env-file ./env/values.env -v ./embeddings_model:/rag-content/embeddings_model:Z -v ./vector_db/rhdh_product_docs:/rag-content/vector_db/rhdh_product_docs:Z quay.io/redhat-ai-dev/llama-stack:library-0.4.3 ``` ## Running With Host Network @@ -167,9 +168,9 @@ To deploy on a cluster see [DEPLOYMENT.md](./docs/DEPLOYMENT.md). | Command | Description | | ---- | ----| | **get-rag** | Gets the RAG data and the embeddings model from the rag-content image registry to your local project directory | +| **sync-upstream-config** | Syncs `config.yaml`, `env/default-values.env`, `lightspeed-stack.yaml`, and image pins from upstream | +| **validate-upstream-config** | Validates that synced upstream files and image pins have not drifted | | **update-question-validation** | Updates the question validation content in `providers.d` | -| **validate-prompt-templates** | Validates prompt values in run.yaml. | -| **update-prompt-templates** | Updates the prompt values in run.yaml. | # Contributing From 7bfca06ad33675da95da947061dc65a113226c6d Mon Sep 17 00:00:00 2001 From: Jordan Dubrick Date: Wed, 18 Mar 2026 15:08:57 -0400 Subject: [PATCH 6/9] remove unused CI, add new sync validation Signed-off-by: Jordan Dubrick --- .github/workflows/validation.yml | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/.github/workflows/validation.yml b/.github/workflows/validation.yml index 0544b8e..a9bc194 100644 --- a/.github/workflows/validation.yml +++ b/.github/workflows/validation.yml @@ -20,20 +20,15 @@ on: branches: [ main ] jobs: - validate-prompt: + validate-upstream-config: runs-on: ubuntu-latest permissions: contents: read - defaults: - run: - working-directory: ./scripts/python-scripts steps: - name: Checkout code uses: actions/checkout@v4 - - name: Setup Environment - run: pip3 install -r requirements.txt - - name: Validate prompt - run: python3 sync.py -t validate + - name: Validate upstream synced content + run: make validate-upstream-config validate-yaml: runs-on: ubuntu-latest permissions: From d761ac9eb5670c3cc127813b03cd7842af98dbc4 Mon Sep 17 00:00:00 2001 From: Jordan Dubrick Date: Wed, 18 Mar 2026 15:32:15 -0400 Subject: [PATCH 7/9] bump llama stack version refs Signed-off-by: Jordan Dubrick --- .github/workflows/dev.yml | 2 +- Containerfile | 2 +- README.md | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index 45e4594..3efb902 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -24,7 +24,7 @@ env: IMAGE_REGISTRY: quay.io REGISTRY_ORG: redhat-ai-dev CONTAINER_FILE: Containerfile - TAG_BASE: library-0.4.3 + TAG_BASE: library-0.5.2 jobs: build-and-push: diff --git a/Containerfile b/Containerfile index cd19bb8..6718f1b 100644 --- a/Containerfile +++ b/Containerfile @@ -77,7 +77,7 @@ LABEL io.k8s.description="Red Hat Developer Hub Lightspeed Llama Stack" LABEL io.k8s.display-name="Red Hat Developer Hub Lightspeed Llama Stack" LABEL io.openshift.tags="developerhub,rhdh,lightspeed,ai,assistant,llama" LABEL name=rhdh-lightspeed-llama-stack -LABEL release=1.8 +LABEL release=1.10 LABEL url="https://github.com/redhat-ai-dev/llama-stack" LABEL vendor="Red Hat, Inc." LABEL version=0.1.1 diff --git a/README.md b/README.md index 22d4490..646d844 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Redhat-AI-Dev Llama Stack [![Apache2.0 License](https://img.shields.io/badge/license-Apache2.0-brightgreen.svg)](LICENSE) -[![Llama Stack Version](https://img.shields.io/badge/llama_stack-v0.4.3-blue)](https://llamastack.github.io/docs/v0.4.3) +[![Llama Stack Version](https://img.shields.io/badge/llama_stack-v0.5.2-blue)](https://llamastack.github.io/docs) [![Python Version](https://img.shields.io/badge/python-3.12-blue)](https://www.python.org/downloads/release/python-3120/) - [Image Availability](#image-availability) @@ -28,7 +28,7 @@ ## Developer Release (Library Mode) ``` -quay.io/redhat-ai-dev/llama-stack:library-0.4.3 +quay.io/redhat-ai-dev/llama-stack:library-0.5.2 ``` # Usage From b89df9b71667621cf57c5bdcc2113c1d8ba54db9 Mon Sep 17 00:00:00 2001 From: Jordan Dubrick Date: Wed, 18 Mar 2026 15:37:22 -0400 Subject: [PATCH 8/9] add dev branch to validation CI Signed-off-by: Jordan Dubrick --- .github/workflows/validation.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/validation.yml b/.github/workflows/validation.yml index a9bc194..b3b7e42 100644 --- a/.github/workflows/validation.yml +++ b/.github/workflows/validation.yml @@ -17,7 +17,7 @@ name: Validation Checks on: pull_request: - branches: [ main ] + branches: [ main, dev ] jobs: validate-upstream-config: From a5828d029dac5fabaefa04f28c2f27037e16b819 Mon Sep 17 00:00:00 2001 From: Jordan Dubrick Date: Thu, 19 Mar 2026 11:03:07 -0400 Subject: [PATCH 9/9] run sync for stable images Signed-off-by: Jordan Dubrick --- Makefile | 2 +- env/default-values.env | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 9d6ea84..f53fc00 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -RAG_CONTENT_IMAGE ?= quay.io/redhat-ai-dev/rag-content:release-1.9-lls-0.4.3 +RAG_CONTENT_IMAGE ?= quay.io/redhat-ai-dev/rag-content:release-1.9-lls-0.4.3-d0444cd9b57222ec9bdbaa36354337480a2ecf97 VENV := $(CURDIR)/scripts/python-scripts/.venv PYTHON := $(VENV)/bin/python3 PIP := $(VENV)/bin/pip3 diff --git a/env/default-values.env b/env/default-values.env index f88e440..e4fbcce 100644 --- a/env/default-values.env +++ b/env/default-values.env @@ -4,8 +4,8 @@ # Service Images LIGHTSPEED_CORE_IMAGE=quay.io/lightspeed-core/lightspeed-stack:dev-20260316-b2f54cf LLAMA_STACK_IMAGE=quay.io/opendatahub/llama-stack:07a97331b3a8831e76ec15c833d2dcf6fa0a34c9 -RAG_CONTENT_IMAGE=quay.io/redhat-ai-dev/rag-content:release-1.9-lls-0.4.3 -OLLAMA_IMAGE=docker.io/ollama/ollama:latest +RAG_CONTENT_IMAGE=quay.io/redhat-ai-dev/rag-content:release-1.9-lls-0.4.3-d0444cd9b57222ec9bdbaa36354337480a2ecf97 +OLLAMA_IMAGE=docker.io/ollama/ollama:0.18.2 # Enable Inference Providers ## Set any providers you want enabled to 'true'