Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ env:
IMAGE_REGISTRY: quay.io
REGISTRY_ORG: redhat-ai-dev
CONTAINER_FILE: Containerfile
TAG_BASE: library-0.4.3
TAG_BASE: library-0.5.2

jobs:
build-and-push:
Expand Down
13 changes: 4 additions & 9 deletions .github/workflows/validation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,23 +17,18 @@ name: Validation Checks

on:
pull_request:
branches: [ main ]
branches: [ main, dev ]

jobs:
validate-prompt:
validate-upstream-config:
runs-on: ubuntu-latest
permissions:
contents: read
defaults:
run:
working-directory: ./scripts/python-scripts
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Setup Environment
run: pip3 install -r requirements.txt
- name: Validate prompt
run: python3 sync.py -t validate
- name: Validate upstream synced content
run: make validate-upstream-config
validate-yaml:
runs-on: ubuntu-latest
permissions:
Expand Down
9 changes: 4 additions & 5 deletions Containerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
ARG TAG="dev-20260226-ca21850"
FROM quay.io/lightspeed-core/lightspeed-stack:${TAG} AS builder
FROM quay.io/lightspeed-core/lightspeed-stack:dev-20260316-b2f54cf AS builder

USER root

Expand Down Expand Up @@ -58,7 +57,7 @@ COPY --from=builder --chown=1001:1001 /app-root /app-root
# checked by konflux
COPY --from=builder --chown=1001:1001 /app-root/LICENSE /licenses/

COPY --chown=1001:1001 ./run.yaml ./lightspeed-stack.yaml ./
COPY --chown=1001:1001 ./config.yaml ./lightspeed-stack.yaml ./
COPY --chown=1001:1001 ./config/ ./config/
COPY --chown=1001:1001 --chmod=755 ./scripts/entrypoint.sh ./

Expand All @@ -78,8 +77,8 @@ LABEL io.k8s.description="Red Hat Developer Hub Lightspeed Llama Stack"
LABEL io.k8s.display-name="Red Hat Developer Hub Lightspeed Llama Stack"
LABEL io.openshift.tags="developerhub,rhdh,lightspeed,ai,assistant,llama"
LABEL name=rhdh-lightspeed-llama-stack
LABEL release=1.8
LABEL release=1.10
LABEL url="https://github.com/redhat-ai-dev/llama-stack"
LABEL vendor="Red Hat, Inc."
LABEL version=0.1.1
LABEL summary="Red Hat Developer Hub Lightspeed Llama Stack"
LABEL summary="Red Hat Developer Hub Lightspeed Llama Stack"
9 changes: 8 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
RAG_CONTENT_IMAGE ?= quay.io/redhat-ai-dev/rag-content:release-1.9-lls-0.4.3
RAG_CONTENT_IMAGE ?= quay.io/redhat-ai-dev/rag-content:release-1.9-lls-0.4.3-d0444cd9b57222ec9bdbaa36354337480a2ecf97
VENV := $(CURDIR)/scripts/python-scripts/.venv
PYTHON := $(VENV)/bin/python3
PIP := $(VENV)/bin/pip3
Expand Down Expand Up @@ -55,3 +55,10 @@ validate-prompt-templates: $(VENV)/bin/activate

update-prompt-templates: $(VENV)/bin/activate
$(call run_sync,update)

.PHONY: sync-upstream-config validate-upstream-config
sync-upstream-config: ## Sync upstream config and image pins from lightspeed-configs
bash ./scripts/sync/upstream-config.sh update

validate-upstream-config: ## Validate synced upstream config and image pins have not drifted
bash ./scripts/sync/upstream-config.sh validate
47 changes: 24 additions & 23 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Redhat-AI-Dev Llama Stack

[![Apache2.0 License](https://img.shields.io/badge/license-Apache2.0-brightgreen.svg)](LICENSE)
[![Llama Stack Version](https://img.shields.io/badge/llama_stack-v0.4.3-blue)](https://llamastack.github.io/docs/v0.4.3)
[![Llama Stack Version](https://img.shields.io/badge/llama_stack-v0.5.2-blue)](https://llamastack.github.io/docs)
[![Python Version](https://img.shields.io/badge/python-3.12-blue)](https://www.python.org/downloads/release/python-3120/)

- [Image Availability](#image-availability)
Expand All @@ -28,7 +28,7 @@
## Developer Release (Library Mode)

```
quay.io/redhat-ai-dev/llama-stack:library-0.4.3
quay.io/redhat-ai-dev/llama-stack:library-0.5.2
```

# Usage
Expand Down Expand Up @@ -97,7 +97,7 @@ For information about these variables see: https://llamastack.github.io/v0.2.18/

## Configuring RAG

The `run.yaml` file that is included in the container image has a RAG tool enabled. In order for this tool to have the necessary reference content, you need to run:
The `config.yaml` file that is included in the container image has a RAG tool enabled. In order for this tool to have the necessary reference content, you need to run:

```
make get-rag
Expand All @@ -107,23 +107,29 @@ This will fetch the necessary reference content and add it to your local project

## Configuring Safety Guards

> [!IMPORTANT]
> If you want to omit the safety guards for development purposes, you can use [run-no-guard.yaml](./run-no-guard.yaml) instead.
Safety guards are configured through environment variables in `env/values.env`. To disable safety guards, leave `ENABLE_SAFETY=` empty.

To enable safety guards, set the following:

```env
ENABLE_SAFETY=true
SAFETY_MODEL=<llama-guard-model-name>
SAFETY_URL=<url-of-safety-model-server>/v1
SAFETY_API_KEY=<api-key-if-required>
```

In the main [run.yaml](./run.yaml) file, Llama Guard is enabled by default. In order to avoid issues during startup you will need to ensure you have an instance of Llama Guard running.
- `SAFETY_MODEL`: The name of the Llama Guard model being used. Defaults to `llama-guard3:8b`.
- `SAFETY_URL`: The URL where the safety model is available. For local container runs, use `http://host.containers.internal:11434/v1`.
- `SAFETY_API_KEY`: The API key required for access to the safety model. Not required for local deployments.

You can do so by running the following to start an Ollama container with Llama Guard:
You will also need an instance of Llama Guard running. You can start one locally with Ollama:

```sh
podman run -d --name ollama -p 11434:11434 docker.io/ollama/ollama:latest
podman exec ollama ollama pull llama-guard3:8b
```
**Note:** Ensure the Ollama container is started and the model is ready before trying to query if deploying the containers manually.

You will need to set the following environment variables to ensure functionality:
- `SAFETY_MODEL`: The name of the Llama Guard model being used. Defaults to `llama-gaurd3:8b`
- `SAFETY_URL`: The URL where the container is available. Defaults to `http://host.docker.internal:11434/v1`
- `SAFETY_API_KEY`: The API key required for access to the safety model. Not required for local.
**Note:** Ensure the Ollama container is started and the model is ready before trying to query if deploying the containers manually.

# Running Locally

Expand All @@ -139,19 +145,14 @@ vector_stores:
vector_store_id: vs_3d47e06c-ac95-49b6-9833-d5e6dd7252dd
```

You will need the `vector_store_id` value. After copying that value you will need to update `run.yaml` and `run-no-guard.yaml`. The `vector_store_id` you copied will replace the `vector_store_id` in those files.

## Running With Safety Guard
You will need the `vector_store_id` value. After copying that value you will need to update `config.yaml`. The `vector_store_id` you copied will replace the `vector_store_id` in that file.

```
podman run -it -p 8080:8080 --env-file ./env/values.env -v ./embeddings_model:/rag-content/embeddings_model:Z -v ./vector_db/rhdh_product_docs:/rag-content/vector_db/rhdh_product_docs:Z quay.io/redhat-ai-dev/llama-stack:library-0.4.3
```
## Running the Container

## Running Without Safety Guard
If you want to enable safety guards, see [Configuring Safety Guards](#configuring-safety-guards) before running.

You can override the built-in `run.yaml` file by mounting the `run-no-guard.yaml` file to the same path.
```
podman run -it -p 8080:8080 --env-file ./env/values.env -v ./embeddings_model:/rag-content/embeddings_model:Z -v ./vector_db/rhdh_product_docs:/rag-content/vector_db/rhdh_product_docs:Z -v ./run-no-guard.yaml:/app-root/run.yaml:Z quay.io/redhat-ai-dev/llama-stack:library-0.4.3
podman run -it -p 8080:8080 --env-file ./env/values.env -v ./embeddings_model:/rag-content/embeddings_model:Z -v ./vector_db/rhdh_product_docs:/rag-content/vector_db/rhdh_product_docs:Z quay.io/redhat-ai-dev/llama-stack:library-0.4.3
```

## Running With Host Network
Expand All @@ -167,9 +168,9 @@ To deploy on a cluster see [DEPLOYMENT.md](./docs/DEPLOYMENT.md).
| Command | Description |
| ---- | ----|
| **get-rag** | Gets the RAG data and the embeddings model from the rag-content image registry to your local project directory |
| **sync-upstream-config** | Syncs `config.yaml`, `env/default-values.env`, `lightspeed-stack.yaml`, and image pins from upstream |
| **validate-upstream-config** | Validates that synced upstream files and image pins have not drifted |
| **update-question-validation** | Updates the question validation content in `providers.d` |
| **validate-prompt-templates** | Validates prompt values in run.yaml. |
| **update-prompt-templates** | Updates the prompt values in run.yaml. |

# Contributing

Expand Down
44 changes: 26 additions & 18 deletions run.yaml → config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
version: 2
image_name: redhat-ai-dev-llama-stack
version: 3
distro_name: developer-lightspeed-lls-0.5.x
apis:
- agents
- inference
Expand Down Expand Up @@ -43,7 +43,9 @@ providers:
base_url: ${env.VLLM_URL:=}
api_token: ${env.VLLM_API_KEY:=}
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
network:
tls:
verify: ${env.VLLM_TLS_VERIFY:=true}
- provider_id: ${env.ENABLE_OLLAMA:+ollama}
provider_type: remote::ollama
config:
Expand All @@ -56,20 +58,15 @@ providers:
provider_type: remote::vertexai
config:
project: ${env.VERTEX_AI_PROJECT:=}
location: ${env.VERTEX_AI_LOCATION:=us-central1}
- provider_id: safety-guard
provider_type: remote::vllm
config:
base_url: ${env.SAFETY_URL:=http://host.docker.internal:11434/v1}
api_token: ${env.SAFETY_API_KEY:=token}
location: ${env.VERTEX_AI_LOCATION:=global}
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
config: {}
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
- provider_id: ${env.ENABLE_SAFETY:+safety-guard}
provider_type: remote::vllm
config:
excluded_categories: []
base_url: ${env.SAFETY_URL:=http://ollama:11434/v1}
api_token: ${env.SAFETY_API_KEY:=}
tool_runtime:
- provider_id: model-context-protocol
provider_type: remote::model-context-protocol
Expand All @@ -92,6 +89,11 @@ providers:
metadata_store:
table_name: files_metadata
backend: sql_default
safety:
- provider_id: ${env.ENABLE_SAFETY:+llama-guard}
provider_type: inline::llama-guard
config:
excluded_categories: []
storage:
backends:
kv_default:
Expand Down Expand Up @@ -124,14 +126,10 @@ registered_resources:
provider_id: sentence-transformers
provider_model_id: /rag-content/embeddings_model
- model_id: ${env.SAFETY_MODEL:=llama-guard3:8b}
provider_id: safety-guard
provider_id: ${env.ENABLE_SAFETY:+safety-guard}
provider_model_id: ${env.SAFETY_MODEL:=llama-guard3:8b}
model_type: llm
metadata: {}
shields:
- shield_id: llama-guard-shield
provider_id: llama-guard
provider_shield_id: safety-guard/${env.SAFETY_MODEL:=llama-guard3:8b}
tool_groups:
- provider_id: rag-runtime
toolgroup_id: builtin::rag
Expand All @@ -140,7 +138,17 @@ registered_resources:
embedding_model: sentence-transformers//rag-content/embeddings_model
embedding_dimension: 768
provider_id: rhdh-docs
shields:
- shield_id: llama-guard-shield
provider_id: ${env.ENABLE_SAFETY:+llama-guard}
provider_shield_id: safety-guard/${env.SAFETY_MODEL:=llama-guard3:8b}
vector_stores:
annotation_prompt_params:
enable_annotations: true
annotation_instruction_template: >
When appropriate, cite sources at the end of sentences using doc_url and doc_title format.
Citing sources is not always required because citations are handled externally.
Never include any citation that is in the form '<| file-id |>'.
default_provider_id: rhdh-docs
default_embedding_model:
provider_id: sentence-transformers
Expand Down
16 changes: 11 additions & 5 deletions env/default-values.env
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
# Note: You only need to set the variables you normally would with '-e' flags.
# You do not need to set them all if they will go unused.

# Service Images
LIGHTSPEED_CORE_IMAGE=quay.io/lightspeed-core/lightspeed-stack:dev-20260316-b2f54cf
LLAMA_STACK_IMAGE=quay.io/opendatahub/llama-stack:07a97331b3a8831e76ec15c833d2dcf6fa0a34c9
RAG_CONTENT_IMAGE=quay.io/redhat-ai-dev/rag-content:release-1.9-lls-0.4.3-d0444cd9b57222ec9bdbaa36354337480a2ecf97
OLLAMA_IMAGE=docker.io/ollama/ollama:0.18.2

# Enable Inference Providers
## Set any providers you want enabled to 'true'
## E.g. ENABLE_VLLM=true
Expand All @@ -10,6 +16,7 @@ ENABLE_VLLM=
ENABLE_VERTEX_AI=
ENABLE_OPENAI=
ENABLE_OLLAMA=
ENABLE_SAFETY=

# vLLM Inference Settings
VLLM_URL=
Expand All @@ -31,17 +38,16 @@ OLLAMA_URL=

# Question Validation Safety Shield Settings
## Ensure VALIDATION_PROVIDER is one of your enabled Inference Providers
## Only required for Llama Stack configs that use the Lightspeed Core provider
## E.g. VALIDATION_PROVIDER=vllm if ENABLE_VLLM=true
VALIDATION_PROVIDER=
VALIDATION_MODEL_NAME=

# Llama Guard Settings
## Defaults to llama-guard3:8b if not set
SAFETY_MODEL=
## Defaults to http://host.docker.internal:11434/v1 if not set
SAFETY_URL=
SAFETY_MODEL=llama-guard3:8b
SAFETY_URL=http://ollama:11434/v1
## Only required for non-local environments with a api key
SAFETY_API_KEY=

# Other
LLAMA_STACK_LOGGING=
LLAMA_STACK_LOGGING=
2 changes: 1 addition & 1 deletion lightspeed-stack.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ service:
access_log: true
llama_stack:
use_as_library_client: true
library_client_config_path: /app-root/run.yaml
library_client_config_path: /app-root/config.yaml
user_data_collection:
feedback_enabled: false
feedback_storage: '/tmp/data/feedback'
Expand Down
Loading
Loading