From ea5a27c77d290ec653cec883e3d4716fc8ef6f6e Mon Sep 17 00:00:00 2001
From: Jordan Dubrick <jdubrick@redhat.com>
Date: Tue, 6 Jan 2026 14:16:45 -0500
Subject: [PATCH 01/10] move to llama stack 0.3.4 and remove safety shield

Signed-off-by: Jordan Dubrick <jdubrick@redhat.com>
---
 pyproject.toml |   7 +-
 run.yaml       | 242 +++++++++++++------------------------------------
 uv.lock        | 134 +++++++++++----------------
 3 files changed, 117 insertions(+), 266 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index a411c67..7c167a4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,10 +7,10 @@ dependencies = [
     "fastapi>=0.115.6",
     "uvicorn>=0.34.3",
     "kubernetes>=30.1.0",
-    "llama-stack==0.2.18",
-    "llama-stack-client==0.2.18",
+    "llama-stack==0.3.4",
+    "llama-stack-client==0.3.4",
     "ollama>=0.2.0",
-    "openai==1.99.9",
+    "openai>=1.100.0",
     "rich>=14.0.0",
     "cachetools>=6.1.0",
     "prometheus-client>=0.22.1",
@@ -34,7 +34,6 @@ dependencies = [
     "greenlet",
     "torch",
     "sentence-transformers>=5.0.0",
-    "lightspeed_stack_providers @ git+https://github.com/Jdubrick/lightspeed-providers.git@devai",
     "pydantic>=2.10.6",
     "httpx",
 ]
diff --git a/run.yaml b/run.yaml
index 8e869d4..c904f53 100644
--- a/run.yaml
+++ b/run.yaml
@@ -13,72 +13,27 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-version: '2'
+version: 2
 image_name: redhat-ai-dev-llama-stack
-
 apis:
   - agents
-  - datasetio
-  - eval
   - inference
-  - post_training
   - safety
-  - scoring
-  - telemetry
   - tool_runtime
   - vector_io
-benchmarks: []
+  - files
 container_image:
-datasets: []
-external_providers_dir: '/app-root/config/providers.d'
-inference_store:
-  db_path: .llama/distributions/ollama/inference_store.db
-  type: sqlite
-logging:
-metadata_store:
-  db_path: .llama/distributions/ollama/registry.db
-  namespace:
-  type: sqlite
-models:
-  - model_id: sentence-transformers/all-mpnet-base-v2
-    metadata:
-      embedding_dimension: 768
-    model_type: embedding
-    provider_id: sentence-transformers
-    provider_model_id: '/app-root/embeddings_model'
+external_providers_dir:
 providers:
   agents:
     - config:
-        persistence_store:
-          db_path: .llama/distributions/ollama/agents_store.db
-          namespace:
-          type: sqlite
-        responses_store:
-          db_path: .llama/distributions/ollama/responses_store.db
-          type: sqlite
-      provider_id: meta-reference
-      provider_type: inline::meta-reference
-  datasetio:
-    - config:
-        kvstore:
-          db_path: .llama/distributions/ollama/huggingface_datasetio.db
-          namespace:
-          type: sqlite
-      provider_id: huggingface
-      provider_type: remote::huggingface
-    - config:
-        kvstore:
-          db_path: .llama/distributions/ollama/localfs_datasetio.db
-          namespace:
-          type: sqlite
-      provider_id: localfs
-      provider_type: inline::localfs
-  eval:
-    - config:
-        kvstore:
-          db_path: .llama/distributions/ollama/meta_reference_eval.db
-          namespace:
-          type: sqlite
+        persistence:
+          agent_state:
+            namespace: agents
+            backend: kv_default
+          responses:
+            table_name: responses
+            backend: sql_default
       provider_id: meta-reference
       provider_type: inline::meta-reference
   inference:
@@ -105,106 +60,6 @@ providers:
     - provider_id: sentence-transformers
       provider_type: inline::sentence-transformers
       config: {}
-  post_training:
-    - config:
-        checkpoint_format: huggingface
-        device: cpu
-        distributed_backend:
-        dpo_output_dir: '.'
-      provider_id: huggingface
-      provider_type: inline::huggingface
-  safety:
-    - config:
-        excluded_categories: []
-      provider_id: llama-guard
-      provider_type: inline::llama-guard
-    - provider_id: lightspeed_question_validity
-      provider_type: inline::lightspeed_question_validity
-      config:
-        model_id: ${env.VALIDATION_PROVIDER:=vllm}/${env.VALIDATION_MODEL_NAME}
-        model_prompt: |-
-          Instructions:
-
-          You area question classification tool. You are an expert in the following categories:
-          - Backstage
-          - Red Hat Developer Hub (RHDH)
-          - Developer Lightspeed
-          - Lightspeed
-          - Artificial Intelligence (AI) Models
-          - Large Language Models (LLMs)
-          - Kubernetes
-          - Openshift
-          - CI/CD
-          - GitOps
-          - Pipelines
-          - Developer Portals
-          - Deployments
-          - Software Catalogs
-          - Software Templates
-          - Tech Docs
-
-          Your job is to determine if a user's question is related to the categories you are an expert in. If the question is related to those categories, \
-          or any features that may be related to those categories, you will answer with ${allowed}.
-
-          If a question is not related to your expert categories, answer with ${rejected}.
-
-          You do not need to explain your answer.
-
-          Below are some example questions:
-          Example Question:
-          Why is the sky blue?
-          Example Response:
-          ${rejected}
-
-          Example Question:
-          Can you help configure my cluster to automatically scale?
-          Example Response:
-          ${allowed}
-
-          Example Question:
-          How do I create import an existing software template in Backstage?
-          Example Response:
-          ${allowed}
-
-          Example Question:
-          How do I accomplish a task in RHDH?
-          Example Response:
-          ${allowed}
-
-          Example Question:
-          How do I explore a component in RHDH catalog?
-          Example Response:
-          ${allowed}
-
-          Example Question:
-          How can I integrate GitOps into my pipeline?
-          Example Response:
-          ${allowed}
-
-          Question:
-          ${message}
-          Response:
-        invalid_question_response: |-
-          Hi, I'm the Red Hat Developer Hub Lightspeed assistant, I can help you with questions about Red Hat Developer Hub or Backstage.
-          Please ensure your question is about these topics, and feel free to ask again!
-  scoring:
-    - config: {}
-      provider_id: basic
-      provider_type: inline::basic
-    - config: {}
-      provider_id: llm-as-judge
-      provider_type: inline::llm-as-judge
-    - config:
-        openai_api_key: '********'
-      provider_id: braintrust
-      provider_type: inline::braintrust
-  telemetry:
-    - config:
-        service_name: 'lightspeed-stack-telemetry'
-        sinks: sqlite
-        sqlite_db_path: .llama/distributions/ollama/trace_store.db
-      provider_id: meta-reference
-      provider_type: inline::meta-reference
   tool_runtime:
     - provider_id: model-context-protocol
       provider_type: remote::model-context-protocol
@@ -213,21 +68,60 @@ providers:
       provider_type: inline::rag-runtime
       config: {}
   vector_io:
-    - config:
-        kvstore:
-          db_path: .llama/distributions/ollama/faiss_store.db
-          namespace:
-          type: sqlite
-      provider_id: faiss
-      provider_type: inline::faiss
-    - provider_id: rhdh-docs
+    - provider_id: rhdh-product-docs-1_8
       provider_type: inline::faiss
       config:
-        kvstore:
-          type: sqlite
-          namespace:
-          db_path: /app-root/vector_db/rhdh_product_docs/1.8/faiss_store.db
-scoring_fns: []
+        persistence:
+          namespace: vector_io::faiss
+          backend: faiss_kv
+  files:
+    - provider_id: localfs
+      provider_type: inline::localfs
+      config:
+        storage_dir: /tmp/llama-stack-files
+        metadata_store:
+          table_name: files_metadata
+          backend: sql_default
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: .llama/distributions/ollama/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: .llama/distributions/ollama/sql_store.db
+    faiss_kv:
+      type: kv_sqlite
+      db_path: /app-root/vector_db/rhdh_product_docs/1.8/faiss_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
+registered_resources:
+  models:
+    - model_id: sentence-transformers/all-mpnet-base-v2
+      metadata:
+        embedding_dimension: 768
+      model_type: embedding
+      provider_id: sentence-transformers
+      provider_model_id: '/app-root/embeddings_model'
+  tool_groups:
+    - provider_id: rag-runtime
+      toolgroup_id: builtin::rag
+  vector_stores:
+    - vector_store_id: rhdh-product-docs-1_8
+      embedding_dimension: 768
+      embedding_model: sentence-transformers/all-mpnet-base-v2
+      provider_id: rhdh-product-docs-1_8
+      provider_vector_store_id: rhdh-product-docs-1_8
 server:
   auth:
   host:
@@ -236,15 +130,3 @@ server:
   tls_cafile:
   tls_certfile:
   tls_keyfile:
-shields:
-  - shield_id: lightspeed_question_validity-shield
-    provider_id: lightspeed_question_validity
-tool_groups:
-  - provider_id: rag-runtime
-    toolgroup_id: builtin::rag
-    description: 'Only use for questions specifically about Red Hat Developer Hub (RHDH). Searches technical documentation for RHDH installation, discovery, configuration, release, upgrade, control access, integration, observability, and extending with plugins. Do not use for any other topic outside RHDH.'
-vector_dbs:
-  - embedding_dimension: 768
-    embedding_model: sentence-transformers/all-mpnet-base-v2
-    provider_id: rhdh-docs
-    vector_db_id: rhdh-product-docs-1_8
diff --git a/uv.lock b/uv.lock
index b6413d1..4a2213d 100644
--- a/uv.lock
+++ b/uv.lock
@@ -112,6 +112,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f5/10/6c25ed6de94c49f88a91fa5018cb4c0f3625f31d5be9f771ebe5cc7cd506/aiosqlite-0.21.0-py3-none-any.whl", hash = "sha256:2549cf4057f95f53dcba16f2b64e8e2791d7e1adedb13197dd8ed77bb226d7d0", size = 15792, upload-time = "2025-02-03T07:30:13.6Z" },
 ]
 
+[[package]]
+name = "annotated-doc"
+version = "0.0.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/57/ba/046ceea27344560984e26a590f90bc7f4a75b06701f653222458922b558c/annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4", size = 7288, upload-time = "2025-11-10T22:07:42.062Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320", size = 5303, upload-time = "2025-11-10T22:07:40.673Z" },
+]
+
 [[package]]
 name = "annotated-types"
 version = "0.7.0"
@@ -385,18 +394,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b0/0d/9feae160378a3553fa9a339b0e9c1a048e147a4127210e286ef18b730f03/durationpy-0.10-py3-none-any.whl", hash = "sha256:3b41e1b601234296b4fb368338fdcd3e13e0b4fb5b67345948f4f2bf9868b286", size = 3922, upload-time = "2025-05-17T13:52:36.463Z" },
 ]
 
-[[package]]
-name = "ecdsa"
-version = "0.19.1"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "six" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/c0/1f/924e3caae75f471eae4b26bd13b698f6af2c44279f67af317439c2f4c46a/ecdsa-0.19.1.tar.gz", hash = "sha256:478cba7b62555866fcb3bb3fe985e06decbdb68ef55713c4e5ab98c57d508e61", size = 201793, upload-time = "2025-03-13T11:52:43.25Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/cb/a3/460c57f094a4a165c84a1341c373b0a4f5ec6ac244b998d5021aade89b77/ecdsa-0.19.1-py2.py3-none-any.whl", hash = "sha256:30638e27cf77b7e15c4c4cc1973720149e1033827cfd00661ca5c8cc0cdb24c3", size = 150607, upload-time = "2025-03-13T11:52:41.757Z" },
-]
-
 [[package]]
 name = "faiss-cpu"
 version = "1.12.0"
@@ -419,16 +416,17 @@ wheels = [
 
 [[package]]
 name = "fastapi"
-version = "0.117.1"
+version = "0.124.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
+    { name = "annotated-doc" },
     { name = "pydantic" },
     { name = "starlette" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/7e/7e/d9788300deaf416178f61fb3c2ceb16b7d0dc9f82a08fdb87a5e64ee3cc7/fastapi-0.117.1.tar.gz", hash = "sha256:fb2d42082d22b185f904ca0ecad2e195b851030bd6c5e4c032d1c981240c631a", size = 307155, upload-time = "2025-09-20T20:16:56.663Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/cd/21/ade3ff6745a82ea8ad88552b4139d27941549e4f19125879f848ac8f3c3d/fastapi-0.124.4.tar.gz", hash = "sha256:0e9422e8d6b797515f33f500309f6e1c98ee4e85563ba0f2debb282df6343763", size = 378460, upload-time = "2025-12-12T15:00:43.891Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/6d/45/d9d3e8eeefbe93be1c50060a9d9a9f366dba66f288bb518a9566a23a8631/fastapi-0.117.1-py3-none-any.whl", hash = "sha256:33c51a0d21cab2b9722d4e56dbb9316f3687155be6b276191790d8da03507552", size = 95959, upload-time = "2025-09-20T20:16:53.661Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/57/aa70121b5008f44031be645a61a7c4abc24e0e888ad3fc8fda916f4d188e/fastapi-0.124.4-py3-none-any.whl", hash = "sha256:6d1e703698443ccb89e50abe4893f3c84d9d6689c0cf1ca4fad6d3c15cf69f15", size = 113281, upload-time = "2025-12-12T15:00:42.44Z" },
 ]
 
 [[package]]
@@ -545,6 +543,8 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/19/0d/6660d55f7373b2ff8152401a83e02084956da23ae58cddbfb0b330978fe9/greenlet-3.2.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b3812d8d0c9579967815af437d96623f45c0f2ae5f04e366de62a12d83a8fb0", size = 607586, upload-time = "2025-08-07T13:18:28.544Z" },
     { url = "https://files.pythonhosted.org/packages/8e/1a/c953fdedd22d81ee4629afbb38d2f9d71e37d23caace44775a3a969147d4/greenlet-3.2.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:abbf57b5a870d30c4675928c37278493044d7c14378350b3aa5d484fa65575f0", size = 1123281, upload-time = "2025-08-07T13:42:39.858Z" },
     { url = "https://files.pythonhosted.org/packages/3f/c7/12381b18e21aef2c6bd3a636da1088b888b97b7a0362fac2e4de92405f97/greenlet-3.2.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:20fb936b4652b6e307b8f347665e2c615540d4b42b3b4c8a321d8286da7e520f", size = 1151142, upload-time = "2025-08-07T13:18:22.981Z" },
+    { url = "https://files.pythonhosted.org/packages/27/45/80935968b53cfd3f33cf99ea5f08227f2646e044568c9b1555b58ffd61c2/greenlet-3.2.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ee7a6ec486883397d70eec05059353b8e83eca9168b9f3f9a361971e77e0bcd0", size = 1564846, upload-time = "2025-11-04T12:42:15.191Z" },
+    { url = "https://files.pythonhosted.org/packages/69/02/b7c30e5e04752cb4db6202a3858b149c0710e5453b71a3b2aec5d78a1aab/greenlet-3.2.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:326d234cbf337c9c3def0676412eb7040a35a768efc92504b947b3e9cfc7543d", size = 1633814, upload-time = "2025-11-04T12:42:17.175Z" },
     { url = "https://files.pythonhosted.org/packages/e9/08/b0814846b79399e585f974bbeebf5580fbe59e258ea7be64d9dfb253c84f/greenlet-3.2.4-cp312-cp312-win_amd64.whl", hash = "sha256:a7d4e128405eea3814a12cc2605e0e6aedb4035bf32697f72deca74de4105e02", size = 299899, upload-time = "2025-08-07T13:38:53.448Z" },
 ]
 
@@ -760,16 +760,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/89/43/d9bebfc3db7dea6ec80df5cb2aad8d274dd18ec2edd6c4f21f32c237cbbb/kubernetes-33.1.0-py2.py3-none-any.whl", hash = "sha256:544de42b24b64287f7e0aa9513c93cb503f7f40eea39b20f66810011a86eabc5", size = 1941335, upload-time = "2025-06-09T21:57:56.327Z" },
 ]
 
-[[package]]
-name = "lightspeed-stack-providers"
-version = "0.1.15"
-source = { git = "https://github.com/Jdubrick/lightspeed-providers.git?rev=devai#6ac0937c526ca285ef4f6bcc3775d05613ee26e7" }
-dependencies = [
-    { name = "httpx" },
-    { name = "llama-stack" },
-    { name = "pydantic" },
-]
-
 [[package]]
 name = "litellm"
 version = "1.77.3"
@@ -794,26 +784,9 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/81/b2/122602255b582fdcf630f8e44b5c9175391abe10be5e2f4db6a7d4173df1/litellm-1.77.3-py3-none-any.whl", hash = "sha256:f0c8c6bcfa2c9cd9e9fa0304f9a94894d252e7c74f118c37a8f2e4e525b2592b", size = 9118886, upload-time = "2025-09-21T00:59:06.178Z" },
 ]
 
-[[package]]
-name = "llama-api-client"
-version = "0.4.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "anyio" },
-    { name = "distro" },
-    { name = "httpx" },
-    { name = "pydantic" },
-    { name = "sniffio" },
-    { name = "typing-extensions" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/f6/fe/937935f9f8a869efbda9b563f64cd8c3d433981f9dada40521ad8eadc9dd/llama_api_client-0.4.0.tar.gz", hash = "sha256:45d37086bd7004846d90746347449ea56cc20109c06cc8d908bbaf7f36fbb931", size = 120975, upload-time = "2025-09-17T21:04:00.558Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/43/ac/0152123825a2674e06fbf1094d8f19fc2b931e84b70007c4340cc0775ce5/llama_api_client-0.4.0-py3-none-any.whl", hash = "sha256:adafdc22faaeefe944d59ff9de65f205efc79acee52d80a3f18fd8a940597368", size = 87986, upload-time = "2025-09-17T21:03:59.686Z" },
-]
-
 [[package]]
 name = "llama-stack"
-version = "0.2.18"
+version = "0.3.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "aiohttp" },
@@ -823,10 +796,8 @@ dependencies = [
     { name = "fire" },
     { name = "h11" },
     { name = "httpx" },
-    { name = "huggingface-hub" },
     { name = "jinja2" },
     { name = "jsonschema" },
-    { name = "llama-api-client" },
     { name = "llama-stack-client" },
     { name = "openai" },
     { name = "opentelemetry-exporter-otlp-proto-http" },
@@ -834,23 +805,24 @@ dependencies = [
     { name = "pillow" },
     { name = "prompt-toolkit" },
     { name = "pydantic" },
+    { name = "pyjwt", extra = ["crypto"] },
     { name = "python-dotenv" },
-    { name = "python-jose", extra = ["cryptography"] },
     { name = "python-multipart" },
     { name = "rich" },
+    { name = "sqlalchemy", extra = ["asyncio"] },
     { name = "starlette" },
     { name = "termcolor" },
     { name = "tiktoken" },
     { name = "uvicorn" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/ed/e1/16c52856746e1412274c085a6e6a21829133f9db3d4932a009700594f4a2/llama_stack-0.2.18.tar.gz", hash = "sha256:0ea6e150140047568e45f98100027a79e20340711e5feff083d9b9dfe42d2605", size = 3321726, upload-time = "2025-08-19T22:12:17.257Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/8f/c5/ade666e8ce894066c0358988e831b31c81840e7b285aa8b5f70236e33681/llama_stack-0.3.4.tar.gz", hash = "sha256:bdb489e4341559465d604c9eba554460ab0d17c5dc005ee2d40aa892b94e2e9b", size = 3322494, upload-time = "2025-12-03T19:00:18.397Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/65/72/c68c50be2d2808fe162c3f344f976bc29839f0cee7a6d951cc3805f8482d/llama_stack-0.2.18-py3-none-any.whl", hash = "sha256:3383fb4da1cc6e77a58ae425ef49ce470bca784ca85051dd6b2b70966f936bea", size = 3650850, upload-time = "2025-08-19T22:12:15.857Z" },
+    { url = "https://files.pythonhosted.org/packages/49/14/c98e5b564b425e4fc7aabf33f4bf9f40c43057424a555f023bcd8e334874/llama_stack-0.3.4-py3-none-any.whl", hash = "sha256:3e302db1efb2ed6c974526b8c6b04b9e54891f3959d0d83c004f77e1c21f6147", size = 3637817, upload-time = "2025-12-03T19:00:16.581Z" },
 ]
 
 [[package]]
 name = "llama-stack-client"
-version = "0.2.18"
+version = "0.3.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
@@ -869,14 +841,14 @@ dependencies = [
     { name = "tqdm" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/69/da/5e5a745495f8a2b8ef24fc4d01fe9031aa2277c36447cb22192ec8c8cc1e/llama_stack_client-0.2.18.tar.gz", hash = "sha256:860c885c9e549445178ac55cc9422e6e2a91215ac7aff5aaccfb42f3ce07e79e", size = 277284, upload-time = "2025-08-19T22:12:09.106Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/6a/10/9c198c62e720c647a01506f40ba4e058a5b2a23c947fab1827eb096a94f2/llama_stack_client-0.3.4.tar.gz", hash = "sha256:6afbd10b152911a044e8d038e58981425ce0a34510da3e31cdd3103516e27688", size = 335668, upload-time = "2025-12-03T18:59:25.48Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/0a/e4/e97f8fdd8a07aa1efc7f7e37b5657d84357b664bf70dd1885a437edc0699/llama_stack_client-0.2.18-py3-none-any.whl", hash = "sha256:90f827d5476f7fc15fd993f1863af6a6e72bd064646bf6a99435eb43a1327f70", size = 367586, upload-time = "2025-08-19T22:12:07.899Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/b9/bcc815cee68ef87635edf72f9454dd35cef8492d2670f5a6b229b5913f0b/llama_stack_client-0.3.4-py3-none-any.whl", hash = "sha256:949c0a6c9a1c925a2b0d930d85b6485bb8d264ba68d02f36aca3c2539cb7b893", size = 425244, upload-time = "2025-12-03T18:59:24.293Z" },
 ]
 
 [[package]]
 name = "llama-stack-runner"
-version = "0.1.0"
+version = "0.1.1"
 source = { virtual = "." }
 dependencies = [
     { name = "aiohttp" },
@@ -891,7 +863,6 @@ dependencies = [
     { name = "greenlet" },
     { name = "httpx" },
     { name = "kubernetes" },
-    { name = "lightspeed-stack-providers" },
     { name = "litellm" },
     { name = "llama-stack" },
     { name = "llama-stack-client" },
@@ -929,13 +900,12 @@ requires-dist = [
     { name = "greenlet" },
     { name = "httpx" },
     { name = "kubernetes", specifier = ">=30.1.0" },
-    { name = "lightspeed-stack-providers", git = "https://github.com/Jdubrick/lightspeed-providers.git?rev=devai" },
     { name = "litellm", specifier = ">=1.72.1" },
-    { name = "llama-stack", specifier = "==0.2.18" },
-    { name = "llama-stack-client", specifier = "==0.2.18" },
+    { name = "llama-stack", specifier = "==0.3.4" },
+    { name = "llama-stack-client", specifier = "==0.3.4" },
     { name = "mcp", specifier = ">=1.9.4" },
     { name = "ollama", specifier = ">=0.2.0" },
-    { name = "openai", specifier = "==1.99.9" },
+    { name = "openai", specifier = ">=1.100.0" },
     { name = "opentelemetry-exporter-otlp", specifier = ">=1.34.0" },
     { name = "opentelemetry-instrumentation", specifier = ">=0.55b0" },
     { name = "opentelemetry-sdk", specifier = ">=1.34.0" },
@@ -1149,7 +1119,7 @@ wheels = [
 
 [[package]]
 name = "openai"
-version = "1.99.9"
+version = "2.9.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
@@ -1161,9 +1131,9 @@ dependencies = [
     { name = "tqdm" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/8a/d2/ef89c6f3f36b13b06e271d3cc984ddd2f62508a0972c1cbcc8485a6644ff/openai-1.99.9.tar.gz", hash = "sha256:f2082d155b1ad22e83247c3de3958eb4255b20ccf4a1de2e6681b6957b554e92", size = 506992, upload-time = "2025-08-12T02:31:10.054Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/09/48/516290f38745cc1e72856f50e8afed4a7f9ac396a5a18f39e892ab89dfc2/openai-2.9.0.tar.gz", hash = "sha256:b52ec65727fc8f1eed2fbc86c8eac0998900c7ef63aa2eb5c24b69717c56fa5f", size = 608202, upload-time = "2025-12-04T18:15:09.01Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e8/fb/df274ca10698ee77b07bff952f302ea627cc12dac6b85289485dd77db6de/openai-1.99.9-py3-none-any.whl", hash = "sha256:9dbcdb425553bae1ac5d947147bebbd630d91bbfc7788394d4c4f3a35682ab3a", size = 786816, upload-time = "2025-08-12T02:31:08.34Z" },
+    { url = "https://files.pythonhosted.org/packages/59/fd/ae2da789cd923dd033c99b8d544071a827c92046b150db01cfa5cea5b3fd/openai-2.9.0-py3-none-any.whl", hash = "sha256:0d168a490fbb45630ad508a6f3022013c155a68fd708069b6a1a01a5e8f0ffad", size = 1030836, upload-time = "2025-12-04T18:15:07.063Z" },
 ]
 
 [[package]]
@@ -1607,6 +1577,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
 ]
 
+[[package]]
+name = "pyjwt"
+version = "2.10.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e7/46/bd74733ff231675599650d3e47f361794b22ef3e3770998dda30d3b63726/pyjwt-2.10.1.tar.gz", hash = "sha256:3cc5772eb20009233caf06e9d8a0577824723b44e6648ee0a2aedb6cf9381953", size = 87785, upload-time = "2024-11-28T03:43:29.933Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/61/ad/689f02752eeec26aed679477e80e632ef1b682313be70793d798c1d5fc8f/PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb", size = 22997, upload-time = "2024-11-28T03:43:27.893Z" },
+]
+
+[package.optional-dependencies]
+crypto = [
+    { name = "cryptography" },
+]
+
 [[package]]
 name = "python-dateutil"
 version = "2.9.0.post0"
@@ -1628,25 +1612,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/5f/ed/539768cf28c661b5b068d66d96a2f155c4971a5d55684a514c1a0e0dec2f/python_dotenv-1.1.1-py3-none-any.whl", hash = "sha256:31f23644fe2602f88ff55e1f5c79ba497e01224ee7737937930c448e4d0e24dc", size = 20556, upload-time = "2025-06-24T04:21:06.073Z" },
 ]
 
-[[package]]
-name = "python-jose"
-version = "3.5.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "ecdsa" },
-    { name = "pyasn1" },
-    { name = "rsa" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/c6/77/3a1c9039db7124eb039772b935f2244fbb73fc8ee65b9acf2375da1c07bf/python_jose-3.5.0.tar.gz", hash = "sha256:fb4eaa44dbeb1c26dcc69e4bd7ec54a1cb8dd64d3b4d81ef08d90ff453f2b01b", size = 92726, upload-time = "2025-05-28T17:31:54.288Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/d9/c3/0bd11992072e6a1c513b16500a5d07f91a24017c5909b02c72c62d7ad024/python_jose-3.5.0-py2.py3-none-any.whl", hash = "sha256:abd1202f23d34dfad2c3d28cb8617b90acf34132c7afd60abd0b0b7d3cb55771", size = 34624, upload-time = "2025-05-28T17:31:52.802Z" },
-]
-
-[package.optional-dependencies]
-cryptography = [
-    { name = "cryptography" },
-]
-
 [[package]]
 name = "python-multipart"
 version = "0.0.20"
@@ -1934,6 +1899,11 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b8/d9/13bdde6521f322861fab67473cec4b1cc8999f3871953531cf61945fad92/sqlalchemy-2.0.43-py3-none-any.whl", hash = "sha256:1681c21dd2ccee222c2fe0bef671d1aef7c504087c9c4e800371cfcc8ac966fc", size = 1924759, upload-time = "2025-08-11T15:39:53.024Z" },
 ]
 
+[package.optional-dependencies]
+asyncio = [
+    { name = "greenlet" },
+]
+
 [[package]]
 name = "sse-starlette"
 version = "3.0.2"
@@ -1948,15 +1918,15 @@ wheels = [
 
 [[package]]
 name = "starlette"
-version = "0.48.0"
+version = "0.50.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/a7/a5/d6f429d43394057b67a6b5bbe6eae2f77a6bf7459d961fdb224bf206eee6/starlette-0.48.0.tar.gz", hash = "sha256:7e8cee469a8ab2352911528110ce9088fdc6a37d9876926e73da7ce4aa4c7a46", size = 2652949, upload-time = "2025-09-13T08:41:05.699Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ba/b8/73a0e6a6e079a9d9cfa64113d771e421640b6f679a52eeb9b32f72d871a1/starlette-0.50.0.tar.gz", hash = "sha256:a2a17b22203254bcbc2e1f926d2d55f3f9497f769416b3190768befe598fa3ca", size = 2646985, upload-time = "2025-11-01T15:25:27.516Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/be/72/2db2f49247d0a18b4f1bb9a5a39a0162869acf235f3a96418363947b3d46/starlette-0.48.0-py3-none-any.whl", hash = "sha256:0764ca97b097582558ecb498132ed0c7d942f233f365b86ba37770e026510659", size = 73736, upload-time = "2025-09-13T08:41:03.869Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/52/1064f510b141bd54025f9b55105e26d1fa970b9be67ad766380a3c9b74b0/starlette-0.50.0-py3-none-any.whl", hash = "sha256:9e5391843ec9b6e472eed1365a78c8098cfceb7a74bfd4d6b1c0c0095efb3bca", size = 74033, upload-time = "2025-11-01T15:25:25.461Z" },
 ]
 
 [[package]]

From ad5997453c5b9d983628861158c5d2e935a5e843 Mon Sep 17 00:00:00 2001
From: Jordan Dubrick <jdubrick@redhat.com>
Date: Tue, 6 Jan 2026 14:24:17 -0500
Subject: [PATCH 02/10] update readme with 0.3 info

Signed-off-by: Jordan Dubrick <jdubrick@redhat.com>
---
 README.md | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 95c7deb..5f1e481 100644
--- a/README.md
+++ b/README.md
@@ -111,7 +111,11 @@ This will fetch the necessary reference content and add it to your local project
 
 ### Configuring Question Validation
 
-By default this Llama Stack has a Safety Shield for question validation enabled. You will need to set the following environment variables to ensure functionality:
+> [!IMPORTANT]
+> Currently question validation is removed from the default run.yaml file.
+> This is due to the way Llama Stack Safety Shields are intended to be used in v0.3 and above.
+
+You will need to set the following environment variables to ensure functionality:
 
 - `VALIDATION_PROVIDER`: The provider you want to use for question validation. This should match what the provider value you are using under `inference`, such as `vllm`, `ollama`, `openai`. Defaults to `vllm`
 - `VALIDATION_MODEL_NAME`: The name of the LLM you want to use for question validation
@@ -127,7 +131,7 @@ Or if using the host network:
 podman run -it -p 8321:8321 --env-file ./env/values.env --network host -v ./embeddings_model:/app-root/embeddings_model:Z -v ./vector_db/rhdh_product_docs:/app-root/vector_db/rhdh_product_docs:Z quay.io/redhat-ai-dev/llama-stack:latest
 ```
 
-Latest Lightspeed Core developer image:
+Latest Lightspeed Core Developer Image:
 ```
 quay.io/lightspeed-core/lightspeed-stack:dev-latest
 ```

From f3114dfe559bf21ff8f78870401413ceadb12287 Mon Sep 17 00:00:00 2001
From: Jordan Dubrick <jdubrick@redhat.com>
Date: Tue, 6 Jan 2026 14:24:44 -0500
Subject: [PATCH 03/10] update lightspeed provider tag (could become redundant)

Signed-off-by: Jordan Dubrick <jdubrick@redhat.com>
---
 Makefile | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 33600cf..e8e8c16 100644
--- a/Makefile
+++ b/Makefile
@@ -36,9 +36,8 @@ help: ## Show this help screen
 		awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-33s\033[0m %s\n", $$1, $$2}'
 	@echo ''
 
-# TODO (Jdubrick): Replace reference to lightspeed-core/lightspeed-providers once bug is addressed.
 update-question-validation:
-	curl -o ./config/providers.d/inline/safety/lightspeed_question_validity.yaml https://raw.githubusercontent.com/Jdubrick/lightspeed-providers/refs/heads/devai/resources/external_providers/inline/safety/lightspeed_question_validity.yaml
+	curl -o ./config/providers.d/inline/safety/lightspeed_question_validity.yaml https://raw.githubusercontent.com/lightspeed-core/lightspeed-providers/refs/tags/0.1.17/resources/external_providers/inline/safety/lightspeed_question_validity.yaml
 
 $(VENV)/bin/activate: ./scripts/python-scripts/requirements.txt
 	python3 -m venv $(VENV)

From ecf88c62721c23bfec689fdc616189328112212a Mon Sep 17 00:00:00 2001
From: Jordan Dubrick <jdubrick@redhat.com>
Date: Tue, 13 Jan 2026 12:02:26 -0500
Subject: [PATCH 04/10] update llama stack to 0.3.5

Signed-off-by: Jordan Dubrick <jdubrick@redhat.com>
---
 pyproject.toml |  5 +++--
 uv.lock        | 27 +++++++++++++++++++--------
 2 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 7c167a4..0bcab6b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,8 +7,8 @@ dependencies = [
     "fastapi>=0.115.6",
     "uvicorn>=0.34.3",
     "kubernetes>=30.1.0",
-    "llama-stack==0.3.4",
-    "llama-stack-client==0.3.4",
+    "llama-stack==0.3.5",
+    "llama-stack-client==0.3.5",
     "ollama>=0.2.0",
     "openai>=1.100.0",
     "rich>=14.0.0",
@@ -36,6 +36,7 @@ dependencies = [
     "sentence-transformers>=5.0.0",
     "pydantic>=2.10.6",
     "httpx",
+    "chardet",
 ]
 requires-python = "==3.12.*"
 readme = "README.md"
diff --git a/uv.lock b/uv.lock
index 4a2213d..326006a 100644
--- a/uv.lock
+++ b/uv.lock
@@ -252,6 +252,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/95/31/9f7f93ad2f8eff1dbc1c3656d7ca5bfd8fb52c9d786b4dcf19b2d02217fa/cffi-2.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:4671d9dd5ec934cb9a73e7ee9676f9362aba54f7f34910956b84d727b0d73fb6", size = 177762, upload-time = "2025-09-08T23:22:59.668Z" },
 ]
 
+[[package]]
+name = "chardet"
+version = "5.2.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f3/0d/f7b6ab21ec75897ed80c17d79b15951a719226b9fababf1e40ea74d69079/chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7", size = 2069618, upload-time = "2023-08-01T19:23:02.662Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/38/6f/f5fbc992a329ee4e0f288c1fe0e2ad9485ed064cac731ed2fe47dcc38cbf/chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970", size = 199385, upload-time = "2023-08-01T19:23:00.661Z" },
+]
+
 [[package]]
 name = "charset-normalizer"
 version = "3.4.3"
@@ -786,7 +795,7 @@ wheels = [
 
 [[package]]
 name = "llama-stack"
-version = "0.3.4"
+version = "0.3.5"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "aiohttp" },
@@ -815,14 +824,14 @@ dependencies = [
     { name = "tiktoken" },
     { name = "uvicorn" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/8f/c5/ade666e8ce894066c0358988e831b31c81840e7b285aa8b5f70236e33681/llama_stack-0.3.4.tar.gz", hash = "sha256:bdb489e4341559465d604c9eba554460ab0d17c5dc005ee2d40aa892b94e2e9b", size = 3322494, upload-time = "2025-12-03T19:00:18.397Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/af/68/967f95e5fe3a650b9bb6a18c4beeb39e734695d92f1ab1525c5b9bfadb1b/llama_stack-0.3.5.tar.gz", hash = "sha256:4a0ce8014b17d14a06858251736f1170f12580fafc519daf75ee1df6c4fbf64b", size = 3320526, upload-time = "2025-12-15T14:34:32.96Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/49/14/c98e5b564b425e4fc7aabf33f4bf9f40c43057424a555f023bcd8e334874/llama_stack-0.3.4-py3-none-any.whl", hash = "sha256:3e302db1efb2ed6c974526b8c6b04b9e54891f3959d0d83c004f77e1c21f6147", size = 3637817, upload-time = "2025-12-03T19:00:16.581Z" },
+    { url = "https://files.pythonhosted.org/packages/24/70/fb1896f07fc38a94b4c0bfb5999872d1514c6b3259fe77358cadef77a3db/llama_stack-0.3.5-py3-none-any.whl", hash = "sha256:93097409c65108e429fc3dda2f246ef4e8d0b07314a32865e941680e537ec366", size = 3636815, upload-time = "2025-12-15T14:34:31.354Z" },
 ]
 
 [[package]]
 name = "llama-stack-client"
-version = "0.3.4"
+version = "0.3.5"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
@@ -841,9 +850,9 @@ dependencies = [
     { name = "tqdm" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/6a/10/9c198c62e720c647a01506f40ba4e058a5b2a23c947fab1827eb096a94f2/llama_stack_client-0.3.4.tar.gz", hash = "sha256:6afbd10b152911a044e8d038e58981425ce0a34510da3e31cdd3103516e27688", size = 335668, upload-time = "2025-12-03T18:59:25.48Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/34/ff/b4bb891249379849e6e273a6254998c7e08562613ca4020817af2da9498e/llama_stack_client-0.3.5.tar.gz", hash = "sha256:2d954429347e920038709ae3e026c06f336ce570bd41245fc4e1e54c78879485", size = 335659, upload-time = "2025-12-15T14:10:16.444Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ae/b9/bcc815cee68ef87635edf72f9454dd35cef8492d2670f5a6b229b5913f0b/llama_stack_client-0.3.4-py3-none-any.whl", hash = "sha256:949c0a6c9a1c925a2b0d930d85b6485bb8d264ba68d02f36aca3c2539cb7b893", size = 425244, upload-time = "2025-12-03T18:59:24.293Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/10/84a4f0ef1cc13f44a692e55bed6a55792671e5320c95a8fd581e02848d61/llama_stack_client-0.3.5-py3-none-any.whl", hash = "sha256:b98acdc660d60839da8b71d5ae59531ba7f059e3e9656ca5ca20edca70f7d6a2", size = 425244, upload-time = "2025-12-15T14:10:14.726Z" },
 ]
 
 [[package]]
@@ -857,6 +866,7 @@ dependencies = [
     { name = "autoevals" },
     { name = "blobfile" },
     { name = "cachetools" },
+    { name = "chardet" },
     { name = "datasets" },
     { name = "faiss-cpu" },
     { name = "fastapi" },
@@ -894,6 +904,7 @@ requires-dist = [
     { name = "autoevals", specifier = ">=0.0.129" },
     { name = "blobfile", specifier = ">=3.0.0" },
     { name = "cachetools", specifier = ">=6.1.0" },
+    { name = "chardet" },
     { name = "datasets", specifier = ">=3.6.0" },
     { name = "faiss-cpu", specifier = ">=1.11.0" },
     { name = "fastapi", specifier = ">=0.115.6" },
@@ -901,8 +912,8 @@ requires-dist = [
     { name = "httpx" },
     { name = "kubernetes", specifier = ">=30.1.0" },
     { name = "litellm", specifier = ">=1.72.1" },
-    { name = "llama-stack", specifier = "==0.3.4" },
-    { name = "llama-stack-client", specifier = "==0.3.4" },
+    { name = "llama-stack", specifier = "==0.3.5" },
+    { name = "llama-stack-client", specifier = "==0.3.5" },
     { name = "mcp", specifier = ">=1.9.4" },
     { name = "ollama", specifier = ">=0.2.0" },
     { name = "openai", specifier = ">=1.100.0" },

From f20d1f0b681bb6c459b5a80dd306ed0fb5c9720e Mon Sep 17 00:00:00 2001
From: Jordan Dubrick <jdubrick@redhat.com>
Date: Tue, 13 Jan 2026 12:02:50 -0500
Subject: [PATCH 05/10] update run.yaml to llama v0.3.x standard

Signed-off-by: Jordan Dubrick <jdubrick@redhat.com>
---
 run.yaml | 40 +++++++++++++++++++++++++++++-----------
 1 file changed, 29 insertions(+), 11 deletions(-)

diff --git a/run.yaml b/run.yaml
index c904f53..4041dc1 100644
--- a/run.yaml
+++ b/run.yaml
@@ -57,9 +57,19 @@ providers:
       config:
         project: ${env.VERTEX_AI_PROJECT:=}
         location: ${env.VERTEX_AI_LOCATION:=us-central1}
+    # - provider_id: safety-guard
+    #   provider_type: remote::vllm
+    #   config:
+    #     url: ${env.SAFETY_VLLM_URL:=}
+    #     api_token: ${env.SAFETY_VLLM_API_KEY:=token}
     - provider_id: sentence-transformers
       provider_type: inline::sentence-transformers
       config: {}
+  # safety:
+  #   - provider_id: llama-guard
+  #     provider_type: inline::llama-guard
+  #     config:
+  #       excluded_categories: []
   tool_runtime:
     - provider_id: model-context-protocol
       provider_type: remote::model-context-protocol
@@ -68,7 +78,7 @@ providers:
       provider_type: inline::rag-runtime
       config: {}
   vector_io:
-    - provider_id: rhdh-product-docs-1_8
+    - provider_id: faiss
       provider_type: inline::faiss
       config:
         persistence:
@@ -86,17 +96,17 @@ storage:
   backends:
     kv_default:
       type: kv_sqlite
-      db_path: .llama/distributions/ollama/kvstore.db
+      db_path: /tmp/kvstore.db
     sql_default:
       type: sql_sqlite
-      db_path: .llama/distributions/ollama/sql_store.db
+      db_path: /tmp/sql_store.db
     faiss_kv:
       type: kv_sqlite
-      db_path: /app-root/vector_db/rhdh_product_docs/1.8/faiss_store.db
+      db_path: /rag-content/vector_db/rhdh_product_docs/1.8/faiss_store.db
   stores:
     metadata:
       namespace: registry
-      backend: kv_default
+      backend: faiss_kv
     inference:
       table_name: inference_store
       backend: sql_default
@@ -112,16 +122,24 @@ registered_resources:
         embedding_dimension: 768
       model_type: embedding
       provider_id: sentence-transformers
-      provider_model_id: '/app-root/embeddings_model'
+      provider_model_id: /rag-content/embeddings_model
+    # - model_id: ${env.SAFETY_MODEL:=llama-guard3:8b}
+    #   provider_id: safety-guard
+    #   provider_model_id: ${env.SAFETY_MODEL:=llama-guard3:8b}
+    #   model_type: llm
+    #   metadata: {}
+  # shields:
+  #   - shield_id: llama-guard-shield
+  #     provider_id: llama-guard
+  #     provider_shield_id: safety-guard/${env.SAFETY_MODEL:=llama-guard3:8b}
   tool_groups:
     - provider_id: rag-runtime
       toolgroup_id: builtin::rag
-  vector_stores:
-    - vector_store_id: rhdh-product-docs-1_8
-      embedding_dimension: 768
+  vector_dbs:
+    - vector_db_id: rhdh-product-docs-1_8
       embedding_model: sentence-transformers/all-mpnet-base-v2
-      provider_id: rhdh-product-docs-1_8
-      provider_vector_store_id: rhdh-product-docs-1_8
+      embedding_dimension: 768
+      provider_id: faiss
 server:
   auth:
   host:

From bbc5350be6f1c2685ef9ba3867592eff37b410c7 Mon Sep 17 00:00:00 2001
From: Jordan Dubrick <jdubrick@redhat.com>
Date: Tue, 13 Jan 2026 12:04:18 -0500
Subject: [PATCH 06/10] update mount reference to use 'rag-content'

Signed-off-by: Jordan Dubrick <jdubrick@redhat.com>
---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 5f1e481..e0fa0de 100644
--- a/README.md
+++ b/README.md
@@ -123,12 +123,12 @@ You will need to set the following environment variables to ensure functionality
 ### Running Locally
 
 ```
-podman run -it -p 8321:8321 --env-file ./env/values.env -v ./embeddings_model:/app-root/embeddings_model:Z -v ./vector_db/rhdh_product_docs:/app-root/vector_db/rhdh_product_docs:Z quay.io/redhat-ai-dev/llama-stack:latest
+podman run -it -p 8321:8321 --env-file ./env/values.env -v ./embeddings_model:/rag-content/embeddings_model:Z -v ./vector_db/rhdh_product_docs:/rag-content/vector_db/rhdh_product_docs:Z quay.io/redhat-ai-dev/llama-stack:latest
 ```
 
 Or if using the host network:
 ```
-podman run -it -p 8321:8321 --env-file ./env/values.env --network host -v ./embeddings_model:/app-root/embeddings_model:Z -v ./vector_db/rhdh_product_docs:/app-root/vector_db/rhdh_product_docs:Z quay.io/redhat-ai-dev/llama-stack:latest
+podman run -it -p 8321:8321 --env-file ./env/values.env --network host -v ./embeddings_model:/rag-content/embeddings_model:Z -v ./vector_db/rhdh_product_docs:/rag-content/vector_db/rhdh_product_docs:Z quay.io/redhat-ai-dev/llama-stack:latest
 ```
 
 Latest Lightspeed Core Developer Image:

From 1ccd7ae85672fb19bb68688d526ccb7fd207fd1b Mon Sep 17 00:00:00 2001
From: Jordan Dubrick <jdubrick@redhat.com>
Date: Tue, 13 Jan 2026 14:48:03 -0500
Subject: [PATCH 07/10] add llama guard

Signed-off-by: Jordan Dubrick <jdubrick@redhat.com>
---
 env/default-values.env |   8 +++
 run-no-guard.yaml      | 136 +++++++++++++++++++++++++++++++++++++++++
 run.yaml               |  38 ++++++------
 3 files changed, 163 insertions(+), 19 deletions(-)
 create mode 100644 run-no-guard.yaml

diff --git a/env/default-values.env b/env/default-values.env
index 970f464..5d1d2e3 100644
--- a/env/default-values.env
+++ b/env/default-values.env
@@ -35,5 +35,13 @@ OLLAMA_URL=
 VALIDATION_PROVIDER=
 VALIDATION_MODEL_NAME=
 
+# Llama Guard Settings
+## Defaults to llama-guard3:8b if not set
+SAFETY_MODEL=
+## Defaults to http://host.docker.internal:11434/v1 if not set
+SAFETY_URL=
+## Only required for non-local environments with a api key
+SAFETY_API_KEY=
+
 # Other
 LLAMA_STACK_LOGGING=
\ No newline at end of file
diff --git a/run-no-guard.yaml b/run-no-guard.yaml
new file mode 100644
index 0000000..92859e3
--- /dev/null
+++ b/run-no-guard.yaml
@@ -0,0 +1,136 @@
+#
+#
+# Copyright Red Hat
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+version: 2
+image_name: redhat-ai-dev-llama-stack
+apis:
+  - agents
+  - inference
+  - safety
+  - tool_runtime
+  - vector_io
+  - files
+container_image:
+external_providers_dir:
+providers:
+  agents:
+    - config:
+        persistence:
+          agent_state:
+            namespace: agents
+            backend: kv_default
+          responses:
+            table_name: responses
+            backend: sql_default
+      provider_id: meta-reference
+      provider_type: inline::meta-reference
+  inference:
+    - provider_id: ${env.ENABLE_VLLM:+vllm}
+      provider_type: remote::vllm
+      config:
+        url: ${env.VLLM_URL:=}
+        api_token: ${env.VLLM_API_KEY:=}
+        max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+        tls_verify: ${env.VLLM_TLS_VERIFY:=true}
+    - provider_id: ${env.ENABLE_OLLAMA:+ollama}
+      provider_type: remote::ollama
+      config:
+        url: ${env.OLLAMA_URL:=http://localhost:11434}
+    - provider_id: ${env.ENABLE_OPENAI:+openai}
+      provider_type: remote::openai
+      config:
+        api_key: ${env.OPENAI_API_KEY:=}
+    - provider_id: ${env.ENABLE_VERTEX_AI:+vertexai}
+      provider_type: remote::vertexai
+      config:
+        project: ${env.VERTEX_AI_PROJECT:=}
+        location: ${env.VERTEX_AI_LOCATION:=us-central1}
+    - provider_id: safety-guard
+      provider_type: remote::vllm
+      config:
+        url: ${env.SAFETY_URL:=http://host.docker.internal:11434/v1}
+        api_token: ${env.SAFETY_API_KEY:=token}
+    - provider_id: sentence-transformers
+      provider_type: inline::sentence-transformers
+      config: {}
+  tool_runtime:
+    - provider_id: model-context-protocol
+      provider_type: remote::model-context-protocol
+      config: {}
+    - provider_id: rag-runtime
+      provider_type: inline::rag-runtime
+      config: {}
+  vector_io:
+    - provider_id: faiss
+      provider_type: inline::faiss
+      config:
+        persistence:
+          namespace: vector_io::faiss
+          backend: faiss_kv
+  files:
+    - provider_id: localfs
+      provider_type: inline::localfs
+      config:
+        storage_dir: /tmp/llama-stack-files
+        metadata_store:
+          table_name: files_metadata
+          backend: sql_default
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: /tmp/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: /tmp/sql_store.db
+    faiss_kv:
+      type: kv_sqlite
+      db_path: /rag-content/vector_db/rhdh_product_docs/1.8/faiss_store.db
+  stores:
+    metadata:
+      namespace: registry
+      backend: faiss_kv
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
+registered_resources:
+  models:
+    - model_id: sentence-transformers/all-mpnet-base-v2
+      metadata:
+        embedding_dimension: 768
+      model_type: embedding
+      provider_id: sentence-transformers
+      provider_model_id: /rag-content/embeddings_model
+  tool_groups:
+    - provider_id: rag-runtime
+      toolgroup_id: builtin::rag
+  vector_dbs:
+    - vector_db_id: rhdh-product-docs-1_8
+      embedding_model: sentence-transformers/all-mpnet-base-v2
+      embedding_dimension: 768
+      provider_id: faiss
+server:
+  auth:
+  host:
+  port: 8321
+  quota:
+  tls_cafile:
+  tls_certfile:
+  tls_keyfile:
diff --git a/run.yaml b/run.yaml
index 4041dc1..d507144 100644
--- a/run.yaml
+++ b/run.yaml
@@ -57,19 +57,19 @@ providers:
       config:
         project: ${env.VERTEX_AI_PROJECT:=}
         location: ${env.VERTEX_AI_LOCATION:=us-central1}
-    # - provider_id: safety-guard
-    #   provider_type: remote::vllm
-    #   config:
-    #     url: ${env.SAFETY_VLLM_URL:=}
-    #     api_token: ${env.SAFETY_VLLM_API_KEY:=token}
+    - provider_id: safety-guard
+      provider_type: remote::vllm
+      config:
+        url: ${env.SAFETY_URL:=http://host.docker.internal:11434/v1}
+        api_token: ${env.SAFETY_API_KEY:=token}
     - provider_id: sentence-transformers
       provider_type: inline::sentence-transformers
       config: {}
-  # safety:
-  #   - provider_id: llama-guard
-  #     provider_type: inline::llama-guard
-  #     config:
-  #       excluded_categories: []
+  safety:
+    - provider_id: llama-guard
+      provider_type: inline::llama-guard
+      config:
+        excluded_categories: []
   tool_runtime:
     - provider_id: model-context-protocol
       provider_type: remote::model-context-protocol
@@ -123,15 +123,15 @@ registered_resources:
       model_type: embedding
       provider_id: sentence-transformers
       provider_model_id: /rag-content/embeddings_model
-    # - model_id: ${env.SAFETY_MODEL:=llama-guard3:8b}
-    #   provider_id: safety-guard
-    #   provider_model_id: ${env.SAFETY_MODEL:=llama-guard3:8b}
-    #   model_type: llm
-    #   metadata: {}
-  # shields:
-  #   - shield_id: llama-guard-shield
-  #     provider_id: llama-guard
-  #     provider_shield_id: safety-guard/${env.SAFETY_MODEL:=llama-guard3:8b}
+    - model_id: ${env.SAFETY_MODEL:=llama-guard3:8b}
+      provider_id: safety-guard
+      provider_model_id: ${env.SAFETY_MODEL:=llama-guard3:8b}
+      model_type: llm
+      metadata: {}
+  shields:
+    - shield_id: llama-guard-shield
+      provider_id: llama-guard
+      provider_shield_id: safety-guard/${env.SAFETY_MODEL:=llama-guard3:8b}
   tool_groups:
     - provider_id: rag-runtime
       toolgroup_id: builtin::rag

From 1b59fdbcd11d9a3817ed4aedcdda449766cadd8b Mon Sep 17 00:00:00 2001
From: Jordan Dubrick <jdubrick@redhat.com>
Date: Tue, 13 Jan 2026 14:48:48 -0500
Subject: [PATCH 08/10] overhaul readme

Signed-off-by: Jordan Dubrick <jdubrick@redhat.com>
---
 README.md | 81 +++++++++++++++++++++++++++++++++----------------------
 1 file changed, 49 insertions(+), 32 deletions(-)

diff --git a/README.md b/README.md
index e0fa0de..2de78d4 100644
--- a/README.md
+++ b/README.md
@@ -1,46 +1,48 @@
 # Redhat-AI-Dev Llama Stack
 
 [![Apache2.0 License](https://img.shields.io/badge/license-Apache2.0-brightgreen.svg)](LICENSE)
+[![Llama Stack Version](https://img.shields.io/badge/llama_stack-v0.3.5-blue)](https://llamastack.github.io/docs/v0.3.5)
+[![Python Version](https://img.shields.io/badge/python-3.12-blue)](https://www.python.org/downloads/release/python-3120/)
 
 - [Image Availability](#image-availability)
+  - [Latest Stable Release](#latest-stable-release)
+  - [Latest Developer Release](#latest-developer-release)
 - [Usage](#usage)
   - [Available Inferences](#available-inferences)
     - [vLLM](#vllm)
     - [Ollama](#ollama)
     - [OpenAI](#openai)
+    - [Vertex AI (Gemini)](#vertex-ai-gemini)
   - [Configuring RAG](#configuring-rag)
-  - [Configuring Question Validation](#configuring-question-validation)
-  - [Running Locally](#running-locally)
-  - [Running on a Cluster](#running-on-a-cluster)
+  - [Configuring Safety Guards](#configuring-safety-guards)
+- [Running Locally](#running-locally)
+- [Running on a Cluster](#running-on-a-cluster)
 - [Makefile Commands](#makefile-commands)
 - [Contributing](#contributing)
+  - [Local Development Requirements](#local-development-requirements)
+  - [Updating YAML Files](#updating-yaml-files)
 - [Troubleshooting](#troubleshooting)
 
-## Image Availability
+# Image Availability
 
-### Latest Stable Release
+## Latest Stable Release
 
 ```
 quay.io/redhat-ai-dev/llama-stack:0.1.1
 ```
 
-### Latest Developer Release
+## Latest Developer Release
 
 ```
 quay.io/redhat-ai-dev/llama-stack:latest
 ```
 
-## Usage
+# Usage
 
 > [!IMPORTANT]
 > The default Llama Stack configuration file that is baked into the built image contains tools. Ensure your provided inference server has tool calling **enabled**.
 
-**Note:** You can enable `DEBUG` logging by setting:
-```
-LLAMA_STACK_LOGGING=all=DEBUG
-```
-
-### Available Inferences
+## Available Inferences
 
 Each inference has its own set of environment variables. You can include all of these variables in a `.env` file and pass that instead to your container. See [default-values.env](./env/default-values.env) for a template. It is recommended you copy that file to `values.env` to avoid committing it to Git.
 
@@ -51,7 +53,7 @@ Each inference has its own set of environment variables. You can include all of
 > 
 > VLLM_API_KEY="token" ❌
 
-#### vLLM
+### vLLM
 
 **Required**
 ```env
@@ -65,7 +67,7 @@ VLLM_MAX_TOKENS=<defaults to 4096>
 VLLM_TLS_VERIFY=<defaults to true>
 ```
 
-#### Ollama
+### Ollama
 
 **Required**
 ```env
@@ -77,7 +79,7 @@ The value of `OLLAMA_URL` is the default `http://localhost:11434`, when you are
 
 The value of `OLLAMA_URL` is `http://host.containers.internal:11434` if you are running llama-stack inside a container i.e.; if you run llama-stack with the podman run command above, it needs to access the Ollama endpoint on your laptop not inside the container. **If you are using Linux**, ensure your firewall allows port 11434 to your podman container's network, some Linux distributions firewalls block all traffic by default. Alternatively you can use `OLLAMA_URL=http://localhost:11434` and set the `--network host` flag when you run your podman container.
 
-#### OpenAI
+### OpenAI
 
 **Required**
 ```env
@@ -87,7 +89,7 @@ OPENAI_API_KEY=<your-api-key>
 
 To get your API Key, go to [platform.openai.com](https://platform.openai.com/settings/organization/api-keys).
 
-#### Vertex AI (Gemini)
+### Vertex AI (Gemini)
 
 **Required**
 ```env
@@ -99,7 +101,7 @@ GOOGLE_APPLICATION_CREDENTIALS=
 
 For information about these variables see: https://llamastack.github.io/v0.2.18/providers/inference/remote_vertexai.html.
 
-### Configuring RAG
+## Configuring RAG
 
 The `run.yaml` file that is included in the container image has a RAG tool enabled. In order for this tool to have the necessary reference content, you need to run:
 
@@ -109,18 +111,27 @@ make get-rag
 
 This will fetch the necessary reference content and add it to your local project directory.
 
-### Configuring Question Validation
+## Configuring Safety Guards
 
 > [!IMPORTANT]
-> Currently question validation is removed from the default run.yaml file.
-> This is due to the way Llama Stack Safety Shields are intended to be used in v0.3 and above.
+> If you want to omit the safety guards for development purposes, you can use [run-no-guard.yaml](./run-no-guard.yaml) instead.
 
-You will need to set the following environment variables to ensure functionality:
+In the main [run.yaml](./run.yaml) file, Llama Guard is enabled by default. In order to avoid issues during startup you will need to ensure you have an instance of Llama Guard running.
+
+You can do so by running the following to start an Ollama container with Llama Guard:
 
-- `VALIDATION_PROVIDER`: The provider you want to use for question validation. This should match what the provider value you are using under `inference`, such as `vllm`, `ollama`, `openai`. Defaults to `vllm`
-- `VALIDATION_MODEL_NAME`: The name of the LLM you want to use for question validation
+```sh
+podman run -d --name ollama -p 11434:11434 docker.io/ollama/ollama:latest
+podman exec ollama ollama pull llama-guard3:8b
+```
+**Note:** Ensure the Ollama container is started and the model is ready before trying to query if deploying the containers manually.
 
-### Running Locally
+You will need to set the following environment variables to ensure functionality:
+- `SAFETY_MODEL`: The name of the Llama Guard model being used. Defaults to `llama-gaurd3:8b`
+- `SAFETY_URL`: The URL where the container is available. Defaults to `http://host.docker.internal:11434/v1`
+- `SAFETY_API_KEY`: The API key required for access to the safety model. Not required for local.
+
+# Running Locally
 
 ```
 podman run -it -p 8321:8321 --env-file ./env/values.env -v ./embeddings_model:/rag-content/embeddings_model:Z -v ./vector_db/rhdh_product_docs:/rag-content/vector_db/rhdh_product_docs:Z quay.io/redhat-ai-dev/llama-stack:latest
@@ -143,7 +154,7 @@ podman run -it -p 8080:8080 -v ./lightspeed-stack.yaml:/app-root/lightspeed-stac
 
 **Note:** If you have built your own version of Lightspeed Core you can replace the image referenced with your own build. Additionally, you can use the Llama Stack container along with the `lightspeed-stack.yaml` file to run Lightspeed Core locally with `uv` from their [repository](https://github.com/lightspeed-core/lightspeed-stack).
 
-### Running on a Cluster
+# Running on a Cluster
 
 To deploy on a cluster see [DEPLOYMENT.md](./docs/DEPLOYMENT.md).
 
@@ -153,17 +164,17 @@ To deploy on a cluster see [DEPLOYMENT.md](./docs/DEPLOYMENT.md).
 | ---- | ----|
 | **get-rag** | Gets the RAG data and the embeddings model from the rag-content image registry to your local project directory |
 | **update-question-validation** | Updates the question validation content in `providers.d` |
-| **validate-prompt-templates** | Validates prompt values in run.yaml. **Requires Python >= 3.11** |
-| **update-prompt-templates** | Updates the prompt values in run.yaml. **Requires Python >= 3.11** |
+| **validate-prompt-templates** | Validates prompt values in run.yaml. |
+| **update-prompt-templates** | Updates the prompt values in run.yaml. |
 
-## Contributing
+# Contributing
 
-### Local Development Requirements
+## Local Development Requirements
 
 - [Yarn](https://yarnpkg.com/)
 - [Node.js >= v22](https://nodejs.org/en/about/previous-releases)
 
-### Updating YAML Files
+## Updating YAML Files
 
 This repository implements Prettier to handle all YAML formatting.
 ```sh
@@ -173,7 +184,13 @@ yarn verify # Runs Prettier to check the YAML files in this repository
 
 If you wish to try new changes with Llama Stack, you can build your own image using the `Containerfile` in the root of this repository.
 
-## Troubleshooting
+# Troubleshooting
+
+>[!NOTE]
+> You can enable `DEBUG` logging by setting:
+>```
+>LLAMA_STACK_LOGGING=all=DEBUG
+>```
 
 If you experience an error related to permissions for the `vector_db`, such as:
 

From 82d0091915dc2b86df52dd7eb8edc6c94f67b1e7 Mon Sep 17 00:00:00 2001
From: Jordan Dubrick <jdubrick@redhat.com>
Date: Wed, 14 Jan 2026 11:59:30 -0500
Subject: [PATCH 09/10] update no guard run

Signed-off-by: Jordan Dubrick <jdubrick@redhat.com>
---
 run-no-guard.yaml | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/run-no-guard.yaml b/run-no-guard.yaml
index 92859e3..a8247cd 100644
--- a/run-no-guard.yaml
+++ b/run-no-guard.yaml
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 version: 2
-image_name: redhat-ai-dev-llama-stack
+image_name: redhat-ai-dev-llama-stack-no-guard
 apis:
   - agents
   - inference
@@ -57,11 +57,6 @@ providers:
       config:
         project: ${env.VERTEX_AI_PROJECT:=}
         location: ${env.VERTEX_AI_LOCATION:=us-central1}
-    - provider_id: safety-guard
-      provider_type: remote::vllm
-      config:
-        url: ${env.SAFETY_URL:=http://host.docker.internal:11434/v1}
-        api_token: ${env.SAFETY_API_KEY:=token}
     - provider_id: sentence-transformers
       provider_type: inline::sentence-transformers
       config: {}

From 580976b712b7f475414fec6ae0b56d89894d23a2 Mon Sep 17 00:00:00 2001
From: Jordan Dubrick <jdubrick@redhat.com>
Date: Wed, 14 Jan 2026 11:59:41 -0500
Subject: [PATCH 10/10] use experimental 1.8 rag build

Signed-off-by: Jordan Dubrick <jdubrick@redhat.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index e8e8c16..4f17f56 100644
--- a/Makefile
+++ b/Makefile
@@ -13,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-RAG_CONTENT_IMAGE ?= quay.io/redhat-ai-dev/rag-content:release-1.8-lcs
+RAG_CONTENT_IMAGE ?= quay.io/redhat-ai-dev/rag-content:experimental-release-1.8-lcs
 VENV := $(CURDIR)/scripts/python-scripts/.venv
 PYTHON := $(VENV)/bin/python3
 PIP := $(VENV)/bin/pip3