From ea5a27c77d290ec653cec883e3d4716fc8ef6f6e Mon Sep 17 00:00:00 2001 From: Jordan Dubrick Date: Tue, 6 Jan 2026 14:16:45 -0500 Subject: [PATCH 01/10] move to llama stack 0.3.4 and remove safety shield Signed-off-by: Jordan Dubrick --- pyproject.toml | 7 +- run.yaml | 242 +++++++++++++------------------------------------ uv.lock | 134 +++++++++++---------------- 3 files changed, 117 insertions(+), 266 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a411c67..7c167a4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,10 +7,10 @@ dependencies = [ "fastapi>=0.115.6", "uvicorn>=0.34.3", "kubernetes>=30.1.0", - "llama-stack==0.2.18", - "llama-stack-client==0.2.18", + "llama-stack==0.3.4", + "llama-stack-client==0.3.4", "ollama>=0.2.0", - "openai==1.99.9", + "openai>=1.100.0", "rich>=14.0.0", "cachetools>=6.1.0", "prometheus-client>=0.22.1", @@ -34,7 +34,6 @@ dependencies = [ "greenlet", "torch", "sentence-transformers>=5.0.0", - "lightspeed_stack_providers @ git+https://github.com/Jdubrick/lightspeed-providers.git@devai", "pydantic>=2.10.6", "httpx", ] diff --git a/run.yaml b/run.yaml index 8e869d4..c904f53 100644 --- a/run.yaml +++ b/run.yaml @@ -13,72 +13,27 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -version: '2' +version: 2 image_name: redhat-ai-dev-llama-stack - apis: - agents - - datasetio - - eval - inference - - post_training - safety - - scoring - - telemetry - tool_runtime - vector_io -benchmarks: [] + - files container_image: -datasets: [] -external_providers_dir: '/app-root/config/providers.d' -inference_store: - db_path: .llama/distributions/ollama/inference_store.db - type: sqlite -logging: -metadata_store: - db_path: .llama/distributions/ollama/registry.db - namespace: - type: sqlite -models: - - model_id: sentence-transformers/all-mpnet-base-v2 - metadata: - embedding_dimension: 768 - model_type: embedding - provider_id: sentence-transformers - provider_model_id: '/app-root/embeddings_model' +external_providers_dir: providers: agents: - config: - persistence_store: - db_path: .llama/distributions/ollama/agents_store.db - namespace: - type: sqlite - responses_store: - db_path: .llama/distributions/ollama/responses_store.db - type: sqlite - provider_id: meta-reference - provider_type: inline::meta-reference - datasetio: - - config: - kvstore: - db_path: .llama/distributions/ollama/huggingface_datasetio.db - namespace: - type: sqlite - provider_id: huggingface - provider_type: remote::huggingface - - config: - kvstore: - db_path: .llama/distributions/ollama/localfs_datasetio.db - namespace: - type: sqlite - provider_id: localfs - provider_type: inline::localfs - eval: - - config: - kvstore: - db_path: .llama/distributions/ollama/meta_reference_eval.db - namespace: - type: sqlite + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default provider_id: meta-reference provider_type: inline::meta-reference inference: @@ -105,106 +60,6 @@ providers: - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} - post_training: - - config: - checkpoint_format: huggingface - device: cpu - distributed_backend: - dpo_output_dir: '.' - provider_id: huggingface - provider_type: inline::huggingface - safety: - - config: - excluded_categories: [] - provider_id: llama-guard - provider_type: inline::llama-guard - - provider_id: lightspeed_question_validity - provider_type: inline::lightspeed_question_validity - config: - model_id: ${env.VALIDATION_PROVIDER:=vllm}/${env.VALIDATION_MODEL_NAME} - model_prompt: |- - Instructions: - - You area question classification tool. You are an expert in the following categories: - - Backstage - - Red Hat Developer Hub (RHDH) - - Developer Lightspeed - - Lightspeed - - Artificial Intelligence (AI) Models - - Large Language Models (LLMs) - - Kubernetes - - Openshift - - CI/CD - - GitOps - - Pipelines - - Developer Portals - - Deployments - - Software Catalogs - - Software Templates - - Tech Docs - - Your job is to determine if a user's question is related to the categories you are an expert in. If the question is related to those categories, \ - or any features that may be related to those categories, you will answer with ${allowed}. - - If a question is not related to your expert categories, answer with ${rejected}. - - You do not need to explain your answer. - - Below are some example questions: - Example Question: - Why is the sky blue? - Example Response: - ${rejected} - - Example Question: - Can you help configure my cluster to automatically scale? - Example Response: - ${allowed} - - Example Question: - How do I create import an existing software template in Backstage? - Example Response: - ${allowed} - - Example Question: - How do I accomplish a task in RHDH? - Example Response: - ${allowed} - - Example Question: - How do I explore a component in RHDH catalog? - Example Response: - ${allowed} - - Example Question: - How can I integrate GitOps into my pipeline? - Example Response: - ${allowed} - - Question: - ${message} - Response: - invalid_question_response: |- - Hi, I'm the Red Hat Developer Hub Lightspeed assistant, I can help you with questions about Red Hat Developer Hub or Backstage. - Please ensure your question is about these topics, and feel free to ask again! - scoring: - - config: {} - provider_id: basic - provider_type: inline::basic - - config: {} - provider_id: llm-as-judge - provider_type: inline::llm-as-judge - - config: - openai_api_key: '********' - provider_id: braintrust - provider_type: inline::braintrust - telemetry: - - config: - service_name: 'lightspeed-stack-telemetry' - sinks: sqlite - sqlite_db_path: .llama/distributions/ollama/trace_store.db - provider_id: meta-reference - provider_type: inline::meta-reference tool_runtime: - provider_id: model-context-protocol provider_type: remote::model-context-protocol @@ -213,21 +68,60 @@ providers: provider_type: inline::rag-runtime config: {} vector_io: - - config: - kvstore: - db_path: .llama/distributions/ollama/faiss_store.db - namespace: - type: sqlite - provider_id: faiss - provider_type: inline::faiss - - provider_id: rhdh-docs + - provider_id: rhdh-product-docs-1_8 provider_type: inline::faiss config: - kvstore: - type: sqlite - namespace: - db_path: /app-root/vector_db/rhdh_product_docs/1.8/faiss_store.db -scoring_fns: [] + persistence: + namespace: vector_io::faiss + backend: faiss_kv + files: + - provider_id: localfs + provider_type: inline::localfs + config: + storage_dir: /tmp/llama-stack-files + metadata_store: + table_name: files_metadata + backend: sql_default +storage: + backends: + kv_default: + type: kv_sqlite + db_path: .llama/distributions/ollama/kvstore.db + sql_default: + type: sql_sqlite + db_path: .llama/distributions/ollama/sql_store.db + faiss_kv: + type: kv_sqlite + db_path: /app-root/vector_db/rhdh_product_docs/1.8/faiss_store.db + stores: + metadata: + namespace: registry + backend: kv_default + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default +registered_resources: + models: + - model_id: sentence-transformers/all-mpnet-base-v2 + metadata: + embedding_dimension: 768 + model_type: embedding + provider_id: sentence-transformers + provider_model_id: '/app-root/embeddings_model' + tool_groups: + - provider_id: rag-runtime + toolgroup_id: builtin::rag + vector_stores: + - vector_store_id: rhdh-product-docs-1_8 + embedding_dimension: 768 + embedding_model: sentence-transformers/all-mpnet-base-v2 + provider_id: rhdh-product-docs-1_8 + provider_vector_store_id: rhdh-product-docs-1_8 server: auth: host: @@ -236,15 +130,3 @@ server: tls_cafile: tls_certfile: tls_keyfile: -shields: - - shield_id: lightspeed_question_validity-shield - provider_id: lightspeed_question_validity -tool_groups: - - provider_id: rag-runtime - toolgroup_id: builtin::rag - description: 'Only use for questions specifically about Red Hat Developer Hub (RHDH). Searches technical documentation for RHDH installation, discovery, configuration, release, upgrade, control access, integration, observability, and extending with plugins. Do not use for any other topic outside RHDH.' -vector_dbs: - - embedding_dimension: 768 - embedding_model: sentence-transformers/all-mpnet-base-v2 - provider_id: rhdh-docs - vector_db_id: rhdh-product-docs-1_8 diff --git a/uv.lock b/uv.lock index b6413d1..4a2213d 100644 --- a/uv.lock +++ b/uv.lock @@ -112,6 +112,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f5/10/6c25ed6de94c49f88a91fa5018cb4c0f3625f31d5be9f771ebe5cc7cd506/aiosqlite-0.21.0-py3-none-any.whl", hash = "sha256:2549cf4057f95f53dcba16f2b64e8e2791d7e1adedb13197dd8ed77bb226d7d0", size = 15792, upload-time = "2025-02-03T07:30:13.6Z" }, ] +[[package]] +name = "annotated-doc" +version = "0.0.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/57/ba/046ceea27344560984e26a590f90bc7f4a75b06701f653222458922b558c/annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4", size = 7288, upload-time = "2025-11-10T22:07:42.062Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320", size = 5303, upload-time = "2025-11-10T22:07:40.673Z" }, +] + [[package]] name = "annotated-types" version = "0.7.0" @@ -385,18 +394,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b0/0d/9feae160378a3553fa9a339b0e9c1a048e147a4127210e286ef18b730f03/durationpy-0.10-py3-none-any.whl", hash = "sha256:3b41e1b601234296b4fb368338fdcd3e13e0b4fb5b67345948f4f2bf9868b286", size = 3922, upload-time = "2025-05-17T13:52:36.463Z" }, ] -[[package]] -name = "ecdsa" -version = "0.19.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "six" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/c0/1f/924e3caae75f471eae4b26bd13b698f6af2c44279f67af317439c2f4c46a/ecdsa-0.19.1.tar.gz", hash = "sha256:478cba7b62555866fcb3bb3fe985e06decbdb68ef55713c4e5ab98c57d508e61", size = 201793, upload-time = "2025-03-13T11:52:43.25Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/cb/a3/460c57f094a4a165c84a1341c373b0a4f5ec6ac244b998d5021aade89b77/ecdsa-0.19.1-py2.py3-none-any.whl", hash = "sha256:30638e27cf77b7e15c4c4cc1973720149e1033827cfd00661ca5c8cc0cdb24c3", size = 150607, upload-time = "2025-03-13T11:52:41.757Z" }, -] - [[package]] name = "faiss-cpu" version = "1.12.0" @@ -419,16 +416,17 @@ wheels = [ [[package]] name = "fastapi" -version = "0.117.1" +version = "0.124.4" source = { registry = "https://pypi.org/simple" } dependencies = [ + { name = "annotated-doc" }, { name = "pydantic" }, { name = "starlette" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/7e/7e/d9788300deaf416178f61fb3c2ceb16b7d0dc9f82a08fdb87a5e64ee3cc7/fastapi-0.117.1.tar.gz", hash = "sha256:fb2d42082d22b185f904ca0ecad2e195b851030bd6c5e4c032d1c981240c631a", size = 307155, upload-time = "2025-09-20T20:16:56.663Z" } +sdist = { url = "https://files.pythonhosted.org/packages/cd/21/ade3ff6745a82ea8ad88552b4139d27941549e4f19125879f848ac8f3c3d/fastapi-0.124.4.tar.gz", hash = "sha256:0e9422e8d6b797515f33f500309f6e1c98ee4e85563ba0f2debb282df6343763", size = 378460, upload-time = "2025-12-12T15:00:43.891Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/6d/45/d9d3e8eeefbe93be1c50060a9d9a9f366dba66f288bb518a9566a23a8631/fastapi-0.117.1-py3-none-any.whl", hash = "sha256:33c51a0d21cab2b9722d4e56dbb9316f3687155be6b276191790d8da03507552", size = 95959, upload-time = "2025-09-20T20:16:53.661Z" }, + { url = "https://files.pythonhosted.org/packages/3e/57/aa70121b5008f44031be645a61a7c4abc24e0e888ad3fc8fda916f4d188e/fastapi-0.124.4-py3-none-any.whl", hash = "sha256:6d1e703698443ccb89e50abe4893f3c84d9d6689c0cf1ca4fad6d3c15cf69f15", size = 113281, upload-time = "2025-12-12T15:00:42.44Z" }, ] [[package]] @@ -545,6 +543,8 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/19/0d/6660d55f7373b2ff8152401a83e02084956da23ae58cddbfb0b330978fe9/greenlet-3.2.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b3812d8d0c9579967815af437d96623f45c0f2ae5f04e366de62a12d83a8fb0", size = 607586, upload-time = "2025-08-07T13:18:28.544Z" }, { url = "https://files.pythonhosted.org/packages/8e/1a/c953fdedd22d81ee4629afbb38d2f9d71e37d23caace44775a3a969147d4/greenlet-3.2.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:abbf57b5a870d30c4675928c37278493044d7c14378350b3aa5d484fa65575f0", size = 1123281, upload-time = "2025-08-07T13:42:39.858Z" }, { url = "https://files.pythonhosted.org/packages/3f/c7/12381b18e21aef2c6bd3a636da1088b888b97b7a0362fac2e4de92405f97/greenlet-3.2.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:20fb936b4652b6e307b8f347665e2c615540d4b42b3b4c8a321d8286da7e520f", size = 1151142, upload-time = "2025-08-07T13:18:22.981Z" }, + { url = "https://files.pythonhosted.org/packages/27/45/80935968b53cfd3f33cf99ea5f08227f2646e044568c9b1555b58ffd61c2/greenlet-3.2.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ee7a6ec486883397d70eec05059353b8e83eca9168b9f3f9a361971e77e0bcd0", size = 1564846, upload-time = "2025-11-04T12:42:15.191Z" }, + { url = "https://files.pythonhosted.org/packages/69/02/b7c30e5e04752cb4db6202a3858b149c0710e5453b71a3b2aec5d78a1aab/greenlet-3.2.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:326d234cbf337c9c3def0676412eb7040a35a768efc92504b947b3e9cfc7543d", size = 1633814, upload-time = "2025-11-04T12:42:17.175Z" }, { url = "https://files.pythonhosted.org/packages/e9/08/b0814846b79399e585f974bbeebf5580fbe59e258ea7be64d9dfb253c84f/greenlet-3.2.4-cp312-cp312-win_amd64.whl", hash = "sha256:a7d4e128405eea3814a12cc2605e0e6aedb4035bf32697f72deca74de4105e02", size = 299899, upload-time = "2025-08-07T13:38:53.448Z" }, ] @@ -760,16 +760,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/89/43/d9bebfc3db7dea6ec80df5cb2aad8d274dd18ec2edd6c4f21f32c237cbbb/kubernetes-33.1.0-py2.py3-none-any.whl", hash = "sha256:544de42b24b64287f7e0aa9513c93cb503f7f40eea39b20f66810011a86eabc5", size = 1941335, upload-time = "2025-06-09T21:57:56.327Z" }, ] -[[package]] -name = "lightspeed-stack-providers" -version = "0.1.15" -source = { git = "https://github.com/Jdubrick/lightspeed-providers.git?rev=devai#6ac0937c526ca285ef4f6bcc3775d05613ee26e7" } -dependencies = [ - { name = "httpx" }, - { name = "llama-stack" }, - { name = "pydantic" }, -] - [[package]] name = "litellm" version = "1.77.3" @@ -794,26 +784,9 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/81/b2/122602255b582fdcf630f8e44b5c9175391abe10be5e2f4db6a7d4173df1/litellm-1.77.3-py3-none-any.whl", hash = "sha256:f0c8c6bcfa2c9cd9e9fa0304f9a94894d252e7c74f118c37a8f2e4e525b2592b", size = 9118886, upload-time = "2025-09-21T00:59:06.178Z" }, ] -[[package]] -name = "llama-api-client" -version = "0.4.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "anyio" }, - { name = "distro" }, - { name = "httpx" }, - { name = "pydantic" }, - { name = "sniffio" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/f6/fe/937935f9f8a869efbda9b563f64cd8c3d433981f9dada40521ad8eadc9dd/llama_api_client-0.4.0.tar.gz", hash = "sha256:45d37086bd7004846d90746347449ea56cc20109c06cc8d908bbaf7f36fbb931", size = 120975, upload-time = "2025-09-17T21:04:00.558Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/43/ac/0152123825a2674e06fbf1094d8f19fc2b931e84b70007c4340cc0775ce5/llama_api_client-0.4.0-py3-none-any.whl", hash = "sha256:adafdc22faaeefe944d59ff9de65f205efc79acee52d80a3f18fd8a940597368", size = 87986, upload-time = "2025-09-17T21:03:59.686Z" }, -] - [[package]] name = "llama-stack" -version = "0.2.18" +version = "0.3.4" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohttp" }, @@ -823,10 +796,8 @@ dependencies = [ { name = "fire" }, { name = "h11" }, { name = "httpx" }, - { name = "huggingface-hub" }, { name = "jinja2" }, { name = "jsonschema" }, - { name = "llama-api-client" }, { name = "llama-stack-client" }, { name = "openai" }, { name = "opentelemetry-exporter-otlp-proto-http" }, @@ -834,23 +805,24 @@ dependencies = [ { name = "pillow" }, { name = "prompt-toolkit" }, { name = "pydantic" }, + { name = "pyjwt", extra = ["crypto"] }, { name = "python-dotenv" }, - { name = "python-jose", extra = ["cryptography"] }, { name = "python-multipart" }, { name = "rich" }, + { name = "sqlalchemy", extra = ["asyncio"] }, { name = "starlette" }, { name = "termcolor" }, { name = "tiktoken" }, { name = "uvicorn" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ed/e1/16c52856746e1412274c085a6e6a21829133f9db3d4932a009700594f4a2/llama_stack-0.2.18.tar.gz", hash = "sha256:0ea6e150140047568e45f98100027a79e20340711e5feff083d9b9dfe42d2605", size = 3321726, upload-time = "2025-08-19T22:12:17.257Z" } +sdist = { url = "https://files.pythonhosted.org/packages/8f/c5/ade666e8ce894066c0358988e831b31c81840e7b285aa8b5f70236e33681/llama_stack-0.3.4.tar.gz", hash = "sha256:bdb489e4341559465d604c9eba554460ab0d17c5dc005ee2d40aa892b94e2e9b", size = 3322494, upload-time = "2025-12-03T19:00:18.397Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/65/72/c68c50be2d2808fe162c3f344f976bc29839f0cee7a6d951cc3805f8482d/llama_stack-0.2.18-py3-none-any.whl", hash = "sha256:3383fb4da1cc6e77a58ae425ef49ce470bca784ca85051dd6b2b70966f936bea", size = 3650850, upload-time = "2025-08-19T22:12:15.857Z" }, + { url = "https://files.pythonhosted.org/packages/49/14/c98e5b564b425e4fc7aabf33f4bf9f40c43057424a555f023bcd8e334874/llama_stack-0.3.4-py3-none-any.whl", hash = "sha256:3e302db1efb2ed6c974526b8c6b04b9e54891f3959d0d83c004f77e1c21f6147", size = 3637817, upload-time = "2025-12-03T19:00:16.581Z" }, ] [[package]] name = "llama-stack-client" -version = "0.2.18" +version = "0.3.4" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -869,14 +841,14 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/69/da/5e5a745495f8a2b8ef24fc4d01fe9031aa2277c36447cb22192ec8c8cc1e/llama_stack_client-0.2.18.tar.gz", hash = "sha256:860c885c9e549445178ac55cc9422e6e2a91215ac7aff5aaccfb42f3ce07e79e", size = 277284, upload-time = "2025-08-19T22:12:09.106Z" } +sdist = { url = "https://files.pythonhosted.org/packages/6a/10/9c198c62e720c647a01506f40ba4e058a5b2a23c947fab1827eb096a94f2/llama_stack_client-0.3.4.tar.gz", hash = "sha256:6afbd10b152911a044e8d038e58981425ce0a34510da3e31cdd3103516e27688", size = 335668, upload-time = "2025-12-03T18:59:25.48Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/0a/e4/e97f8fdd8a07aa1efc7f7e37b5657d84357b664bf70dd1885a437edc0699/llama_stack_client-0.2.18-py3-none-any.whl", hash = "sha256:90f827d5476f7fc15fd993f1863af6a6e72bd064646bf6a99435eb43a1327f70", size = 367586, upload-time = "2025-08-19T22:12:07.899Z" }, + { url = "https://files.pythonhosted.org/packages/ae/b9/bcc815cee68ef87635edf72f9454dd35cef8492d2670f5a6b229b5913f0b/llama_stack_client-0.3.4-py3-none-any.whl", hash = "sha256:949c0a6c9a1c925a2b0d930d85b6485bb8d264ba68d02f36aca3c2539cb7b893", size = 425244, upload-time = "2025-12-03T18:59:24.293Z" }, ] [[package]] name = "llama-stack-runner" -version = "0.1.0" +version = "0.1.1" source = { virtual = "." } dependencies = [ { name = "aiohttp" }, @@ -891,7 +863,6 @@ dependencies = [ { name = "greenlet" }, { name = "httpx" }, { name = "kubernetes" }, - { name = "lightspeed-stack-providers" }, { name = "litellm" }, { name = "llama-stack" }, { name = "llama-stack-client" }, @@ -929,13 +900,12 @@ requires-dist = [ { name = "greenlet" }, { name = "httpx" }, { name = "kubernetes", specifier = ">=30.1.0" }, - { name = "lightspeed-stack-providers", git = "https://github.com/Jdubrick/lightspeed-providers.git?rev=devai" }, { name = "litellm", specifier = ">=1.72.1" }, - { name = "llama-stack", specifier = "==0.2.18" }, - { name = "llama-stack-client", specifier = "==0.2.18" }, + { name = "llama-stack", specifier = "==0.3.4" }, + { name = "llama-stack-client", specifier = "==0.3.4" }, { name = "mcp", specifier = ">=1.9.4" }, { name = "ollama", specifier = ">=0.2.0" }, - { name = "openai", specifier = "==1.99.9" }, + { name = "openai", specifier = ">=1.100.0" }, { name = "opentelemetry-exporter-otlp", specifier = ">=1.34.0" }, { name = "opentelemetry-instrumentation", specifier = ">=0.55b0" }, { name = "opentelemetry-sdk", specifier = ">=1.34.0" }, @@ -1149,7 +1119,7 @@ wheels = [ [[package]] name = "openai" -version = "1.99.9" +version = "2.9.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -1161,9 +1131,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/8a/d2/ef89c6f3f36b13b06e271d3cc984ddd2f62508a0972c1cbcc8485a6644ff/openai-1.99.9.tar.gz", hash = "sha256:f2082d155b1ad22e83247c3de3958eb4255b20ccf4a1de2e6681b6957b554e92", size = 506992, upload-time = "2025-08-12T02:31:10.054Z" } +sdist = { url = "https://files.pythonhosted.org/packages/09/48/516290f38745cc1e72856f50e8afed4a7f9ac396a5a18f39e892ab89dfc2/openai-2.9.0.tar.gz", hash = "sha256:b52ec65727fc8f1eed2fbc86c8eac0998900c7ef63aa2eb5c24b69717c56fa5f", size = 608202, upload-time = "2025-12-04T18:15:09.01Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e8/fb/df274ca10698ee77b07bff952f302ea627cc12dac6b85289485dd77db6de/openai-1.99.9-py3-none-any.whl", hash = "sha256:9dbcdb425553bae1ac5d947147bebbd630d91bbfc7788394d4c4f3a35682ab3a", size = 786816, upload-time = "2025-08-12T02:31:08.34Z" }, + { url = "https://files.pythonhosted.org/packages/59/fd/ae2da789cd923dd033c99b8d544071a827c92046b150db01cfa5cea5b3fd/openai-2.9.0-py3-none-any.whl", hash = "sha256:0d168a490fbb45630ad508a6f3022013c155a68fd708069b6a1a01a5e8f0ffad", size = 1030836, upload-time = "2025-12-04T18:15:07.063Z" }, ] [[package]] @@ -1607,6 +1577,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, ] +[[package]] +name = "pyjwt" +version = "2.10.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e7/46/bd74733ff231675599650d3e47f361794b22ef3e3770998dda30d3b63726/pyjwt-2.10.1.tar.gz", hash = "sha256:3cc5772eb20009233caf06e9d8a0577824723b44e6648ee0a2aedb6cf9381953", size = 87785, upload-time = "2024-11-28T03:43:29.933Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/61/ad/689f02752eeec26aed679477e80e632ef1b682313be70793d798c1d5fc8f/PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb", size = 22997, upload-time = "2024-11-28T03:43:27.893Z" }, +] + +[package.optional-dependencies] +crypto = [ + { name = "cryptography" }, +] + [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -1628,25 +1612,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5f/ed/539768cf28c661b5b068d66d96a2f155c4971a5d55684a514c1a0e0dec2f/python_dotenv-1.1.1-py3-none-any.whl", hash = "sha256:31f23644fe2602f88ff55e1f5c79ba497e01224ee7737937930c448e4d0e24dc", size = 20556, upload-time = "2025-06-24T04:21:06.073Z" }, ] -[[package]] -name = "python-jose" -version = "3.5.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "ecdsa" }, - { name = "pyasn1" }, - { name = "rsa" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/c6/77/3a1c9039db7124eb039772b935f2244fbb73fc8ee65b9acf2375da1c07bf/python_jose-3.5.0.tar.gz", hash = "sha256:fb4eaa44dbeb1c26dcc69e4bd7ec54a1cb8dd64d3b4d81ef08d90ff453f2b01b", size = 92726, upload-time = "2025-05-28T17:31:54.288Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d9/c3/0bd11992072e6a1c513b16500a5d07f91a24017c5909b02c72c62d7ad024/python_jose-3.5.0-py2.py3-none-any.whl", hash = "sha256:abd1202f23d34dfad2c3d28cb8617b90acf34132c7afd60abd0b0b7d3cb55771", size = 34624, upload-time = "2025-05-28T17:31:52.802Z" }, -] - -[package.optional-dependencies] -cryptography = [ - { name = "cryptography" }, -] - [[package]] name = "python-multipart" version = "0.0.20" @@ -1934,6 +1899,11 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b8/d9/13bdde6521f322861fab67473cec4b1cc8999f3871953531cf61945fad92/sqlalchemy-2.0.43-py3-none-any.whl", hash = "sha256:1681c21dd2ccee222c2fe0bef671d1aef7c504087c9c4e800371cfcc8ac966fc", size = 1924759, upload-time = "2025-08-11T15:39:53.024Z" }, ] +[package.optional-dependencies] +asyncio = [ + { name = "greenlet" }, +] + [[package]] name = "sse-starlette" version = "3.0.2" @@ -1948,15 +1918,15 @@ wheels = [ [[package]] name = "starlette" -version = "0.48.0" +version = "0.50.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a7/a5/d6f429d43394057b67a6b5bbe6eae2f77a6bf7459d961fdb224bf206eee6/starlette-0.48.0.tar.gz", hash = "sha256:7e8cee469a8ab2352911528110ce9088fdc6a37d9876926e73da7ce4aa4c7a46", size = 2652949, upload-time = "2025-09-13T08:41:05.699Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ba/b8/73a0e6a6e079a9d9cfa64113d771e421640b6f679a52eeb9b32f72d871a1/starlette-0.50.0.tar.gz", hash = "sha256:a2a17b22203254bcbc2e1f926d2d55f3f9497f769416b3190768befe598fa3ca", size = 2646985, upload-time = "2025-11-01T15:25:27.516Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/be/72/2db2f49247d0a18b4f1bb9a5a39a0162869acf235f3a96418363947b3d46/starlette-0.48.0-py3-none-any.whl", hash = "sha256:0764ca97b097582558ecb498132ed0c7d942f233f365b86ba37770e026510659", size = 73736, upload-time = "2025-09-13T08:41:03.869Z" }, + { url = "https://files.pythonhosted.org/packages/d9/52/1064f510b141bd54025f9b55105e26d1fa970b9be67ad766380a3c9b74b0/starlette-0.50.0-py3-none-any.whl", hash = "sha256:9e5391843ec9b6e472eed1365a78c8098cfceb7a74bfd4d6b1c0c0095efb3bca", size = 74033, upload-time = "2025-11-01T15:25:25.461Z" }, ] [[package]] From ad5997453c5b9d983628861158c5d2e935a5e843 Mon Sep 17 00:00:00 2001 From: Jordan Dubrick Date: Tue, 6 Jan 2026 14:24:17 -0500 Subject: [PATCH 02/10] update readme with 0.3 info Signed-off-by: Jordan Dubrick --- README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 95c7deb..5f1e481 100644 --- a/README.md +++ b/README.md @@ -111,7 +111,11 @@ This will fetch the necessary reference content and add it to your local project ### Configuring Question Validation -By default this Llama Stack has a Safety Shield for question validation enabled. You will need to set the following environment variables to ensure functionality: +> [!IMPORTANT] +> Currently question validation is removed from the default run.yaml file. +> This is due to the way Llama Stack Safety Shields are intended to be used in v0.3 and above. + +You will need to set the following environment variables to ensure functionality: - `VALIDATION_PROVIDER`: The provider you want to use for question validation. This should match what the provider value you are using under `inference`, such as `vllm`, `ollama`, `openai`. Defaults to `vllm` - `VALIDATION_MODEL_NAME`: The name of the LLM you want to use for question validation @@ -127,7 +131,7 @@ Or if using the host network: podman run -it -p 8321:8321 --env-file ./env/values.env --network host -v ./embeddings_model:/app-root/embeddings_model:Z -v ./vector_db/rhdh_product_docs:/app-root/vector_db/rhdh_product_docs:Z quay.io/redhat-ai-dev/llama-stack:latest ``` -Latest Lightspeed Core developer image: +Latest Lightspeed Core Developer Image: ``` quay.io/lightspeed-core/lightspeed-stack:dev-latest ``` From f3114dfe559bf21ff8f78870401413ceadb12287 Mon Sep 17 00:00:00 2001 From: Jordan Dubrick Date: Tue, 6 Jan 2026 14:24:44 -0500 Subject: [PATCH 03/10] update lightspeed provider tag (could become redundant) Signed-off-by: Jordan Dubrick --- Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 33600cf..e8e8c16 100644 --- a/Makefile +++ b/Makefile @@ -36,9 +36,8 @@ help: ## Show this help screen awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-33s\033[0m %s\n", $$1, $$2}' @echo '' -# TODO (Jdubrick): Replace reference to lightspeed-core/lightspeed-providers once bug is addressed. update-question-validation: - curl -o ./config/providers.d/inline/safety/lightspeed_question_validity.yaml https://raw.githubusercontent.com/Jdubrick/lightspeed-providers/refs/heads/devai/resources/external_providers/inline/safety/lightspeed_question_validity.yaml + curl -o ./config/providers.d/inline/safety/lightspeed_question_validity.yaml https://raw.githubusercontent.com/lightspeed-core/lightspeed-providers/refs/tags/0.1.17/resources/external_providers/inline/safety/lightspeed_question_validity.yaml $(VENV)/bin/activate: ./scripts/python-scripts/requirements.txt python3 -m venv $(VENV) From ecf88c62721c23bfec689fdc616189328112212a Mon Sep 17 00:00:00 2001 From: Jordan Dubrick Date: Tue, 13 Jan 2026 12:02:26 -0500 Subject: [PATCH 04/10] update llama stack to 0.3.5 Signed-off-by: Jordan Dubrick --- pyproject.toml | 5 +++-- uv.lock | 27 +++++++++++++++++++-------- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 7c167a4..0bcab6b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,8 +7,8 @@ dependencies = [ "fastapi>=0.115.6", "uvicorn>=0.34.3", "kubernetes>=30.1.0", - "llama-stack==0.3.4", - "llama-stack-client==0.3.4", + "llama-stack==0.3.5", + "llama-stack-client==0.3.5", "ollama>=0.2.0", "openai>=1.100.0", "rich>=14.0.0", @@ -36,6 +36,7 @@ dependencies = [ "sentence-transformers>=5.0.0", "pydantic>=2.10.6", "httpx", + "chardet", ] requires-python = "==3.12.*" readme = "README.md" diff --git a/uv.lock b/uv.lock index 4a2213d..326006a 100644 --- a/uv.lock +++ b/uv.lock @@ -252,6 +252,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/95/31/9f7f93ad2f8eff1dbc1c3656d7ca5bfd8fb52c9d786b4dcf19b2d02217fa/cffi-2.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:4671d9dd5ec934cb9a73e7ee9676f9362aba54f7f34910956b84d727b0d73fb6", size = 177762, upload-time = "2025-09-08T23:22:59.668Z" }, ] +[[package]] +name = "chardet" +version = "5.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f3/0d/f7b6ab21ec75897ed80c17d79b15951a719226b9fababf1e40ea74d69079/chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7", size = 2069618, upload-time = "2023-08-01T19:23:02.662Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/38/6f/f5fbc992a329ee4e0f288c1fe0e2ad9485ed064cac731ed2fe47dcc38cbf/chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970", size = 199385, upload-time = "2023-08-01T19:23:00.661Z" }, +] + [[package]] name = "charset-normalizer" version = "3.4.3" @@ -786,7 +795,7 @@ wheels = [ [[package]] name = "llama-stack" -version = "0.3.4" +version = "0.3.5" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohttp" }, @@ -815,14 +824,14 @@ dependencies = [ { name = "tiktoken" }, { name = "uvicorn" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/8f/c5/ade666e8ce894066c0358988e831b31c81840e7b285aa8b5f70236e33681/llama_stack-0.3.4.tar.gz", hash = "sha256:bdb489e4341559465d604c9eba554460ab0d17c5dc005ee2d40aa892b94e2e9b", size = 3322494, upload-time = "2025-12-03T19:00:18.397Z" } +sdist = { url = "https://files.pythonhosted.org/packages/af/68/967f95e5fe3a650b9bb6a18c4beeb39e734695d92f1ab1525c5b9bfadb1b/llama_stack-0.3.5.tar.gz", hash = "sha256:4a0ce8014b17d14a06858251736f1170f12580fafc519daf75ee1df6c4fbf64b", size = 3320526, upload-time = "2025-12-15T14:34:32.96Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/49/14/c98e5b564b425e4fc7aabf33f4bf9f40c43057424a555f023bcd8e334874/llama_stack-0.3.4-py3-none-any.whl", hash = "sha256:3e302db1efb2ed6c974526b8c6b04b9e54891f3959d0d83c004f77e1c21f6147", size = 3637817, upload-time = "2025-12-03T19:00:16.581Z" }, + { url = "https://files.pythonhosted.org/packages/24/70/fb1896f07fc38a94b4c0bfb5999872d1514c6b3259fe77358cadef77a3db/llama_stack-0.3.5-py3-none-any.whl", hash = "sha256:93097409c65108e429fc3dda2f246ef4e8d0b07314a32865e941680e537ec366", size = 3636815, upload-time = "2025-12-15T14:34:31.354Z" }, ] [[package]] name = "llama-stack-client" -version = "0.3.4" +version = "0.3.5" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -841,9 +850,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/6a/10/9c198c62e720c647a01506f40ba4e058a5b2a23c947fab1827eb096a94f2/llama_stack_client-0.3.4.tar.gz", hash = "sha256:6afbd10b152911a044e8d038e58981425ce0a34510da3e31cdd3103516e27688", size = 335668, upload-time = "2025-12-03T18:59:25.48Z" } +sdist = { url = "https://files.pythonhosted.org/packages/34/ff/b4bb891249379849e6e273a6254998c7e08562613ca4020817af2da9498e/llama_stack_client-0.3.5.tar.gz", hash = "sha256:2d954429347e920038709ae3e026c06f336ce570bd41245fc4e1e54c78879485", size = 335659, upload-time = "2025-12-15T14:10:16.444Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ae/b9/bcc815cee68ef87635edf72f9454dd35cef8492d2670f5a6b229b5913f0b/llama_stack_client-0.3.4-py3-none-any.whl", hash = "sha256:949c0a6c9a1c925a2b0d930d85b6485bb8d264ba68d02f36aca3c2539cb7b893", size = 425244, upload-time = "2025-12-03T18:59:24.293Z" }, + { url = "https://files.pythonhosted.org/packages/4d/10/84a4f0ef1cc13f44a692e55bed6a55792671e5320c95a8fd581e02848d61/llama_stack_client-0.3.5-py3-none-any.whl", hash = "sha256:b98acdc660d60839da8b71d5ae59531ba7f059e3e9656ca5ca20edca70f7d6a2", size = 425244, upload-time = "2025-12-15T14:10:14.726Z" }, ] [[package]] @@ -857,6 +866,7 @@ dependencies = [ { name = "autoevals" }, { name = "blobfile" }, { name = "cachetools" }, + { name = "chardet" }, { name = "datasets" }, { name = "faiss-cpu" }, { name = "fastapi" }, @@ -894,6 +904,7 @@ requires-dist = [ { name = "autoevals", specifier = ">=0.0.129" }, { name = "blobfile", specifier = ">=3.0.0" }, { name = "cachetools", specifier = ">=6.1.0" }, + { name = "chardet" }, { name = "datasets", specifier = ">=3.6.0" }, { name = "faiss-cpu", specifier = ">=1.11.0" }, { name = "fastapi", specifier = ">=0.115.6" }, @@ -901,8 +912,8 @@ requires-dist = [ { name = "httpx" }, { name = "kubernetes", specifier = ">=30.1.0" }, { name = "litellm", specifier = ">=1.72.1" }, - { name = "llama-stack", specifier = "==0.3.4" }, - { name = "llama-stack-client", specifier = "==0.3.4" }, + { name = "llama-stack", specifier = "==0.3.5" }, + { name = "llama-stack-client", specifier = "==0.3.5" }, { name = "mcp", specifier = ">=1.9.4" }, { name = "ollama", specifier = ">=0.2.0" }, { name = "openai", specifier = ">=1.100.0" }, From f20d1f0b681bb6c459b5a80dd306ed0fb5c9720e Mon Sep 17 00:00:00 2001 From: Jordan Dubrick Date: Tue, 13 Jan 2026 12:02:50 -0500 Subject: [PATCH 05/10] update run.yaml to llama v0.3.x standard Signed-off-by: Jordan Dubrick --- run.yaml | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/run.yaml b/run.yaml index c904f53..4041dc1 100644 --- a/run.yaml +++ b/run.yaml @@ -57,9 +57,19 @@ providers: config: project: ${env.VERTEX_AI_PROJECT:=} location: ${env.VERTEX_AI_LOCATION:=us-central1} + # - provider_id: safety-guard + # provider_type: remote::vllm + # config: + # url: ${env.SAFETY_VLLM_URL:=} + # api_token: ${env.SAFETY_VLLM_API_KEY:=token} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} + # safety: + # - provider_id: llama-guard + # provider_type: inline::llama-guard + # config: + # excluded_categories: [] tool_runtime: - provider_id: model-context-protocol provider_type: remote::model-context-protocol @@ -68,7 +78,7 @@ providers: provider_type: inline::rag-runtime config: {} vector_io: - - provider_id: rhdh-product-docs-1_8 + - provider_id: faiss provider_type: inline::faiss config: persistence: @@ -86,17 +96,17 @@ storage: backends: kv_default: type: kv_sqlite - db_path: .llama/distributions/ollama/kvstore.db + db_path: /tmp/kvstore.db sql_default: type: sql_sqlite - db_path: .llama/distributions/ollama/sql_store.db + db_path: /tmp/sql_store.db faiss_kv: type: kv_sqlite - db_path: /app-root/vector_db/rhdh_product_docs/1.8/faiss_store.db + db_path: /rag-content/vector_db/rhdh_product_docs/1.8/faiss_store.db stores: metadata: namespace: registry - backend: kv_default + backend: faiss_kv inference: table_name: inference_store backend: sql_default @@ -112,16 +122,24 @@ registered_resources: embedding_dimension: 768 model_type: embedding provider_id: sentence-transformers - provider_model_id: '/app-root/embeddings_model' + provider_model_id: /rag-content/embeddings_model + # - model_id: ${env.SAFETY_MODEL:=llama-guard3:8b} + # provider_id: safety-guard + # provider_model_id: ${env.SAFETY_MODEL:=llama-guard3:8b} + # model_type: llm + # metadata: {} + # shields: + # - shield_id: llama-guard-shield + # provider_id: llama-guard + # provider_shield_id: safety-guard/${env.SAFETY_MODEL:=llama-guard3:8b} tool_groups: - provider_id: rag-runtime toolgroup_id: builtin::rag - vector_stores: - - vector_store_id: rhdh-product-docs-1_8 - embedding_dimension: 768 + vector_dbs: + - vector_db_id: rhdh-product-docs-1_8 embedding_model: sentence-transformers/all-mpnet-base-v2 - provider_id: rhdh-product-docs-1_8 - provider_vector_store_id: rhdh-product-docs-1_8 + embedding_dimension: 768 + provider_id: faiss server: auth: host: From bbc5350be6f1c2685ef9ba3867592eff37b410c7 Mon Sep 17 00:00:00 2001 From: Jordan Dubrick Date: Tue, 13 Jan 2026 12:04:18 -0500 Subject: [PATCH 06/10] update mount reference to use 'rag-content' Signed-off-by: Jordan Dubrick --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 5f1e481..e0fa0de 100644 --- a/README.md +++ b/README.md @@ -123,12 +123,12 @@ You will need to set the following environment variables to ensure functionality ### Running Locally ``` -podman run -it -p 8321:8321 --env-file ./env/values.env -v ./embeddings_model:/app-root/embeddings_model:Z -v ./vector_db/rhdh_product_docs:/app-root/vector_db/rhdh_product_docs:Z quay.io/redhat-ai-dev/llama-stack:latest +podman run -it -p 8321:8321 --env-file ./env/values.env -v ./embeddings_model:/rag-content/embeddings_model:Z -v ./vector_db/rhdh_product_docs:/rag-content/vector_db/rhdh_product_docs:Z quay.io/redhat-ai-dev/llama-stack:latest ``` Or if using the host network: ``` -podman run -it -p 8321:8321 --env-file ./env/values.env --network host -v ./embeddings_model:/app-root/embeddings_model:Z -v ./vector_db/rhdh_product_docs:/app-root/vector_db/rhdh_product_docs:Z quay.io/redhat-ai-dev/llama-stack:latest +podman run -it -p 8321:8321 --env-file ./env/values.env --network host -v ./embeddings_model:/rag-content/embeddings_model:Z -v ./vector_db/rhdh_product_docs:/rag-content/vector_db/rhdh_product_docs:Z quay.io/redhat-ai-dev/llama-stack:latest ``` Latest Lightspeed Core Developer Image: From 1ccd7ae85672fb19bb68688d526ccb7fd207fd1b Mon Sep 17 00:00:00 2001 From: Jordan Dubrick Date: Tue, 13 Jan 2026 14:48:03 -0500 Subject: [PATCH 07/10] add llama guard Signed-off-by: Jordan Dubrick --- env/default-values.env | 8 +++ run-no-guard.yaml | 136 +++++++++++++++++++++++++++++++++++++++++ run.yaml | 38 ++++++------ 3 files changed, 163 insertions(+), 19 deletions(-) create mode 100644 run-no-guard.yaml diff --git a/env/default-values.env b/env/default-values.env index 970f464..5d1d2e3 100644 --- a/env/default-values.env +++ b/env/default-values.env @@ -35,5 +35,13 @@ OLLAMA_URL= VALIDATION_PROVIDER= VALIDATION_MODEL_NAME= +# Llama Guard Settings +## Defaults to llama-guard3:8b if not set +SAFETY_MODEL= +## Defaults to http://host.docker.internal:11434/v1 if not set +SAFETY_URL= +## Only required for non-local environments with a api key +SAFETY_API_KEY= + # Other LLAMA_STACK_LOGGING= \ No newline at end of file diff --git a/run-no-guard.yaml b/run-no-guard.yaml new file mode 100644 index 0000000..92859e3 --- /dev/null +++ b/run-no-guard.yaml @@ -0,0 +1,136 @@ +# +# +# Copyright Red Hat +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +version: 2 +image_name: redhat-ai-dev-llama-stack +apis: + - agents + - inference + - safety + - tool_runtime + - vector_io + - files +container_image: +external_providers_dir: +providers: + agents: + - config: + persistence: + agent_state: + namespace: agents + backend: kv_default + responses: + table_name: responses + backend: sql_default + provider_id: meta-reference + provider_type: inline::meta-reference + inference: + - provider_id: ${env.ENABLE_VLLM:+vllm} + provider_type: remote::vllm + config: + url: ${env.VLLM_URL:=} + api_token: ${env.VLLM_API_KEY:=} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} + - provider_id: ${env.ENABLE_OLLAMA:+ollama} + provider_type: remote::ollama + config: + url: ${env.OLLAMA_URL:=http://localhost:11434} + - provider_id: ${env.ENABLE_OPENAI:+openai} + provider_type: remote::openai + config: + api_key: ${env.OPENAI_API_KEY:=} + - provider_id: ${env.ENABLE_VERTEX_AI:+vertexai} + provider_type: remote::vertexai + config: + project: ${env.VERTEX_AI_PROJECT:=} + location: ${env.VERTEX_AI_LOCATION:=us-central1} + - provider_id: safety-guard + provider_type: remote::vllm + config: + url: ${env.SAFETY_URL:=http://host.docker.internal:11434/v1} + api_token: ${env.SAFETY_API_KEY:=token} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} + tool_runtime: + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + persistence: + namespace: vector_io::faiss + backend: faiss_kv + files: + - provider_id: localfs + provider_type: inline::localfs + config: + storage_dir: /tmp/llama-stack-files + metadata_store: + table_name: files_metadata + backend: sql_default +storage: + backends: + kv_default: + type: kv_sqlite + db_path: /tmp/kvstore.db + sql_default: + type: sql_sqlite + db_path: /tmp/sql_store.db + faiss_kv: + type: kv_sqlite + db_path: /rag-content/vector_db/rhdh_product_docs/1.8/faiss_store.db + stores: + metadata: + namespace: registry + backend: faiss_kv + inference: + table_name: inference_store + backend: sql_default + max_write_queue_size: 10000 + num_writers: 4 + conversations: + table_name: openai_conversations + backend: sql_default +registered_resources: + models: + - model_id: sentence-transformers/all-mpnet-base-v2 + metadata: + embedding_dimension: 768 + model_type: embedding + provider_id: sentence-transformers + provider_model_id: /rag-content/embeddings_model + tool_groups: + - provider_id: rag-runtime + toolgroup_id: builtin::rag + vector_dbs: + - vector_db_id: rhdh-product-docs-1_8 + embedding_model: sentence-transformers/all-mpnet-base-v2 + embedding_dimension: 768 + provider_id: faiss +server: + auth: + host: + port: 8321 + quota: + tls_cafile: + tls_certfile: + tls_keyfile: diff --git a/run.yaml b/run.yaml index 4041dc1..d507144 100644 --- a/run.yaml +++ b/run.yaml @@ -57,19 +57,19 @@ providers: config: project: ${env.VERTEX_AI_PROJECT:=} location: ${env.VERTEX_AI_LOCATION:=us-central1} - # - provider_id: safety-guard - # provider_type: remote::vllm - # config: - # url: ${env.SAFETY_VLLM_URL:=} - # api_token: ${env.SAFETY_VLLM_API_KEY:=token} + - provider_id: safety-guard + provider_type: remote::vllm + config: + url: ${env.SAFETY_URL:=http://host.docker.internal:11434/v1} + api_token: ${env.SAFETY_API_KEY:=token} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} - # safety: - # - provider_id: llama-guard - # provider_type: inline::llama-guard - # config: - # excluded_categories: [] + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] tool_runtime: - provider_id: model-context-protocol provider_type: remote::model-context-protocol @@ -123,15 +123,15 @@ registered_resources: model_type: embedding provider_id: sentence-transformers provider_model_id: /rag-content/embeddings_model - # - model_id: ${env.SAFETY_MODEL:=llama-guard3:8b} - # provider_id: safety-guard - # provider_model_id: ${env.SAFETY_MODEL:=llama-guard3:8b} - # model_type: llm - # metadata: {} - # shields: - # - shield_id: llama-guard-shield - # provider_id: llama-guard - # provider_shield_id: safety-guard/${env.SAFETY_MODEL:=llama-guard3:8b} + - model_id: ${env.SAFETY_MODEL:=llama-guard3:8b} + provider_id: safety-guard + provider_model_id: ${env.SAFETY_MODEL:=llama-guard3:8b} + model_type: llm + metadata: {} + shields: + - shield_id: llama-guard-shield + provider_id: llama-guard + provider_shield_id: safety-guard/${env.SAFETY_MODEL:=llama-guard3:8b} tool_groups: - provider_id: rag-runtime toolgroup_id: builtin::rag From 1b59fdbcd11d9a3817ed4aedcdda449766cadd8b Mon Sep 17 00:00:00 2001 From: Jordan Dubrick Date: Tue, 13 Jan 2026 14:48:48 -0500 Subject: [PATCH 08/10] overhaul readme Signed-off-by: Jordan Dubrick --- README.md | 81 +++++++++++++++++++++++++++++++++---------------------- 1 file changed, 49 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index e0fa0de..2de78d4 100644 --- a/README.md +++ b/README.md @@ -1,46 +1,48 @@ # Redhat-AI-Dev Llama Stack [![Apache2.0 License](https://img.shields.io/badge/license-Apache2.0-brightgreen.svg)](LICENSE) +[![Llama Stack Version](https://img.shields.io/badge/llama_stack-v0.3.5-blue)](https://llamastack.github.io/docs/v0.3.5) +[![Python Version](https://img.shields.io/badge/python-3.12-blue)](https://www.python.org/downloads/release/python-3120/) - [Image Availability](#image-availability) + - [Latest Stable Release](#latest-stable-release) + - [Latest Developer Release](#latest-developer-release) - [Usage](#usage) - [Available Inferences](#available-inferences) - [vLLM](#vllm) - [Ollama](#ollama) - [OpenAI](#openai) + - [Vertex AI (Gemini)](#vertex-ai-gemini) - [Configuring RAG](#configuring-rag) - - [Configuring Question Validation](#configuring-question-validation) - - [Running Locally](#running-locally) - - [Running on a Cluster](#running-on-a-cluster) + - [Configuring Safety Guards](#configuring-safety-guards) +- [Running Locally](#running-locally) +- [Running on a Cluster](#running-on-a-cluster) - [Makefile Commands](#makefile-commands) - [Contributing](#contributing) + - [Local Development Requirements](#local-development-requirements) + - [Updating YAML Files](#updating-yaml-files) - [Troubleshooting](#troubleshooting) -## Image Availability +# Image Availability -### Latest Stable Release +## Latest Stable Release ``` quay.io/redhat-ai-dev/llama-stack:0.1.1 ``` -### Latest Developer Release +## Latest Developer Release ``` quay.io/redhat-ai-dev/llama-stack:latest ``` -## Usage +# Usage > [!IMPORTANT] > The default Llama Stack configuration file that is baked into the built image contains tools. Ensure your provided inference server has tool calling **enabled**. -**Note:** You can enable `DEBUG` logging by setting: -``` -LLAMA_STACK_LOGGING=all=DEBUG -``` - -### Available Inferences +## Available Inferences Each inference has its own set of environment variables. You can include all of these variables in a `.env` file and pass that instead to your container. See [default-values.env](./env/default-values.env) for a template. It is recommended you copy that file to `values.env` to avoid committing it to Git. @@ -51,7 +53,7 @@ Each inference has its own set of environment variables. You can include all of > > VLLM_API_KEY="token" ❌ -#### vLLM +### vLLM **Required** ```env @@ -65,7 +67,7 @@ VLLM_MAX_TOKENS= VLLM_TLS_VERIFY= ``` -#### Ollama +### Ollama **Required** ```env @@ -77,7 +79,7 @@ The value of `OLLAMA_URL` is the default `http://localhost:11434`, when you are The value of `OLLAMA_URL` is `http://host.containers.internal:11434` if you are running llama-stack inside a container i.e.; if you run llama-stack with the podman run command above, it needs to access the Ollama endpoint on your laptop not inside the container. **If you are using Linux**, ensure your firewall allows port 11434 to your podman container's network, some Linux distributions firewalls block all traffic by default. Alternatively you can use `OLLAMA_URL=http://localhost:11434` and set the `--network host` flag when you run your podman container. -#### OpenAI +### OpenAI **Required** ```env @@ -87,7 +89,7 @@ OPENAI_API_KEY= To get your API Key, go to [platform.openai.com](https://platform.openai.com/settings/organization/api-keys). -#### Vertex AI (Gemini) +### Vertex AI (Gemini) **Required** ```env @@ -99,7 +101,7 @@ GOOGLE_APPLICATION_CREDENTIALS= For information about these variables see: https://llamastack.github.io/v0.2.18/providers/inference/remote_vertexai.html. -### Configuring RAG +## Configuring RAG The `run.yaml` file that is included in the container image has a RAG tool enabled. In order for this tool to have the necessary reference content, you need to run: @@ -109,18 +111,27 @@ make get-rag This will fetch the necessary reference content and add it to your local project directory. -### Configuring Question Validation +## Configuring Safety Guards > [!IMPORTANT] -> Currently question validation is removed from the default run.yaml file. -> This is due to the way Llama Stack Safety Shields are intended to be used in v0.3 and above. +> If you want to omit the safety guards for development purposes, you can use [run-no-guard.yaml](./run-no-guard.yaml) instead. -You will need to set the following environment variables to ensure functionality: +In the main [run.yaml](./run.yaml) file, Llama Guard is enabled by default. In order to avoid issues during startup you will need to ensure you have an instance of Llama Guard running. + +You can do so by running the following to start an Ollama container with Llama Guard: -- `VALIDATION_PROVIDER`: The provider you want to use for question validation. This should match what the provider value you are using under `inference`, such as `vllm`, `ollama`, `openai`. Defaults to `vllm` -- `VALIDATION_MODEL_NAME`: The name of the LLM you want to use for question validation +```sh +podman run -d --name ollama -p 11434:11434 docker.io/ollama/ollama:latest +podman exec ollama ollama pull llama-guard3:8b +``` +**Note:** Ensure the Ollama container is started and the model is ready before trying to query if deploying the containers manually. -### Running Locally +You will need to set the following environment variables to ensure functionality: +- `SAFETY_MODEL`: The name of the Llama Guard model being used. Defaults to `llama-gaurd3:8b` +- `SAFETY_URL`: The URL where the container is available. Defaults to `http://host.docker.internal:11434/v1` +- `SAFETY_API_KEY`: The API key required for access to the safety model. Not required for local. + +# Running Locally ``` podman run -it -p 8321:8321 --env-file ./env/values.env -v ./embeddings_model:/rag-content/embeddings_model:Z -v ./vector_db/rhdh_product_docs:/rag-content/vector_db/rhdh_product_docs:Z quay.io/redhat-ai-dev/llama-stack:latest @@ -143,7 +154,7 @@ podman run -it -p 8080:8080 -v ./lightspeed-stack.yaml:/app-root/lightspeed-stac **Note:** If you have built your own version of Lightspeed Core you can replace the image referenced with your own build. Additionally, you can use the Llama Stack container along with the `lightspeed-stack.yaml` file to run Lightspeed Core locally with `uv` from their [repository](https://github.com/lightspeed-core/lightspeed-stack). -### Running on a Cluster +# Running on a Cluster To deploy on a cluster see [DEPLOYMENT.md](./docs/DEPLOYMENT.md). @@ -153,17 +164,17 @@ To deploy on a cluster see [DEPLOYMENT.md](./docs/DEPLOYMENT.md). | ---- | ----| | **get-rag** | Gets the RAG data and the embeddings model from the rag-content image registry to your local project directory | | **update-question-validation** | Updates the question validation content in `providers.d` | -| **validate-prompt-templates** | Validates prompt values in run.yaml. **Requires Python >= 3.11** | -| **update-prompt-templates** | Updates the prompt values in run.yaml. **Requires Python >= 3.11** | +| **validate-prompt-templates** | Validates prompt values in run.yaml. | +| **update-prompt-templates** | Updates the prompt values in run.yaml. | -## Contributing +# Contributing -### Local Development Requirements +## Local Development Requirements - [Yarn](https://yarnpkg.com/) - [Node.js >= v22](https://nodejs.org/en/about/previous-releases) -### Updating YAML Files +## Updating YAML Files This repository implements Prettier to handle all YAML formatting. ```sh @@ -173,7 +184,13 @@ yarn verify # Runs Prettier to check the YAML files in this repository If you wish to try new changes with Llama Stack, you can build your own image using the `Containerfile` in the root of this repository. -## Troubleshooting +# Troubleshooting + +>[!NOTE] +> You can enable `DEBUG` logging by setting: +>``` +>LLAMA_STACK_LOGGING=all=DEBUG +>``` If you experience an error related to permissions for the `vector_db`, such as: From 82d0091915dc2b86df52dd7eb8edc6c94f67b1e7 Mon Sep 17 00:00:00 2001 From: Jordan Dubrick Date: Wed, 14 Jan 2026 11:59:30 -0500 Subject: [PATCH 09/10] update no guard run Signed-off-by: Jordan Dubrick --- run-no-guard.yaml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/run-no-guard.yaml b/run-no-guard.yaml index 92859e3..a8247cd 100644 --- a/run-no-guard.yaml +++ b/run-no-guard.yaml @@ -14,7 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. version: 2 -image_name: redhat-ai-dev-llama-stack +image_name: redhat-ai-dev-llama-stack-no-guard apis: - agents - inference @@ -57,11 +57,6 @@ providers: config: project: ${env.VERTEX_AI_PROJECT:=} location: ${env.VERTEX_AI_LOCATION:=us-central1} - - provider_id: safety-guard - provider_type: remote::vllm - config: - url: ${env.SAFETY_URL:=http://host.docker.internal:11434/v1} - api_token: ${env.SAFETY_API_KEY:=token} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} From 580976b712b7f475414fec6ae0b56d89894d23a2 Mon Sep 17 00:00:00 2001 From: Jordan Dubrick Date: Wed, 14 Jan 2026 11:59:41 -0500 Subject: [PATCH 10/10] use experimental 1.8 rag build Signed-off-by: Jordan Dubrick --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e8e8c16..4f17f56 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -RAG_CONTENT_IMAGE ?= quay.io/redhat-ai-dev/rag-content:release-1.8-lcs +RAG_CONTENT_IMAGE ?= quay.io/redhat-ai-dev/rag-content:experimental-release-1.8-lcs VENV := $(CURDIR)/scripts/python-scripts/.venv PYTHON := $(VENV)/bin/python3 PIP := $(VENV)/bin/pip3