From e1401bcb2cf82d612d3f49c558091b1f1bbae47d Mon Sep 17 00:00:00 2001
From: arpannookala-12 <ganesh.arpan.nookala@cloud2labs.com>
Date: Tue, 21 Apr 2026 14:35:19 -0500
Subject: [PATCH 1/4] feat: add Mistral-7B-Instruct-v0.3 model card and
 deployment guide for Dell EI

Signed-off-by: arpannookala-12 <ganesh.arpan.nookala@cloud2labs.com>
---
 .../Mistral-7B-Instruct-v0.3/deployment.md    | 61 +++++++++++++++++++
 .../Mistral-7B-Instruct-v0.3/model-card.md    | 61 +++++++++++++++++++
 2 files changed, 122 insertions(+)
 create mode 100644 third_party/Dell/model-deployment/Mistral-7B-Instruct-v0.3/deployment.md
 create mode 100644 third_party/Dell/model-deployment/Mistral-7B-Instruct-v0.3/model-card.md

diff --git a/third_party/Dell/model-deployment/Mistral-7B-Instruct-v0.3/deployment.md b/third_party/Dell/model-deployment/Mistral-7B-Instruct-v0.3/deployment.md
new file mode 100644
index 00000000..4eae9bd7
--- /dev/null
+++ b/third_party/Dell/model-deployment/Mistral-7B-Instruct-v0.3/deployment.md
@@ -0,0 +1,61 @@
+
+# Deployed with EI Version-1.2
+
+## Step 1: Set Environment Variables
+
+```bash
+# Export Hugging Face token
+export HUGGING_FACE_HUB_TOKEN="your_token_here"
+
+# Set your base URL and API token
+export BASE_HOST="your-cluster-url"
+
+#generate keyclock token
+export BASE_URL="https://your-cluster-url"
+export KEYCLOAK_CLIENT_ID=api
+export KEYCLOAK_CLIENT_SECRET="your keyclock client secret"
+export TOKEN=$(curl -k -X POST $BASE_URL/token  -H 'Content-Type: application/x-www-form-urlencoded' -d "grant_type=client_credentials&client_id=${KEYCLOAK_CLIENT_ID}&client_secret=${KEYCLOAK_CLIENT_SECRET}" | jq -r .access_token)"
+```
+
+## Step 2: Deploy Mistral-7b-Instruct Model
+
+```bash
+helm install vllm-mistral-7b ./core/helm-charts/vllm \
+--values ./core/helm-charts/vllm/gaudi3-values.yaml \
+--set LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3" \
+--set global.HUGGINGFACEHUB_API_TOKEN="$HUGGING_FACE_HUB_TOKEN" \
+--set ingress.enabled=true \
+--set ingress.host="$BASE_HOST" \
+--set ingress.secretname="$BASE_HOST" \
+--force
+```
+
+## Step 3: Test the Deployed Model
+
+```bash
+curl -k ${BASE_URL}/Mistral-7B-Instruct-v0.3/v1/completions \
+  -X POST \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer $TOKEN" \
+  -d '{
+    "model": "mistralai/Mistral-7B-Instruct-v0.3",
+    "prompt": "What is Deep Learning?",
+    "max_tokens": 25,
+    "temperature": 0
+  }'
+```
+
+## To undeploy the model
+
+```bash
+helm uninstall vllm-mistral-7b
+```
+## Parameters
+
+| Parameter                                                 | Description                                                                                           |
+| --------------------------------------------------------- | ----------------------------------------------------------------------------------------------------- |
+| `--set LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"` | Defines the target model from **Hugging Face** to deploy.                                             |
+| `--set global.HUGGINGFACEHUB_API_TOKEN="..."`             | Authenticates access to gated or private Hugging Face models. Replace with your own secure token.     |
+| `--set ingress.enabled=true`                              | Enables Kubernetes **Ingress** to expose the model service externally.                                |
+| `--set ingress.host="replace-ingress"`                    | Public hostname or FQDN for the inference endpoint (maps to your Ingress controller IP).              |
+| `--set ingress.secretname="replace-secret"`               | Kubernetes **TLS Secret** used for HTTPS termination at the ingress layer.                            |
diff --git a/third_party/Dell/model-deployment/Mistral-7B-Instruct-v0.3/model-card.md b/third_party/Dell/model-deployment/Mistral-7B-Instruct-v0.3/model-card.md
new file mode 100644
index 00000000..f5d01075
--- /dev/null
+++ b/third_party/Dell/model-deployment/Mistral-7B-Instruct-v0.3/model-card.md
@@ -0,0 +1,61 @@
+# Mistral 7B
+
+This model uses Mistral 7B, a compact yet high-performance large-language model developed by Mistral AI. It represents a 7 billion-parameter class model optimized for efficient inference, strong reasoning and code capabilities, and broad usage scenario support. The model uses advanced attention mechanisms (e.g., grouped-query attention, sliding-window attention) to deliver performance on par with much larger models while maintaining operational efficiency.
+
+For full details including model specifications, licensing, intended use, safety guidance, and example prompts, please visit the official Hugging Face page: **Official Hugging Face Page**
+
+https://huggingface.co/mistralai/Mistral-7B-v0.1
+
+
+### Model Attribution
+
+**Developer:**	Mistral AI
+
+**purpose:** High-efficiency general-purpose LLM for text, code, reasoning
+
+**Sizes/Variants:**	7B base (≈ 7.3 billion parameters)
+
+**Modalities:**	Text → Text (Natural language generation, reasoning, code)
+
+**Parameter Size:** ~7 billion
+
+**Max Context:**	Varies by variant; supports long-context sliding window attention. 
+
+**License:** Apache 2.0 (open-weight release)
+
+**Minimum required PCIe Cards:** 1
+
+### Usage Notice
+
+**By using this model, you agree that:**
+
+- Inputs and outputs are processed via the Mistral 7B model and you accept its licensing terms under Apache 2.0.
+- You must review generated content (text or code) for accuracy, compliance, and suitability before deploying in production.
+- The model should not be used to generate malicious content, disallowed content, or to automate decisions in high-risk or regulated settings without appropriate guardrails.
+- Because the model is an open-weight release under Apache 2.0, you are free to use, fine-tune and deploy it in many scenarios, but you remain responsible for ensuring usage compliance and monitoring output safety.
+
+### Intended Applications
+
+- General-purpose text generation (summarization, translation, creative writing)
+- Reasoning tasks (commonsense, mathematics, logic, multi-step problem solving)
+- Code generation and completion (the model has strong performance in code tasks)
+- Instruction-following variants (via the Instruct versions) for chat-bots, assistants, interactive agents
+- Research and experimentation in efficient LLMs, fine-tuning, quantization, custom deployment
+
+### Limitations
+
+- Although strong, the model can still generate inaccurate, irrelevant or hallucinatory outputs — human review remains essential.
+- The base model (and many instruct versions) may come without built-in moderation or guardrails.
+- For highly safety-critical, regulated, or commercial production systems you may require additional guardrails, monitoring or a more fully audited model.
+- The size (~7B) means there are trade-offs compared to much larger models (for extremely large context reasoning, multimodal tasks, etc.).
+- Deployment still requires hardware resources and may require techniques like quantization, efficient inference backends, to cost-effectively run in production.
+
+
+### References
+
+Open model documentation by Mistral AI. https://docs.mistral.ai/getting-started/models
+
+Model card on Hugging Face. https://huggingface.co/mistralai/Mistral-7B-v0.1
+
+“Mistral 7B” announcement blog post. https://mistral.ai/news/announcing-mistral-7b
+

From 8e428548f5313e68a9ab2d9a8fcf2ede864e7bf0 Mon Sep 17 00:00:00 2001
From: arpannookala-12 <ganesh.arpan.nookala@cloud2labs.com>
Date: Tue, 21 Apr 2026 14:39:12 -0500
Subject: [PATCH 2/4] feat: add Mistral-7B-v0.3 model card and deployment guide
 for Dell EI

Signed-off-by: arpannookala-12 <ganesh.arpan.nookala@cloud2labs.com>
---
 .../Mistral-7B-Instruct-v0.3/model-card.md    | 61 ---------------
 .../deployment.md                             | 37 ++++-----
 .../Mistral-7B-v0.3/model-card.md             | 76 +++++++++++++++++++
 3 files changed, 92 insertions(+), 82 deletions(-)
 delete mode 100644 third_party/Dell/model-deployment/Mistral-7B-Instruct-v0.3/model-card.md
 rename third_party/Dell/model-deployment/{Mistral-7B-Instruct-v0.3 => Mistral-7B-v0.3}/deployment.md (50%)
 create mode 100644 third_party/Dell/model-deployment/Mistral-7B-v0.3/model-card.md

diff --git a/third_party/Dell/model-deployment/Mistral-7B-Instruct-v0.3/model-card.md b/third_party/Dell/model-deployment/Mistral-7B-Instruct-v0.3/model-card.md
deleted file mode 100644
index f5d01075..00000000
--- a/third_party/Dell/model-deployment/Mistral-7B-Instruct-v0.3/model-card.md
+++ /dev/null
@@ -1,61 +0,0 @@
-# Mistral 7B
-
-This model uses Mistral 7B, a compact yet high-performance large-language model developed by Mistral AI. It represents a 7 billion-parameter class model optimized for efficient inference, strong reasoning and code capabilities, and broad usage scenario support. The model uses advanced attention mechanisms (e.g., grouped-query attention, sliding-window attention) to deliver performance on par with much larger models while maintaining operational efficiency.
-
-For full details including model specifications, licensing, intended use, safety guidance, and example prompts, please visit the official Hugging Face page: **Official Hugging Face Page**
-
-https://huggingface.co/mistralai/Mistral-7B-v0.1
-
-
-### Model Attribution
-
-**Developer:**	Mistral AI
-
-**purpose:** High-efficiency general-purpose LLM for text, code, reasoning
-
-**Sizes/Variants:**	7B base (≈ 7.3 billion parameters)
-
-**Modalities:**	Text → Text (Natural language generation, reasoning, code)
-
-**Parameter Size:** ~7 billion
-
-**Max Context:**	Varies by variant; supports long-context sliding window attention. 
-
-**License:** Apache 2.0 (open-weight release)
-
-**Minimum required PCIe Cards:** 1
-
-### Usage Notice
-
-**By using this model, you agree that:**
-
-- Inputs and outputs are processed via the Mistral 7B model and you accept its licensing terms under Apache 2.0.
-- You must review generated content (text or code) for accuracy, compliance, and suitability before deploying in production.
-- The model should not be used to generate malicious content, disallowed content, or to automate decisions in high-risk or regulated settings without appropriate guardrails.
-- Because the model is an open-weight release under Apache 2.0, you are free to use, fine-tune and deploy it in many scenarios, but you remain responsible for ensuring usage compliance and monitoring output safety.
-
-### Intended Applications
-
-- General-purpose text generation (summarization, translation, creative writing)
-- Reasoning tasks (commonsense, mathematics, logic, multi-step problem solving)
-- Code generation and completion (the model has strong performance in code tasks)
-- Instruction-following variants (via the Instruct versions) for chat-bots, assistants, interactive agents
-- Research and experimentation in efficient LLMs, fine-tuning, quantization, custom deployment
-
-### Limitations
-
-- Although strong, the model can still generate inaccurate, irrelevant or hallucinatory outputs — human review remains essential.
-- The base model (and many instruct versions) may come without built-in moderation or guardrails.
-- For highly safety-critical, regulated, or commercial production systems you may require additional guardrails, monitoring or a more fully audited model.
-- The size (~7B) means there are trade-offs compared to much larger models (for extremely large context reasoning, multimodal tasks, etc.).
-- Deployment still requires hardware resources and may require techniques like quantization, efficient inference backends, to cost-effectively run in production.
-
-
-### References
-
-Open model documentation by Mistral AI. https://docs.mistral.ai/getting-started/models
-
-Model card on Hugging Face. https://huggingface.co/mistralai/Mistral-7B-v0.1
-
-“Mistral 7B” announcement blog post. https://mistral.ai/news/announcing-mistral-7b
-
diff --git a/third_party/Dell/model-deployment/Mistral-7B-Instruct-v0.3/deployment.md b/third_party/Dell/model-deployment/Mistral-7B-v0.3/deployment.md
similarity index 50%
rename from third_party/Dell/model-deployment/Mistral-7B-Instruct-v0.3/deployment.md
rename to third_party/Dell/model-deployment/Mistral-7B-v0.3/deployment.md
index 4eae9bd7..c953478c 100644
--- a/third_party/Dell/model-deployment/Mistral-7B-Instruct-v0.3/deployment.md
+++ b/third_party/Dell/model-deployment/Mistral-7B-v0.3/deployment.md
@@ -1,5 +1,5 @@
 
-# Deployed with EI Version-1.2
+# Deployed with EI Version-1.4
 
 ## Step 1: Set Environment Variables
 
@@ -10,52 +10,47 @@ export HUGGING_FACE_HUB_TOKEN="your_token_here"
 # Set your base URL and API token
 export BASE_HOST="your-cluster-url"
 
-#generate keyclock token
-export BASE_URL="https://your-cluster-url"
-export KEYCLOAK_CLIENT_ID=api
-export KEYCLOAK_CLIENT_SECRET="your keyclock client secret"
-export TOKEN=$(curl -k -X POST $BASE_URL/token  -H 'Content-Type: application/x-www-form-urlencoded' -d "grant_type=client_credentials&client_id=${KEYCLOAK_CLIENT_ID}&client_secret=${KEYCLOAK_CLIENT_SECRET}" | jq -r .access_token)"
 ```
 
 ## Step 2: Deploy Mistral-7b-Instruct Model
 
 ```bash
-helm install vllm-mistral-7b ./core/helm-charts/vllm \
---values ./core/helm-charts/vllm/gaudi3-values.yaml \
---set LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3" \
---set global.HUGGINGFACEHUB_API_TOKEN="$HUGGING_FACE_HUB_TOKEN" \
---set ingress.enabled=true \
---set ingress.host="$BASE_HOST" \
---set ingress.secretname="$BASE_HOST" \
---force
+helm install mistral-7b-v3 ./core/helm-charts/vllm \
+  --values ./core/helm-charts/vllm/xeon-values.yaml \
+  --set LLM_MODEL_ID="mistralai/Mistral-7B-v0.3" \
+  --set global.HUGGINGFACEHUB_API_TOKEN="$HUGGING_FACE_HUB_TOKEN" \
+  --set ingress.enabled=false \
+  --set ingress.host="$BASE_HOST" \
+  --set ingress.secretname="$BASE_HOST"
 ```
 
 ## Step 3: Test the Deployed Model
 
 ```bash
-curl -k ${BASE_URL}/Mistral-7B-Instruct-v0.3/v1/completions \
+curl -k ${BASE_URL}/v1/completions \
   -X POST \
   -H "Content-Type: application/json" \
-  -H "Authorization: Bearer $TOKEN" \
+  -H "Authorization: Bearer {API_KEY}" \
   -d '{
-    "model": "mistralai/Mistral-7B-Instruct-v0.3",
+    "model": "mistralai/Mistral-7B-v0.3",
     "prompt": "What is Deep Learning?",
     "max_tokens": 25,
     "temperature": 0
-  }'
+ }'
 ```
 
 ## To undeploy the model
 
 ```bash
-helm uninstall vllm-mistral-7b
+helm uninstall mistral-7b-v3
 ```
 ## Parameters
 
 | Parameter                                                 | Description                                                                                           |
 | --------------------------------------------------------- | ----------------------------------------------------------------------------------------------------- |
-| `--set LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3"` | Defines the target model from **Hugging Face** to deploy.                                             |
+| `--set LLM_MODEL_ID="mistralai/Mistral-7B-v0.3"` | Defines the target model from **Hugging Face** to deploy.                                             |                                            |
 | `--set global.HUGGINGFACEHUB_API_TOKEN="..."`             | Authenticates access to gated or private Hugging Face models. Replace with your own secure token.     |
-| `--set ingress.enabled=true`                              | Enables Kubernetes **Ingress** to expose the model service externally.                                |
+| `--set ingress.enabled=false`                              | Enables Kubernetes **Ingress** to expose the model service externally.                                |
 | `--set ingress.host="replace-ingress"`                    | Public hostname or FQDN for the inference endpoint (maps to your Ingress controller IP).              |
 | `--set ingress.secretname="replace-secret"`               | Kubernetes **TLS Secret** used for HTTPS termination at the ingress layer.                            |
+| `--API_KEY`             | Genai gateway api-key|
\ No newline at end of file
diff --git a/third_party/Dell/model-deployment/Mistral-7B-v0.3/model-card.md b/third_party/Dell/model-deployment/Mistral-7B-v0.3/model-card.md
new file mode 100644
index 00000000..8b03ce8b
--- /dev/null
+++ b/third_party/Dell/model-deployment/Mistral-7B-v0.3/model-card.md
@@ -0,0 +1,76 @@
+## Mistral 7B v0.3
+
+This model uses **Mistral-7B v0.3**, a next-generation 7-billion-parameter transformer language model developed by **Mistral AI**. It represents a compact, efficient, and high-performance LLM architecture optimized for general-purpose text generation, research, and downstream fine-tuning. Compared to earlier releases, the v0.3 iteration integrates tokenizer improvements, extended context length support, and architectural refinements for stronger performance and interoperability in modern LLM ecosystems.
+
+For full details including model specifications, licensing, intended use, and technical documentation, please visit the official Hugging Face page: **Official Hugging Face Page**
+
+https://huggingface.co/mistralai/Mistral-7B-v0.3
+
+---
+
+### Model Attribution
+
+**Developer:** Mistral AI
+
+**Purpose:** Foundation model for general NLP tasks, downstream fine-tuning, and integration into custom pipelines
+
+**Sizes / Variants:**  
+7B (≈ 7 billion parameters)
+
+**Modalities:**  
+Text → Text (autoregressive language modeling)
+
+**Parameter Size:**  
+~7 billion
+
+**Max Context:**  
+Extended context window supported (exact length may depend on inference backend and configuration)
+
+**License:**  
+Apache 2.0 (open-weight release)
+
+**Minimum Required PCIe Cards:**  
+1–2 (varies by precision, quantization, and inference framework)
+
+---
+
+### Usage Notice
+
+By using this model, you agree that:
+
+- Inputs and outputs are processed via the Mistral-7B v0.3 model and you accept its licensing terms under Apache 2.0.
+- Model outputs must be reviewed for accuracy, suitability, and safety before use in commercial or production contexts.
+- This base model does not include alignment or instruction-fine-tuning, and therefore may produce literal, unfiltered, or undesired content without safety conditioning.
+- You remain responsible for monitoring, filtering, and enforcing compliance, especially in sensitive, regulated, or user-facing deployments.
+
+---
+
+### Intended Applications
+
+- Research in transformer and LLM architectures
+- Pre-training or continued training for domain-specific LLMs
+- Fine-tuning for instruction following, chat roles, code, or domain tasks
+- General-purpose text generation and language modeling
+- Embedding into autonomous or semi-autonomous agents with external alignment layers
+- Experimental or academic benchmarking on open-weight LLMs
+
+---
+
+### Limitations
+
+- As a base model, it lacks instruction tuning and safety alignment, making outputs potentially unstructured or unsafe without further processing.
+- May generate hallucinated, biased, or factually incorrect content; human validation is recommended.
+- Safety-critical and regulated use cases require external safeguards, filtering, or moderation systems.
+- Operational performance varies with context length, quantization, and hardware backend; optimization may be required for real-time workloads.
+
+---
+
+### References
+
+- Official Model Card on Hugging Face: https://huggingface.co/mistralai/Mistral-7B-v0.3
+
+- Open model documentation by Mistral AI. 
+  https://docs.mistral.ai/getting-started/models
+
+- “Mistral 7B” announcement blog post. 
+   https://mistral.ai/news/announcing-mistral-7b

From 1869a04c7ab272172df5b959fea8f5eb670a6c37 Mon Sep 17 00:00:00 2001
From: Harika <codewith3@gmail.com>
Date: Tue, 5 May 2026 12:05:04 -0500
Subject: [PATCH 3/4] update mistral 7b 0.3 deployment.md

---
 .../Mistral-7B-v0.3/deployment.md             | 89 ++++++++++++++-----
 1 file changed, 67 insertions(+), 22 deletions(-)

diff --git a/third_party/Dell/model-deployment/Mistral-7B-v0.3/deployment.md b/third_party/Dell/model-deployment/Mistral-7B-v0.3/deployment.md
index c953478c..8dbb882f 100644
--- a/third_party/Dell/model-deployment/Mistral-7B-v0.3/deployment.md
+++ b/third_party/Dell/model-deployment/Mistral-7B-v0.3/deployment.md
@@ -1,56 +1,101 @@
+## Step 1: Prerequisites to Deploy Mistral-7B-v0.3 Model on Xeon with Keycloak
 
-# Deployed with EI Version-1.4
+Ensure the Enterprise Inference stack with Keycloak is already deployed before proceeding.
 
-## Step 1: Set Environment Variables
+Edit `core/scripts/generate-token.sh` and set your values before sourcing it:
+
+| Variable                  | Description                                                              |
+| ------------------------- | ------------------------------------------------------------------------ |
+| `BASE_URL`                | Hostname of your cluster (e.g. `api.example.com`), without `https://`   |
+| `KEYCLOAK_ADMIN_USERNAME` | Keycloak admin username                                                  |
+| `KEYCLOAK_PASSWORD`       | Keycloak admin password                                                  |
+| `KEYCLOAK_CLIENT_ID`      | Keycloak client ID configured during EI deployment                       |
+
+Then run:
 
 ```bash
-# Export Hugging Face token
 export HUGGING_FACE_HUB_TOKEN="your_token_here"
 
-# Set your base URL and API token
-export BASE_HOST="your-cluster-url"
-
+cd ~/Enterprise-Inference
+source core/scripts/generate-token.sh
 ```
 
-## Step 2: Deploy Mistral-7b-Instruct Model
+This exports: `BASE_URL`, `KEYCLOAK_CLIENT_ID`, `KEYCLOAK_CLIENT_SECRET`, and `TOKEN`.
+
+## Step 2: Deploy Mistral-7B-v0.3 Model
 
 ```bash
 helm install mistral-7b-v3 ./core/helm-charts/vllm \
   --values ./core/helm-charts/vllm/xeon-values.yaml \
   --set LLM_MODEL_ID="mistralai/Mistral-7B-v0.3" \
   --set global.HUGGINGFACEHUB_API_TOKEN="$HUGGING_FACE_HUB_TOKEN" \
-  --set ingress.enabled=false \
-  --set ingress.host="$BASE_HOST" \
-  --set ingress.secretname="$BASE_HOST"
+  --set ingress.enabled=true \
+  --set ingress.secretname="${BASE_URL}" \
+  --set ingress.host="${BASE_URL}" \
+  --set oidc.client_id="$KEYCLOAK_CLIENT_ID" \
+  --set oidc.client_secret="$KEYCLOAK_CLIENT_SECRET" \
+  --set apisix.enabled=true \
+  --set tensor_parallel_size="1" \
+  --set pipeline_parallel_size="1"
+```
+
+## Step 3: Verify the Deployment
+
+```bash
+kubectl get pods
+kubectl get apisixroutes
+```
+
+Expected Output:
+
+```
+NAME                                READY   STATUS    RESTARTS
+keycloak-0                          1/1     Running   0
+keycloak-postgresql-0               1/1     Running   0
+mistral-7b-v3-<hash>-<hash>         1/1     Running   0
 ```
 
-## Step 3: Test the Deployed Model
+> Note: The pod name suffix `<hash>-<hash>` is auto-generated by Kubernetes and will differ on each deployment. Ensure all pods show `1/1 Running`.
+
+```
+NAME                          HOSTS
+mistral-7b-v3-apisixroute     api.example.com
+```
+
+## Step 4: Test the Deployed Model
 
 ```bash
-curl -k ${BASE_URL}/v1/completions \
+curl -k https://${BASE_URL}/Mistral-7B-v0.3-vllmcpu/v1/completions \
   -X POST \
   -H "Content-Type: application/json" \
-  -H "Authorization: Bearer {API_KEY}" \
+  -H "Authorization: Bearer $TOKEN" \
   -d '{
     "model": "mistralai/Mistral-7B-v0.3",
     "prompt": "What is Deep Learning?",
     "max_tokens": 25,
     "temperature": 0
- }'
+  }'
 ```
 
+If successful, the model will return a completion response.
+
 ## To undeploy the model
 
 ```bash
 helm uninstall mistral-7b-v3
 ```
+
 ## Parameters
 
-| Parameter                                                 | Description                                                                                           |
-| --------------------------------------------------------- | ----------------------------------------------------------------------------------------------------- |
-| `--set LLM_MODEL_ID="mistralai/Mistral-7B-v0.3"` | Defines the target model from **Hugging Face** to deploy.                                             |                                            |
-| `--set global.HUGGINGFACEHUB_API_TOKEN="..."`             | Authenticates access to gated or private Hugging Face models. Replace with your own secure token.     |
-| `--set ingress.enabled=false`                              | Enables Kubernetes **Ingress** to expose the model service externally.                                |
-| `--set ingress.host="replace-ingress"`                    | Public hostname or FQDN for the inference endpoint (maps to your Ingress controller IP).              |
-| `--set ingress.secretname="replace-secret"`               | Kubernetes **TLS Secret** used for HTTPS termination at the ingress layer.                            |
-| `--API_KEY`             | Genai gateway api-key|
\ No newline at end of file
+| Parameter                                        | Description                                                                                       |
+| ------------------------------------------------ | ------------------------------------------------------------------------------------------------- |
+| `--set LLM_MODEL_ID="mistralai/Mistral-7B-v0.3"` | Defines the target model from **Hugging Face** to deploy.                                         |
+| `--set global.HUGGINGFACEHUB_API_TOKEN="..."`    | Authenticates access to gated or private Hugging Face models. Replace with your own secure token. |
+| `--set ingress.enabled=true`                     | Enables Kubernetes **Ingress** to expose the model service externally.                            |
+| `--set ingress.host="${BASE_URL}"`               | Public hostname or FQDN for the inference endpoint (maps to your Ingress controller IP).          |
+| `--set ingress.secretname="${BASE_URL}"`         | Kubernetes **TLS Secret** used for HTTPS termination at the ingress layer.                        |
+| `--set oidc.client_id="..."`                     | Keycloak OIDC client ID used for token-based authentication.                                      |
+| `--set oidc.client_secret="..."`                 | Keycloak OIDC client secret corresponding to the client ID.                                       |
+| `--set apisix.enabled=true`                      | Enables **APISIX** as the API gateway for routing and authentication.                             |
+| `--set tensor_parallel_size="1"`                 | Number of tensor parallel workers. Set to the number of available CPUs/GPUs per node.            |
+| `--set pipeline_parallel_size="1"`               | Number of pipeline parallel stages. Typically `1` for single-node deployments.                   |

From 2fa62d6c9b8775fcaee87f963a55c21224280b45 Mon Sep 17 00:00:00 2001
From: Harika <codewith3@gmail.com>
Date: Wed, 27 May 2026 17:51:42 -0500
Subject: [PATCH 4/4] Remove README.md from model-deployment folder

---
 third_party/Dell/model-deployment/README.md | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 third_party/Dell/model-deployment/README.md

diff --git a/third_party/Dell/model-deployment/README.md b/third_party/Dell/model-deployment/README.md
deleted file mode 100644
index 43d98118..00000000
--- a/third_party/Dell/model-deployment/README.md
+++ /dev/null
@@ -1 +0,0 @@
-# PLACEHOLDER
\ No newline at end of file