From 3fa8f5ea33dae7dde4e9045b69c68c67acbdea47 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-56-243.us-west-2.compute.internal>
Date: Mon, 28 Oct 2024 07:53:59 +0000
Subject: [PATCH] remote service with multi model support

---
 ChatQnA/Dockerfile.wrapper                    |  36 ++
 ChatQnA/chatqna_wrapper.py                    |  68 +++
 .../docker_compose/intel/hpu/gaudi/README.md  | 554 ++++++++++++++++++
 .../intel/hpu/gaudi/README_remote.md          | 276 +++++++++
 .../intel/hpu/gaudi/compose.yaml              | 398 +++++++++++++
 .../intel/hpu/gaudi/compose_remote.yaml       | 306 ++++++++++
 .../intel/hpu/gaudi/compose_tgi_remote.yaml   | 354 +++++++++++
 .../docker_compose/intel/hpu/gaudi/set_env.sh |  48 ++
 .../intel/hpu/gaudi/set_env_remote.sh         |  57 ++
 ProductivitySuite/ui/react/src/App.tsx        |   4 +-
 .../react/src/components/CodeGen/CodeGen.tsx  |   2 +-
 .../components/Conversation/Conversation.tsx  |  18 +-
 .../src/components/Conversation/settings.tsx  |  66 ++-
 .../ui/react/src/components/DocSum/DocSum.tsx |  34 +-
 .../src/components/DocSum/docSum.module.scss  |   1 +
 .../ui/react/src/components/FaqGen/FaqGen.tsx |   2 +-
 .../Shared/CodeRender/CodeRender.tsx          |   1 -
 .../src/redux/Conversation/Conversation.ts    |   8 +
 .../redux/Conversation/ConversationSlice.ts   |  26 +-
 .../src/styles/components/context.module.scss |   3 +-
 20 files changed, 2227 insertions(+), 35 deletions(-)
 create mode 100644 ChatQnA/Dockerfile.wrapper
 create mode 100644 ChatQnA/chatqna_wrapper.py
 create mode 100644 ProductivitySuite/docker_compose/intel/hpu/gaudi/README.md
 create mode 100644 ProductivitySuite/docker_compose/intel/hpu/gaudi/README_remote.md
 create mode 100644 ProductivitySuite/docker_compose/intel/hpu/gaudi/compose.yaml
 create mode 100644 ProductivitySuite/docker_compose/intel/hpu/gaudi/compose_remote.yaml
 create mode 100644 ProductivitySuite/docker_compose/intel/hpu/gaudi/compose_tgi_remote.yaml
 create mode 100644 ProductivitySuite/docker_compose/intel/hpu/gaudi/set_env.sh
 create mode 100644 ProductivitySuite/docker_compose/intel/hpu/gaudi/set_env_remote.sh

diff --git a/ChatQnA/Dockerfile.wrapper b/ChatQnA/Dockerfile.wrapper
new file mode 100644
index 0000000000..1baf63e460
--- /dev/null
+++ b/ChatQnA/Dockerfile.wrapper
@@ -0,0 +1,36 @@
+
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM python:3.11-slim
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
+    libgl1-mesa-glx \
+    libjemalloc-dev \
+    git
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+
+COPY /GenAIComps /home/user/GenAIComps
+WORKDIR /home/user/
+#RUN git clone --branch v1.0 https://github.com/opea-project/GenAIComps.git
+
+
+WORKDIR /home/user/GenAIComps
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt
+
+COPY /GenAIExamples/ChatQnA/chatqna_wrapper.py /home/user/chatqna.py
+
+ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps
+
+USER user
+
+WORKDIR /home/user
+
+RUN echo 'ulimit -S -n 999999' >> ~/.bashrc
+
+ENTRYPOINT ["python", "chatqna.py"]
diff --git a/ChatQnA/chatqna_wrapper.py b/ChatQnA/chatqna_wrapper.py
new file mode 100644
index 0000000000..09062b5d27
--- /dev/null
+++ b/ChatQnA/chatqna_wrapper.py
@@ -0,0 +1,68 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+
+from comps import ChatQnAGateway, MicroService, ServiceOrchestrator, ServiceType
+
+MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0")
+MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888))
+EMBEDDING_SERVICE_HOST_IP = os.getenv("EMBEDDING_SERVICE_HOST_IP", "0.0.0.0")
+EMBEDDING_SERVICE_PORT = int(os.getenv("EMBEDDING_SERVICE_PORT", 6000))
+RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0")
+RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_SERVICE_PORT", 7000))
+RERANK_SERVICE_HOST_IP = os.getenv("RERANK_SERVICE_HOST_IP", "0.0.0.0")
+RERANK_SERVICE_PORT = int(os.getenv("RERANK_SERVICE_PORT", 8000))
+LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0")
+LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000))
+
+
+class ChatQnAService:
+    def __init__(self, host="0.0.0.0", port=8000):
+        self.host = host
+        self.port = port
+        self.megaservice = ServiceOrchestrator()
+
+    def add_remote_service(self):
+        embedding = MicroService(
+            name="embedding",
+            host=EMBEDDING_SERVICE_HOST_IP,
+            port=EMBEDDING_SERVICE_PORT,
+            endpoint="/v1/embeddings",
+            use_remote_service=True,
+            service_type=ServiceType.EMBEDDING,
+        )
+        retriever = MicroService(
+            name="retriever",
+            host=RETRIEVER_SERVICE_HOST_IP,
+            port=RETRIEVER_SERVICE_PORT,
+            endpoint="/v1/retrieval",
+            use_remote_service=True,
+            service_type=ServiceType.RETRIEVER,
+        )
+        rerank = MicroService(
+            name="rerank",
+            host=RERANK_SERVICE_HOST_IP,
+            port=RERANK_SERVICE_PORT,
+            endpoint="/v1/reranking",
+            use_remote_service=True,
+            service_type=ServiceType.RERANK,
+        )
+        llm = MicroService(
+            name="llm",
+            host=LLM_SERVICE_HOST_IP,
+            port=LLM_SERVICE_PORT,
+            endpoint="/v1/chat/completions",
+            use_remote_service=True,
+            service_type=ServiceType.LLM,
+        )
+        self.megaservice.add(embedding).add(retriever).add(rerank).add(llm)
+        self.megaservice.flow_to(embedding, retriever)
+        self.megaservice.flow_to(retriever, rerank)
+        self.megaservice.flow_to(rerank, llm)
+        self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port)
+
+
+if __name__ == "__main__":
+    chatqna = ChatQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT)
+    chatqna.add_remote_service()
diff --git a/ProductivitySuite/docker_compose/intel/hpu/gaudi/README.md b/ProductivitySuite/docker_compose/intel/hpu/gaudi/README.md
new file mode 100644
index 0000000000..c5463ad103
--- /dev/null
+++ b/ProductivitySuite/docker_compose/intel/hpu/gaudi/README.md
@@ -0,0 +1,554 @@
+# Build Mega Service of Productivity Suite on Xeon
+
+This document outlines the deployment process for OPEA Productivity Suite utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Xeon server and [GenAIExamples](https://github.com/opea-project/GenAIExamples.git) solutions. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices such as `embedding`, `retriever`, `rerank`, and `llm`. We will publish the Docker images to Docker Hub soon, it will simplify the deployment process for this service.
+
+---
+
+## 🐳 Build Docker Images
+
+First of all, you need to build Docker Images locally and install the python package of it.
+
+### 1. Build Embedding Image
+
+```bash
+git clone https://github.com/opea-project/GenAIComps.git
+cd GenAIComps
+docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/tei/langchain/Dockerfile .
+```
+
+### 2. Build Retriever Image
+
+```bash
+docker build --no-cache -t opea/retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/redis/langchain/Dockerfile .
+```
+
+### 3. Build Rerank Image
+
+```bash
+docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/Dockerfile .
+```
+
+### 4. Build LLM Image
+
+#### Use TGI as backend
+
+```bash
+docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
+```
+
+### 5. Build Dataprep Image
+
+```bash
+docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile .
+```
+
+### 6. Build Prompt Registry Image
+
+```bash
+docker build -t opea/promptregistry-mongo-server:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/prompt_registry/mongo/Dockerfile .
+```
+
+### 7. Build Chat History Image
+
+```bash
+docker build -t opea/chathistory-mongo-server:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/chathistory/mongo/Dockerfile .
+cd ..
+```
+
+### 8. Build MegaService Docker Images
+
+The Productivity Suite is composed of multiple GenAIExample reference solutions composed together.
+
+#### 8.1 Build ChatQnA MegaService Docker Images
+
+```bash
+git clone https://github.com/opea-project/GenAIExamples.git
+cd GenAIExamples/ChatQnA/
+docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
+```
+
+#### 8.2 Build DocSum Megaservice Docker Images
+
+```bash
+cd GenAIExamples/DocSum
+docker build --no-cache -t opea/docsum:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
+```
+
+#### 8.3 Build CodeGen Megaservice Docker Images
+
+```bash
+cd GenAIExamples/CodeGen
+docker build --no-cache -t opea/codegen:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
+```
+
+#### 8.4 Build FAQGen Megaservice Docker Images
+
+```bash
+cd GenAIExamples/FaqGen
+docker build --no-cache -t opea/faqgen:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile .
+```
+
+### 9. Build UI Docker Image
+
+Build frontend Docker image that enables via below command:
+
+**Export the value of the public IP address of your Xeon server to the `host_ip` environment variable**
+
+```bash
+cd GenAIExamples/ProductivitySuite/ui
+docker build --no-cache -t ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml docker/Dockerfile.react .
+```
+
+---
+
+## 🚀 Start Microservices
+
+### Setup Environment Variables
+
+Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below.
+
+**Export the value of the public IP address of your Xeon server to the `host_ip` environment variable**
+
+> Change the External_Public_IP below with the actual IPV4 value
+
+```
+export host_ip="External_Public_IP"
+```
+
+**Export the value of your Huggingface API token to the `your_hf_api_token` environment variable**
+
+> Change the Your_Huggingface_API_Token below with tyour actual Huggingface API Token value
+
+```
+export your_hf_api_token="Your_Huggingface_API_Token"
+```
+
+**Append the value of the public IP address to the no_proxy list**
+
+```
+export your_no_proxy=${your_no_proxy},"External_Public_IP"
+```
+
+```bash
+export MONGO_HOST=${host_ip}
+export MONGO_PORT=27017
+export DB_NAME="test"
+export COLLECTION_NAME="Conversations"
+export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+export RERANK_MODEL_ID="BAAI/bge-reranker-base"
+export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
+export LLM_MODEL_ID_CODEGEN="meta-llama/CodeLlama-7b-hf"
+export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
+export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
+export TGI_LLM_ENDPOINT="http://${host_ip}:9009"
+export REDIS_URL="redis://${host_ip}:6379"
+export INDEX_NAME="rag-redis"
+export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token}
+export MEGA_SERVICE_HOST_IP=${host_ip}
+export EMBEDDING_SERVICE_HOST_IP=${host_ip}
+export RETRIEVER_SERVICE_HOST_IP=${host_ip}
+export RERANK_SERVICE_HOST_IP=${host_ip}
+export LLM_SERVICE_HOST_IP=${host_ip}
+export LLM_SERVICE_HOST_IP_DOCSUM=${host_ip}
+export LLM_SERVICE_HOST_IP_FAQGEN=${host_ip}
+export LLM_SERVICE_HOST_IP_CODEGEN=${host_ip}
+export LLM_SERVICE_HOST_IP_CHATQNA=${host_ip}
+export TGI_LLM_ENDPOINT_CHATQNA="http://${host_ip}:9009"
+export TGI_LLM_ENDPOINT_CODEGEN="http://${host_ip}:8028"
+export TGI_LLM_ENDPOINT_FAQGEN="http://${host_ip}:9009"
+export TGI_LLM_ENDPOINT_DOCSUM="http://${host_ip}:9009"
+export BACKEND_SERVICE_ENDPOINT_CHATQNA="http://${host_ip}:8888/v1/chatqna"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete_file"
+export BACKEND_SERVICE_ENDPOINT_FAQGEN="http://${host_ip}:8889/v1/faqgen"
+export BACKEND_SERVICE_ENDPOINT_CODEGEN="http://${host_ip}:7778/v1/codegen"
+export BACKEND_SERVICE_ENDPOINT_DOCSUM="http://${host_ip}:8890/v1/docsum"
+export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file"
+export CHAT_HISTORY_CREATE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/create"
+export CHAT_HISTORY_CREATE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/create"
+export CHAT_HISTORY_DELETE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/delete"
+export CHAT_HISTORY_GET_ENDPOINT="http://${host_ip}:6012/v1/chathistory/get"
+export PROMPT_SERVICE_GET_ENDPOINT="http://${host_ip}:6018/v1/prompt/get"
+export PROMPT_SERVICE_CREATE_ENDPOINT="http://${host_ip}:6018/v1/prompt/create"
+export KEYCLOAK_SERVICE_ENDPOINT="http://${host_ip}:8080"
+export LLM_SERVICE_HOST_PORT_FAQGEN=9002
+export LLM_SERVICE_HOST_PORT_CODEGEN=9001
+export LLM_SERVICE_HOST_PORT_DOCSUM=9003
+export PROMPT_COLLECTION_NAME="prompt"
+```
+
+Note: Please replace with `host_ip` with you external IP address, do not use localhost.
+
+### Start all the services Docker Containers
+
+> Before running the docker compose command, you need to be in the folder that has the docker compose yaml file
+
+```bash
+cd GenAIExamples/ProductivitySuite/docker_compose/intel/cpu/xeon
+
+docker compose -f compose.yaml up -d
+```
+
+---
+
+### 🔐 Setup Keycloak
+
+Please refer to **[keycloak_setup_guide](keycloak_setup_guide.md)** for more detail related to Keycloak configuration setup.
+
+---
+
+### ✅ Validate Microservices
+
+1. TEI Embedding Service
+
+   ```bash
+   curl ${host_ip}:6006/embed \
+       -X POST \
+       -d '{"inputs":"What is Deep Learning?"}' \
+       -H 'Content-Type: application/json'
+   ```
+
+2. Embedding Microservice
+
+   ```bash
+   curl http://${host_ip}:6000/v1/embeddings\
+     -X POST \
+     -d '{"text":"hello"}' \
+     -H 'Content-Type: application/json'
+   ```
+
+3. Retriever Microservice
+
+   To consume the retriever microservice, you need to generate a mock embedding vector by Python script. The length of embedding vector
+   is determined by the embedding model.
+   Here we use the model `EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"`, which vector size is 768.
+
+   Check the vector dimension of your embedding model, set `your_embedding` dimension equals to it.
+
+   ```bash
+   export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
+   curl http://${host_ip}:7000/v1/retrieval \
+     -X POST \
+     -d "{\"text\":\"test\",\"embedding\":${your_embedding}}" \
+     -H 'Content-Type: application/json'
+   ```
+
+4. TEI Reranking Service
+
+   ```bash
+   curl http://${host_ip}:8808/rerank \
+       -X POST \
+       -d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \
+       -H 'Content-Type: application/json'
+   ```
+
+5. Reranking Microservice
+
+   ```bash
+   curl http://${host_ip}:8000/v1/reranking\
+     -X POST \
+     -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
+     -H 'Content-Type: application/json'
+   ```
+
+6. LLM backend Service (ChatQnA, DocSum, FAQGen)
+
+   ```bash
+   curl http://${host_ip}:9009/generate \
+     -X POST \
+     -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \
+     -H 'Content-Type: application/json'
+   ```
+
+7. LLM backend Service (CodeGen)
+
+   ```bash
+   curl http://${host_ip}:8028/generate \
+     -X POST \
+     -d '{"inputs":"def print_hello_world():","parameters":{"max_new_tokens":256, "do_sample": true}}' \
+     -H 'Content-Type: application/json'
+   ```
+
+8. ChatQnA LLM Microservice
+
+   ```bash
+   curl http://${host_ip}:9000/v1/chat/completions\
+     -X POST \
+     -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \
+     -H 'Content-Type: application/json'
+   ```
+
+9. CodeGen LLM Microservice
+
+   ```bash
+   curl http://${host_ip}:9001/v1/chat/completions\
+     -X POST \
+     -d '{"query":"def print_hello_world():"}' \
+     -H 'Content-Type: application/json'
+   ```
+
+10. DocSum LLM Microservice
+
+    ```bash
+    curl http://${host_ip}:9002/v1/chat/docsum\
+      -X POST \
+      -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5"}' \
+      -H 'Content-Type: application/json'
+    ```
+
+11. FAQGen LLM Microservice
+
+    ```bash
+    curl http://${host_ip}:9003/v1/faqgen\
+      -X POST \
+      -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5"}' \
+      -H 'Content-Type: application/json'
+    ```
+
+12. ChatQnA MegaService
+
+    ```bash
+    curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{
+         "messages": "What is the revenue of Nike in 2023?"
+         }'
+    ```
+
+13. FAQGen MegaService
+
+    ```bash
+    curl http://${host_ip}:8889/v1/faqgen -H "Content-Type: application/json" -d '{
+         "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."
+         }'
+    ```
+
+14. DocSum MegaService
+
+    ```bash
+    curl http://${host_ip}:8890/v1/docsum -H "Content-Type: application/json" -d '{
+         "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."
+         }'
+    ```
+
+15. CodeGen MegaService
+
+    ```bash
+    curl http://${host_ip}:7778/v1/codegen -H "Content-Type: application/json" -d '{
+         "messages": "def print_hello_world():"
+         }'
+    ```
+
+16. Dataprep Microservice
+
+    If you want to update the default knowledge base, you can use the following commands:
+
+    Update Knowledge Base via Local File Upload:
+
+    ```bash
+    curl -X POST "http://${host_ip}:6007/v1/dataprep" \
+         -H "Content-Type: multipart/form-data" \
+         -F "files=@./nke-10k-2023.pdf"
+    ```
+
+    This command updates a knowledge base by uploading a local file for processing. Update the file path according to your environment.
+
+    Add Knowledge Base via HTTP Links:
+
+    ```bash
+    curl -X POST "http://${host_ip}:6007/v1/dataprep" \
+         -H "Content-Type: multipart/form-data" \
+         -F 'link_list=["https://opea.dev"]'
+    ```
+
+    This command updates a knowledge base by submitting a list of HTTP links for processing.
+
+    Also, you are able to get the file list that you uploaded:
+
+    ```bash
+    curl -X POST "http://${host_ip}:6007/v1/dataprep/get_file" \
+         -H "Content-Type: application/json"
+    ```
+
+    To delete the file/link you uploaded:
+
+    ```bash
+    # delete link
+    curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
+         -d '{"file_path": "https://opea.dev.txt"}' \
+         -H "Content-Type: application/json"
+
+    # delete file
+    curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
+         -d '{"file_path": "nke-10k-2023.pdf"}' \
+         -H "Content-Type: application/json"
+
+    # delete all uploaded files and links
+    curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \
+         -d '{"file_path": "all"}' \
+         -H "Content-Type: application/json"
+    ```
+
+17. Prompt Registry Microservice
+
+    If you want to update the default Prompts in the application for your user, you can use the following commands:
+
+    ```bash
+    curl -X 'POST' \
+      http://{host_ip}:6018/v1/prompt/create \
+      -H 'accept: application/json' \
+      -H 'Content-Type: application/json' \
+      -d '{
+        "prompt_text": "test prompt", "user": "test"
+    }'
+    ```
+
+    Retrieve prompt from database based on user or prompt_id
+
+    ```bash
+    curl -X 'POST' \
+      http://{host_ip}:6018/v1/prompt/get \
+      -H 'accept: application/json' \
+      -H 'Content-Type: application/json' \
+      -d '{
+      "user": "test"}'
+
+    curl -X 'POST' \
+      http://{host_ip}:6018/v1/prompt/get \
+      -H 'accept: application/json' \
+      -H 'Content-Type: application/json' \
+      -d '{
+      "user": "test", "prompt_id":"{prompt_id returned from save prompt route above}"}'
+    ```
+
+    Delete prompt from database based on prompt_id provided
+
+    ```bash
+    curl -X 'POST' \
+      http://{host_ip}:6018/v1/prompt/delete \
+      -H 'accept: application/json' \
+      -H 'Content-Type: application/json' \
+      -d '{
+      "user": "test", "prompt_id":"{prompt_id to be deleted}"}'
+    ```
+
+18. Chat History Microservice
+
+    To validate the chatHistory Microservice, you can use the following commands.
+
+    Create a sample conversation and get the message ID.
+
+    ```bash
+    curl -X 'POST' \
+      http://${host_ip}:6012/v1/chathistory/create \
+      -H 'accept: application/json' \
+      -H 'Content-Type: application/json' \
+      -d '{
+      "data": {
+        "messages": "test Messages", "user": "test"
+      }
+    }'
+    ```
+
+    Retrieve the conversation based on user or conversation id
+
+    ```bash
+    curl -X 'POST' \
+      http://${host_ip}:6012/v1/chathistory/get \
+      -H 'accept: application/json' \
+      -H 'Content-Type: application/json' \
+      -d '{
+      "user": "test"}'
+
+    curl -X 'POST' \
+      http://${host_ip}:6012/v1/chathistory/get \
+      -H 'accept: application/json' \
+      -H 'Content-Type: application/json' \
+      -d '{
+      "user": "test", "id":"{Conversation id to retrieve }"}'
+    ```
+
+    Delete Conversation from database based on conversation id provided.
+
+    ```bash
+    curl -X 'POST' \
+      http://${host_ip}:6012/v1/chathistory/delete \
+      -H 'accept: application/json' \
+      -H 'Content-Type: application/json' \
+      -d '{
+      "user": "test", "id":"{Conversation id to Delete}"}'
+    ```
+
+---
+
+## 🚀 Launch the UI
+
+To access the frontend, open the following URL in your browser: http://{host_ip}:5174. By default, the UI runs on port 80 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below:
+
+```yaml
+  productivity-suite-xeon-react-ui-server:
+    image: opea/productivity-suite-react-ui-server:latest
+    ...
+    ports:
+      - "5715:80" # Map port 5715 on the host to port 80 in the container.
+```
+
+Here is an example of running Productivity Suite
+![project-screenshot](../../../../assets/img/chat_qna_init.png)
+![project-screenshot](../../../../assets/img/Login_page.png)
+
+---
+
+## 🛠️ Key Features
+
+Here're some of the project's features:
+
+### 💬ChatQnA
+
+- **Start a Text Chat**：Initiate a text chat with the ability to input written conversations, where the dialogue content can also be customized based on uploaded files.
+- **Context Awareness**: The AI assistant maintains the context of the conversation, understanding references to previous statements or questions. This allows for more natural and coherent exchanges.
+
+### 🎛️ Data Source
+
+- **File Upload or Remote Link**: The choice between uploading locally or copying a remote link. Chat according to uploaded knowledge base.
+- **File Management**:Uploaded File would get listed and user would be able add or remove file/links
+
+#### Screenshots
+
+![project-screenshot](../../../../assets/img/data_source.png)
+
+- **Clear Chat**: Clear the record of the current dialog box without retaining the contents of the dialog box.
+- **Chat history**: Historical chat records can still be retained after refreshing, making it easier for users to view the context.
+- **Conversational Chat**: The application maintains a history of the conversation, allowing users to review previous messages and the AI to refer back to earlier points in the dialogue when necessary.
+
+#### Screenshots
+
+![project-screenshot](../../../../assets/img/chat_qna_init.png)
+![project-screenshot](../../../../assets/img/chatqna_with_conversation.png)
+
+### 💻 Codegen
+
+- **Generate code**: generate the corresponding code based on the current user's input.
+
+#### Screenshots
+
+![project-screenshot](../../../../assets/img/codegen.png)
+
+### 📚 Document Summarization
+
+- **Summarizing Uploaded Files**: Upload files from their local device, then click 'Generate Summary' to summarize the content of the uploaded file. The summary will be displayed on the 'Summary' box.
+- **Summarizing Text via Pasting**: Paste the text to be summarized into the text box, then click 'Generate Summary' to produce a condensed summary of the content, which will be displayed in the 'Summary' box on the right.
+- **Scroll to Bottom**: The summarized content will automatically scroll to the bottom.
+
+#### Screenshots
+
+![project-screenshot](../../../../assets/img/doc_summary_paste.png)
+![project-screenshot](../../../../assets/img/doc_summary_file.png)
+
+### ❓ FAQ Generator
+
+- **Generate FAQs from Text via Pasting**: Paste the text to into the text box, then click 'Generate FAQ' to produce a condensed FAQ of the content, which will be displayed in the 'FAQ' box below.
+
+- **Generate FAQs from Text via txt file Upload**: Upload the file in the Upload bar, then click 'Generate FAQ' to produce a condensed FAQ of the content, which will be displayed in the 'FAQ' box below.
+
+#### Screenshots
+
+![project-screenshot](../../../../assets/img/faq_generator.png)
diff --git a/ProductivitySuite/docker_compose/intel/hpu/gaudi/README_remote.md b/ProductivitySuite/docker_compose/intel/hpu/gaudi/README_remote.md
new file mode 100644
index 0000000000..991dd2a9a4
--- /dev/null
+++ b/ProductivitySuite/docker_compose/intel/hpu/gaudi/README_remote.md
@@ -0,0 +1,276 @@
+# Build Mega Service of Productivity Suite
+
+This document outlines the deployment process for OPEA Productivity Suite utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Xeon server and [GenAIExamples](https://github.com/opea-project/GenAIExamples.git) solutions. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices such as `embedding`, `retriever`, `rerank`, and `llm`. 
+
+## 🚀 Build Docker Images
+
+Create a directory and clone the GenAIComps repository
+
+```bash
+mkdir genai
+git clone --branch v1.0 https://github.com/opea-project/GenAIComps.git
+```
+Copy patch files related to GenAIComps inside GenAIComps folder and apply the patch
+
+```bash
+cd GenAIComps
+git am *.patch
+```
+
+### 1. Build Embedding Image
+
+```bash
+docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/tei/langchain/Dockerfile .
+```
+
+### 2. Build Rerank Image
+
+```bash
+docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/Dockerfile .
+```
+
+### 3. Build LLM Images
+
+#### Use TGI as backend to build FAQ Generation
+
+```bash
+docker build -t opea/llm-faqgen-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/faq-generation/tgi/langchain/Dockerfile .
+```
+
+#### Use TGI as backend to build Doc Summarization
+
+```bash
+docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/summarization/tgi/langchain/Dockerfile .
+```
+
+#### Use TGI as backend to build Text Generation
+
+```bash
+docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile .
+```
+
+### 4. Build Prompt Registry Image
+
+```bash
+docker build -t opea/promptregistry-mongo-server:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/prompt_registry/mongo/Dockerfile .
+```
+
+### 5. Build Productivity Suite Docker Images
+
+The Productivity Suite is composed of multiple GenAIExample reference solutions composed together.
+
+```bash
+cd ..
+git clone --branch v1.0 https://github.com/opea-project/GenAIExamples.git
+cd GenAIExamples
+```
+
+Copy the patch files related to GenAIExamples into above cloned GenAIExamples folder
+
+Apply the patches
+```bash
+git am *.patch
+```
+
+#### 5.1 Build ChatQnA MegaService Docker Images
+
+```bash
+cd ..
+docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f GenAIExamples/ChatQnA/Dockerfile .
+```
+
+### 6. Build UI Docker Image
+
+Build frontend Docker image that enables via below command:
+
+**Export the value of the public IP address of your server to the `host_ip` environment variable**
+
+```bash
+cd GenAIExamples/ProductivitySuite/ui
+docker build --no-cache -t opea/productivity-suite-react-ui-server:latest -f docker/Dockerfile.react .
+```
+
+## 🚀 Start Microservices
+
+### Setup Environment Variables
+
+Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below.
+
+**Export the value of the public IP address of your server to the `host_ip` environment variable**
+
+> Change the External_Public_IP below with the actual IPV4 value
+
+```
+export host_ip="External_Public_IP"
+```
+
+**Export the value of your Huggingface API token to the `your_hf_api_token` environment variable**
+
+> Change the Your_Huggingface_API_Token below with tyour actual Huggingface API Token value
+
+```
+export your_hf_api_token="Your_Huggingface_API_Token"
+```
+
+**Append the value of the public IP address to the no_proxy list**
+
+```
+export your_no_proxy=${your_no_proxy},"External_Public_IP"
+```
+
+**Export the value of your remote host to the `remote_host` environment variable (Only if you are using remote TGI/TEI)**
+
+> Change the Your_Remote_Host below with your actual API Gateway Host value
+
+```
+export remote_host="Your_Remote_Host"
+```
+
+**Set ClientId, Client_Secret and Token URL only if the remote API is protected with OAuth Client Credentials Flow**
+
+**Export the value of your Remote API ClientId to the `clientid` environment variable**
+
+> Change the Your_API_ClientId below with your actual ClientId value
+
+```
+export clientid="Your_API_ClientId"
+```
+
+**Export the value of your Remote API client secret to the `client_secret` environment variable**
+
+> Change the Your_API_ClientSecret below with your actual ClientSecret value
+
+```
+export client_secret="Your_API_ClientSecret"
+```
+
+**Export the value of your Remote API token url to the `token_url` environment variable**
+
+> Change the Your_API_TokenUrl below with your actual Token URL value
+
+```
+export token_url="Your_API_TokenUrl"
+```
+
+**Export the value of your Remote Embedding Endpoint to the `embedding_endpoint` environment variable (Set this if you have tei embedding running remotely)**
+
+> Change the Your_Remote_Embedding_Endpoint below with your actual embedding endpoint value
+
+```
+export embedding_endpoint="Your_Remote_Embedding_Endpoint"
+```
+
+**Export the value of your Remote Reranking Endpoint to the `reranking_endpoint` environment variable (Set this if you have reranking running remotely)**
+
+> Change the Your_Remote_Reranking_Endpoint below with tyour actual reranking endpoint value
+
+```
+export reranking_endpoint="Your_Remote_Reranking_Endpoint"
+```
+
+**Export the value of your Remote TGI Endpoint to the `tgi_endpoint` environment variable (Set this if you have tgi running remotely)**
+
+> Change the Your_Remote_TGI_Endpoint below with tyour actual tgi endpoint value
+
+```
+export tgi_endpoint="Your_Remote_TGI_Endpoint"
+```
+
+**To use multiple TGI models**
+> Create the model_configs.json file under /GenAIExamples/ProductivitySuite/docker_compose/intel/hpu/gaudi folder
+> Add the model details as shown in the below example
+
+
+```bash
+cd GenAIExamples/ProductivitiySuite/docker_compose/intel/hpu/gaudi
+touch model_configs.json
+```
+
+File Structure:
+
+[
+    {
+        "model_name": "Your Model Name",
+        "displayName": "Model Display Name for the UI",
+        "endpoint": "Model Endpoint with http/https",
+        "minToken": 100, //Min Token Value
+        "maxToken": 2000 //Max Token Value
+    },
+    {
+        "model_name": "Your Model Name",
+        "displayName": "Model Display Name for the UI",
+        "endpoint": "Model Endpoint with http/https",
+        "minToken": 100, //Min Token Value
+        "maxToken": 2000 //Max Token Value
+    }
+]
+
+Example:
+
+[
+    {
+        "model_name": "meta-llama/Meta-Llama-3.1-70B-Instruct",
+        "displayName": "llama-3.1-70B",
+        "endpoint": "https://<host>/<endpoint>",
+        "minToken": 100,
+        "maxToken": 2000
+    },
+    {
+        "model_name": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+        "displayName": "llama-3.1-8B",
+        "endpoint": "https://<host>/<endpoint>",
+        "minToken": 100,
+        "maxToken": 2000
+    },
+    {
+        "model_name": "Intel/neural-chat-7b-v3-3",
+        "displayName": "neural chat",
+        "endpoint": "https://<host>/<endpoint>",
+        "minToken": 100,
+        "maxToken": 1000
+    }
+]
+
+> After creating and adding details in the model_configs.json file. Copy the same file into the public folder of the UI
+
+```bash
+cd ../../../../
+cp docker_compose/intel/hpu/gaudi/model_configs.json ui/react/public/model_configs.json
+```
+
+> Navigate to GenAIExamples/ProductivitiySuite/docker_compose/intel/hpu/gaudi and run set_env.sh
+
+```bash
+cd GenAIExamples/ProductivitiySuite/docker_compose/intel/hpu/gaudi
+chmod +x set_env_remote.sh
+source set_env_remote.sh
+```
+
+Note: Please replace with `host_ip` with you external IP address, do not use localhost.
+
+### Start all the services Docker Containers
+
+#### Run all services locally
+```bash
+docker compose -f compose.yaml up -d
+```
+
+#### Run TGI and TEI inference remote
+```bash
+docker compose -f compose_remote.yaml up -d
+```
+
+#### Run only TGI remote
+```bash
+docker compose -f compose_tgi_remote.yaml up -d
+```
+
+### Setup Keycloak
+
+Please refer to [keycloak_setup_guide](keycloak_setup_guide.md) for more detail related to Keycloak configuration setup.
+
+
+## 🚀 Launch the UI
+
+To access the frontend, open the following URL in your browser: http://{host_ip}:5174.
+
diff --git a/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose.yaml b/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose.yaml
new file mode 100644
index 0000000000..d15856e275
--- /dev/null
+++ b/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -0,0 +1,398 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+version: "3.3"
+services:
+  redis-vector-db:
+    image: redis/redis-stack:7.2.0-v9
+    container_name: redis-vector-db
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+    ports:
+      - "6379:6379"
+      - "8001:8001"
+  dataprep-redis-service:
+    image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
+    container_name: dataprep-redis-server
+    depends_on:
+      - redis-vector-db
+    ports:
+      - "6007:6007"
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL: ${REDIS_URL}
+      INDEX_NAME: ${INDEX_NAME}
+  tei-embedding-service:
+    image: ghcr.io/huggingface/tei-gaudi:latest
+    container_name: tei-embedding-gaudi-server
+    ports:
+      - "6006:80"
+    volumes:
+      - "./data_embedding:/data"
+    shm_size: 1g
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+      MAX_WARMUP_SEQUENCE_LENGTH: 512
+    command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
+  embedding:
+    image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
+    container_name: embedding-tei-server
+    depends_on:
+      - tei-embedding-service
+    ports:
+      - "6000:6000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-embedding-service"
+    restart: unless-stopped
+  retriever:
+    image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
+    container_name: retriever-redis-server
+    depends_on:
+      - redis-vector-db
+    ports:
+      - "7000:7000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL: ${REDIS_URL}
+      INDEX_NAME: ${INDEX_NAME}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+    restart: unless-stopped
+  tei-reranking-service:
+    image: ghcr.io/huggingface/tei-gaudi:latest
+    container_name: tei-reranking-server
+    ports:
+      - "8808:80"
+    volumes:
+      - "./data_tei:/data"
+    shm_size: 1g
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+      MAX_WARMUP_SEQUENCE_LENGTH: 512
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    ipc: host
+    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
+  reranking:
+    image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
+    container_name: reranking-tei-gaudi-server
+    depends_on:
+      - tei-reranking-service
+    ports:
+      - "8000:8000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-reranking-service"
+    restart: unless-stopped
+  tgi_service:
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+    container_name: tgi-service
+    ports:
+      - "9009:80"
+    volumes:
+      - "./data:/data"
+    shm_size: 1g
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+      ENABLE_HPU_GRAPH: true
+      LIMIT_HPU_GRAPH: true
+      USE_FLASH_ATTENTION: true
+      FLASH_ATTENTION_RECOMPUTE: true
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    ipc: host
+    command: --model-id ${LLM_MODEL_ID}
+  llm:
+    image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
+    container_name: llm-tgi-gaudi-server
+    depends_on:
+      - tgi_service
+    ports:
+      - "9000:9000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_CHATQNA}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-llm-service"
+      
+    restart: unless-stopped
+  chatqna-gaudi-backend-server:
+    image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
+    container_name: chatqna-gaudi-backend-server
+    depends_on:
+      - redis-vector-db
+      - tei-embedding-service
+      - embedding
+      - retriever
+      - tei-reranking-service
+      - reranking
+      - tgi_service
+      - llm
+    ports:
+      - "8888:8888"
+    environment:
+      no_proxy: ${no_proxy}
+      https_proxy: ${https_proxy}
+      http_proxy: ${http_proxy}
+      MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
+      EMBEDDING_SERVICE_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP}
+      RETRIEVER_SERVICE_HOST_IP: ${RETRIEVER_SERVICE_HOST_IP}
+      RERANK_SERVICE_HOST_IP: ${RERANK_SERVICE_HOST_IP}
+      LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_CHATQNA}
+    ipc: host
+    restart: always
+  tgi_service_codegen:
+    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
+    container_name: tgi_service_codegen
+    ports:
+      - "8028:80"
+    volumes:
+      - "./data_codegen:/data"
+    shm_size: 1g
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+      ENABLE_HPU_GRAPH: true
+      LIMIT_HPU_GRAPH: true
+      USE_FLASH_ATTENTION: true
+      FLASH_ATTENTION_RECOMPUTE: true
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    ipc: host
+    command: --model-id ${LLM_MODEL_ID_CODEGEN} --max-input-length 1024 --max-total-tokens 2048
+  llm_codegen:
+    image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
+    container_name: llm-tgi-server-codegen
+    depends_on:
+      - tgi_service_codegen
+    ports:
+      - "9001:9000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_CODEGEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-llm-service"
+    restart: unless-stopped
+  codegen-gaudi-backend-server:
+    image: ${REGISTRY:-opea}/codegen:${TAG:-latest}
+    container_name: codegen-gaudi-backend-server
+    depends_on:
+      - llm
+    ports:
+      - "7778:7778"
+    environment:
+      no_proxy: ${no_proxy}
+      https_proxy: ${https_proxy}
+      http_proxy: ${http_proxy}
+      MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
+      LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_CODEGEN}
+      LLM_SERVICE_PORT: ${LLM_SERVICE_HOST_PORT_CODEGEN}
+    ipc: host
+    restart: always
+  llm_faqgen:
+    image: ${REGISTRY:-opea}/llm-faqgen-tgi:${TAG:-latest}
+    container_name: llm-faqgen-server
+    depends_on:
+      - tgi_service
+    ports:
+      - "9002:9000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_FAQGEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-llm-service"
+    restart: unless-stopped
+  faqgen-gaudi-backend-server:
+    image: ${REGISTRY:-opea}/faqgen:${TAG:-latest}
+    container_name: faqgen-gaudi-backend-server
+    depends_on:
+      - tgi_service
+      - llm_faqgen
+    ports:
+      - "8889:8888"
+    environment:
+      no_proxy: ${no_proxy}
+      https_proxy: ${https_proxy}
+      http_proxy: ${http_proxy}
+      MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
+      LLM_SERVICE_PORT: ${LLM_SERVICE_HOST_PORT_FAQGEN}
+      LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_FAQGEN}
+    ipc: host
+    restart: always
+  llm_docsum_server:
+    image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest}
+    container_name: llm-docsum-server
+    depends_on:
+      - tgi_service
+    ports:
+      - "9003:9000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_DOCSUM}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-llm-service"
+    restart: unless-stopped
+  docsum-gaudi-backend-server:
+    image: ${REGISTRY:-opea}/docsum:${TAG:-latest}
+    container_name: docsum-gaudi-backend-server
+    depends_on:
+      - tgi_service
+      - llm_docsum_server
+    ports:
+      - "8890:8888"
+    environment:
+      no_proxy: ${no_proxy}
+      https_proxy: ${https_proxy}
+      http_proxy: ${http_proxy}
+      LLM_SERVICE_PORT: ${LLM_SERVICE_HOST_PORT_DOCSUM}
+      MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
+      LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_DOCSUM}
+    ipc: host
+    restart: always
+  mongo:
+    image: mongo:7.0.11
+    container_name: mongodb
+    ports:
+      - 27017:27017
+    environment:
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      no_proxy: ${no_proxy}
+    command: mongod --quiet --logpath /dev/null
+  chathistory-mongo:
+    image: ${REGISTRY:-opea}/chathistory-mongo-server:${TAG:-latest}
+    container_name: chathistory-mongo-server
+    ports:
+      - "6012:6012"
+    ipc: host
+    environment:
+      http_proxy: ${http_proxy}
+      no_proxy: ${no_proxy}
+      https_proxy: ${https_proxy}
+      MONGO_HOST: ${MONGO_HOST}
+      MONGO_PORT: ${MONGO_PORT}
+      COLLECTION_NAME: ${COLLECTION_NAME}
+    restart: unless-stopped
+  promptregistry-mongo:
+    image: ${REGISTRY:-opea}/promptregistry-mongo-server:${TAG:-latest}
+    container_name: promptregistry-mongo-server
+    ports:
+      - "6018:6018"
+    ipc: host
+    environment:
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      no_proxy: ${no_proxy}
+      MONGO_HOST: ${MONGO_HOST}
+      MONGO_PORT: ${MONGO_PORT}
+      COLLECTION_NAME: ${PROMPT_COLLECTION_NAME}
+    restart: unless-stopped
+  keycloak:
+    image: quay.io/keycloak/keycloak:25.0.2
+    container_name: keycloak-server
+    ports:
+      - 8081:8080
+    environment:
+      - KEYCLOAK_ADMIN=admin
+      - KEYCLOAK_ADMIN_PASSWORD=admin
+      - KC_PROXY=edge
+    ipc: host
+    command: start-dev
+    restart: always
+  productivity-suite-gaudi-react-ui-server:
+    image: ${REGISTRY:-opea}/productivity-suite-react-ui-server:${TAG:-latest}
+    container_name: productivity-suite-gaudi-react-ui-server
+    ports:
+      - "5174:80"
+    environment:
+      - APP_BACKEND_SERVICE_ENDPOINT_CHATQNA=${BACKEND_SERVICE_ENDPOINT_CHATQNA}
+      - APP_BACKEND_SERVICE_ENDPOINT_CODEGEN=${BACKEND_SERVICE_ENDPOINT_CODEGEN}
+      - APP_BACKEND_SERVICE_ENDPOINT_DOCSUM=${BACKEND_SERVICE_ENDPOINT_DOCSUM}
+      - APP_BACKEND_SERVICE_ENDPOINT_FAQGEN=${BACKEND_SERVICE_ENDPOINT_FAQGEN}
+      - APP_DATAPREP_SERVICE_ENDPOINT=${DATAPREP_SERVICE_ENDPOINT}
+      - APP_DATAPREP_GET_FILE_ENDPOINT=${DATAPREP_GET_FILE_ENDPOINT}
+      - APP_DATAPREP_DELETE_FILE_ENDPOINT=${DATAPREP_DELETE_FILE_ENDPOINT}
+      - APP_CHAT_HISTORY_CREATE_ENDPOINT=${CHAT_HISTORY_CREATE_ENDPOINT}
+      - APP_CHAT_HISTORY_DELETE_ENDPOINT=${CHAT_HISTORY_DELETE_ENDPOINT}
+      - APP_CHAT_HISTORY_GET_ENDPOINT=${CHAT_HISTORY_GET_ENDPOINT}
+      - APP_PROMPT_SERVICE_GET_ENDPOINT=${PROMPT_SERVICE_GET_ENDPOINT}
+      - APP_PROMPT_SERVICE_CREATE_ENDPOINT=${PROMPT_SERVICE_CREATE_ENDPOINT}
+      - APP_KEYCLOAK_SERVICE_ENDPOINT=${KEYCLOAK_SERVICE_ENDPOINT}
+    ipc: host
+    restart: always
+networks:
+  default:
+    driver: bridge
\ No newline at end of file
diff --git a/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose_remote.yaml b/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose_remote.yaml
new file mode 100644
index 0000000000..82408a33fe
--- /dev/null
+++ b/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose_remote.yaml
@@ -0,0 +1,306 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+version: "3.3"
+services:
+  redis-vector-db:
+    image: redis/redis-stack:7.2.0-v9
+    container_name: redis-vector-db
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+    ports:
+      - "6379:6379"
+      - "8001:8001"
+  dataprep-redis-service:
+    image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
+    container_name: dataprep-redis-server
+    depends_on:
+      - redis-vector-db
+    ports:
+      - "6007:6007"
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL: ${REDIS_URL}
+      INDEX_NAME: ${INDEX_NAME}
+  embedding:
+    image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
+    container_name: embedding-tei-server
+    ports:
+      - "6000:6000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-embedding-service"
+      CLIENTID: ${CLIENTID}
+      CLIENT_SECRET: ${CLIENT_SECRET}
+      TOKEN_URL: ${TOKEN_URL}
+    restart: unless-stopped
+  retriever:
+    image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
+    container_name: retriever-redis-server
+    depends_on:
+      - redis-vector-db
+    ports:
+      - "7000:7000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL: ${REDIS_URL}
+      INDEX_NAME: ${INDEX_NAME}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+    restart: unless-stopped
+  reranking:
+    image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
+    container_name: reranking-tei-gaudi-server
+    ports:
+      - "8000:8000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-reranking-service"
+      CLIENTID: ${CLIENTID}
+      CLIENT_SECRET: ${CLIENT_SECRET}
+      TOKEN_URL: ${TOKEN_URL}
+    restart: unless-stopped
+  llm:
+    image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
+    container_name: llm-tgi-gaudi-server
+    ports:
+      - "9000:9000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_CHATQNA}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-llm-service"
+      MODEL_CONFIGS: ${MODEL_CONFIGS}
+      CLIENTID: ${CLIENTID}
+      CLIENT_SECRET: ${CLIENT_SECRET}
+      TOKEN_URL: ${TOKEN_URL}
+      
+    restart: unless-stopped
+  chatqna-gaudi-backend-server:
+    image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
+    container_name: chatqna-gaudi-backend-server
+    depends_on:
+      - redis-vector-db
+      - embedding
+      - retriever
+      - reranking
+      - llm
+    ports:
+      - "8888:8888"
+    environment:
+      no_proxy: ${no_proxy}
+      https_proxy: ${https_proxy}
+      http_proxy: ${http_proxy}
+      MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
+      EMBEDDING_SERVICE_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP}
+      RETRIEVER_SERVICE_HOST_IP: ${RETRIEVER_SERVICE_HOST_IP}
+      RERANK_SERVICE_HOST_IP: ${RERANK_SERVICE_HOST_IP}
+      LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_CHATQNA}
+    ipc: host
+    restart: always
+  llm_codegen:
+    image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
+    container_name: llm-tgi-server-codegen
+    ports:
+      - "9001:9000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_CODEGEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-llm-service"
+      CLIENTID: ${CLIENTID}
+      CLIENT_SECRET: ${CLIENT_SECRET}
+      TOKEN_URL: ${TOKEN_URL}
+    restart: unless-stopped
+  codegen-gaudi-backend-server:
+    image: ${REGISTRY:-opea}/codegen:${TAG:-latest}
+    container_name: codegen-gaudi-backend-server
+    ports:
+      - "7778:7778"
+    environment:
+      no_proxy: ${no_proxy}
+      https_proxy: ${https_proxy}
+      http_proxy: ${http_proxy}
+      MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
+      LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_CODEGEN}
+      LLM_SERVICE_PORT: ${LLM_SERVICE_HOST_PORT_CODEGEN}
+    ipc: host
+    restart: always
+  llm_faqgen:
+    image: ${REGISTRY:-opea}/llm-faqgen-tgi:${TAG:-latest}
+    container_name: llm-faqgen-server
+    ports:
+      - "9002:9000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_FAQGEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-llm-service"
+      CLIENTID: ${CLIENTID}
+      CLIENT_SECRET: ${CLIENT_SECRET}
+      TOKEN_URL: ${TOKEN_URL}
+    restart: unless-stopped
+  faqgen-gaudi-backend-server:
+    image: ${REGISTRY:-opea}/faqgen:${TAG:-latest}
+    container_name: faqgen-gaudi-backend-server
+    depends_on:
+      - llm_faqgen
+    ports:
+      - "8889:8888"
+    environment:
+      no_proxy: ${no_proxy}
+      https_proxy: ${https_proxy}
+      http_proxy: ${http_proxy}
+      MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
+      LLM_SERVICE_PORT: ${LLM_SERVICE_HOST_PORT_FAQGEN}
+      LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_FAQGEN}
+    ipc: host
+    restart: always
+  llm_docsum_server:
+    image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest}
+    container_name: llm-docsum-server
+    ports:
+      - "9003:9000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_DOCSUM}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-llm-service"
+      CLIENTID: ${CLIENTID}
+      CLIENT_SECRET: ${CLIENT_SECRET}
+      TOKEN_URL: ${TOKEN_URL}
+    restart: unless-stopped
+  docsum-gaudi-backend-server:
+    image: ${REGISTRY:-opea}/docsum:${TAG:-latest}
+    container_name: docsum-gaudi-backend-server
+    depends_on:
+      - llm_docsum_server
+    ports:
+      - "8890:8888"
+    environment:
+      no_proxy: ${no_proxy}
+      https_proxy: ${https_proxy}
+      http_proxy: ${http_proxy}
+      LLM_SERVICE_PORT: ${LLM_SERVICE_HOST_PORT_DOCSUM}
+      MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
+      LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_DOCSUM}
+    ipc: host
+    restart: always
+  mongo:
+    image: mongo:7.0.11
+    container_name: mongodb
+    ports:
+      - 27017:27017
+    environment:
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      no_proxy: ${no_proxy}
+    command: mongod --quiet --logpath /dev/null
+  chathistory-mongo:
+    image: ${REGISTRY:-opea}/chathistory-mongo-server:${TAG:-latest}
+    container_name: chathistory-mongo-server
+    ports:
+      - "6012:6012"
+    ipc: host
+    environment:
+      http_proxy: ${http_proxy}
+      no_proxy: ${no_proxy}
+      https_proxy: ${https_proxy}
+      MONGO_HOST: ${MONGO_HOST}
+      MONGO_PORT: ${MONGO_PORT}
+      COLLECTION_NAME: ${COLLECTION_NAME}
+    restart: unless-stopped
+  promptregistry-mongo:
+    image: ${REGISTRY:-opea}/promptregistry-mongo-server:${TAG:-latest}
+    container_name: promptregistry-mongo-server
+    ports:
+      - "6018:6018"
+    ipc: host
+    environment:
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      no_proxy: ${no_proxy}
+      MONGO_HOST: ${MONGO_HOST}
+      MONGO_PORT: ${MONGO_PORT}
+      COLLECTION_NAME: ${PROMPT_COLLECTION_NAME}
+    restart: unless-stopped
+  keycloak:
+    image: quay.io/keycloak/keycloak:25.0.2
+    container_name: keycloak-server
+    ports:
+      - 8080:8080
+    environment:
+      - KEYCLOAK_ADMIN=admin
+      - KEYCLOAK_ADMIN_PASSWORD=admin
+      - KC_PROXY=edge
+    ipc: host
+    command: 
+      - start-dev
+    restart: always
+  productivity-suite-gaudi-react-ui-server:
+    image: ${REGISTRY:-opea}/productivity-suite-react-ui-server:${TAG:-latest}
+    container_name: productivity-suite-gaudi-react-ui-server
+    ports:
+      - "5174:80"
+    environment:
+      - APP_BACKEND_SERVICE_ENDPOINT_CHATQNA=${BACKEND_SERVICE_ENDPOINT_CHATQNA}
+      - APP_BACKEND_SERVICE_ENDPOINT_CODEGEN=${BACKEND_SERVICE_ENDPOINT_CODEGEN}
+      - APP_BACKEND_SERVICE_ENDPOINT_DOCSUM=${BACKEND_SERVICE_ENDPOINT_DOCSUM}
+      - APP_BACKEND_SERVICE_ENDPOINT_FAQGEN=${BACKEND_SERVICE_ENDPOINT_FAQGEN}
+      - APP_DATAPREP_SERVICE_ENDPOINT=${DATAPREP_SERVICE_ENDPOINT}
+      - APP_DATAPREP_GET_FILE_ENDPOINT=${DATAPREP_GET_FILE_ENDPOINT}
+      - APP_DATAPREP_DELETE_FILE_ENDPOINT=${DATAPREP_DELETE_FILE_ENDPOINT}
+      - APP_CHAT_HISTORY_CREATE_ENDPOINT=${CHAT_HISTORY_CREATE_ENDPOINT}
+      - APP_CHAT_HISTORY_DELETE_ENDPOINT=${CHAT_HISTORY_DELETE_ENDPOINT}
+      - APP_CHAT_HISTORY_GET_ENDPOINT=${CHAT_HISTORY_GET_ENDPOINT}
+      - APP_PROMPT_SERVICE_GET_ENDPOINT=${PROMPT_SERVICE_GET_ENDPOINT}
+      - APP_PROMPT_SERVICE_CREATE_ENDPOINT=${PROMPT_SERVICE_CREATE_ENDPOINT}
+      - APP_KEYCLOAK_SERVICE_ENDPOINT=${KEYCLOAK_SERVICE_ENDPOINT}
+    ipc: host
+    restart: always
+networks:
+  default:
+    driver: bridge
\ No newline at end of file
diff --git a/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose_tgi_remote.yaml b/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose_tgi_remote.yaml
new file mode 100644
index 0000000000..02ab491e5f
--- /dev/null
+++ b/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose_tgi_remote.yaml
@@ -0,0 +1,354 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+version: "3.3"
+services:
+  redis-vector-db:
+    image: redis/redis-stack:7.2.0-v9
+    container_name: redis-vector-db
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+    ports:
+      - "6379:6379"
+      - "8001:8001"
+  dataprep-redis-service:
+    image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest}
+    container_name: dataprep-redis-server
+    depends_on:
+      - redis-vector-db
+    ports:
+      - "6007:6007"
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL: ${REDIS_URL}
+      INDEX_NAME: ${INDEX_NAME}
+  tei-embedding-service:
+    image: ghcr.io/huggingface/tei-gaudi:latest
+    container_name: tei-embedding-gaudi-server
+    ports:
+      - "6006:80"
+    volumes:
+      - "./data_embedding:/data"
+    shm_size: 1g
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+      MAX_WARMUP_SEQUENCE_LENGTH: 512
+    command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
+  embedding:
+    image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest}
+    container_name: embedding-tei-server
+    depends_on:
+      - tei-embedding-service
+    ports:
+      - "6000:6000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-embedding-service"
+    restart: unless-stopped
+  retriever:
+    image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
+    container_name: retriever-redis-server
+    depends_on:
+      - redis-vector-db
+    ports:
+      - "7000:7000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      REDIS_URL: ${REDIS_URL}
+      INDEX_NAME: ${INDEX_NAME}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+    restart: unless-stopped
+  tei-reranking-service:
+    image: ghcr.io/huggingface/tei-gaudi:latest
+    container_name: tei-reranking-server
+    ports:
+      - "8808:80"
+    volumes:
+      - "./data_tei:/data"
+    shm_size: 1g
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+      MAX_WARMUP_SEQUENCE_LENGTH: 512
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    ipc: host
+    command: --model-id ${RERANK_MODEL_ID} --auto-truncate
+  reranking:
+    image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest}
+    container_name: reranking-tei-gaudi-server
+    depends_on:
+      - tei-reranking-service
+    ports:
+      - "8000:8000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-reranking-service"
+    restart: unless-stopped
+  llm:
+    image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
+    container_name: llm-tgi-gaudi-server
+    depends_on:
+      - tgi_service
+    ports:
+      - "9000:9000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_CHATQNA}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      HF_HUB_DISABLE_PROGRESS_BARS: 1
+      HF_HUB_ENABLE_HF_TRANSFER: 0
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-llm-service"
+      MODEL_CONFIGS: ${MODEL_CONFIGS}
+      CLIENTID: ${CLIENTID}
+      CLIENT_SECRET: ${CLIENT_SECRET}
+      TOKEN_URL: ${TOKEN_URL}
+      
+    restart: unless-stopped
+  chatqna-gaudi-backend-server:
+    image: ${REGISTRY:-opea}/chatqna:${TAG:-latest}
+    container_name: chatqna-gaudi-backend-server
+    depends_on:
+      - redis-vector-db
+      - tei-embedding-service
+      - embedding
+      - retriever
+      - tei-reranking-service
+      - reranking
+      - tgi_service
+      - llm
+    ports:
+      - "8888:8888"
+    environment:
+      no_proxy: ${no_proxy}
+      https_proxy: ${https_proxy}
+      http_proxy: ${http_proxy}
+      MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
+      EMBEDDING_SERVICE_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP}
+      RETRIEVER_SERVICE_HOST_IP: ${RETRIEVER_SERVICE_HOST_IP}
+      RERANK_SERVICE_HOST_IP: ${RERANK_SERVICE_HOST_IP}
+      LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_CHATQNA}
+    ipc: host
+    restart: always
+  llm_codegen:
+    image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest}
+    container_name: llm-tgi-server-codegen
+    ports:
+      - "9001:9000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_CODEGEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-llm-service"
+      CLIENTID: ${CLIENTID}
+      CLIENT_SECRET: ${CLIENT_SECRET}
+      TOKEN_URL: ${TOKEN_URL}
+    restart: unless-stopped
+  codegen-gaudi-backend-server:
+    image: ${REGISTRY:-opea}/codegen:${TAG:-latest}
+    container_name: codegen-gaudi-backend-server
+    depends_on:
+      - llm
+    ports:
+      - "7778:7778"
+    environment:
+      no_proxy: ${no_proxy}
+      https_proxy: ${https_proxy}
+      http_proxy: ${http_proxy}
+      MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
+      LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_CODEGEN}
+      LLM_SERVICE_PORT: ${LLM_SERVICE_HOST_PORT_CODEGEN}
+    ipc: host
+    restart: always
+  llm_faqgen:
+    image: ${REGISTRY:-opea}/llm-faqgen-tgi:${TAG:-latest}
+    container_name: llm-faqgen-server
+    ports:
+      - "9002:9000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_FAQGEN}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-llm-service"
+      CLIENTID: ${CLIENTID}
+      CLIENT_SECRET: ${CLIENT_SECRET}
+      TOKEN_URL: ${TOKEN_URL}
+    restart: unless-stopped
+  faqgen-gaudi-backend-server:
+    image: ${REGISTRY:-opea}/faqgen:${TAG:-latest}
+    container_name: faqgen-gaudi-backend-server
+    depends_on:
+      - llm_faqgen
+    ports:
+      - "8889:8888"
+    environment:
+      no_proxy: ${no_proxy}
+      https_proxy: ${https_proxy}
+      http_proxy: ${http_proxy}
+      MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
+      LLM_SERVICE_PORT: ${LLM_SERVICE_HOST_PORT_FAQGEN}
+      LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_FAQGEN}
+    ipc: host
+    restart: always
+  llm_docsum_server:
+    image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest}
+    container_name: llm-docsum-server
+    ports:
+      - "9003:9000"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_DOCSUM}
+      HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+      LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2}
+      LANGCHAIN_PROJECT: "opea-llm-service"
+      CLIENTID: ${CLIENTID}
+      CLIENT_SECRET: ${CLIENT_SECRET}
+      TOKEN_URL: ${TOKEN_URL}
+    restart: unless-stopped
+  docsum-gaudi-backend-server:
+    image: ${REGISTRY:-opea}/docsum:${TAG:-latest}
+    container_name: docsum-gaudi-backend-server
+    depends_on:
+      - tgi_service
+      - llm_docsum_server
+    ports:
+      - "8890:8888"
+    environment:
+      no_proxy: ${no_proxy}
+      https_proxy: ${https_proxy}
+      http_proxy: ${http_proxy}
+      LLM_SERVICE_PORT: ${LLM_SERVICE_HOST_PORT_DOCSUM}
+      MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP}
+      LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_DOCSUM}
+    ipc: host
+    restart: always
+  mongo:
+    image: mongo:7.0.11
+    container_name: mongodb
+    ports:
+      - 27017:27017
+    environment:
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      no_proxy: ${no_proxy}
+    command: mongod --quiet --logpath /dev/null
+  chathistory-mongo:
+    image: ${REGISTRY:-opea}/chathistory-mongo-server:${TAG:-latest}
+    container_name: chathistory-mongo-server
+    ports:
+      - "6012:6012"
+    ipc: host
+    environment:
+      http_proxy: ${http_proxy}
+      no_proxy: ${no_proxy}
+      https_proxy: ${https_proxy}
+      MONGO_HOST: ${MONGO_HOST}
+      MONGO_PORT: ${MONGO_PORT}
+      COLLECTION_NAME: ${COLLECTION_NAME}
+    restart: unless-stopped
+  promptregistry-mongo:
+    image: ${REGISTRY:-opea}/promptregistry-mongo-server:${TAG:-latest}
+    container_name: promptregistry-mongo-server
+    ports:
+      - "6018:6018"
+    ipc: host
+    environment:
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      no_proxy: ${no_proxy}
+      MONGO_HOST: ${MONGO_HOST}
+      MONGO_PORT: ${MONGO_PORT}
+      COLLECTION_NAME: ${PROMPT_COLLECTION_NAME}
+    restart: unless-stopped
+  keycloak:
+    image: quay.io/keycloak/keycloak:25.0.2
+    container_name: keycloak-server
+    ports:
+      - 8081:8080
+    environment:
+      - KEYCLOAK_ADMIN=admin
+      - KEYCLOAK_ADMIN_PASSWORD=admin
+      - KC_PROXY=edge
+    ipc: host
+    command: start-dev
+    restart: always
+  productivity-suite-gaudi-react-ui-server:
+    image: ${REGISTRY:-opea}/productivity-suite-react-ui-server:${TAG:-latest}
+    container_name: productivity-suite-gaudi-react-ui-server
+    ports:
+      - "5174:80"
+    environment:
+      - APP_BACKEND_SERVICE_ENDPOINT_CHATQNA=${BACKEND_SERVICE_ENDPOINT_CHATQNA}
+      - APP_BACKEND_SERVICE_ENDPOINT_CODEGEN=${BACKEND_SERVICE_ENDPOINT_CODEGEN}
+      - APP_BACKEND_SERVICE_ENDPOINT_DOCSUM=${BACKEND_SERVICE_ENDPOINT_DOCSUM}
+      - APP_BACKEND_SERVICE_ENDPOINT_FAQGEN=${BACKEND_SERVICE_ENDPOINT_FAQGEN}
+      - APP_DATAPREP_SERVICE_ENDPOINT=${DATAPREP_SERVICE_ENDPOINT}
+      - APP_DATAPREP_GET_FILE_ENDPOINT=${DATAPREP_GET_FILE_ENDPOINT}
+      - APP_DATAPREP_DELETE_FILE_ENDPOINT=${DATAPREP_DELETE_FILE_ENDPOINT}
+      - APP_CHAT_HISTORY_CREATE_ENDPOINT=${CHAT_HISTORY_CREATE_ENDPOINT}
+      - APP_CHAT_HISTORY_DELETE_ENDPOINT=${CHAT_HISTORY_DELETE_ENDPOINT}
+      - APP_CHAT_HISTORY_GET_ENDPOINT=${CHAT_HISTORY_GET_ENDPOINT}
+      - APP_PROMPT_SERVICE_GET_ENDPOINT=${PROMPT_SERVICE_GET_ENDPOINT}
+      - APP_PROMPT_SERVICE_CREATE_ENDPOINT=${PROMPT_SERVICE_CREATE_ENDPOINT}
+      - APP_KEYCLOAK_SERVICE_ENDPOINT=${KEYCLOAK_SERVICE_ENDPOINT}
+    ipc: host
+    restart: always
+networks:
+  default:
+    driver: bridge
\ No newline at end of file
diff --git a/ProductivitySuite/docker_compose/intel/hpu/gaudi/set_env.sh b/ProductivitySuite/docker_compose/intel/hpu/gaudi/set_env.sh
new file mode 100644
index 0000000000..0139b532af
--- /dev/null
+++ b/ProductivitySuite/docker_compose/intel/hpu/gaudi/set_env.sh
@@ -0,0 +1,48 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+export MONGO_HOST=${host_ip}
+export MONGO_PORT=27017
+export DB_NAME="opea"
+export COLLECTION_NAME="Conversations"
+export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+export RERANK_MODEL_ID="BAAI/bge-reranker-base"
+export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
+export LLM_MODEL_ID_CODEGEN="meta-llama/CodeLlama-7b-hf"
+export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006"
+export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808"
+export TGI_LLM_ENDPOINT="http://${host_ip}:9009"
+export REDIS_URL="redis://${host_ip}:6379"
+export INDEX_NAME="rag-redis"
+export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN}
+export MEGA_SERVICE_HOST_IP=${host_ip}
+export EMBEDDING_SERVICE_HOST_IP=${host_ip}
+export RETRIEVER_SERVICE_HOST_IP=${host_ip}
+export RERANK_SERVICE_HOST_IP=${host_ip}
+export LLM_SERVICE_HOST_IP=${host_ip}
+export LLM_SERVICE_HOST_IP_DOCSUM=${host_ip}
+export LLM_SERVICE_HOST_IP_FAQGEN=${host_ip}
+export LLM_SERVICE_HOST_IP_CODEGEN=${host_ip}
+export LLM_SERVICE_HOST_IP_CHATQNA=${host_ip}
+export TGI_LLM_ENDPOINT_CHATQNA="http://${host_ip}:9009"
+export TGI_LLM_ENDPOINT_CODEGEN="http://${host_ip}:8028"
+export TGI_LLM_ENDPOINT_FAQGEN="http://${host_ip}:9009"
+export TGI_LLM_ENDPOINT_DOCSUM="http://${host_ip}:9009"
+export BACKEND_SERVICE_ENDPOINT_CHATQNA="http://${host_ip}:8888/v1/chatqna"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete_file"
+export BACKEND_SERVICE_ENDPOINT_FAQGEN="http://${host_ip}:8889/v1/faqgen"
+export BACKEND_SERVICE_ENDPOINT_CODEGEN="http://${host_ip}:7778/v1/codegen"
+export BACKEND_SERVICE_ENDPOINT_DOCSUM="http://${host_ip}:8890/v1/docsum"
+export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file"
+export CHAT_HISTORY_CREATE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/create"
+export CHAT_HISTORY_CREATE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/create"
+export CHAT_HISTORY_DELETE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/delete"
+export CHAT_HISTORY_GET_ENDPOINT="http://${host_ip}:6012/v1/chathistory/get"
+export PROMPT_SERVICE_GET_ENDPOINT="http://${host_ip}:6018/v1/prompt/get"
+export PROMPT_SERVICE_CREATE_ENDPOINT="http://${host_ip}:6018/v1/prompt/create"
+export KEYCLOAK_SERVICE_ENDPOINT="http://${host_ip}:8080"
+export LLM_SERVICE_HOST_PORT_FAQGEN=9002
+export LLM_SERVICE_HOST_PORT_CODEGEN=9001
+export LLM_SERVICE_HOST_PORT_DOCSUM=9003
+export PROMPT_COLLECTION_NAME="prompt"
diff --git a/ProductivitySuite/docker_compose/intel/hpu/gaudi/set_env_remote.sh b/ProductivitySuite/docker_compose/intel/hpu/gaudi/set_env_remote.sh
new file mode 100644
index 0000000000..67bad368bd
--- /dev/null
+++ b/ProductivitySuite/docker_compose/intel/hpu/gaudi/set_env_remote.sh
@@ -0,0 +1,57 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#!/bin/bash
+
+# Check if the model_configs.json file exists
+if [ -f model_configs.json ]; then
+    # If the file exists, set the MODEL_CONFIGS environment variable using the content of the file
+    export MODEL_CONFIGS=$(jq -c . model_configs.json)
+fi
+export MONGO_HOST=${host_ip}
+export MONGO_PORT=27017
+export DB_NAME="opea"
+export COLLECTION_NAME="Conversations"
+export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
+export RERANK_MODEL_ID="BAAI/bge-reranker-base"
+export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-70B-Instruct"
+export LLM_MODEL_ID_CODEGEN="meta-llama/CodeLlama-7b-hf"
+export TEI_EMBEDDING_ENDPOINT="${remote_host}/${embedding_endpoint}"
+export TEI_RERANKING_ENDPOINT="${remote_host}/${reranking_endpoin}"
+export TGI_LLM_ENDPOINT="${remote_host}/${tgi_endpoint}"
+export REDIS_URL="redis://${host_ip}:6379"
+export INDEX_NAME="rag-redis"
+export HUGGINGFACEHUB_API_TOKEN=${hf_api_token}
+export MEGA_SERVICE_HOST_IP=${host_ip}
+export EMBEDDING_SERVICE_HOST_IP=${host_ip}
+export RETRIEVER_SERVICE_HOST_IP=${host_ip}
+export RERANK_SERVICE_HOST_IP=${host_ip}
+export LLM_SERVICE_HOST_IP=${host_ip}
+export LLM_SERVICE_HOST_IP_DOCSUM=${host_ip}
+export LLM_SERVICE_HOST_IP_FAQGEN=${host_ip}
+export LLM_SERVICE_HOST_IP_CODEGEN=${host_ip}
+export LLM_SERVICE_HOST_IP_CHATQNA=${host_ip}
+export TGI_LLM_ENDPOINT_CHATQNA="${remote_host}/${tgi_endpoint}"
+export TGI_LLM_ENDPOINT_CODEGEN="${remote_host}/${tgi_endpoint}"
+export TGI_LLM_ENDPOINT_FAQGEN="${remote_host}/${tgi_endpoint}"
+export TGI_LLM_ENDPOINT_DOCSUM="${remote_host}/${tgi_endpoint}"
+export BACKEND_SERVICE_ENDPOINT_CHATQNA="http://${host_ip}:8888/v1/chatqna"
+export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_file"
+export BACKEND_SERVICE_ENDPOINT_FAQGEN="http://${host_ip}:8889/v1/faqgen"
+export BACKEND_SERVICE_ENDPOINT_CODEGEN="http://${host_ip}:7778/v1/codegen"
+export BACKEND_SERVICE_ENDPOINT_DOCSUM="http://${host_ip}:8890/v1/docsum"
+export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep"
+export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file"
+export CHAT_HISTORY_CREATE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/create"
+export CHAT_HISTORY_CREATE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/create"
+export CHAT_HISTORY_DELETE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/delete"
+export CHAT_HISTORY_GET_ENDPOINT="http://${host_ip}:6012/v1/chathistory/get"
+export PROMPT_SERVICE_GET_ENDPOINT="http://${host_ip}:6018/v1/prompt/get"
+export PROMPT_SERVICE_CREATE_ENDPOINT="http://${host_ip}:6018/v1/prompt/create"
+export KEYCLOAK_SERVICE_ENDPOINT="http://${host_ip}:8080"
+export LLM_SERVICE_HOST_PORT_FAQGEN=9002
+export LLM_SERVICE_HOST_PORT_CODEGEN=9001
+export LLM_SERVICE_HOST_PORT_DOCSUM=9003
+export PROMPT_COLLECTION_NAME="prompt"
+export CLIENTID=${clientid}
+export CLIENT_SECRET=${client_secret}
+export TOKEN_URL=${token_url}
\ No newline at end of file
diff --git a/ProductivitySuite/ui/react/src/App.tsx b/ProductivitySuite/ui/react/src/App.tsx
index c12ee1d8fa..ebaeaf9d0a 100644
--- a/ProductivitySuite/ui/react/src/App.tsx
+++ b/ProductivitySuite/ui/react/src/App.tsx
@@ -18,9 +18,9 @@ import { useAppDispatch } from "./redux/store";
 import { setUser } from "./redux/User/userSlice";
 import { useEffect } from "react";
 
-const title = "Chat QnA"
+const title = "Digital Assistant"
 const navList: SidebarNavList = [
-  { icon: IconMessages, label: "Chat Qna", path: "/", children: <Conversation title={title} /> },
+  { icon: IconMessages, label: "Digital Assistant", path: "/", children: <Conversation title={title} /> },
   { icon: IconCode, label: "CodeGen", path: "/codegen", children: <CodeGen /> },
   { icon: IconFileTextAi, label: "DocSum", path: "/docsum", children: <DocSum /> },
   { icon: IconFileInfo, label: "FaqGen", path: "/faqgen", children: <FaqGen /> },
diff --git a/ProductivitySuite/ui/react/src/components/CodeGen/CodeGen.tsx b/ProductivitySuite/ui/react/src/components/CodeGen/CodeGen.tsx
index 29c96f61cb..1811b1bf0d 100644
--- a/ProductivitySuite/ui/react/src/components/CodeGen/CodeGen.tsx
+++ b/ProductivitySuite/ui/react/src/components/CodeGen/CodeGen.tsx
@@ -97,7 +97,7 @@ const CodeGen = () => {
             <div className={styleClasses.codeGenContent}>
                 <div className={styleClasses.codeGenContentMessages}>
                     <div className={styleClasses.codeGenTitle}>
-                        <Title order={3}>CodeGen</Title>
+                        <Title order={3}>Code Generator</Title>
                     </div>
 
                     <div className={styleClasses.historyContainer} ref={scrollViewport}>
diff --git a/ProductivitySuite/ui/react/src/components/Conversation/Conversation.tsx b/ProductivitySuite/ui/react/src/components/Conversation/Conversation.tsx
index e772248f39..e6455c85eb 100644
--- a/ProductivitySuite/ui/react/src/components/Conversation/Conversation.tsx
+++ b/ProductivitySuite/ui/react/src/components/Conversation/Conversation.tsx
@@ -4,8 +4,8 @@
 import { KeyboardEventHandler, SyntheticEvent, useEffect, useRef, useState } from 'react'
 import styleClasses from "./conversation.module.scss"
 import { ActionIcon, Group, Textarea, Title, Tooltip, rem } from '@mantine/core'
-import { IconArrowDown, IconArrowRight, IconArrowUp, IconMessagePlus } from '@tabler/icons-react'
-import { conversationSelector, doConversation, getAllConversations, newConversation, setSystemPrompt } from '../../redux/Conversation/ConversationSlice'
+import { IconArrowDown, IconArrowRight, IconMessagePlus, IconPencil } from '@tabler/icons-react'
+import { conversationSelector, doConversation, getAllConversations, newConversation, setSystemPrompt} from '../../redux/Conversation/ConversationSlice'
 import { ConversationMessage } from '../Message/conversationMessage'
 import { useAppDispatch, useAppSelector } from '../../redux/store'
 import { Message, MessageRole } from '../../redux/Conversation/Conversation'
@@ -21,7 +21,7 @@ type ConversationProps = {
 
 const Conversation = ({ title }: ConversationProps) => {
   const [prompt, setPrompt] = useState<string>("")
-  const [updateSystemPrompt, setUpdateSystemPrompt] = useState(false)
+  const [updateSystemPrompt, setUpdateSystemPrompt] = useState(true)
 
   const dispatch = useAppDispatch();
   const promptInputRef = useRef<HTMLTextAreaElement>(null)
@@ -53,8 +53,11 @@ const Conversation = ({ title }: ConversationProps) => {
     //     return { role: message.role, content: message.content }
     //   })
     // }
-
-    messages = [systemPromptObject, ...(selectedConversationHistory)]
+    if(selectedConversationHistory.length==0){
+      messages = [systemPromptObject, ...(selectedConversationHistory)]
+    }else{
+      messages = [...(selectedConversationHistory)]
+    }
 
     doConversation({
       conversationId: selectedConversationId,
@@ -111,7 +114,7 @@ const Conversation = ({ title }: ConversationProps) => {
     <div className={styleClasses.conversationWrapper}>
       <ConversationSideBar title={title} />
       <div className={styleClasses.conversationContent}>
-        <div className={styleClasses.conversationContentMessages} style={updateSystemPrompt ? { gridTemplateRows: `60px 1fr 160px` } : {} }>
+        <div className={styleClasses.conversationContentMessages} style={updateSystemPrompt ? { gridTemplateRows: `60px 1fr 180px` } : {} }>
           <div className={styleClasses.conversationTitle}>
             <Title order={3} className={styleClasses.title}>{selectedConversation?.first_query || ""} </Title>
             <span className={styleClasses.spacer}></span>
@@ -151,6 +154,7 @@ const Conversation = ({ title }: ConversationProps) => {
 
           <div className={styleClasses.conversationActions}>
             <Textarea
+              label="System Prompt"
               style={{
                 display: updateSystemPrompt ? 'block' : 'none',
                 marginBottom: '10px',
@@ -166,7 +170,7 @@ const Conversation = ({ title }: ConversationProps) => {
               <Tooltip label="update system prompt">
                 <ActionIcon onClick={() => setUpdateSystemPrompt((prev) => !prev)} size={32} radius="xl" variant="filled">
                   {updateSystemPrompt ? (<IconArrowDown style={{ width: rem(18), height: rem(18) }} stroke={1.5} />) :
-                    (<IconArrowUp style={{ width: rem(18), height: rem(18) }} stroke={1.5} />)}
+                    (<IconPencil style={{ width: rem(18), height: rem(18) }} stroke={1.5} />)}
                 </ActionIcon>
               </Tooltip>
               
diff --git a/ProductivitySuite/ui/react/src/components/Conversation/settings.tsx b/ProductivitySuite/ui/react/src/components/Conversation/settings.tsx
index 61fd87e9b8..b9b4b57d84 100644
--- a/ProductivitySuite/ui/react/src/components/Conversation/settings.tsx
+++ b/ProductivitySuite/ui/react/src/components/Conversation/settings.tsx
@@ -1,13 +1,33 @@
-import { NumberInput, Slider, Text, Title } from "@mantine/core"
+import { NumberInput, Select, Slider, Text, Title } from "@mantine/core"
 import { useAppDispatch, useAppSelector } from "../../redux/store"
-import { conversationSelector, setTemperature, setToken } from "../../redux/Conversation/ConversationSlice"
-
+import { conversationSelector, setTemperature, setToken, setModel, setMinToken, setMaxToken, setModels} from "../../redux/Conversation/ConversationSlice"
+import { useEffect } from "react";
 
 
 function Settings() {
-    const { token,maxTemperature, minTemperature, maxToken, minToken, temperature} = useAppSelector(conversationSelector)
+    const { token, maxTemperature, minTemperature, maxToken, minToken, temperature, models, model } = useAppSelector(conversationSelector)
     const dispatch = useAppDispatch();
-    
+
+    const modelOptions = models.map(model => ({
+        value: model.model_name,
+        label: model.displayName,
+        minToken: model.minToken,
+        maxToken: model.maxToken,
+    }));
+
+    const onModelChange = (value: string | null) => {
+        if (value) {
+            const selectedModel = models.find(m => m.model_name === value);
+            if (selectedModel) {
+                dispatch(setModel(value));
+                dispatch(setTemperature(0.4)); // Assuming you want to reset to a default value
+                dispatch(setToken(selectedModel.minToken));
+                dispatch(setMinToken(selectedModel.minToken));
+                dispatch(setMaxToken(selectedModel.maxToken));
+                // You might also want to update the min and max token values in the redux state here
+            }
+        }
+    };
     const onTemperatureChange = (value: number) => {
         dispatch(setTemperature(value))
     }
@@ -15,11 +35,45 @@ function Settings() {
         dispatch(setToken(Number(value)))
     }
 
+    const callFunctions = async() => {
+        try {
+            const response = await fetch('/model_configs.json');
+            if (!response.ok) {
+              throw new Error(`HTTP error! status: ${response.status}`);
+            }
+            const model_configs = await response.json();
+            // After fetching, update the state with the new configs
+            dispatch(setModels(model_configs));
+            dispatch(setMinToken(model_configs[0].minToken));
+            dispatch(setMaxToken(model_configs[0].maxToken));
+            dispatch(setModel(model_configs[0].model_name));
+          } catch (error) {
+            console.warn('model_configs.json not found, using default configuration.', error);
+            // If the fetch fails, the state will remain with the default values
+          }
+    }
+    
+    useEffect(() => {
+        callFunctions()
+    }, [])
+
     return (
         <>
+        
             <div>
                 <Title order={4}>Settings</Title>
             </div>
+            {models.length > 0 && (
+                <div>
+                    <Select
+                        label="Model"
+                        placeholder="Pick a model"
+                        value={model}
+                        onChange={onModelChange}
+                        data={modelOptions}
+                    />
+                </div>
+            )}
             <div>
                 <Text>Temperature</Text>
                 <Slider
@@ -41,7 +95,7 @@ function Settings() {
                 />
             </div>
         </>
-
+        
     )
 }
 
diff --git a/ProductivitySuite/ui/react/src/components/DocSum/DocSum.tsx b/ProductivitySuite/ui/react/src/components/DocSum/DocSum.tsx
index 9e7472c658..550ec0f274 100644
--- a/ProductivitySuite/ui/react/src/components/DocSum/DocSum.tsx
+++ b/ProductivitySuite/ui/react/src/components/DocSum/DocSum.tsx
@@ -17,6 +17,17 @@ const DocSum = () => {
     const [fileContent, setFileContent] = useState<string>('');
     const [response, setResponse] = useState<string>('');
     
+    let messagesEnd:HTMLDivElement;
+
+    const scrollToView = () => {
+        if (messagesEnd) {
+            messagesEnd.scrollTop = messagesEnd.scrollHeight;
+        }
+    };
+    useEffect(()=>{
+        scrollToView()
+    },[response])
+
     useEffect(() => {
         if(isFile){
             setValue('')
@@ -72,17 +83,11 @@ const DocSum = () => {
         onmessage(msg) {
             if (msg?.data != "[DONE]") {
                 try {
-                    const res = JSON.parse(msg.data)
-                    const logs = res.ops;
-                    logs.forEach((log: { op: string; path: string; value: string }) => {
-                        if (log.op === "add") {
-                            if (
-                                log.value !== "</s>" && log.path.endsWith("/streamed_output/-") && log.path.length > "/streamed_output/-".length
-                            ) {
-                               setResponse(prev=>prev+log.value);
-                            }
-                        }
-                    });
+                    const match = msg.data.match(/b'([^']*)'/);
+                    if (match && match[1] != "</s>") {
+                        const extractedText = match[1];
+                        setResponse(prev => (prev + extractedText.replace("<|eot_id|>", "").replace(/\\n/g, "\n")));
+                    }
                 } catch (e) {
                     console.log("something wrong in msg", e);
                     throw e;
@@ -106,7 +111,7 @@ const DocSum = () => {
             <div className={styleClasses.docSumContent}>
                 <div className={styleClasses.docSumContentMessages}>
                     <div className={styleClasses.docSumTitle}>
-                        <Title order={3}>Doc Summary</Title>
+                        <Title order={3}>Content Summarizer</Title>
                     </div>
                     <div>
                         <Text size="lg" >Please upload file or paste content for summarization.</Text>
@@ -139,7 +144,10 @@ const DocSum = () => {
                         <Button loading={isGenerating} loaderProps={{ type: 'dots' }} onClick={handleSubmit}>Generate Summary</Button>
                     </div>
                     {response && (
-                        <div className={styleClasses.docSumResult}>
+                        <div className={styleClasses.docSumResult} ref={(el) => {
+                            if(el)
+                                messagesEnd = el;
+                        }}>
                             <Markdown content={response} />
                         </div>
                     )}
diff --git a/ProductivitySuite/ui/react/src/components/DocSum/docSum.module.scss b/ProductivitySuite/ui/react/src/components/DocSum/docSum.module.scss
index 399e979391..a5061d8151 100644
--- a/ProductivitySuite/ui/react/src/components/DocSum/docSum.module.scss
+++ b/ProductivitySuite/ui/react/src/components/DocSum/docSum.module.scss
@@ -38,6 +38,7 @@
         }
       }
       .docSumResult {
+        overflow-y: auto;
       }
     }
   }
diff --git a/ProductivitySuite/ui/react/src/components/FaqGen/FaqGen.tsx b/ProductivitySuite/ui/react/src/components/FaqGen/FaqGen.tsx
index ca731cbf8b..cbb74d4ad2 100644
--- a/ProductivitySuite/ui/react/src/components/FaqGen/FaqGen.tsx
+++ b/ProductivitySuite/ui/react/src/components/FaqGen/FaqGen.tsx
@@ -90,7 +90,7 @@ const FaqGen = () => {
                             if (
                                 log.value !== "</s>" && log.path.endsWith("/streamed_output/-") && log.path.length > "/streamed_output/-".length
                             ) {
-                               setResponse(prev=>prev+log.value);
+                                setResponse(prev => prev + log.value.replace("<|eot_id|>", "").replace(/\\n/g, "\n"));
                             }
                         }
                     });
diff --git a/ProductivitySuite/ui/react/src/components/Shared/CodeRender/CodeRender.tsx b/ProductivitySuite/ui/react/src/components/Shared/CodeRender/CodeRender.tsx
index 479034cece..a5ee1a16c2 100644
--- a/ProductivitySuite/ui/react/src/components/Shared/CodeRender/CodeRender.tsx
+++ b/ProductivitySuite/ui/react/src/components/Shared/CodeRender/CodeRender.tsx
@@ -11,7 +11,6 @@ type CodeRenderProps = {
 }
 const CodeRender = ({ cleanCode, language, inline }:CodeRenderProps) => {
     cleanCode = String(cleanCode).replace(/\n$/, '').replace(/^\s*[\r\n]/gm, '') //right trim and remove empty lines from the input
-    console.log(styles)
     try {
         return inline ? (<code className='inline-code'><i>{cleanCode}</i></code>) : (
             <div className={styles.code}>
diff --git a/ProductivitySuite/ui/react/src/redux/Conversation/Conversation.ts b/ProductivitySuite/ui/react/src/redux/Conversation/Conversation.ts
index 57ebb5ece2..72d8d12ca3 100644
--- a/ProductivitySuite/ui/react/src/redux/Conversation/Conversation.ts
+++ b/ProductivitySuite/ui/react/src/redux/Conversation/Conversation.ts
@@ -30,6 +30,13 @@ type file = {
   name: string;
 };
 
+export type Model = {
+  model_name: string;
+  displayName: string;
+  minToken: number,
+  maxToken:number
+}
+
 export interface ConversationReducer {
   selectedConversationId: string;
   conversations: Conversation[];
@@ -37,6 +44,7 @@ export interface ConversationReducer {
   onGoingResult: string;
   filesInDataSource: file[];
   systemPrompt: string;
+  models: Model[];
   model: string;
   minToken: number;
   maxToken: number;
diff --git a/ProductivitySuite/ui/react/src/redux/Conversation/ConversationSlice.ts b/ProductivitySuite/ui/react/src/redux/Conversation/ConversationSlice.ts
index ea7617d82c..40475234ca 100644
--- a/ProductivitySuite/ui/react/src/redux/Conversation/ConversationSlice.ts
+++ b/ProductivitySuite/ui/react/src/redux/Conversation/ConversationSlice.ts
@@ -18,6 +18,7 @@ import {
   CHAT_HISTORY_GET,
   CHAT_HISTORY_DELETE,
 } from "../../config";
+import { Model } from './Conversation';
 
 const initialState: ConversationReducer = {
   conversations: [],
@@ -25,6 +26,7 @@ const initialState: ConversationReducer = {
   selectedConversationHistory: [],
   onGoingResult: "",
   filesInDataSource: [],
+  models: [],
   model: "Intel/neural-chat-7b-v3-3",
   systemPrompt: "You are helpful assistant",
   minToken: 100,
@@ -68,6 +70,18 @@ export const ConversationSlice = createSlice({
     setSystemPrompt: (state, action: PayloadAction<string>) => {
       state.systemPrompt = action.payload;
     },
+    setModel: (state, action: PayloadAction<string>) => {
+      state.model = action.payload;
+    },
+    setMinToken: (state, action: PayloadAction<number>) => {
+      state.minToken = action.payload;
+    },
+    setMaxToken: (state, action: PayloadAction<number>) => {
+      state.maxToken = action.payload;
+    },
+    setModels: (state, action: PayloadAction<Model []>) => {
+      state.models = action.payload;
+    }
   },
   extraReducers(builder) {
     builder.addCase(uploadFile.fulfilled, () => {
@@ -231,7 +245,9 @@ export const deleteConversation = createAsyncThunkWrapper(
 
 export const doConversation = (conversationRequest: ConversationRequest) => {
   const { conversationId, userPrompt, messages, model, token, temperature } = conversationRequest;
-  store.dispatch(addMessageToMessages(messages[0]));
+  if(messages.length==1){
+    store.dispatch(addMessageToMessages(messages[0]));
+  }
   store.dispatch(addMessageToMessages(userPrompt));
   const userPromptWithoutTime = {
     role: userPrompt.role,
@@ -240,7 +256,7 @@ export const doConversation = (conversationRequest: ConversationRequest) => {
   const body = {
     messages: [...messages, userPromptWithoutTime],
     model,
-    max_new_tokens: token,
+    max_tokens: token,
     temperature: temperature,
   };
 
@@ -271,7 +287,7 @@ export const doConversation = (conversationRequest: ConversationRequest) => {
             const match = msg.data.match(/b'([^']*)'/);
             if (match && match[1] != "</s>") {
               const extractedText = match[1];
-              result += extractedText;
+              result += extractedText.replace("<|eot_id|>","").replace(/\\n/g, "\n");
               store.dispatch(setOnGoingResult(result));
             }
           } catch (e) {
@@ -321,6 +337,10 @@ export const {
   setTemperature,
   setToken,
   setSystemPrompt,
+  setModel,
+  setMinToken,
+  setMaxToken,
+  setModels
 } = ConversationSlice.actions;
 export const conversationSelector = (state: RootState) => state.conversationReducer;
 export default ConversationSlice.reducer;
diff --git a/ProductivitySuite/ui/react/src/styles/components/context.module.scss b/ProductivitySuite/ui/react/src/styles/components/context.module.scss
index e2d3caafaa..cac58cdfdf 100644
--- a/ProductivitySuite/ui/react/src/styles/components/context.module.scss
+++ b/ProductivitySuite/ui/react/src/styles/components/context.module.scss
@@ -15,7 +15,7 @@
     "settings";
 
   grid-template-columns: auto;
-  grid-template-rows: 70px 1fr 175px;
+  grid-template-rows: 70px 1fr 240px;
 
   .contextTitle {
     grid-area: title;
@@ -30,6 +30,7 @@
     width: 100%;
     height: 60px;
     border-bottom: 1px solid light-dark(var(--mantine-color-gray-3), var(--mantine-color-dark-7));
+    font-size: 1.3vw !important;
   }
 
   .contextList {