From 3fa8f5ea33dae7dde4e9045b69c68c67acbdea47 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 28 Oct 2024 07:53:59 +0000 Subject: [PATCH] remote service with multi model support --- ChatQnA/Dockerfile.wrapper | 36 ++ ChatQnA/chatqna_wrapper.py | 68 +++ .../docker_compose/intel/hpu/gaudi/README.md | 554 ++++++++++++++++++ .../intel/hpu/gaudi/README_remote.md | 276 +++++++++ .../intel/hpu/gaudi/compose.yaml | 398 +++++++++++++ .../intel/hpu/gaudi/compose_remote.yaml | 306 ++++++++++ .../intel/hpu/gaudi/compose_tgi_remote.yaml | 354 +++++++++++ .../docker_compose/intel/hpu/gaudi/set_env.sh | 48 ++ .../intel/hpu/gaudi/set_env_remote.sh | 57 ++ ProductivitySuite/ui/react/src/App.tsx | 4 +- .../react/src/components/CodeGen/CodeGen.tsx | 2 +- .../components/Conversation/Conversation.tsx | 18 +- .../src/components/Conversation/settings.tsx | 66 ++- .../ui/react/src/components/DocSum/DocSum.tsx | 34 +- .../src/components/DocSum/docSum.module.scss | 1 + .../ui/react/src/components/FaqGen/FaqGen.tsx | 2 +- .../Shared/CodeRender/CodeRender.tsx | 1 - .../src/redux/Conversation/Conversation.ts | 8 + .../redux/Conversation/ConversationSlice.ts | 26 +- .../src/styles/components/context.module.scss | 3 +- 20 files changed, 2227 insertions(+), 35 deletions(-) create mode 100644 ChatQnA/Dockerfile.wrapper create mode 100644 ChatQnA/chatqna_wrapper.py create mode 100644 ProductivitySuite/docker_compose/intel/hpu/gaudi/README.md create mode 100644 ProductivitySuite/docker_compose/intel/hpu/gaudi/README_remote.md create mode 100644 ProductivitySuite/docker_compose/intel/hpu/gaudi/compose.yaml create mode 100644 ProductivitySuite/docker_compose/intel/hpu/gaudi/compose_remote.yaml create mode 100644 ProductivitySuite/docker_compose/intel/hpu/gaudi/compose_tgi_remote.yaml create mode 100644 ProductivitySuite/docker_compose/intel/hpu/gaudi/set_env.sh create mode 100644 ProductivitySuite/docker_compose/intel/hpu/gaudi/set_env_remote.sh diff --git a/ChatQnA/Dockerfile.wrapper b/ChatQnA/Dockerfile.wrapper new file mode 100644 index 0000000000..1baf63e460 --- /dev/null +++ b/ChatQnA/Dockerfile.wrapper @@ -0,0 +1,36 @@ + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM python:3.11-slim + +RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ + libgl1-mesa-glx \ + libjemalloc-dev \ + git + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + +COPY /GenAIComps /home/user/GenAIComps +WORKDIR /home/user/ +#RUN git clone --branch v1.0 https://github.com/opea-project/GenAIComps.git + + +WORKDIR /home/user/GenAIComps +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt + +COPY /GenAIExamples/ChatQnA/chatqna_wrapper.py /home/user/chatqna.py + +ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps + +USER user + +WORKDIR /home/user + +RUN echo 'ulimit -S -n 999999' >> ~/.bashrc + +ENTRYPOINT ["python", "chatqna.py"] diff --git a/ChatQnA/chatqna_wrapper.py b/ChatQnA/chatqna_wrapper.py new file mode 100644 index 0000000000..09062b5d27 --- /dev/null +++ b/ChatQnA/chatqna_wrapper.py @@ -0,0 +1,68 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os + +from comps import ChatQnAGateway, MicroService, ServiceOrchestrator, ServiceType + +MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0") +MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888)) +EMBEDDING_SERVICE_HOST_IP = os.getenv("EMBEDDING_SERVICE_HOST_IP", "0.0.0.0") +EMBEDDING_SERVICE_PORT = int(os.getenv("EMBEDDING_SERVICE_PORT", 6000)) +RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0") +RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_SERVICE_PORT", 7000)) +RERANK_SERVICE_HOST_IP = os.getenv("RERANK_SERVICE_HOST_IP", "0.0.0.0") +RERANK_SERVICE_PORT = int(os.getenv("RERANK_SERVICE_PORT", 8000)) +LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0") +LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000)) + + +class ChatQnAService: + def __init__(self, host="0.0.0.0", port=8000): + self.host = host + self.port = port + self.megaservice = ServiceOrchestrator() + + def add_remote_service(self): + embedding = MicroService( + name="embedding", + host=EMBEDDING_SERVICE_HOST_IP, + port=EMBEDDING_SERVICE_PORT, + endpoint="/v1/embeddings", + use_remote_service=True, + service_type=ServiceType.EMBEDDING, + ) + retriever = MicroService( + name="retriever", + host=RETRIEVER_SERVICE_HOST_IP, + port=RETRIEVER_SERVICE_PORT, + endpoint="/v1/retrieval", + use_remote_service=True, + service_type=ServiceType.RETRIEVER, + ) + rerank = MicroService( + name="rerank", + host=RERANK_SERVICE_HOST_IP, + port=RERANK_SERVICE_PORT, + endpoint="/v1/reranking", + use_remote_service=True, + service_type=ServiceType.RERANK, + ) + llm = MicroService( + name="llm", + host=LLM_SERVICE_HOST_IP, + port=LLM_SERVICE_PORT, + endpoint="/v1/chat/completions", + use_remote_service=True, + service_type=ServiceType.LLM, + ) + self.megaservice.add(embedding).add(retriever).add(rerank).add(llm) + self.megaservice.flow_to(embedding, retriever) + self.megaservice.flow_to(retriever, rerank) + self.megaservice.flow_to(rerank, llm) + self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port) + + +if __name__ == "__main__": + chatqna = ChatQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT) + chatqna.add_remote_service() diff --git a/ProductivitySuite/docker_compose/intel/hpu/gaudi/README.md b/ProductivitySuite/docker_compose/intel/hpu/gaudi/README.md new file mode 100644 index 0000000000..c5463ad103 --- /dev/null +++ b/ProductivitySuite/docker_compose/intel/hpu/gaudi/README.md @@ -0,0 +1,554 @@ +# Build Mega Service of Productivity Suite on Xeon + +This document outlines the deployment process for OPEA Productivity Suite utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Xeon server and [GenAIExamples](https://github.com/opea-project/GenAIExamples.git) solutions. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices such as `embedding`, `retriever`, `rerank`, and `llm`. We will publish the Docker images to Docker Hub soon, it will simplify the deployment process for this service. + +--- + +## 🐳 Build Docker Images + +First of all, you need to build Docker Images locally and install the python package of it. + +### 1. Build Embedding Image + +```bash +git clone https://github.com/opea-project/GenAIComps.git +cd GenAIComps +docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/tei/langchain/Dockerfile . +``` + +### 2. Build Retriever Image + +```bash +docker build --no-cache -t opea/retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/redis/langchain/Dockerfile . +``` + +### 3. Build Rerank Image + +```bash +docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/Dockerfile . +``` + +### 4. Build LLM Image + +#### Use TGI as backend + +```bash +docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile . +``` + +### 5. Build Dataprep Image + +```bash +docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile . +``` + +### 6. Build Prompt Registry Image + +```bash +docker build -t opea/promptregistry-mongo-server:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/prompt_registry/mongo/Dockerfile . +``` + +### 7. Build Chat History Image + +```bash +docker build -t opea/chathistory-mongo-server:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/chathistory/mongo/Dockerfile . +cd .. +``` + +### 8. Build MegaService Docker Images + +The Productivity Suite is composed of multiple GenAIExample reference solutions composed together. + +#### 8.1 Build ChatQnA MegaService Docker Images + +```bash +git clone https://github.com/opea-project/GenAIExamples.git +cd GenAIExamples/ChatQnA/ +docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . +``` + +#### 8.2 Build DocSum Megaservice Docker Images + +```bash +cd GenAIExamples/DocSum +docker build --no-cache -t opea/docsum:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . +``` + +#### 8.3 Build CodeGen Megaservice Docker Images + +```bash +cd GenAIExamples/CodeGen +docker build --no-cache -t opea/codegen:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . +``` + +#### 8.4 Build FAQGen Megaservice Docker Images + +```bash +cd GenAIExamples/FaqGen +docker build --no-cache -t opea/faqgen:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . +``` + +### 9. Build UI Docker Image + +Build frontend Docker image that enables via below command: + +**Export the value of the public IP address of your Xeon server to the `host_ip` environment variable** + +```bash +cd GenAIExamples/ProductivitySuite/ui +docker build --no-cache -t ProductivitySuite/docker_compose/intel/cpu/xeon/compose.yaml docker/Dockerfile.react . +``` + +--- + +## πŸš€ Start Microservices + +### Setup Environment Variables + +Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. + +**Export the value of the public IP address of your Xeon server to the `host_ip` environment variable** + +> Change the External_Public_IP below with the actual IPV4 value + +``` +export host_ip="External_Public_IP" +``` + +**Export the value of your Huggingface API token to the `your_hf_api_token` environment variable** + +> Change the Your_Huggingface_API_Token below with tyour actual Huggingface API Token value + +``` +export your_hf_api_token="Your_Huggingface_API_Token" +``` + +**Append the value of the public IP address to the no_proxy list** + +``` +export your_no_proxy=${your_no_proxy},"External_Public_IP" +``` + +```bash +export MONGO_HOST=${host_ip} +export MONGO_PORT=27017 +export DB_NAME="test" +export COLLECTION_NAME="Conversations" +export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" +export RERANK_MODEL_ID="BAAI/bge-reranker-base" +export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export LLM_MODEL_ID_CODEGEN="meta-llama/CodeLlama-7b-hf" +export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006" +export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808" +export TGI_LLM_ENDPOINT="http://${host_ip}:9009" +export REDIS_URL="redis://${host_ip}:6379" +export INDEX_NAME="rag-redis" +export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export MEGA_SERVICE_HOST_IP=${host_ip} +export EMBEDDING_SERVICE_HOST_IP=${host_ip} +export RETRIEVER_SERVICE_HOST_IP=${host_ip} +export RERANK_SERVICE_HOST_IP=${host_ip} +export LLM_SERVICE_HOST_IP=${host_ip} +export LLM_SERVICE_HOST_IP_DOCSUM=${host_ip} +export LLM_SERVICE_HOST_IP_FAQGEN=${host_ip} +export LLM_SERVICE_HOST_IP_CODEGEN=${host_ip} +export LLM_SERVICE_HOST_IP_CHATQNA=${host_ip} +export TGI_LLM_ENDPOINT_CHATQNA="http://${host_ip}:9009" +export TGI_LLM_ENDPOINT_CODEGEN="http://${host_ip}:8028" +export TGI_LLM_ENDPOINT_FAQGEN="http://${host_ip}:9009" +export TGI_LLM_ENDPOINT_DOCSUM="http://${host_ip}:9009" +export BACKEND_SERVICE_ENDPOINT_CHATQNA="http://${host_ip}:8888/v1/chatqna" +export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete_file" +export BACKEND_SERVICE_ENDPOINT_FAQGEN="http://${host_ip}:8889/v1/faqgen" +export BACKEND_SERVICE_ENDPOINT_CODEGEN="http://${host_ip}:7778/v1/codegen" +export BACKEND_SERVICE_ENDPOINT_DOCSUM="http://${host_ip}:8890/v1/docsum" +export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep" +export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file" +export CHAT_HISTORY_CREATE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/create" +export CHAT_HISTORY_CREATE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/create" +export CHAT_HISTORY_DELETE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/delete" +export CHAT_HISTORY_GET_ENDPOINT="http://${host_ip}:6012/v1/chathistory/get" +export PROMPT_SERVICE_GET_ENDPOINT="http://${host_ip}:6018/v1/prompt/get" +export PROMPT_SERVICE_CREATE_ENDPOINT="http://${host_ip}:6018/v1/prompt/create" +export KEYCLOAK_SERVICE_ENDPOINT="http://${host_ip}:8080" +export LLM_SERVICE_HOST_PORT_FAQGEN=9002 +export LLM_SERVICE_HOST_PORT_CODEGEN=9001 +export LLM_SERVICE_HOST_PORT_DOCSUM=9003 +export PROMPT_COLLECTION_NAME="prompt" +``` + +Note: Please replace with `host_ip` with you external IP address, do not use localhost. + +### Start all the services Docker Containers + +> Before running the docker compose command, you need to be in the folder that has the docker compose yaml file + +```bash +cd GenAIExamples/ProductivitySuite/docker_compose/intel/cpu/xeon + +docker compose -f compose.yaml up -d +``` + +--- + +### πŸ” Setup Keycloak + +Please refer to **[keycloak_setup_guide](keycloak_setup_guide.md)** for more detail related to Keycloak configuration setup. + +--- + +### βœ… Validate Microservices + +1. TEI Embedding Service + + ```bash + curl ${host_ip}:6006/embed \ + -X POST \ + -d '{"inputs":"What is Deep Learning?"}' \ + -H 'Content-Type: application/json' + ``` + +2. Embedding Microservice + + ```bash + curl http://${host_ip}:6000/v1/embeddings\ + -X POST \ + -d '{"text":"hello"}' \ + -H 'Content-Type: application/json' + ``` + +3. Retriever Microservice + + To consume the retriever microservice, you need to generate a mock embedding vector by Python script. The length of embedding vector + is determined by the embedding model. + Here we use the model `EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"`, which vector size is 768. + + Check the vector dimension of your embedding model, set `your_embedding` dimension equals to it. + + ```bash + export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") + curl http://${host_ip}:7000/v1/retrieval \ + -X POST \ + -d "{\"text\":\"test\",\"embedding\":${your_embedding}}" \ + -H 'Content-Type: application/json' + ``` + +4. TEI Reranking Service + + ```bash + curl http://${host_ip}:8808/rerank \ + -X POST \ + -d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \ + -H 'Content-Type: application/json' + ``` + +5. Reranking Microservice + + ```bash + curl http://${host_ip}:8000/v1/reranking\ + -X POST \ + -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \ + -H 'Content-Type: application/json' + ``` + +6. LLM backend Service (ChatQnA, DocSum, FAQGen) + + ```bash + curl http://${host_ip}:9009/generate \ + -X POST \ + -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \ + -H 'Content-Type: application/json' + ``` + +7. LLM backend Service (CodeGen) + + ```bash + curl http://${host_ip}:8028/generate \ + -X POST \ + -d '{"inputs":"def print_hello_world():","parameters":{"max_new_tokens":256, "do_sample": true}}' \ + -H 'Content-Type: application/json' + ``` + +8. ChatQnA LLM Microservice + + ```bash + curl http://${host_ip}:9000/v1/chat/completions\ + -X POST \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ + -H 'Content-Type: application/json' + ``` + +9. CodeGen LLM Microservice + + ```bash + curl http://${host_ip}:9001/v1/chat/completions\ + -X POST \ + -d '{"query":"def print_hello_world():"}' \ + -H 'Content-Type: application/json' + ``` + +10. DocSum LLM Microservice + + ```bash + curl http://${host_ip}:9002/v1/chat/docsum\ + -X POST \ + -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5"}' \ + -H 'Content-Type: application/json' + ``` + +11. FAQGen LLM Microservice + + ```bash + curl http://${host_ip}:9003/v1/faqgen\ + -X POST \ + -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5"}' \ + -H 'Content-Type: application/json' + ``` + +12. ChatQnA MegaService + + ```bash + curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{ + "messages": "What is the revenue of Nike in 2023?" + }' + ``` + +13. FAQGen MegaService + + ```bash + curl http://${host_ip}:8889/v1/faqgen -H "Content-Type: application/json" -d '{ + "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." + }' + ``` + +14. DocSum MegaService + + ```bash + curl http://${host_ip}:8890/v1/docsum -H "Content-Type: application/json" -d '{ + "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." + }' + ``` + +15. CodeGen MegaService + + ```bash + curl http://${host_ip}:7778/v1/codegen -H "Content-Type: application/json" -d '{ + "messages": "def print_hello_world():" + }' + ``` + +16. Dataprep Microservice + + If you want to update the default knowledge base, you can use the following commands: + + Update Knowledge Base via Local File Upload: + + ```bash + curl -X POST "http://${host_ip}:6007/v1/dataprep" \ + -H "Content-Type: multipart/form-data" \ + -F "files=@./nke-10k-2023.pdf" + ``` + + This command updates a knowledge base by uploading a local file for processing. Update the file path according to your environment. + + Add Knowledge Base via HTTP Links: + + ```bash + curl -X POST "http://${host_ip}:6007/v1/dataprep" \ + -H "Content-Type: multipart/form-data" \ + -F 'link_list=["https://opea.dev"]' + ``` + + This command updates a knowledge base by submitting a list of HTTP links for processing. + + Also, you are able to get the file list that you uploaded: + + ```bash + curl -X POST "http://${host_ip}:6007/v1/dataprep/get_file" \ + -H "Content-Type: application/json" + ``` + + To delete the file/link you uploaded: + + ```bash + # delete link + curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \ + -d '{"file_path": "https://opea.dev.txt"}' \ + -H "Content-Type: application/json" + + # delete file + curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \ + -d '{"file_path": "nke-10k-2023.pdf"}' \ + -H "Content-Type: application/json" + + # delete all uploaded files and links + curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \ + -d '{"file_path": "all"}' \ + -H "Content-Type: application/json" + ``` + +17. Prompt Registry Microservice + + If you want to update the default Prompts in the application for your user, you can use the following commands: + + ```bash + curl -X 'POST' \ + http://{host_ip}:6018/v1/prompt/create \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "prompt_text": "test prompt", "user": "test" + }' + ``` + + Retrieve prompt from database based on user or prompt_id + + ```bash + curl -X 'POST' \ + http://{host_ip}:6018/v1/prompt/get \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "user": "test"}' + + curl -X 'POST' \ + http://{host_ip}:6018/v1/prompt/get \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "user": "test", "prompt_id":"{prompt_id returned from save prompt route above}"}' + ``` + + Delete prompt from database based on prompt_id provided + + ```bash + curl -X 'POST' \ + http://{host_ip}:6018/v1/prompt/delete \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "user": "test", "prompt_id":"{prompt_id to be deleted}"}' + ``` + +18. Chat History Microservice + + To validate the chatHistory Microservice, you can use the following commands. + + Create a sample conversation and get the message ID. + + ```bash + curl -X 'POST' \ + http://${host_ip}:6012/v1/chathistory/create \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "data": { + "messages": "test Messages", "user": "test" + } + }' + ``` + + Retrieve the conversation based on user or conversation id + + ```bash + curl -X 'POST' \ + http://${host_ip}:6012/v1/chathistory/get \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "user": "test"}' + + curl -X 'POST' \ + http://${host_ip}:6012/v1/chathistory/get \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "user": "test", "id":"{Conversation id to retrieve }"}' + ``` + + Delete Conversation from database based on conversation id provided. + + ```bash + curl -X 'POST' \ + http://${host_ip}:6012/v1/chathistory/delete \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "user": "test", "id":"{Conversation id to Delete}"}' + ``` + +--- + +## πŸš€ Launch the UI + +To access the frontend, open the following URL in your browser: http://{host_ip}:5174. By default, the UI runs on port 80 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below: + +```yaml + productivity-suite-xeon-react-ui-server: + image: opea/productivity-suite-react-ui-server:latest + ... + ports: + - "5715:80" # Map port 5715 on the host to port 80 in the container. +``` + +Here is an example of running Productivity Suite +![project-screenshot](../../../../assets/img/chat_qna_init.png) +![project-screenshot](../../../../assets/img/Login_page.png) + +--- + +## πŸ› οΈ Key Features + +Here're some of the project's features: + +### πŸ’¬ChatQnA + +- **Start a Text Chat**:Initiate a text chat with the ability to input written conversations, where the dialogue content can also be customized based on uploaded files. +- **Context Awareness**: The AI assistant maintains the context of the conversation, understanding references to previous statements or questions. This allows for more natural and coherent exchanges. + +### πŸŽ›οΈ Data Source + +- **File Upload or Remote Link**: The choice between uploading locally or copying a remote link. Chat according to uploaded knowledge base. +- **File Management**:Uploaded File would get listed and user would be able add or remove file/links + +#### Screenshots + +![project-screenshot](../../../../assets/img/data_source.png) + +- **Clear Chat**: Clear the record of the current dialog box without retaining the contents of the dialog box. +- **Chat history**: Historical chat records can still be retained after refreshing, making it easier for users to view the context. +- **Conversational Chat**: The application maintains a history of the conversation, allowing users to review previous messages and the AI to refer back to earlier points in the dialogue when necessary. + +#### Screenshots + +![project-screenshot](../../../../assets/img/chat_qna_init.png) +![project-screenshot](../../../../assets/img/chatqna_with_conversation.png) + +### πŸ’» Codegen + +- **Generate code**: generate the corresponding code based on the current user's input. + +#### Screenshots + +![project-screenshot](../../../../assets/img/codegen.png) + +### πŸ“š Document Summarization + +- **Summarizing Uploaded Files**: Upload files from their local device, then click 'Generate Summary' to summarize the content of the uploaded file. The summary will be displayed on the 'Summary' box. +- **Summarizing Text via Pasting**: Paste the text to be summarized into the text box, then click 'Generate Summary' to produce a condensed summary of the content, which will be displayed in the 'Summary' box on the right. +- **Scroll to Bottom**: The summarized content will automatically scroll to the bottom. + +#### Screenshots + +![project-screenshot](../../../../assets/img/doc_summary_paste.png) +![project-screenshot](../../../../assets/img/doc_summary_file.png) + +### ❓ FAQ Generator + +- **Generate FAQs from Text via Pasting**: Paste the text to into the text box, then click 'Generate FAQ' to produce a condensed FAQ of the content, which will be displayed in the 'FAQ' box below. + +- **Generate FAQs from Text via txt file Upload**: Upload the file in the Upload bar, then click 'Generate FAQ' to produce a condensed FAQ of the content, which will be displayed in the 'FAQ' box below. + +#### Screenshots + +![project-screenshot](../../../../assets/img/faq_generator.png) diff --git a/ProductivitySuite/docker_compose/intel/hpu/gaudi/README_remote.md b/ProductivitySuite/docker_compose/intel/hpu/gaudi/README_remote.md new file mode 100644 index 0000000000..991dd2a9a4 --- /dev/null +++ b/ProductivitySuite/docker_compose/intel/hpu/gaudi/README_remote.md @@ -0,0 +1,276 @@ +# Build Mega Service of Productivity Suite + +This document outlines the deployment process for OPEA Productivity Suite utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Xeon server and [GenAIExamples](https://github.com/opea-project/GenAIExamples.git) solutions. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices such as `embedding`, `retriever`, `rerank`, and `llm`. + +## πŸš€ Build Docker Images + +Create a directory and clone the GenAIComps repository + +```bash +mkdir genai +git clone --branch v1.0 https://github.com/opea-project/GenAIComps.git +``` +Copy patch files related to GenAIComps inside GenAIComps folder and apply the patch + +```bash +cd GenAIComps +git am *.patch +``` + +### 1. Build Embedding Image + +```bash +docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/tei/langchain/Dockerfile . +``` + +### 2. Build Rerank Image + +```bash +docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/Dockerfile . +``` + +### 3. Build LLM Images + +#### Use TGI as backend to build FAQ Generation + +```bash +docker build -t opea/llm-faqgen-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/faq-generation/tgi/langchain/Dockerfile . +``` + +#### Use TGI as backend to build Doc Summarization + +```bash +docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/summarization/tgi/langchain/Dockerfile . +``` + +#### Use TGI as backend to build Text Generation + +```bash +docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile . +``` + +### 4. Build Prompt Registry Image + +```bash +docker build -t opea/promptregistry-mongo-server:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/prompt_registry/mongo/Dockerfile . +``` + +### 5. Build Productivity Suite Docker Images + +The Productivity Suite is composed of multiple GenAIExample reference solutions composed together. + +```bash +cd .. +git clone --branch v1.0 https://github.com/opea-project/GenAIExamples.git +cd GenAIExamples +``` + +Copy the patch files related to GenAIExamples into above cloned GenAIExamples folder + +Apply the patches +```bash +git am *.patch +``` + +#### 5.1 Build ChatQnA MegaService Docker Images + +```bash +cd .. +docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f GenAIExamples/ChatQnA/Dockerfile . +``` + +### 6. Build UI Docker Image + +Build frontend Docker image that enables via below command: + +**Export the value of the public IP address of your server to the `host_ip` environment variable** + +```bash +cd GenAIExamples/ProductivitySuite/ui +docker build --no-cache -t opea/productivity-suite-react-ui-server:latest -f docker/Dockerfile.react . +``` + +## πŸš€ Start Microservices + +### Setup Environment Variables + +Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. + +**Export the value of the public IP address of your server to the `host_ip` environment variable** + +> Change the External_Public_IP below with the actual IPV4 value + +``` +export host_ip="External_Public_IP" +``` + +**Export the value of your Huggingface API token to the `your_hf_api_token` environment variable** + +> Change the Your_Huggingface_API_Token below with tyour actual Huggingface API Token value + +``` +export your_hf_api_token="Your_Huggingface_API_Token" +``` + +**Append the value of the public IP address to the no_proxy list** + +``` +export your_no_proxy=${your_no_proxy},"External_Public_IP" +``` + +**Export the value of your remote host to the `remote_host` environment variable (Only if you are using remote TGI/TEI)** + +> Change the Your_Remote_Host below with your actual API Gateway Host value + +``` +export remote_host="Your_Remote_Host" +``` + +**Set ClientId, Client_Secret and Token URL only if the remote API is protected with OAuth Client Credentials Flow** + +**Export the value of your Remote API ClientId to the `clientid` environment variable** + +> Change the Your_API_ClientId below with your actual ClientId value + +``` +export clientid="Your_API_ClientId" +``` + +**Export the value of your Remote API client secret to the `client_secret` environment variable** + +> Change the Your_API_ClientSecret below with your actual ClientSecret value + +``` +export client_secret="Your_API_ClientSecret" +``` + +**Export the value of your Remote API token url to the `token_url` environment variable** + +> Change the Your_API_TokenUrl below with your actual Token URL value + +``` +export token_url="Your_API_TokenUrl" +``` + +**Export the value of your Remote Embedding Endpoint to the `embedding_endpoint` environment variable (Set this if you have tei embedding running remotely)** + +> Change the Your_Remote_Embedding_Endpoint below with your actual embedding endpoint value + +``` +export embedding_endpoint="Your_Remote_Embedding_Endpoint" +``` + +**Export the value of your Remote Reranking Endpoint to the `reranking_endpoint` environment variable (Set this if you have reranking running remotely)** + +> Change the Your_Remote_Reranking_Endpoint below with tyour actual reranking endpoint value + +``` +export reranking_endpoint="Your_Remote_Reranking_Endpoint" +``` + +**Export the value of your Remote TGI Endpoint to the `tgi_endpoint` environment variable (Set this if you have tgi running remotely)** + +> Change the Your_Remote_TGI_Endpoint below with tyour actual tgi endpoint value + +``` +export tgi_endpoint="Your_Remote_TGI_Endpoint" +``` + +**To use multiple TGI models** +> Create the model_configs.json file under /GenAIExamples/ProductivitySuite/docker_compose/intel/hpu/gaudi folder +> Add the model details as shown in the below example + + +```bash +cd GenAIExamples/ProductivitiySuite/docker_compose/intel/hpu/gaudi +touch model_configs.json +``` + +File Structure: + +[ + { + "model_name": "Your Model Name", + "displayName": "Model Display Name for the UI", + "endpoint": "Model Endpoint with http/https", + "minToken": 100, //Min Token Value + "maxToken": 2000 //Max Token Value + }, + { + "model_name": "Your Model Name", + "displayName": "Model Display Name for the UI", + "endpoint": "Model Endpoint with http/https", + "minToken": 100, //Min Token Value + "maxToken": 2000 //Max Token Value + } +] + +Example: + +[ + { + "model_name": "meta-llama/Meta-Llama-3.1-70B-Instruct", + "displayName": "llama-3.1-70B", + "endpoint": "https:///", + "minToken": 100, + "maxToken": 2000 + }, + { + "model_name": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "displayName": "llama-3.1-8B", + "endpoint": "https:///", + "minToken": 100, + "maxToken": 2000 + }, + { + "model_name": "Intel/neural-chat-7b-v3-3", + "displayName": "neural chat", + "endpoint": "https:///", + "minToken": 100, + "maxToken": 1000 + } +] + +> After creating and adding details in the model_configs.json file. Copy the same file into the public folder of the UI + +```bash +cd ../../../../ +cp docker_compose/intel/hpu/gaudi/model_configs.json ui/react/public/model_configs.json +``` + +> Navigate to GenAIExamples/ProductivitiySuite/docker_compose/intel/hpu/gaudi and run set_env.sh + +```bash +cd GenAIExamples/ProductivitiySuite/docker_compose/intel/hpu/gaudi +chmod +x set_env_remote.sh +source set_env_remote.sh +``` + +Note: Please replace with `host_ip` with you external IP address, do not use localhost. + +### Start all the services Docker Containers + +#### Run all services locally +```bash +docker compose -f compose.yaml up -d +``` + +#### Run TGI and TEI inference remote +```bash +docker compose -f compose_remote.yaml up -d +``` + +#### Run only TGI remote +```bash +docker compose -f compose_tgi_remote.yaml up -d +``` + +### Setup Keycloak + +Please refer to [keycloak_setup_guide](keycloak_setup_guide.md) for more detail related to Keycloak configuration setup. + + +## πŸš€ Launch the UI + +To access the frontend, open the following URL in your browser: http://{host_ip}:5174. + diff --git a/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose.yaml b/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose.yaml new file mode 100644 index 0000000000..d15856e275 --- /dev/null +++ b/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose.yaml @@ -0,0 +1,398 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +version: "3.3" +services: + redis-vector-db: + image: redis/redis-stack:7.2.0-v9 + container_name: redis-vector-db + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + ports: + - "6379:6379" + - "8001:8001" + dataprep-redis-service: + image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest} + container_name: dataprep-redis-server + depends_on: + - redis-vector-db + ports: + - "6007:6007" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + REDIS_URL: ${REDIS_URL} + INDEX_NAME: ${INDEX_NAME} + tei-embedding-service: + image: ghcr.io/huggingface/tei-gaudi:latest + container_name: tei-embedding-gaudi-server + ports: + - "6006:80" + volumes: + - "./data_embedding:/data" + shm_size: 1g + runtime: habana + cap_add: + - SYS_NICE + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + MAX_WARMUP_SEQUENCE_LENGTH: 512 + command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate + embedding: + image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest} + container_name: embedding-tei-server + depends_on: + - tei-embedding-service + ports: + - "6000:6000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-embedding-service" + restart: unless-stopped + retriever: + image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest} + container_name: retriever-redis-server + depends_on: + - redis-vector-db + ports: + - "7000:7000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + REDIS_URL: ${REDIS_URL} + INDEX_NAME: ${INDEX_NAME} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + restart: unless-stopped + tei-reranking-service: + image: ghcr.io/huggingface/tei-gaudi:latest + container_name: tei-reranking-server + ports: + - "8808:80" + volumes: + - "./data_tei:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + MAX_WARMUP_SEQUENCE_LENGTH: 512 + runtime: habana + cap_add: + - SYS_NICE + ipc: host + command: --model-id ${RERANK_MODEL_ID} --auto-truncate + reranking: + image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest} + container_name: reranking-tei-gaudi-server + depends_on: + - tei-reranking-service + ports: + - "8000:8000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-reranking-service" + restart: unless-stopped + tgi_service: + image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + container_name: tgi-service + ports: + - "9009:80" + volumes: + - "./data:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + ENABLE_HPU_GRAPH: true + LIMIT_HPU_GRAPH: true + USE_FLASH_ATTENTION: true + FLASH_ATTENTION_RECOMPUTE: true + runtime: habana + cap_add: + - SYS_NICE + ipc: host + command: --model-id ${LLM_MODEL_ID} + llm: + image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest} + container_name: llm-tgi-gaudi-server + depends_on: + - tgi_service + ports: + - "9000:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_CHATQNA} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-llm-service" + + restart: unless-stopped + chatqna-gaudi-backend-server: + image: ${REGISTRY:-opea}/chatqna:${TAG:-latest} + container_name: chatqna-gaudi-backend-server + depends_on: + - redis-vector-db + - tei-embedding-service + - embedding + - retriever + - tei-reranking-service + - reranking + - tgi_service + - llm + ports: + - "8888:8888" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP} + EMBEDDING_SERVICE_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP} + RETRIEVER_SERVICE_HOST_IP: ${RETRIEVER_SERVICE_HOST_IP} + RERANK_SERVICE_HOST_IP: ${RERANK_SERVICE_HOST_IP} + LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_CHATQNA} + ipc: host + restart: always + tgi_service_codegen: + image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + container_name: tgi_service_codegen + ports: + - "8028:80" + volumes: + - "./data_codegen:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + ENABLE_HPU_GRAPH: true + LIMIT_HPU_GRAPH: true + USE_FLASH_ATTENTION: true + FLASH_ATTENTION_RECOMPUTE: true + runtime: habana + cap_add: + - SYS_NICE + ipc: host + command: --model-id ${LLM_MODEL_ID_CODEGEN} --max-input-length 1024 --max-total-tokens 2048 + llm_codegen: + image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest} + container_name: llm-tgi-server-codegen + depends_on: + - tgi_service_codegen + ports: + - "9001:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_CODEGEN} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-llm-service" + restart: unless-stopped + codegen-gaudi-backend-server: + image: ${REGISTRY:-opea}/codegen:${TAG:-latest} + container_name: codegen-gaudi-backend-server + depends_on: + - llm + ports: + - "7778:7778" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP} + LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_CODEGEN} + LLM_SERVICE_PORT: ${LLM_SERVICE_HOST_PORT_CODEGEN} + ipc: host + restart: always + llm_faqgen: + image: ${REGISTRY:-opea}/llm-faqgen-tgi:${TAG:-latest} + container_name: llm-faqgen-server + depends_on: + - tgi_service + ports: + - "9002:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_FAQGEN} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-llm-service" + restart: unless-stopped + faqgen-gaudi-backend-server: + image: ${REGISTRY:-opea}/faqgen:${TAG:-latest} + container_name: faqgen-gaudi-backend-server + depends_on: + - tgi_service + - llm_faqgen + ports: + - "8889:8888" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP} + LLM_SERVICE_PORT: ${LLM_SERVICE_HOST_PORT_FAQGEN} + LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_FAQGEN} + ipc: host + restart: always + llm_docsum_server: + image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest} + container_name: llm-docsum-server + depends_on: + - tgi_service + ports: + - "9003:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_DOCSUM} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-llm-service" + restart: unless-stopped + docsum-gaudi-backend-server: + image: ${REGISTRY:-opea}/docsum:${TAG:-latest} + container_name: docsum-gaudi-backend-server + depends_on: + - tgi_service + - llm_docsum_server + ports: + - "8890:8888" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + LLM_SERVICE_PORT: ${LLM_SERVICE_HOST_PORT_DOCSUM} + MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP} + LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_DOCSUM} + ipc: host + restart: always + mongo: + image: mongo:7.0.11 + container_name: mongodb + ports: + - 27017:27017 + environment: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + no_proxy: ${no_proxy} + command: mongod --quiet --logpath /dev/null + chathistory-mongo: + image: ${REGISTRY:-opea}/chathistory-mongo-server:${TAG:-latest} + container_name: chathistory-mongo-server + ports: + - "6012:6012" + ipc: host + environment: + http_proxy: ${http_proxy} + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + MONGO_HOST: ${MONGO_HOST} + MONGO_PORT: ${MONGO_PORT} + COLLECTION_NAME: ${COLLECTION_NAME} + restart: unless-stopped + promptregistry-mongo: + image: ${REGISTRY:-opea}/promptregistry-mongo-server:${TAG:-latest} + container_name: promptregistry-mongo-server + ports: + - "6018:6018" + ipc: host + environment: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + no_proxy: ${no_proxy} + MONGO_HOST: ${MONGO_HOST} + MONGO_PORT: ${MONGO_PORT} + COLLECTION_NAME: ${PROMPT_COLLECTION_NAME} + restart: unless-stopped + keycloak: + image: quay.io/keycloak/keycloak:25.0.2 + container_name: keycloak-server + ports: + - 8081:8080 + environment: + - KEYCLOAK_ADMIN=admin + - KEYCLOAK_ADMIN_PASSWORD=admin + - KC_PROXY=edge + ipc: host + command: start-dev + restart: always + productivity-suite-gaudi-react-ui-server: + image: ${REGISTRY:-opea}/productivity-suite-react-ui-server:${TAG:-latest} + container_name: productivity-suite-gaudi-react-ui-server + ports: + - "5174:80" + environment: + - APP_BACKEND_SERVICE_ENDPOINT_CHATQNA=${BACKEND_SERVICE_ENDPOINT_CHATQNA} + - APP_BACKEND_SERVICE_ENDPOINT_CODEGEN=${BACKEND_SERVICE_ENDPOINT_CODEGEN} + - APP_BACKEND_SERVICE_ENDPOINT_DOCSUM=${BACKEND_SERVICE_ENDPOINT_DOCSUM} + - APP_BACKEND_SERVICE_ENDPOINT_FAQGEN=${BACKEND_SERVICE_ENDPOINT_FAQGEN} + - APP_DATAPREP_SERVICE_ENDPOINT=${DATAPREP_SERVICE_ENDPOINT} + - APP_DATAPREP_GET_FILE_ENDPOINT=${DATAPREP_GET_FILE_ENDPOINT} + - APP_DATAPREP_DELETE_FILE_ENDPOINT=${DATAPREP_DELETE_FILE_ENDPOINT} + - APP_CHAT_HISTORY_CREATE_ENDPOINT=${CHAT_HISTORY_CREATE_ENDPOINT} + - APP_CHAT_HISTORY_DELETE_ENDPOINT=${CHAT_HISTORY_DELETE_ENDPOINT} + - APP_CHAT_HISTORY_GET_ENDPOINT=${CHAT_HISTORY_GET_ENDPOINT} + - APP_PROMPT_SERVICE_GET_ENDPOINT=${PROMPT_SERVICE_GET_ENDPOINT} + - APP_PROMPT_SERVICE_CREATE_ENDPOINT=${PROMPT_SERVICE_CREATE_ENDPOINT} + - APP_KEYCLOAK_SERVICE_ENDPOINT=${KEYCLOAK_SERVICE_ENDPOINT} + ipc: host + restart: always +networks: + default: + driver: bridge \ No newline at end of file diff --git a/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose_remote.yaml b/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose_remote.yaml new file mode 100644 index 0000000000..82408a33fe --- /dev/null +++ b/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose_remote.yaml @@ -0,0 +1,306 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +version: "3.3" +services: + redis-vector-db: + image: redis/redis-stack:7.2.0-v9 + container_name: redis-vector-db + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + ports: + - "6379:6379" + - "8001:8001" + dataprep-redis-service: + image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest} + container_name: dataprep-redis-server + depends_on: + - redis-vector-db + ports: + - "6007:6007" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + REDIS_URL: ${REDIS_URL} + INDEX_NAME: ${INDEX_NAME} + embedding: + image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest} + container_name: embedding-tei-server + ports: + - "6000:6000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-embedding-service" + CLIENTID: ${CLIENTID} + CLIENT_SECRET: ${CLIENT_SECRET} + TOKEN_URL: ${TOKEN_URL} + restart: unless-stopped + retriever: + image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest} + container_name: retriever-redis-server + depends_on: + - redis-vector-db + ports: + - "7000:7000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + REDIS_URL: ${REDIS_URL} + INDEX_NAME: ${INDEX_NAME} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + restart: unless-stopped + reranking: + image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest} + container_name: reranking-tei-gaudi-server + ports: + - "8000:8000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-reranking-service" + CLIENTID: ${CLIENTID} + CLIENT_SECRET: ${CLIENT_SECRET} + TOKEN_URL: ${TOKEN_URL} + restart: unless-stopped + llm: + image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest} + container_name: llm-tgi-gaudi-server + ports: + - "9000:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_CHATQNA} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-llm-service" + MODEL_CONFIGS: ${MODEL_CONFIGS} + CLIENTID: ${CLIENTID} + CLIENT_SECRET: ${CLIENT_SECRET} + TOKEN_URL: ${TOKEN_URL} + + restart: unless-stopped + chatqna-gaudi-backend-server: + image: ${REGISTRY:-opea}/chatqna:${TAG:-latest} + container_name: chatqna-gaudi-backend-server + depends_on: + - redis-vector-db + - embedding + - retriever + - reranking + - llm + ports: + - "8888:8888" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP} + EMBEDDING_SERVICE_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP} + RETRIEVER_SERVICE_HOST_IP: ${RETRIEVER_SERVICE_HOST_IP} + RERANK_SERVICE_HOST_IP: ${RERANK_SERVICE_HOST_IP} + LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_CHATQNA} + ipc: host + restart: always + llm_codegen: + image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest} + container_name: llm-tgi-server-codegen + ports: + - "9001:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_CODEGEN} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-llm-service" + CLIENTID: ${CLIENTID} + CLIENT_SECRET: ${CLIENT_SECRET} + TOKEN_URL: ${TOKEN_URL} + restart: unless-stopped + codegen-gaudi-backend-server: + image: ${REGISTRY:-opea}/codegen:${TAG:-latest} + container_name: codegen-gaudi-backend-server + ports: + - "7778:7778" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP} + LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_CODEGEN} + LLM_SERVICE_PORT: ${LLM_SERVICE_HOST_PORT_CODEGEN} + ipc: host + restart: always + llm_faqgen: + image: ${REGISTRY:-opea}/llm-faqgen-tgi:${TAG:-latest} + container_name: llm-faqgen-server + ports: + - "9002:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_FAQGEN} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-llm-service" + CLIENTID: ${CLIENTID} + CLIENT_SECRET: ${CLIENT_SECRET} + TOKEN_URL: ${TOKEN_URL} + restart: unless-stopped + faqgen-gaudi-backend-server: + image: ${REGISTRY:-opea}/faqgen:${TAG:-latest} + container_name: faqgen-gaudi-backend-server + depends_on: + - llm_faqgen + ports: + - "8889:8888" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP} + LLM_SERVICE_PORT: ${LLM_SERVICE_HOST_PORT_FAQGEN} + LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_FAQGEN} + ipc: host + restart: always + llm_docsum_server: + image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest} + container_name: llm-docsum-server + ports: + - "9003:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_DOCSUM} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-llm-service" + CLIENTID: ${CLIENTID} + CLIENT_SECRET: ${CLIENT_SECRET} + TOKEN_URL: ${TOKEN_URL} + restart: unless-stopped + docsum-gaudi-backend-server: + image: ${REGISTRY:-opea}/docsum:${TAG:-latest} + container_name: docsum-gaudi-backend-server + depends_on: + - llm_docsum_server + ports: + - "8890:8888" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + LLM_SERVICE_PORT: ${LLM_SERVICE_HOST_PORT_DOCSUM} + MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP} + LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_DOCSUM} + ipc: host + restart: always + mongo: + image: mongo:7.0.11 + container_name: mongodb + ports: + - 27017:27017 + environment: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + no_proxy: ${no_proxy} + command: mongod --quiet --logpath /dev/null + chathistory-mongo: + image: ${REGISTRY:-opea}/chathistory-mongo-server:${TAG:-latest} + container_name: chathistory-mongo-server + ports: + - "6012:6012" + ipc: host + environment: + http_proxy: ${http_proxy} + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + MONGO_HOST: ${MONGO_HOST} + MONGO_PORT: ${MONGO_PORT} + COLLECTION_NAME: ${COLLECTION_NAME} + restart: unless-stopped + promptregistry-mongo: + image: ${REGISTRY:-opea}/promptregistry-mongo-server:${TAG:-latest} + container_name: promptregistry-mongo-server + ports: + - "6018:6018" + ipc: host + environment: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + no_proxy: ${no_proxy} + MONGO_HOST: ${MONGO_HOST} + MONGO_PORT: ${MONGO_PORT} + COLLECTION_NAME: ${PROMPT_COLLECTION_NAME} + restart: unless-stopped + keycloak: + image: quay.io/keycloak/keycloak:25.0.2 + container_name: keycloak-server + ports: + - 8080:8080 + environment: + - KEYCLOAK_ADMIN=admin + - KEYCLOAK_ADMIN_PASSWORD=admin + - KC_PROXY=edge + ipc: host + command: + - start-dev + restart: always + productivity-suite-gaudi-react-ui-server: + image: ${REGISTRY:-opea}/productivity-suite-react-ui-server:${TAG:-latest} + container_name: productivity-suite-gaudi-react-ui-server + ports: + - "5174:80" + environment: + - APP_BACKEND_SERVICE_ENDPOINT_CHATQNA=${BACKEND_SERVICE_ENDPOINT_CHATQNA} + - APP_BACKEND_SERVICE_ENDPOINT_CODEGEN=${BACKEND_SERVICE_ENDPOINT_CODEGEN} + - APP_BACKEND_SERVICE_ENDPOINT_DOCSUM=${BACKEND_SERVICE_ENDPOINT_DOCSUM} + - APP_BACKEND_SERVICE_ENDPOINT_FAQGEN=${BACKEND_SERVICE_ENDPOINT_FAQGEN} + - APP_DATAPREP_SERVICE_ENDPOINT=${DATAPREP_SERVICE_ENDPOINT} + - APP_DATAPREP_GET_FILE_ENDPOINT=${DATAPREP_GET_FILE_ENDPOINT} + - APP_DATAPREP_DELETE_FILE_ENDPOINT=${DATAPREP_DELETE_FILE_ENDPOINT} + - APP_CHAT_HISTORY_CREATE_ENDPOINT=${CHAT_HISTORY_CREATE_ENDPOINT} + - APP_CHAT_HISTORY_DELETE_ENDPOINT=${CHAT_HISTORY_DELETE_ENDPOINT} + - APP_CHAT_HISTORY_GET_ENDPOINT=${CHAT_HISTORY_GET_ENDPOINT} + - APP_PROMPT_SERVICE_GET_ENDPOINT=${PROMPT_SERVICE_GET_ENDPOINT} + - APP_PROMPT_SERVICE_CREATE_ENDPOINT=${PROMPT_SERVICE_CREATE_ENDPOINT} + - APP_KEYCLOAK_SERVICE_ENDPOINT=${KEYCLOAK_SERVICE_ENDPOINT} + ipc: host + restart: always +networks: + default: + driver: bridge \ No newline at end of file diff --git a/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose_tgi_remote.yaml b/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose_tgi_remote.yaml new file mode 100644 index 0000000000..02ab491e5f --- /dev/null +++ b/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose_tgi_remote.yaml @@ -0,0 +1,354 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +version: "3.3" +services: + redis-vector-db: + image: redis/redis-stack:7.2.0-v9 + container_name: redis-vector-db + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + ports: + - "6379:6379" + - "8001:8001" + dataprep-redis-service: + image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest} + container_name: dataprep-redis-server + depends_on: + - redis-vector-db + ports: + - "6007:6007" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + REDIS_URL: ${REDIS_URL} + INDEX_NAME: ${INDEX_NAME} + tei-embedding-service: + image: ghcr.io/huggingface/tei-gaudi:latest + container_name: tei-embedding-gaudi-server + ports: + - "6006:80" + volumes: + - "./data_embedding:/data" + shm_size: 1g + runtime: habana + cap_add: + - SYS_NICE + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + MAX_WARMUP_SEQUENCE_LENGTH: 512 + command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate + embedding: + image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest} + container_name: embedding-tei-server + depends_on: + - tei-embedding-service + ports: + - "6000:6000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-embedding-service" + restart: unless-stopped + retriever: + image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest} + container_name: retriever-redis-server + depends_on: + - redis-vector-db + ports: + - "7000:7000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + REDIS_URL: ${REDIS_URL} + INDEX_NAME: ${INDEX_NAME} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + restart: unless-stopped + tei-reranking-service: + image: ghcr.io/huggingface/tei-gaudi:latest + container_name: tei-reranking-server + ports: + - "8808:80" + volumes: + - "./data_tei:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + MAX_WARMUP_SEQUENCE_LENGTH: 512 + runtime: habana + cap_add: + - SYS_NICE + ipc: host + command: --model-id ${RERANK_MODEL_ID} --auto-truncate + reranking: + image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest} + container_name: reranking-tei-gaudi-server + depends_on: + - tei-reranking-service + ports: + - "8000:8000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-reranking-service" + restart: unless-stopped + llm: + image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest} + container_name: llm-tgi-gaudi-server + depends_on: + - tgi_service + ports: + - "9000:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_CHATQNA} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-llm-service" + MODEL_CONFIGS: ${MODEL_CONFIGS} + CLIENTID: ${CLIENTID} + CLIENT_SECRET: ${CLIENT_SECRET} + TOKEN_URL: ${TOKEN_URL} + + restart: unless-stopped + chatqna-gaudi-backend-server: + image: ${REGISTRY:-opea}/chatqna:${TAG:-latest} + container_name: chatqna-gaudi-backend-server + depends_on: + - redis-vector-db + - tei-embedding-service + - embedding + - retriever + - tei-reranking-service + - reranking + - tgi_service + - llm + ports: + - "8888:8888" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP} + EMBEDDING_SERVICE_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP} + RETRIEVER_SERVICE_HOST_IP: ${RETRIEVER_SERVICE_HOST_IP} + RERANK_SERVICE_HOST_IP: ${RERANK_SERVICE_HOST_IP} + LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_CHATQNA} + ipc: host + restart: always + llm_codegen: + image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest} + container_name: llm-tgi-server-codegen + ports: + - "9001:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_CODEGEN} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-llm-service" + CLIENTID: ${CLIENTID} + CLIENT_SECRET: ${CLIENT_SECRET} + TOKEN_URL: ${TOKEN_URL} + restart: unless-stopped + codegen-gaudi-backend-server: + image: ${REGISTRY:-opea}/codegen:${TAG:-latest} + container_name: codegen-gaudi-backend-server + depends_on: + - llm + ports: + - "7778:7778" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP} + LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_CODEGEN} + LLM_SERVICE_PORT: ${LLM_SERVICE_HOST_PORT_CODEGEN} + ipc: host + restart: always + llm_faqgen: + image: ${REGISTRY:-opea}/llm-faqgen-tgi:${TAG:-latest} + container_name: llm-faqgen-server + ports: + - "9002:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_FAQGEN} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-llm-service" + CLIENTID: ${CLIENTID} + CLIENT_SECRET: ${CLIENT_SECRET} + TOKEN_URL: ${TOKEN_URL} + restart: unless-stopped + faqgen-gaudi-backend-server: + image: ${REGISTRY:-opea}/faqgen:${TAG:-latest} + container_name: faqgen-gaudi-backend-server + depends_on: + - llm_faqgen + ports: + - "8889:8888" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP} + LLM_SERVICE_PORT: ${LLM_SERVICE_HOST_PORT_FAQGEN} + LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_FAQGEN} + ipc: host + restart: always + llm_docsum_server: + image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest} + container_name: llm-docsum-server + ports: + - "9003:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_DOCSUM} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-llm-service" + CLIENTID: ${CLIENTID} + CLIENT_SECRET: ${CLIENT_SECRET} + TOKEN_URL: ${TOKEN_URL} + restart: unless-stopped + docsum-gaudi-backend-server: + image: ${REGISTRY:-opea}/docsum:${TAG:-latest} + container_name: docsum-gaudi-backend-server + depends_on: + - tgi_service + - llm_docsum_server + ports: + - "8890:8888" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + LLM_SERVICE_PORT: ${LLM_SERVICE_HOST_PORT_DOCSUM} + MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP} + LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_DOCSUM} + ipc: host + restart: always + mongo: + image: mongo:7.0.11 + container_name: mongodb + ports: + - 27017:27017 + environment: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + no_proxy: ${no_proxy} + command: mongod --quiet --logpath /dev/null + chathistory-mongo: + image: ${REGISTRY:-opea}/chathistory-mongo-server:${TAG:-latest} + container_name: chathistory-mongo-server + ports: + - "6012:6012" + ipc: host + environment: + http_proxy: ${http_proxy} + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + MONGO_HOST: ${MONGO_HOST} + MONGO_PORT: ${MONGO_PORT} + COLLECTION_NAME: ${COLLECTION_NAME} + restart: unless-stopped + promptregistry-mongo: + image: ${REGISTRY:-opea}/promptregistry-mongo-server:${TAG:-latest} + container_name: promptregistry-mongo-server + ports: + - "6018:6018" + ipc: host + environment: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + no_proxy: ${no_proxy} + MONGO_HOST: ${MONGO_HOST} + MONGO_PORT: ${MONGO_PORT} + COLLECTION_NAME: ${PROMPT_COLLECTION_NAME} + restart: unless-stopped + keycloak: + image: quay.io/keycloak/keycloak:25.0.2 + container_name: keycloak-server + ports: + - 8081:8080 + environment: + - KEYCLOAK_ADMIN=admin + - KEYCLOAK_ADMIN_PASSWORD=admin + - KC_PROXY=edge + ipc: host + command: start-dev + restart: always + productivity-suite-gaudi-react-ui-server: + image: ${REGISTRY:-opea}/productivity-suite-react-ui-server:${TAG:-latest} + container_name: productivity-suite-gaudi-react-ui-server + ports: + - "5174:80" + environment: + - APP_BACKEND_SERVICE_ENDPOINT_CHATQNA=${BACKEND_SERVICE_ENDPOINT_CHATQNA} + - APP_BACKEND_SERVICE_ENDPOINT_CODEGEN=${BACKEND_SERVICE_ENDPOINT_CODEGEN} + - APP_BACKEND_SERVICE_ENDPOINT_DOCSUM=${BACKEND_SERVICE_ENDPOINT_DOCSUM} + - APP_BACKEND_SERVICE_ENDPOINT_FAQGEN=${BACKEND_SERVICE_ENDPOINT_FAQGEN} + - APP_DATAPREP_SERVICE_ENDPOINT=${DATAPREP_SERVICE_ENDPOINT} + - APP_DATAPREP_GET_FILE_ENDPOINT=${DATAPREP_GET_FILE_ENDPOINT} + - APP_DATAPREP_DELETE_FILE_ENDPOINT=${DATAPREP_DELETE_FILE_ENDPOINT} + - APP_CHAT_HISTORY_CREATE_ENDPOINT=${CHAT_HISTORY_CREATE_ENDPOINT} + - APP_CHAT_HISTORY_DELETE_ENDPOINT=${CHAT_HISTORY_DELETE_ENDPOINT} + - APP_CHAT_HISTORY_GET_ENDPOINT=${CHAT_HISTORY_GET_ENDPOINT} + - APP_PROMPT_SERVICE_GET_ENDPOINT=${PROMPT_SERVICE_GET_ENDPOINT} + - APP_PROMPT_SERVICE_CREATE_ENDPOINT=${PROMPT_SERVICE_CREATE_ENDPOINT} + - APP_KEYCLOAK_SERVICE_ENDPOINT=${KEYCLOAK_SERVICE_ENDPOINT} + ipc: host + restart: always +networks: + default: + driver: bridge \ No newline at end of file diff --git a/ProductivitySuite/docker_compose/intel/hpu/gaudi/set_env.sh b/ProductivitySuite/docker_compose/intel/hpu/gaudi/set_env.sh new file mode 100644 index 0000000000..0139b532af --- /dev/null +++ b/ProductivitySuite/docker_compose/intel/hpu/gaudi/set_env.sh @@ -0,0 +1,48 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +export MONGO_HOST=${host_ip} +export MONGO_PORT=27017 +export DB_NAME="opea" +export COLLECTION_NAME="Conversations" +export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" +export RERANK_MODEL_ID="BAAI/bge-reranker-base" +export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export LLM_MODEL_ID_CODEGEN="meta-llama/CodeLlama-7b-hf" +export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006" +export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808" +export TGI_LLM_ENDPOINT="http://${host_ip}:9009" +export REDIS_URL="redis://${host_ip}:6379" +export INDEX_NAME="rag-redis" +export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export MEGA_SERVICE_HOST_IP=${host_ip} +export EMBEDDING_SERVICE_HOST_IP=${host_ip} +export RETRIEVER_SERVICE_HOST_IP=${host_ip} +export RERANK_SERVICE_HOST_IP=${host_ip} +export LLM_SERVICE_HOST_IP=${host_ip} +export LLM_SERVICE_HOST_IP_DOCSUM=${host_ip} +export LLM_SERVICE_HOST_IP_FAQGEN=${host_ip} +export LLM_SERVICE_HOST_IP_CODEGEN=${host_ip} +export LLM_SERVICE_HOST_IP_CHATQNA=${host_ip} +export TGI_LLM_ENDPOINT_CHATQNA="http://${host_ip}:9009" +export TGI_LLM_ENDPOINT_CODEGEN="http://${host_ip}:8028" +export TGI_LLM_ENDPOINT_FAQGEN="http://${host_ip}:9009" +export TGI_LLM_ENDPOINT_DOCSUM="http://${host_ip}:9009" +export BACKEND_SERVICE_ENDPOINT_CHATQNA="http://${host_ip}:8888/v1/chatqna" +export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete_file" +export BACKEND_SERVICE_ENDPOINT_FAQGEN="http://${host_ip}:8889/v1/faqgen" +export BACKEND_SERVICE_ENDPOINT_CODEGEN="http://${host_ip}:7778/v1/codegen" +export BACKEND_SERVICE_ENDPOINT_DOCSUM="http://${host_ip}:8890/v1/docsum" +export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep" +export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file" +export CHAT_HISTORY_CREATE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/create" +export CHAT_HISTORY_CREATE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/create" +export CHAT_HISTORY_DELETE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/delete" +export CHAT_HISTORY_GET_ENDPOINT="http://${host_ip}:6012/v1/chathistory/get" +export PROMPT_SERVICE_GET_ENDPOINT="http://${host_ip}:6018/v1/prompt/get" +export PROMPT_SERVICE_CREATE_ENDPOINT="http://${host_ip}:6018/v1/prompt/create" +export KEYCLOAK_SERVICE_ENDPOINT="http://${host_ip}:8080" +export LLM_SERVICE_HOST_PORT_FAQGEN=9002 +export LLM_SERVICE_HOST_PORT_CODEGEN=9001 +export LLM_SERVICE_HOST_PORT_DOCSUM=9003 +export PROMPT_COLLECTION_NAME="prompt" diff --git a/ProductivitySuite/docker_compose/intel/hpu/gaudi/set_env_remote.sh b/ProductivitySuite/docker_compose/intel/hpu/gaudi/set_env_remote.sh new file mode 100644 index 0000000000..67bad368bd --- /dev/null +++ b/ProductivitySuite/docker_compose/intel/hpu/gaudi/set_env_remote.sh @@ -0,0 +1,57 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +#!/bin/bash + +# Check if the model_configs.json file exists +if [ -f model_configs.json ]; then + # If the file exists, set the MODEL_CONFIGS environment variable using the content of the file + export MODEL_CONFIGS=$(jq -c . model_configs.json) +fi +export MONGO_HOST=${host_ip} +export MONGO_PORT=27017 +export DB_NAME="opea" +export COLLECTION_NAME="Conversations" +export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" +export RERANK_MODEL_ID="BAAI/bge-reranker-base" +export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-70B-Instruct" +export LLM_MODEL_ID_CODEGEN="meta-llama/CodeLlama-7b-hf" +export TEI_EMBEDDING_ENDPOINT="${remote_host}/${embedding_endpoint}" +export TEI_RERANKING_ENDPOINT="${remote_host}/${reranking_endpoin}" +export TGI_LLM_ENDPOINT="${remote_host}/${tgi_endpoint}" +export REDIS_URL="redis://${host_ip}:6379" +export INDEX_NAME="rag-redis" +export HUGGINGFACEHUB_API_TOKEN=${hf_api_token} +export MEGA_SERVICE_HOST_IP=${host_ip} +export EMBEDDING_SERVICE_HOST_IP=${host_ip} +export RETRIEVER_SERVICE_HOST_IP=${host_ip} +export RERANK_SERVICE_HOST_IP=${host_ip} +export LLM_SERVICE_HOST_IP=${host_ip} +export LLM_SERVICE_HOST_IP_DOCSUM=${host_ip} +export LLM_SERVICE_HOST_IP_FAQGEN=${host_ip} +export LLM_SERVICE_HOST_IP_CODEGEN=${host_ip} +export LLM_SERVICE_HOST_IP_CHATQNA=${host_ip} +export TGI_LLM_ENDPOINT_CHATQNA="${remote_host}/${tgi_endpoint}" +export TGI_LLM_ENDPOINT_CODEGEN="${remote_host}/${tgi_endpoint}" +export TGI_LLM_ENDPOINT_FAQGEN="${remote_host}/${tgi_endpoint}" +export TGI_LLM_ENDPOINT_DOCSUM="${remote_host}/${tgi_endpoint}" +export BACKEND_SERVICE_ENDPOINT_CHATQNA="http://${host_ip}:8888/v1/chatqna" +export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_file" +export BACKEND_SERVICE_ENDPOINT_FAQGEN="http://${host_ip}:8889/v1/faqgen" +export BACKEND_SERVICE_ENDPOINT_CODEGEN="http://${host_ip}:7778/v1/codegen" +export BACKEND_SERVICE_ENDPOINT_DOCSUM="http://${host_ip}:8890/v1/docsum" +export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep" +export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file" +export CHAT_HISTORY_CREATE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/create" +export CHAT_HISTORY_CREATE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/create" +export CHAT_HISTORY_DELETE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/delete" +export CHAT_HISTORY_GET_ENDPOINT="http://${host_ip}:6012/v1/chathistory/get" +export PROMPT_SERVICE_GET_ENDPOINT="http://${host_ip}:6018/v1/prompt/get" +export PROMPT_SERVICE_CREATE_ENDPOINT="http://${host_ip}:6018/v1/prompt/create" +export KEYCLOAK_SERVICE_ENDPOINT="http://${host_ip}:8080" +export LLM_SERVICE_HOST_PORT_FAQGEN=9002 +export LLM_SERVICE_HOST_PORT_CODEGEN=9001 +export LLM_SERVICE_HOST_PORT_DOCSUM=9003 +export PROMPT_COLLECTION_NAME="prompt" +export CLIENTID=${clientid} +export CLIENT_SECRET=${client_secret} +export TOKEN_URL=${token_url} \ No newline at end of file diff --git a/ProductivitySuite/ui/react/src/App.tsx b/ProductivitySuite/ui/react/src/App.tsx index c12ee1d8fa..ebaeaf9d0a 100644 --- a/ProductivitySuite/ui/react/src/App.tsx +++ b/ProductivitySuite/ui/react/src/App.tsx @@ -18,9 +18,9 @@ import { useAppDispatch } from "./redux/store"; import { setUser } from "./redux/User/userSlice"; import { useEffect } from "react"; -const title = "Chat QnA" +const title = "Digital Assistant" const navList: SidebarNavList = [ - { icon: IconMessages, label: "Chat Qna", path: "/", children: }, + { icon: IconMessages, label: "Digital Assistant", path: "/", children: }, { icon: IconCode, label: "CodeGen", path: "/codegen", children: }, { icon: IconFileTextAi, label: "DocSum", path: "/docsum", children: }, { icon: IconFileInfo, label: "FaqGen", path: "/faqgen", children: }, diff --git a/ProductivitySuite/ui/react/src/components/CodeGen/CodeGen.tsx b/ProductivitySuite/ui/react/src/components/CodeGen/CodeGen.tsx index 29c96f61cb..1811b1bf0d 100644 --- a/ProductivitySuite/ui/react/src/components/CodeGen/CodeGen.tsx +++ b/ProductivitySuite/ui/react/src/components/CodeGen/CodeGen.tsx @@ -97,7 +97,7 @@ const CodeGen = () => {
- CodeGen + Code Generator
diff --git a/ProductivitySuite/ui/react/src/components/Conversation/Conversation.tsx b/ProductivitySuite/ui/react/src/components/Conversation/Conversation.tsx index e772248f39..e6455c85eb 100644 --- a/ProductivitySuite/ui/react/src/components/Conversation/Conversation.tsx +++ b/ProductivitySuite/ui/react/src/components/Conversation/Conversation.tsx @@ -4,8 +4,8 @@ import { KeyboardEventHandler, SyntheticEvent, useEffect, useRef, useState } from 'react' import styleClasses from "./conversation.module.scss" import { ActionIcon, Group, Textarea, Title, Tooltip, rem } from '@mantine/core' -import { IconArrowDown, IconArrowRight, IconArrowUp, IconMessagePlus } from '@tabler/icons-react' -import { conversationSelector, doConversation, getAllConversations, newConversation, setSystemPrompt } from '../../redux/Conversation/ConversationSlice' +import { IconArrowDown, IconArrowRight, IconMessagePlus, IconPencil } from '@tabler/icons-react' +import { conversationSelector, doConversation, getAllConversations, newConversation, setSystemPrompt} from '../../redux/Conversation/ConversationSlice' import { ConversationMessage } from '../Message/conversationMessage' import { useAppDispatch, useAppSelector } from '../../redux/store' import { Message, MessageRole } from '../../redux/Conversation/Conversation' @@ -21,7 +21,7 @@ type ConversationProps = { const Conversation = ({ title }: ConversationProps) => { const [prompt, setPrompt] = useState("") - const [updateSystemPrompt, setUpdateSystemPrompt] = useState(false) + const [updateSystemPrompt, setUpdateSystemPrompt] = useState(true) const dispatch = useAppDispatch(); const promptInputRef = useRef(null) @@ -53,8 +53,11 @@ const Conversation = ({ title }: ConversationProps) => { // return { role: message.role, content: message.content } // }) // } - - messages = [systemPromptObject, ...(selectedConversationHistory)] + if(selectedConversationHistory.length==0){ + messages = [systemPromptObject, ...(selectedConversationHistory)] + }else{ + messages = [...(selectedConversationHistory)] + } doConversation({ conversationId: selectedConversationId, @@ -111,7 +114,7 @@ const Conversation = ({ title }: ConversationProps) => {
-
+
{selectedConversation?.first_query || ""} @@ -151,6 +154,7 @@ const Conversation = ({ title }: ConversationProps) => {