From a2afce1675fbdd835e71d387bab7404e221fd318 Mon Sep 17 00:00:00 2001 From: XinyaoWa Date: Mon, 28 Oct 2024 09:11:54 +0800 Subject: [PATCH 01/91] update codetrans default model (#1015) Signed-off-by: Xinyao Wang Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- CodeTrans/README.md | 6 +++--- CodeTrans/docker_compose/intel/cpu/xeon/README.md | 6 +++--- CodeTrans/docker_compose/intel/hpu/gaudi/README.md | 6 +++--- CodeTrans/docker_compose/set_env.sh | 2 +- CodeTrans/kubernetes/intel/README.md | 2 +- CodeTrans/kubernetes/intel/README_gmc.md | 2 +- CodeTrans/kubernetes/intel/cpu/xeon/gmc/codetrans_xeon.yaml | 2 +- CodeTrans/kubernetes/intel/cpu/xeon/manifest/codetrans.yaml | 2 +- .../kubernetes/intel/hpu/gaudi/gmc/codetrans_gaudi.yaml | 2 +- .../kubernetes/intel/hpu/gaudi/manifest/codetrans.yaml | 2 +- CodeTrans/tests/test_compose_on_gaudi.sh | 2 +- CodeTrans/tests/test_compose_on_xeon.sh | 2 +- supported_examples.md | 6 +++--- 13 files changed, 21 insertions(+), 21 deletions(-) diff --git a/CodeTrans/README.md b/CodeTrans/README.md index b70666273f..5cfa9b27e0 100644 --- a/CodeTrans/README.md +++ b/CodeTrans/README.md @@ -77,9 +77,9 @@ Currently we support two ways of deploying Code Translation services on docker: By default, the LLM model is set to a default value as listed below: -| Service | Model | -| ------- | ----------------------------- | -| LLM | HuggingFaceH4/mistral-7b-grok | +| Service | Model | +| ------- | ---------------------------------- | +| LLM | mistralai/Mistral-7B-Instruct-v0.3 | Change the `LLM_MODEL_ID` in `docker_compose/set_env.sh` for your needs. diff --git a/CodeTrans/docker_compose/intel/cpu/xeon/README.md b/CodeTrans/docker_compose/intel/cpu/xeon/README.md index fd29ce2103..15f6414f04 100755 --- a/CodeTrans/docker_compose/intel/cpu/xeon/README.md +++ b/CodeTrans/docker_compose/intel/cpu/xeon/README.md @@ -57,9 +57,9 @@ Then run the command `docker images`, you will have the following Docker Images: By default, the LLM model is set to a default value as listed below: -| Service | Model | -| ------- | ----------------------------- | -| LLM | HuggingFaceH4/mistral-7b-grok | +| Service | Model | +| ------- | ---------------------------------- | +| LLM | mistralai/Mistral-7B-Instruct-v0.3 | Change the `LLM_MODEL_ID` below for your needs. diff --git a/CodeTrans/docker_compose/intel/hpu/gaudi/README.md b/CodeTrans/docker_compose/intel/hpu/gaudi/README.md index 1eb1812f22..04858bc235 100755 --- a/CodeTrans/docker_compose/intel/hpu/gaudi/README.md +++ b/CodeTrans/docker_compose/intel/hpu/gaudi/README.md @@ -49,9 +49,9 @@ Then run the command `docker images`, you will have the following Docker Images: By default, the LLM model is set to a default value as listed below: -| Service | Model | -| ------- | ----------------------------- | -| LLM | HuggingFaceH4/mistral-7b-grok | +| Service | Model | +| ------- | ---------------------------------- | +| LLM | mistralai/Mistral-7B-Instruct-v0.3 | Change the `LLM_MODEL_ID` below for your needs. diff --git a/CodeTrans/docker_compose/set_env.sh b/CodeTrans/docker_compose/set_env.sh index 5eae8f0cda..b4defd88c5 100644 --- a/CodeTrans/docker_compose/set_env.sh +++ b/CodeTrans/docker_compose/set_env.sh @@ -4,7 +4,7 @@ # SPDX-License-Identifier: Apache-2.0 -export LLM_MODEL_ID="HuggingFaceH4/mistral-7b-grok" +export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3" export TGI_LLM_ENDPOINT="http://${host_ip}:8008" export MEGA_SERVICE_HOST_IP=${host_ip} export LLM_SERVICE_HOST_IP=${host_ip} diff --git a/CodeTrans/kubernetes/intel/README.md b/CodeTrans/kubernetes/intel/README.md index 9d6e63f8be..3799f98321 100644 --- a/CodeTrans/kubernetes/intel/README.md +++ b/CodeTrans/kubernetes/intel/README.md @@ -14,7 +14,7 @@ By default, the LLM model is set to a default value as listed below: |Service |Model | |---------|-------------------------| -|LLM |HuggingFaceH4/mistral-7b-grok| +|LLM |mistralai/Mistral-7B-Instruct-v0.3| Change the `MODEL_ID` in `codetrans.yaml` for your needs. diff --git a/CodeTrans/kubernetes/intel/README_gmc.md b/CodeTrans/kubernetes/intel/README_gmc.md index 1b932f4ea2..0f66407d16 100644 --- a/CodeTrans/kubernetes/intel/README_gmc.md +++ b/CodeTrans/kubernetes/intel/README_gmc.md @@ -13,7 +13,7 @@ By default, the LLM model is set to a default value as listed below: |Service |Model | |---------|-------------------------| -|LLM |HuggingFaceH4/mistral-7b-grok| +|LLM |mistralai/Mistral-7B-Instruct-v0.3| Change the `MODEL_ID` in `codetrans_xeon.yaml` for your needs. diff --git a/CodeTrans/kubernetes/intel/cpu/xeon/gmc/codetrans_xeon.yaml b/CodeTrans/kubernetes/intel/cpu/xeon/gmc/codetrans_xeon.yaml index 889a1d21a6..244e7eb54a 100644 --- a/CodeTrans/kubernetes/intel/cpu/xeon/gmc/codetrans_xeon.yaml +++ b/CodeTrans/kubernetes/intel/cpu/xeon/gmc/codetrans_xeon.yaml @@ -29,6 +29,6 @@ spec: internalService: serviceName: tgi-service config: - MODEL_ID: HuggingFaceH4/mistral-7b-grok + MODEL_ID: mistralai/Mistral-7B-Instruct-v0.3 endpoint: /generate isDownstreamService: true diff --git a/CodeTrans/kubernetes/intel/cpu/xeon/manifest/codetrans.yaml b/CodeTrans/kubernetes/intel/cpu/xeon/manifest/codetrans.yaml index a68768e2f8..4429083432 100644 --- a/CodeTrans/kubernetes/intel/cpu/xeon/manifest/codetrans.yaml +++ b/CodeTrans/kubernetes/intel/cpu/xeon/manifest/codetrans.yaml @@ -64,7 +64,7 @@ metadata: app.kubernetes.io/version: "2.1.0" app.kubernetes.io/managed-by: Helm data: - MODEL_ID: "HuggingFaceH4/mistral-7b-grok" + MODEL_ID: "mistralai/Mistral-7B-Instruct-v0.3" PORT: "2080" HF_TOKEN: "insert-your-huggingface-token-here" http_proxy: "" diff --git a/CodeTrans/kubernetes/intel/hpu/gaudi/gmc/codetrans_gaudi.yaml b/CodeTrans/kubernetes/intel/hpu/gaudi/gmc/codetrans_gaudi.yaml index 5bc1bd5e2d..b61ffef3ec 100644 --- a/CodeTrans/kubernetes/intel/hpu/gaudi/gmc/codetrans_gaudi.yaml +++ b/CodeTrans/kubernetes/intel/hpu/gaudi/gmc/codetrans_gaudi.yaml @@ -29,6 +29,6 @@ spec: internalService: serviceName: tgi-gaudi-svc config: - MODEL_ID: HuggingFaceH4/mistral-7b-grok + MODEL_ID: mistralai/Mistral-7B-Instruct-v0.3 endpoint: /generate isDownstreamService: true diff --git a/CodeTrans/kubernetes/intel/hpu/gaudi/manifest/codetrans.yaml b/CodeTrans/kubernetes/intel/hpu/gaudi/manifest/codetrans.yaml index 541f311799..076104e77a 100644 --- a/CodeTrans/kubernetes/intel/hpu/gaudi/manifest/codetrans.yaml +++ b/CodeTrans/kubernetes/intel/hpu/gaudi/manifest/codetrans.yaml @@ -64,7 +64,7 @@ metadata: app.kubernetes.io/version: "2.1.0" app.kubernetes.io/managed-by: Helm data: - MODEL_ID: "HuggingFaceH4/mistral-7b-grok" + MODEL_ID: "mistralai/Mistral-7B-Instruct-v0.3" PORT: "2080" HF_TOKEN: "insert-your-huggingface-token-here" http_proxy: "" diff --git a/CodeTrans/tests/test_compose_on_gaudi.sh b/CodeTrans/tests/test_compose_on_gaudi.sh index b246f4dc91..884f2dffa9 100644 --- a/CodeTrans/tests/test_compose_on_gaudi.sh +++ b/CodeTrans/tests/test_compose_on_gaudi.sh @@ -31,7 +31,7 @@ function start_services() { export http_proxy=${http_proxy} export https_proxy=${http_proxy} - export LLM_MODEL_ID="HuggingFaceH4/mistral-7b-grok" + export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3" export TGI_LLM_ENDPOINT="http://${ip_address}:8008" export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} export MEGA_SERVICE_HOST_IP=${ip_address} diff --git a/CodeTrans/tests/test_compose_on_xeon.sh b/CodeTrans/tests/test_compose_on_xeon.sh index 8cbcb23208..d1f55c9a3d 100644 --- a/CodeTrans/tests/test_compose_on_xeon.sh +++ b/CodeTrans/tests/test_compose_on_xeon.sh @@ -30,7 +30,7 @@ function start_services() { cd $WORKPATH/docker_compose/intel/cpu/xeon/ export http_proxy=${http_proxy} export https_proxy=${http_proxy} - export LLM_MODEL_ID="HuggingFaceH4/mistral-7b-grok" + export LLM_MODEL_ID="mistralai/Mistral-7B-Instruct-v0.3" export TGI_LLM_ENDPOINT="http://${ip_address}:8008" export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} export MEGA_SERVICE_HOST_IP=${ip_address} diff --git a/supported_examples.md b/supported_examples.md index e913fd22e2..ec0624b1ac 100644 --- a/supported_examples.md +++ b/supported_examples.md @@ -71,9 +71,9 @@ This document introduces the supported examples of GenAIExamples. The supported [CodeTrans](./CodeTrans/README.md) is an example of chatbot for converting code written in one programming language to another programming language while maintaining the same functionality. -| Framework | LLM | Serving | HW | Description | -| ------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------- | --------------------------------------------------------------- | ----------- | ---------------- | -| [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [HuggingFaceH4/mistral-7b-grok](https://huggingface.co/HuggingFaceH4/mistral-7b-grok) | [TGI](https://github.com/huggingface/text-generation-inference) | Xeon/Gaudi2 | Code Translation | +| Framework | LLM | Serving | HW | Description | +| ------------------------------------------------------------------------------ | ----------------------------------------------------------------------------------------------- | --------------------------------------------------------------- | ----------- | ---------------- | +| [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [mistralai/Mistral-7B-Instruct-v0.3](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3) | [TGI](https://github.com/huggingface/text-generation-inference) | Xeon/Gaudi2 | Code Translation | ### DocSum From 2332d229508a5f40da71c8650631a6774dbd34df Mon Sep 17 00:00:00 2001 From: Yao Qing Date: Mon, 28 Oct 2024 09:18:01 +0800 Subject: [PATCH 02/91] [Codegen] Replace codegen default Model to Qwen/Qwen2.5-Coder-7B-Instruct. (#1013) Signed-off-by: Yao, Qing Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- CodeGen/README.md | 10 +++++----- CodeGen/docker_compose/intel/cpu/xeon/README.md | 2 +- CodeGen/docker_compose/intel/hpu/gaudi/README.md | 2 +- CodeGen/docker_compose/set_env.sh | 2 +- CodeGen/kubernetes/intel/README.md | 2 +- .../kubernetes/intel/cpu/xeon/gmc/codegen_xeon.yaml | 2 +- .../kubernetes/intel/cpu/xeon/manifest/codegen.yaml | 2 +- .../kubernetes/intel/hpu/gaudi/gmc/codegen_gaudi.yaml | 2 +- .../kubernetes/intel/hpu/gaudi/manifest/codegen.yaml | 2 +- supported_examples.md | 6 +++--- 10 files changed, 16 insertions(+), 16 deletions(-) diff --git a/CodeGen/README.md b/CodeGen/README.md index 03288fb2df..013c31d373 100644 --- a/CodeGen/README.md +++ b/CodeGen/README.md @@ -85,12 +85,12 @@ Currently we support two ways of deploying ChatQnA services with docker compose: By default, the LLM model is set to a default value as listed below: -| Service | Model | -| ------------ | ------------------------------------------------------------------------------- | -| LLM_MODEL_ID | [meta-llama/CodeLlama-7b-hf](https://huggingface.co/meta-llama/CodeLlama-7b-hf) | +| Service | Model | +| ------------ | --------------------------------------------------------------------------------------- | +| LLM_MODEL_ID | [Qwen/Qwen2.5-Coder-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct) | -[meta-llama/CodeLlama-7b-hf](https://huggingface.co/meta-llama/CodeLlama-7b-hf) is a gated model that requires submitting an access request through Hugging Face. You can replace it with another model. -Change the `LLM_MODEL_ID` below for your needs, such as: [Qwen/CodeQwen1.5-7B-Chat](https://huggingface.co/Qwen/CodeQwen1.5-7B-Chat), [deepseek-ai/deepseek-coder-6.7b-instruct](https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct) +[Qwen/Qwen2.5-Coder-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct) may be a gated model that requires submitting an access request through Hugging Face. You can replace it with another model. +Change the `LLM_MODEL_ID` below for your needs, such as: [deepseek-ai/deepseek-coder-6.7b-instruct](https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct) If you choose to use `meta-llama/CodeLlama-7b-hf` as LLM model, you will need to visit [here](https://huggingface.co/meta-llama/CodeLlama-7b-hf), click the `Expand to review and access` button to ask for model access. diff --git a/CodeGen/docker_compose/intel/cpu/xeon/README.md b/CodeGen/docker_compose/intel/cpu/xeon/README.md index 8bdde1f755..5332d719a3 100644 --- a/CodeGen/docker_compose/intel/cpu/xeon/README.md +++ b/CodeGen/docker_compose/intel/cpu/xeon/README.md @@ -105,7 +105,7 @@ export your_no_proxy=${your_no_proxy},"External_Public_IP" export no_proxy=${your_no_proxy} export http_proxy=${your_http_proxy} export https_proxy=${your_http_proxy} -export LLM_MODEL_ID="meta-llama/CodeLlama-7b-hf" +export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct" export TGI_LLM_ENDPOINT="http://${host_ip}:8028" export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} export MEGA_SERVICE_HOST_IP=${host_ip} diff --git a/CodeGen/docker_compose/intel/hpu/gaudi/README.md b/CodeGen/docker_compose/intel/hpu/gaudi/README.md index 2a5040ea03..31cfad2929 100644 --- a/CodeGen/docker_compose/intel/hpu/gaudi/README.md +++ b/CodeGen/docker_compose/intel/hpu/gaudi/README.md @@ -85,7 +85,7 @@ Since the `compose.yaml` will consume some environment variables, you need to se export no_proxy=${your_no_proxy} export http_proxy=${your_http_proxy} export https_proxy=${your_http_proxy} -export LLM_MODEL_ID="meta-llama/CodeLlama-7b-hf" +export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct" export TGI_LLM_ENDPOINT="http://${host_ip}:8028" export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} export MEGA_SERVICE_HOST_IP=${host_ip} diff --git a/CodeGen/docker_compose/set_env.sh b/CodeGen/docker_compose/set_env.sh index d66a120af2..dba717b64a 100644 --- a/CodeGen/docker_compose/set_env.sh +++ b/CodeGen/docker_compose/set_env.sh @@ -4,7 +4,7 @@ # SPDX-License-Identifier: Apache-2.0 -export LLM_MODEL_ID="meta-llama/CodeLlama-7b-hf" +export LLM_MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct" export TGI_LLM_ENDPOINT="http://${host_ip}:8028" export MEGA_SERVICE_HOST_IP=${host_ip} export LLM_SERVICE_HOST_IP=${host_ip} diff --git a/CodeGen/kubernetes/intel/README.md b/CodeGen/kubernetes/intel/README.md index be18003b83..a4bb446811 100644 --- a/CodeGen/kubernetes/intel/README.md +++ b/CodeGen/kubernetes/intel/README.md @@ -14,7 +14,7 @@ ``` cd GenAIExamples/CodeGen/kubernetes/intel/cpu/xeon/manifests export HUGGINGFACEHUB_API_TOKEN="YourOwnToken" -export MODEL_ID="meta-llama/CodeLlama-7b-hf" +export MODEL_ID="Qwen/Qwen2.5-Coder-7B-Instruct" sed -i "s/insert-your-huggingface-token-here/${HUGGINGFACEHUB_API_TOKEN}/g" codegen.yaml sed -i "s/meta-llama\/CodeLlama-7b-hf/${MODEL_ID}/g" codegen.yaml kubectl apply -f codegen.yaml diff --git a/CodeGen/kubernetes/intel/cpu/xeon/gmc/codegen_xeon.yaml b/CodeGen/kubernetes/intel/cpu/xeon/gmc/codegen_xeon.yaml index dd1675ce3c..8dd3c2b574 100644 --- a/CodeGen/kubernetes/intel/cpu/xeon/gmc/codegen_xeon.yaml +++ b/CodeGen/kubernetes/intel/cpu/xeon/gmc/codegen_xeon.yaml @@ -29,6 +29,6 @@ spec: internalService: serviceName: tgi-service config: - MODEL_ID: meta-llama/CodeLlama-7b-hf + MODEL_ID: Qwen/Qwen2.5-Coder-7B-Instruct endpoint: /generate isDownstreamService: true diff --git a/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen.yaml b/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen.yaml index 96cc682660..4e6d8f91c8 100644 --- a/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen.yaml +++ b/CodeGen/kubernetes/intel/cpu/xeon/manifest/codegen.yaml @@ -64,7 +64,7 @@ metadata: app.kubernetes.io/version: "2.1.0" app.kubernetes.io/managed-by: Helm data: - MODEL_ID: "meta-llama/CodeLlama-7b-hf" + MODEL_ID: "Qwen/Qwen2.5-Coder-7B-Instruct" PORT: "2080" HF_TOKEN: "insert-your-huggingface-token-here" http_proxy: "" diff --git a/CodeGen/kubernetes/intel/hpu/gaudi/gmc/codegen_gaudi.yaml b/CodeGen/kubernetes/intel/hpu/gaudi/gmc/codegen_gaudi.yaml index 2e37820577..d9a927e5c4 100644 --- a/CodeGen/kubernetes/intel/hpu/gaudi/gmc/codegen_gaudi.yaml +++ b/CodeGen/kubernetes/intel/hpu/gaudi/gmc/codegen_gaudi.yaml @@ -29,6 +29,6 @@ spec: internalService: serviceName: tgi-gaudi-svc config: - MODEL_ID: meta-llama/CodeLlama-7b-hf + MODEL_ID: Qwen/Qwen2.5-Coder-7B-Instruct endpoint: /generate isDownstreamService: true diff --git a/CodeGen/kubernetes/intel/hpu/gaudi/manifest/codegen.yaml b/CodeGen/kubernetes/intel/hpu/gaudi/manifest/codegen.yaml index c4a43a7c3c..b506d17d49 100644 --- a/CodeGen/kubernetes/intel/hpu/gaudi/manifest/codegen.yaml +++ b/CodeGen/kubernetes/intel/hpu/gaudi/manifest/codegen.yaml @@ -64,7 +64,7 @@ metadata: app.kubernetes.io/version: "2.1.0" app.kubernetes.io/managed-by: Helm data: - MODEL_ID: "meta-llama/CodeLlama-7b-hf" + MODEL_ID: "Qwen/Qwen2.5-Coder-7B-Instruct" PORT: "2080" HF_TOKEN: "insert-your-huggingface-token-here" http_proxy: "" diff --git a/supported_examples.md b/supported_examples.md index ec0624b1ac..33b02f71d5 100644 --- a/supported_examples.md +++ b/supported_examples.md @@ -63,9 +63,9 @@ This document introduces the supported examples of GenAIExamples. The supported [CodeGen](./CodeGen/README.md) is an example of copilot designed for code generation in Visual Studio Code. -| Framework | LLM | Serving | HW | Description | -| ------------------------------------------------------------------------------ | ------------------------------------------------------------------------------- | --------------------------------------------------------------- | ----------- | ----------- | -| [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [meta-llama/CodeLlama-7b-hf](https://huggingface.co/meta-llama/CodeLlama-7b-hf) | [TGI](https://github.com/huggingface/text-generation-inference) | Xeon/Gaudi2 | Copilot | +| Framework | LLM | Serving | HW | Description | +| ------------------------------------------------------------------------------ | --------------------------------------------------------------------------------------- | --------------------------------------------------------------- | ----------- | ----------- | +| [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [Qwen/Qwen2.5-Coder-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct) | [TGI](https://github.com/huggingface/text-generation-inference) | Xeon/Gaudi2 | Copilot | ### CodeTrans From bc47930ce15ea274230bcda6ff1d0df9c201e0b0 Mon Sep 17 00:00:00 2001 From: Lianhao Lu Date: Mon, 28 Oct 2024 11:51:24 +0800 Subject: [PATCH 03/91] manifest CI: repopulate the failure from inner test script (#1032) Signed-off-by: Lianhao Lu --- .github/workflows/_manifest-e2e.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/_manifest-e2e.yml b/.github/workflows/_manifest-e2e.yml index 69a080506d..77386afa89 100644 --- a/.github/workflows/_manifest-e2e.yml +++ b/.github/workflows/_manifest-e2e.yml @@ -91,6 +91,7 @@ jobs: else echo "Validate ${{ inputs.example }} failure!!!" .github/workflows/scripts/k8s-utils.sh dump_all_pod_logs $NAMESPACE + exit 1 fi fi From fde5996192710a6788a0ec0ac5d0ddd58d1d55ca Mon Sep 17 00:00:00 2001 From: XinyaoWa Date: Tue, 29 Oct 2024 16:34:11 +0800 Subject: [PATCH 04/91] fix FaqGen accuracy scripts bug (#1039) Signed-off-by: Xinyao Wang --- FaqGen/benchmark/accuracy/evaluate.py | 2 +- FaqGen/benchmark/accuracy/launch_tgi.sh | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/FaqGen/benchmark/accuracy/evaluate.py b/FaqGen/benchmark/accuracy/evaluate.py index 30998da4dd..da75502ce0 100644 --- a/FaqGen/benchmark/accuracy/evaluate.py +++ b/FaqGen/benchmark/accuracy/evaluate.py @@ -35,7 +35,7 @@ contexts.append([inputs_faq]) embeddings = HuggingFaceBgeEmbeddings(model_name="BAAI/bge-base-en-v1.5") -metrics_faq = ["answer_relevancy", "faithfulness", "context_utilization", "reference_free_rubrics_score"] +metrics_faq = ["answer_relevancy", "faithfulness", "context_utilization", "rubrics_score_without_reference"] metric = RagasMetric(threshold=0.5, model=llm_endpoint, embeddings=embeddings, metrics=metrics_faq) test_case = {"question": question, "answer": answer, "ground_truth": ground_truth, "contexts": contexts} diff --git a/FaqGen/benchmark/accuracy/launch_tgi.sh b/FaqGen/benchmark/accuracy/launch_tgi.sh index f4ac9eade4..a504f2a41f 100644 --- a/FaqGen/benchmark/accuracy/launch_tgi.sh +++ b/FaqGen/benchmark/accuracy/launch_tgi.sh @@ -11,7 +11,6 @@ docker run -it --rm \ -p $port_number:80 \ -v $volume:/data \ --runtime=habana \ - --restart always \ -e HUGGING_FACE_HUB_TOKEN=$HUGGING_FACE_HUB_TOKEN \ -e HABANA_VISIBLE_DEVICES=all \ -e OMPI_MCA_btl_vader_single_copy_mechanism=none \ From a3ef26006822bc9359a653d8718fd1cb09b5ab22 Mon Sep 17 00:00:00 2001 From: sgurunat Date: Tue, 29 Oct 2024 12:31:31 +0000 Subject: [PATCH 05/91] Add chatqna_wrapper.py along with updated Dockerfile.wrapper. To support multiple models chatqna with wrapper is required --- ChatQnA/Dockerfile.wrapper | 36 ++++++++++++++++++++ ChatQnA/chatqna_wrapper.py | 68 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+) create mode 100644 ChatQnA/Dockerfile.wrapper create mode 100644 ChatQnA/chatqna_wrapper.py diff --git a/ChatQnA/Dockerfile.wrapper b/ChatQnA/Dockerfile.wrapper new file mode 100644 index 0000000000..1baf63e460 --- /dev/null +++ b/ChatQnA/Dockerfile.wrapper @@ -0,0 +1,36 @@ + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM python:3.11-slim + +RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ + libgl1-mesa-glx \ + libjemalloc-dev \ + git + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + +COPY /GenAIComps /home/user/GenAIComps +WORKDIR /home/user/ +#RUN git clone --branch v1.0 https://github.com/opea-project/GenAIComps.git + + +WORKDIR /home/user/GenAIComps +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt + +COPY /GenAIExamples/ChatQnA/chatqna_wrapper.py /home/user/chatqna.py + +ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps + +USER user + +WORKDIR /home/user + +RUN echo 'ulimit -S -n 999999' >> ~/.bashrc + +ENTRYPOINT ["python", "chatqna.py"] diff --git a/ChatQnA/chatqna_wrapper.py b/ChatQnA/chatqna_wrapper.py new file mode 100644 index 0000000000..09062b5d27 --- /dev/null +++ b/ChatQnA/chatqna_wrapper.py @@ -0,0 +1,68 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os + +from comps import ChatQnAGateway, MicroService, ServiceOrchestrator, ServiceType + +MEGA_SERVICE_HOST_IP = os.getenv("MEGA_SERVICE_HOST_IP", "0.0.0.0") +MEGA_SERVICE_PORT = int(os.getenv("MEGA_SERVICE_PORT", 8888)) +EMBEDDING_SERVICE_HOST_IP = os.getenv("EMBEDDING_SERVICE_HOST_IP", "0.0.0.0") +EMBEDDING_SERVICE_PORT = int(os.getenv("EMBEDDING_SERVICE_PORT", 6000)) +RETRIEVER_SERVICE_HOST_IP = os.getenv("RETRIEVER_SERVICE_HOST_IP", "0.0.0.0") +RETRIEVER_SERVICE_PORT = int(os.getenv("RETRIEVER_SERVICE_PORT", 7000)) +RERANK_SERVICE_HOST_IP = os.getenv("RERANK_SERVICE_HOST_IP", "0.0.0.0") +RERANK_SERVICE_PORT = int(os.getenv("RERANK_SERVICE_PORT", 8000)) +LLM_SERVICE_HOST_IP = os.getenv("LLM_SERVICE_HOST_IP", "0.0.0.0") +LLM_SERVICE_PORT = int(os.getenv("LLM_SERVICE_PORT", 9000)) + + +class ChatQnAService: + def __init__(self, host="0.0.0.0", port=8000): + self.host = host + self.port = port + self.megaservice = ServiceOrchestrator() + + def add_remote_service(self): + embedding = MicroService( + name="embedding", + host=EMBEDDING_SERVICE_HOST_IP, + port=EMBEDDING_SERVICE_PORT, + endpoint="/v1/embeddings", + use_remote_service=True, + service_type=ServiceType.EMBEDDING, + ) + retriever = MicroService( + name="retriever", + host=RETRIEVER_SERVICE_HOST_IP, + port=RETRIEVER_SERVICE_PORT, + endpoint="/v1/retrieval", + use_remote_service=True, + service_type=ServiceType.RETRIEVER, + ) + rerank = MicroService( + name="rerank", + host=RERANK_SERVICE_HOST_IP, + port=RERANK_SERVICE_PORT, + endpoint="/v1/reranking", + use_remote_service=True, + service_type=ServiceType.RERANK, + ) + llm = MicroService( + name="llm", + host=LLM_SERVICE_HOST_IP, + port=LLM_SERVICE_PORT, + endpoint="/v1/chat/completions", + use_remote_service=True, + service_type=ServiceType.LLM, + ) + self.megaservice.add(embedding).add(retriever).add(rerank).add(llm) + self.megaservice.flow_to(embedding, retriever) + self.megaservice.flow_to(retriever, rerank) + self.megaservice.flow_to(rerank, llm) + self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port=self.port) + + +if __name__ == "__main__": + chatqna = ChatQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT) + chatqna.add_remote_service() From 1d30bff42b1282149a0ef8e4c86be5293828d753 Mon Sep 17 00:00:00 2001 From: sgurunat Date: Tue, 29 Oct 2024 12:38:48 +0000 Subject: [PATCH 06/91] ProductivitySuite: Add docker compose files for Intel Gaudi server along with remote tgi/tei service with instructions --- .../docker_compose/intel/hpu/gaudi/README.md | 554 ++++++++++++++++++ .../intel/hpu/gaudi/README_remote.md | 277 +++++++++ .../intel/hpu/gaudi/compose.yaml | 398 +++++++++++++ .../intel/hpu/gaudi/compose_remote.yaml | 306 ++++++++++ .../intel/hpu/gaudi/compose_tgi_remote.yaml | 354 +++++++++++ .../docker_compose/intel/hpu/gaudi/set_env.sh | 48 ++ .../intel/hpu/gaudi/set_env_remote.sh | 57 ++ 7 files changed, 1994 insertions(+) create mode 100644 ProductivitySuite/docker_compose/intel/hpu/gaudi/README.md create mode 100644 ProductivitySuite/docker_compose/intel/hpu/gaudi/README_remote.md create mode 100644 ProductivitySuite/docker_compose/intel/hpu/gaudi/compose.yaml create mode 100644 ProductivitySuite/docker_compose/intel/hpu/gaudi/compose_remote.yaml create mode 100644 ProductivitySuite/docker_compose/intel/hpu/gaudi/compose_tgi_remote.yaml create mode 100644 ProductivitySuite/docker_compose/intel/hpu/gaudi/set_env.sh create mode 100644 ProductivitySuite/docker_compose/intel/hpu/gaudi/set_env_remote.sh diff --git a/ProductivitySuite/docker_compose/intel/hpu/gaudi/README.md b/ProductivitySuite/docker_compose/intel/hpu/gaudi/README.md new file mode 100644 index 0000000000..60faeb8c7b --- /dev/null +++ b/ProductivitySuite/docker_compose/intel/hpu/gaudi/README.md @@ -0,0 +1,554 @@ +# Build Mega Service of Productivity Suite on Gaudi + +This document outlines the deployment process for OPEA Productivity Suite utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Gaudi server and [GenAIExamples](https://github.com/opea-project/GenAIExamples.git) solutions. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices such as `embedding`, `retriever`, `rerank`, and `llm`. We will publish the Docker images to Docker Hub soon, it will simplify the deployment process for this service. + +--- + +## 🐳 Build Docker Images + +First of all, you need to build Docker Images locally and install the python package of it. + +### 1. Build Embedding Image + +```bash +git clone https://github.com/opea-project/GenAIComps.git +cd GenAIComps +docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/tei/langchain/Dockerfile . +``` + +### 2. Build Retriever Image + +```bash +docker build --no-cache -t opea/retriever-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/retrievers/redis/langchain/Dockerfile . +``` + +### 3. Build Rerank Image + +```bash +docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/Dockerfile . +``` + +### 4. Build LLM Image + +#### Use TGI as backend + +```bash +docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile . +``` + +### 5. Build Dataprep Image + +```bash +docker build --no-cache -t opea/dataprep-redis:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/dataprep/redis/langchain/Dockerfile . +``` + +### 6. Build Prompt Registry Image + +```bash +docker build -t opea/promptregistry-mongo-server:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/prompt_registry/mongo/Dockerfile . +``` + +### 7. Build Chat History Image + +```bash +docker build -t opea/chathistory-mongo-server:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/chathistory/mongo/Dockerfile . +cd .. +``` + +### 8. Build MegaService Docker Images + +The Productivity Suite is composed of multiple GenAIExample reference solutions composed together. + +#### 8.1 Build ChatQnA MegaService Docker Images + +```bash +git clone https://github.com/opea-project/GenAIExamples.git +cd GenAIExamples/ChatQnA/ +docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . +``` + +#### 8.2 Build DocSum Megaservice Docker Images + +```bash +cd GenAIExamples/DocSum +docker build --no-cache -t opea/docsum:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . +``` + +#### 8.3 Build CodeGen Megaservice Docker Images + +```bash +cd GenAIExamples/CodeGen +docker build --no-cache -t opea/codegen:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . +``` + +#### 8.4 Build FAQGen Megaservice Docker Images + +```bash +cd GenAIExamples/FaqGen +docker build --no-cache -t opea/faqgen:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . +``` + +### 9. Build UI Docker Image + +Build frontend Docker image that enables via below command: + +**Export the value of the public IP address of your server to the `host_ip` environment variable** + +```bash +cd GenAIExamples/ProductivitySuite/ui +docker build --no-cache -t ProductivitySuite/docker_compose/intel/hpu/gaudi/compose.yaml docker/Dockerfile.react . +``` + +--- + +## 🚀 Start Microservices + +### Setup Environment Variables + +Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. + +**Export the value of the public IP address of your server to the `host_ip` environment variable** + +> Change the External_Public_IP below with the actual IPV4 value + +``` +export host_ip="External_Public_IP" +``` + +**Export the value of your Huggingface API token to the `your_hf_api_token` environment variable** + +> Change the Your_Huggingface_API_Token below with tyour actual Huggingface API Token value + +``` +export your_hf_api_token="Your_Huggingface_API_Token" +``` + +**Append the value of the public IP address to the no_proxy list** + +``` +export your_no_proxy=${your_no_proxy},"External_Public_IP" +``` + +```bash +export MONGO_HOST=${host_ip} +export MONGO_PORT=27017 +export DB_NAME="test" +export COLLECTION_NAME="Conversations" +export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" +export RERANK_MODEL_ID="BAAI/bge-reranker-base" +export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export LLM_MODEL_ID_CODEGEN="meta-llama/CodeLlama-7b-hf" +export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006" +export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808" +export TGI_LLM_ENDPOINT="http://${host_ip}:9009" +export REDIS_URL="redis://${host_ip}:6379" +export INDEX_NAME="rag-redis" +export HUGGINGFACEHUB_API_TOKEN=${your_hf_api_token} +export MEGA_SERVICE_HOST_IP=${host_ip} +export EMBEDDING_SERVICE_HOST_IP=${host_ip} +export RETRIEVER_SERVICE_HOST_IP=${host_ip} +export RERANK_SERVICE_HOST_IP=${host_ip} +export LLM_SERVICE_HOST_IP=${host_ip} +export LLM_SERVICE_HOST_IP_DOCSUM=${host_ip} +export LLM_SERVICE_HOST_IP_FAQGEN=${host_ip} +export LLM_SERVICE_HOST_IP_CODEGEN=${host_ip} +export LLM_SERVICE_HOST_IP_CHATQNA=${host_ip} +export TGI_LLM_ENDPOINT_CHATQNA="http://${host_ip}:9009" +export TGI_LLM_ENDPOINT_CODEGEN="http://${host_ip}:8028" +export TGI_LLM_ENDPOINT_FAQGEN="http://${host_ip}:9009" +export TGI_LLM_ENDPOINT_DOCSUM="http://${host_ip}:9009" +export BACKEND_SERVICE_ENDPOINT_CHATQNA="http://${host_ip}:8888/v1/chatqna" +export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete_file" +export BACKEND_SERVICE_ENDPOINT_FAQGEN="http://${host_ip}:8889/v1/faqgen" +export BACKEND_SERVICE_ENDPOINT_CODEGEN="http://${host_ip}:7778/v1/codegen" +export BACKEND_SERVICE_ENDPOINT_DOCSUM="http://${host_ip}:8890/v1/docsum" +export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep" +export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file" +export CHAT_HISTORY_CREATE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/create" +export CHAT_HISTORY_CREATE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/create" +export CHAT_HISTORY_DELETE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/delete" +export CHAT_HISTORY_GET_ENDPOINT="http://${host_ip}:6012/v1/chathistory/get" +export PROMPT_SERVICE_GET_ENDPOINT="http://${host_ip}:6018/v1/prompt/get" +export PROMPT_SERVICE_CREATE_ENDPOINT="http://${host_ip}:6018/v1/prompt/create" +export KEYCLOAK_SERVICE_ENDPOINT="http://${host_ip}:8080" +export LLM_SERVICE_HOST_PORT_FAQGEN=9002 +export LLM_SERVICE_HOST_PORT_CODEGEN=9001 +export LLM_SERVICE_HOST_PORT_DOCSUM=9003 +export PROMPT_COLLECTION_NAME="prompt" +``` + +Note: Please replace with `host_ip` with you external IP address, do not use localhost. + +### Start all the services Docker Containers + +> Before running the docker compose command, you need to be in the folder that has the docker compose yaml file + +```bash +cd GenAIExamples/ProductivitySuite/docker_compose/intel/hpu/gaudi + +docker compose -f compose.yaml up -d +``` + +--- + +### 🔐 Setup Keycloak + +Please refer to **[keycloak_setup_guide](keycloak_setup_guide.md)** for more detail related to Keycloak configuration setup. + +--- + +### ✅ Validate Microservices + +1. TEI Embedding Service + + ```bash + curl ${host_ip}:6006/embed \ + -X POST \ + -d '{"inputs":"What is Deep Learning?"}' \ + -H 'Content-Type: application/json' + ``` + +2. Embedding Microservice + + ```bash + curl http://${host_ip}:6000/v1/embeddings\ + -X POST \ + -d '{"text":"hello"}' \ + -H 'Content-Type: application/json' + ``` + +3. Retriever Microservice + + To consume the retriever microservice, you need to generate a mock embedding vector by Python script. The length of embedding vector + is determined by the embedding model. + Here we use the model `EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"`, which vector size is 768. + + Check the vector dimension of your embedding model, set `your_embedding` dimension equals to it. + + ```bash + export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)") + curl http://${host_ip}:7000/v1/retrieval \ + -X POST \ + -d "{\"text\":\"test\",\"embedding\":${your_embedding}}" \ + -H 'Content-Type: application/json' + ``` + +4. TEI Reranking Service + + ```bash + curl http://${host_ip}:8808/rerank \ + -X POST \ + -d '{"query":"What is Deep Learning?", "texts": ["Deep Learning is not...", "Deep learning is..."]}' \ + -H 'Content-Type: application/json' + ``` + +5. Reranking Microservice + + ```bash + curl http://${host_ip}:8000/v1/reranking\ + -X POST \ + -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \ + -H 'Content-Type: application/json' + ``` + +6. LLM backend Service (ChatQnA, DocSum, FAQGen) + + ```bash + curl http://${host_ip}:9009/generate \ + -X POST \ + -d '{"inputs":"What is Deep Learning?","parameters":{"max_new_tokens":17, "do_sample": true}}' \ + -H 'Content-Type: application/json' + ``` + +7. LLM backend Service (CodeGen) + + ```bash + curl http://${host_ip}:8028/generate \ + -X POST \ + -d '{"inputs":"def print_hello_world():","parameters":{"max_new_tokens":256, "do_sample": true}}' \ + -H 'Content-Type: application/json' + ``` + +8. ChatQnA LLM Microservice + + ```bash + curl http://${host_ip}:9000/v1/chat/completions\ + -X POST \ + -d '{"query":"What is Deep Learning?","max_tokens":17,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' \ + -H 'Content-Type: application/json' + ``` + +9. CodeGen LLM Microservice + + ```bash + curl http://${host_ip}:9001/v1/chat/completions\ + -X POST \ + -d '{"query":"def print_hello_world():"}' \ + -H 'Content-Type: application/json' + ``` + +10. DocSum LLM Microservice + + ```bash + curl http://${host_ip}:9002/v1/chat/docsum\ + -X POST \ + -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5"}' \ + -H 'Content-Type: application/json' + ``` + +11. FAQGen LLM Microservice + + ```bash + curl http://${host_ip}:9003/v1/faqgen\ + -X POST \ + -d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5"}' \ + -H 'Content-Type: application/json' + ``` + +12. ChatQnA MegaService + + ```bash + curl http://${host_ip}:8888/v1/chatqna -H "Content-Type: application/json" -d '{ + "messages": "What is the revenue of Nike in 2023?" + }' + ``` + +13. FAQGen MegaService + + ```bash + curl http://${host_ip}:8889/v1/faqgen -H "Content-Type: application/json" -d '{ + "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." + }' + ``` + +14. DocSum MegaService + + ```bash + curl http://${host_ip}:8890/v1/docsum -H "Content-Type: application/json" -d '{ + "messages": "Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5." + }' + ``` + +15. CodeGen MegaService + + ```bash + curl http://${host_ip}:7778/v1/codegen -H "Content-Type: application/json" -d '{ + "messages": "def print_hello_world():" + }' + ``` + +16. Dataprep Microservice + + If you want to update the default knowledge base, you can use the following commands: + + Update Knowledge Base via Local File Upload: + + ```bash + curl -X POST "http://${host_ip}:6007/v1/dataprep" \ + -H "Content-Type: multipart/form-data" \ + -F "files=@./nke-10k-2023.pdf" + ``` + + This command updates a knowledge base by uploading a local file for processing. Update the file path according to your environment. + + Add Knowledge Base via HTTP Links: + + ```bash + curl -X POST "http://${host_ip}:6007/v1/dataprep" \ + -H "Content-Type: multipart/form-data" \ + -F 'link_list=["https://opea.dev"]' + ``` + + This command updates a knowledge base by submitting a list of HTTP links for processing. + + Also, you are able to get the file list that you uploaded: + + ```bash + curl -X POST "http://${host_ip}:6007/v1/dataprep/get_file" \ + -H "Content-Type: application/json" + ``` + + To delete the file/link you uploaded: + + ```bash + # delete link + curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \ + -d '{"file_path": "https://opea.dev.txt"}' \ + -H "Content-Type: application/json" + + # delete file + curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \ + -d '{"file_path": "nke-10k-2023.pdf"}' \ + -H "Content-Type: application/json" + + # delete all uploaded files and links + curl -X POST "http://${host_ip}:6007/v1/dataprep/delete_file" \ + -d '{"file_path": "all"}' \ + -H "Content-Type: application/json" + ``` + +17. Prompt Registry Microservice + + If you want to update the default Prompts in the application for your user, you can use the following commands: + + ```bash + curl -X 'POST' \ + http://{host_ip}:6018/v1/prompt/create \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "prompt_text": "test prompt", "user": "test" + }' + ``` + + Retrieve prompt from database based on user or prompt_id + + ```bash + curl -X 'POST' \ + http://{host_ip}:6018/v1/prompt/get \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "user": "test"}' + + curl -X 'POST' \ + http://{host_ip}:6018/v1/prompt/get \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "user": "test", "prompt_id":"{prompt_id returned from save prompt route above}"}' + ``` + + Delete prompt from database based on prompt_id provided + + ```bash + curl -X 'POST' \ + http://{host_ip}:6018/v1/prompt/delete \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "user": "test", "prompt_id":"{prompt_id to be deleted}"}' + ``` + +18. Chat History Microservice + + To validate the chatHistory Microservice, you can use the following commands. + + Create a sample conversation and get the message ID. + + ```bash + curl -X 'POST' \ + http://${host_ip}:6012/v1/chathistory/create \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "data": { + "messages": "test Messages", "user": "test" + } + }' + ``` + + Retrieve the conversation based on user or conversation id + + ```bash + curl -X 'POST' \ + http://${host_ip}:6012/v1/chathistory/get \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "user": "test"}' + + curl -X 'POST' \ + http://${host_ip}:6012/v1/chathistory/get \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "user": "test", "id":"{Conversation id to retrieve }"}' + ``` + + Delete Conversation from database based on conversation id provided. + + ```bash + curl -X 'POST' \ + http://${host_ip}:6012/v1/chathistory/delete \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ + -d '{ + "user": "test", "id":"{Conversation id to Delete}"}' + ``` + +--- + +## 🚀 Launch the UI + +To access the frontend, open the following URL in your browser: http://{host_ip}:5174. By default, the UI runs on port 80 internally. If you prefer to use a different host port to access the frontend, you can modify the port mapping in the `compose.yaml` file as shown below: + +```yaml + productivity-suite-gaudi-react-ui-server: + image: opea/productivity-suite-react-ui-server:latest + ... + ports: + - "5715:80" # Map port 5715 on the host to port 80 in the container. +``` + +Here is an example of running Productivity Suite +![project-screenshot](../../../../assets/img/chat_qna_init.png) +![project-screenshot](../../../../assets/img/Login_page.png) + +--- + +## 🛠️ Key Features + +Here're some of the project's features: + +### 💬ChatQnA + +- **Start a Text Chat**:Initiate a text chat with the ability to input written conversations, where the dialogue content can also be customized based on uploaded files. +- **Context Awareness**: The AI assistant maintains the context of the conversation, understanding references to previous statements or questions. This allows for more natural and coherent exchanges. + +### 🎛️ Data Source + +- **File Upload or Remote Link**: The choice between uploading locally or copying a remote link. Chat according to uploaded knowledge base. +- **File Management**:Uploaded File would get listed and user would be able add or remove file/links + +#### Screenshots + +![project-screenshot](../../../../assets/img/data_source.png) + +- **Clear Chat**: Clear the record of the current dialog box without retaining the contents of the dialog box. +- **Chat history**: Historical chat records can still be retained after refreshing, making it easier for users to view the context. +- **Conversational Chat**: The application maintains a history of the conversation, allowing users to review previous messages and the AI to refer back to earlier points in the dialogue when necessary. + +#### Screenshots + +![project-screenshot](../../../../assets/img/chat_qna_init.png) +![project-screenshot](../../../../assets/img/chatqna_with_conversation.png) + +### 💻 Codegen + +- **Generate code**: generate the corresponding code based on the current user's input. + +#### Screenshots + +![project-screenshot](../../../../assets/img/codegen.png) + +### 📚 Document Summarization + +- **Summarizing Uploaded Files**: Upload files from their local device, then click 'Generate Summary' to summarize the content of the uploaded file. The summary will be displayed on the 'Summary' box. +- **Summarizing Text via Pasting**: Paste the text to be summarized into the text box, then click 'Generate Summary' to produce a condensed summary of the content, which will be displayed in the 'Summary' box on the right. +- **Scroll to Bottom**: The summarized content will automatically scroll to the bottom. + +#### Screenshots + +![project-screenshot](../../../../assets/img/doc_summary_paste.png) +![project-screenshot](../../../../assets/img/doc_summary_file.png) + +### ❓ FAQ Generator + +- **Generate FAQs from Text via Pasting**: Paste the text to into the text box, then click 'Generate FAQ' to produce a condensed FAQ of the content, which will be displayed in the 'FAQ' box below. + +- **Generate FAQs from Text via txt file Upload**: Upload the file in the Upload bar, then click 'Generate FAQ' to produce a condensed FAQ of the content, which will be displayed in the 'FAQ' box below. + +#### Screenshots + +![project-screenshot](../../../../assets/img/faq_generator.png) diff --git a/ProductivitySuite/docker_compose/intel/hpu/gaudi/README_remote.md b/ProductivitySuite/docker_compose/intel/hpu/gaudi/README_remote.md new file mode 100644 index 0000000000..c6bd1fd023 --- /dev/null +++ b/ProductivitySuite/docker_compose/intel/hpu/gaudi/README_remote.md @@ -0,0 +1,277 @@ +# Build Mega Service of Productivity Suite + +This document outlines the deployment process for OPEA Productivity Suite utilizing the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice pipeline on Intel Gaudi server and [GenAIExamples](https://github.com/opea-project/GenAIExamples.git) solutions. The steps include Docker image creation, container deployment via Docker Compose, and service execution to integrate microservices such as `embedding`, `retriever`, `rerank`, and `llm`. + +## 🚀 Build Docker Images + +Create a directory and clone the GenAIComps repository + +```bash +mkdir genai +git clone --branch v1.0 https://github.com/opea-project/GenAIComps.git +``` +Copy patch files related to GenAIComps inside GenAIComps folder and apply the patch + +```bash +cd GenAIComps +git am *.patch +``` + +### 1. Build Embedding Image + +```bash +docker build --no-cache -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/tei/langchain/Dockerfile . +``` + +### 2. Build Rerank Image + +```bash +docker build --no-cache -t opea/reranking-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/reranks/tei/Dockerfile . +``` + +### 3. Build LLM Images + +#### Use TGI as backend to build FAQ Generation + +```bash +docker build -t opea/llm-faqgen-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/faq-generation/tgi/langchain/Dockerfile . +``` + +#### Use TGI as backend to build Doc Summarization + +```bash +docker build -t opea/llm-docsum-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/summarization/tgi/langchain/Dockerfile . +``` + +#### Use TGI as backend to build Text Generation + +```bash +docker build --no-cache -t opea/llm-tgi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/tgi/Dockerfile . +``` + +### 4. Build Prompt Registry Image + +```bash +docker build -t opea/promptregistry-mongo-server:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/prompt_registry/mongo/Dockerfile . +``` + +### 5. Build Productivity Suite Docker Images + +The Productivity Suite is composed of multiple GenAIExample reference solutions composed together. + +```bash +cd .. +git clone --branch v1.0 https://github.com/opea-project/GenAIExamples.git +cd GenAIExamples +``` + +Copy the patch files related to GenAIExamples into above cloned GenAIExamples folder + +Apply the patches +```bash +git am *.patch +``` + +#### 5.1 Build ChatQnA MegaService Docker Images + +```bash +cd .. +docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f GenAIExamples/ChatQnA/Dockerfile.wrapper . +``` + +### 6. Build UI Docker Image + +Build frontend Docker image that enables via below command: + +**Export the value of the public IP address of your server to the `host_ip` environment variable** + +```bash +cd GenAIExamples/ProductivitySuite/ui +docker build --no-cache -t opea/productivity-suite-react-ui-server:latest -f docker/Dockerfile.react . +``` + +## 🚀 Start Microservices + +### Setup Environment Variables + +Since the `compose.yaml` will consume some environment variables, you need to setup them in advance as below. + +**Export the value of the public IP address of your server to the `host_ip` environment variable** + +> Change the External_Public_IP below with the actual IPV4 value + +``` +export host_ip="External_Public_IP" +``` + +**Export the value of your Huggingface API token to the `your_hf_api_token` environment variable** + +> Change the Your_Huggingface_API_Token below with tyour actual Huggingface API Token value + +``` +export your_hf_api_token="Your_Huggingface_API_Token" +``` + +**Append the value of the public IP address to the no_proxy list** + +``` +export your_no_proxy=${your_no_proxy},"External_Public_IP" +``` + +**Export the value of your remote host to the `remote_host` environment variable (Only if you are using remote TGI/TEI)** + +> Change the Your_Remote_Host below with your actual API Gateway Host value + +``` +export remote_host="Your_Remote_Host" +``` + +**Set ClientId, Client_Secret and Token URL only if the remote API is protected with OAuth Client Credentials Flow** + +**Export the value of your Remote API ClientId to the `clientid` environment variable** + +> Change the Your_API_ClientId below with your actual ClientId value + +``` +export clientid="Your_API_ClientId" +``` + +**Export the value of your Remote API client secret to the `client_secret` environment variable** + +> Change the Your_API_ClientSecret below with your actual ClientSecret value + +``` +export client_secret="Your_API_ClientSecret" +``` + +**Export the value of your Remote API token url to the `token_url` environment variable** + +> Change the Your_API_TokenUrl below with your actual Token URL value + +``` +export token_url="Your_API_TokenUrl" +``` + +**Export the value of your Remote Embedding Endpoint to the `embedding_endpoint` environment variable (Set this if you have tei embedding running remotely)** + +> Change the Your_Remote_Embedding_Endpoint below with your actual embedding endpoint value + +``` +export embedding_endpoint="Your_Remote_Embedding_Endpoint" +``` + +**Export the value of your Remote Reranking Endpoint to the `reranking_endpoint` environment variable (Set this if you have reranking running remotely)** + +> Change the Your_Remote_Reranking_Endpoint below with tyour actual reranking endpoint value + +``` +export reranking_endpoint="Your_Remote_Reranking_Endpoint" +``` + +**Export the value of your Remote TGI Endpoint to the `tgi_endpoint` environment variable (Set this if you have tgi running remotely)** + +> Change the Your_Remote_TGI_Endpoint below with tyour actual tgi endpoint value + +``` +export tgi_endpoint="Your_Remote_TGI_Endpoint" +``` + +**To use multiple TGI models** +> Create the model_configs.json file under /GenAIExamples/ProductivitySuite/docker_compose/intel/hpu/gaudi folder +> Add the model details as shown in the below example + + +```bash +cd .. +cd docker_compose/intel/hpu/gaudi +touch model_configs.json +``` + +File Structure: + +[ + { + "model_name": "Your Model Name", + "displayName": "Model Display Name for the UI", + "endpoint": "Model Endpoint with http/https", + "minToken": 100, //Min Token Value + "maxToken": 2000 //Max Token Value + }, + { + "model_name": "Your Model Name", + "displayName": "Model Display Name for the UI", + "endpoint": "Model Endpoint with http/https", + "minToken": 100, //Min Token Value + "maxToken": 2000 //Max Token Value + } +] + +Example: + +[ + { + "model_name": "meta-llama/Meta-Llama-3.1-70B-Instruct", + "displayName": "llama-3.1-70B", + "endpoint": "https:///", + "minToken": 100, + "maxToken": 2000 + }, + { + "model_name": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "displayName": "llama-3.1-8B", + "endpoint": "https:///", + "minToken": 100, + "maxToken": 2000 + }, + { + "model_name": "Intel/neural-chat-7b-v3-3", + "displayName": "neural chat", + "endpoint": "https:///", + "minToken": 100, + "maxToken": 1000 + } +] + +> After creating and adding details in the model_configs.json file. Copy the same file into the public folder of the UI + +```bash +cd ../../../../ +cp docker_compose/intel/hpu/gaudi/model_configs.json ui/react/public/model_configs.json +``` + +> Navigate to GenAIExamples/ProductivitiySuite/docker_compose/intel/hpu/gaudi and run set_env.sh + +```bash +cd docker_compose/intel/hpu/gaudi +chmod +x set_env_remote.sh +source set_env_remote.sh +``` + +Note: Please replace with `host_ip` with you external IP address, do not use localhost. + +### Start all the services Docker Containers + +#### Run all services locally +```bash +docker compose -f compose.yaml up -d +``` + +#### Run TGI and TEI inference remote +```bash +docker compose -f compose_remote.yaml up -d +``` + +#### Run only TGI remote +```bash +docker compose -f compose_tgi_remote.yaml up -d +``` + +### Setup Keycloak + +Please refer to [keycloak_setup_guide](keycloak_setup_guide.md) for more detail related to Keycloak configuration setup. + + +## 🚀 Launch the UI + +To access the frontend, open the following URL in your browser: http://{host_ip}:5174. + diff --git a/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose.yaml b/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose.yaml new file mode 100644 index 0000000000..d15856e275 --- /dev/null +++ b/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose.yaml @@ -0,0 +1,398 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +version: "3.3" +services: + redis-vector-db: + image: redis/redis-stack:7.2.0-v9 + container_name: redis-vector-db + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + ports: + - "6379:6379" + - "8001:8001" + dataprep-redis-service: + image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest} + container_name: dataprep-redis-server + depends_on: + - redis-vector-db + ports: + - "6007:6007" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + REDIS_URL: ${REDIS_URL} + INDEX_NAME: ${INDEX_NAME} + tei-embedding-service: + image: ghcr.io/huggingface/tei-gaudi:latest + container_name: tei-embedding-gaudi-server + ports: + - "6006:80" + volumes: + - "./data_embedding:/data" + shm_size: 1g + runtime: habana + cap_add: + - SYS_NICE + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + MAX_WARMUP_SEQUENCE_LENGTH: 512 + command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate + embedding: + image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest} + container_name: embedding-tei-server + depends_on: + - tei-embedding-service + ports: + - "6000:6000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-embedding-service" + restart: unless-stopped + retriever: + image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest} + container_name: retriever-redis-server + depends_on: + - redis-vector-db + ports: + - "7000:7000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + REDIS_URL: ${REDIS_URL} + INDEX_NAME: ${INDEX_NAME} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + restart: unless-stopped + tei-reranking-service: + image: ghcr.io/huggingface/tei-gaudi:latest + container_name: tei-reranking-server + ports: + - "8808:80" + volumes: + - "./data_tei:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + MAX_WARMUP_SEQUENCE_LENGTH: 512 + runtime: habana + cap_add: + - SYS_NICE + ipc: host + command: --model-id ${RERANK_MODEL_ID} --auto-truncate + reranking: + image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest} + container_name: reranking-tei-gaudi-server + depends_on: + - tei-reranking-service + ports: + - "8000:8000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-reranking-service" + restart: unless-stopped + tgi_service: + image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + container_name: tgi-service + ports: + - "9009:80" + volumes: + - "./data:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + ENABLE_HPU_GRAPH: true + LIMIT_HPU_GRAPH: true + USE_FLASH_ATTENTION: true + FLASH_ATTENTION_RECOMPUTE: true + runtime: habana + cap_add: + - SYS_NICE + ipc: host + command: --model-id ${LLM_MODEL_ID} + llm: + image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest} + container_name: llm-tgi-gaudi-server + depends_on: + - tgi_service + ports: + - "9000:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_CHATQNA} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-llm-service" + + restart: unless-stopped + chatqna-gaudi-backend-server: + image: ${REGISTRY:-opea}/chatqna:${TAG:-latest} + container_name: chatqna-gaudi-backend-server + depends_on: + - redis-vector-db + - tei-embedding-service + - embedding + - retriever + - tei-reranking-service + - reranking + - tgi_service + - llm + ports: + - "8888:8888" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP} + EMBEDDING_SERVICE_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP} + RETRIEVER_SERVICE_HOST_IP: ${RETRIEVER_SERVICE_HOST_IP} + RERANK_SERVICE_HOST_IP: ${RERANK_SERVICE_HOST_IP} + LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_CHATQNA} + ipc: host + restart: always + tgi_service_codegen: + image: ghcr.io/huggingface/tgi-gaudi:2.0.5 + container_name: tgi_service_codegen + ports: + - "8028:80" + volumes: + - "./data_codegen:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + ENABLE_HPU_GRAPH: true + LIMIT_HPU_GRAPH: true + USE_FLASH_ATTENTION: true + FLASH_ATTENTION_RECOMPUTE: true + runtime: habana + cap_add: + - SYS_NICE + ipc: host + command: --model-id ${LLM_MODEL_ID_CODEGEN} --max-input-length 1024 --max-total-tokens 2048 + llm_codegen: + image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest} + container_name: llm-tgi-server-codegen + depends_on: + - tgi_service_codegen + ports: + - "9001:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_CODEGEN} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-llm-service" + restart: unless-stopped + codegen-gaudi-backend-server: + image: ${REGISTRY:-opea}/codegen:${TAG:-latest} + container_name: codegen-gaudi-backend-server + depends_on: + - llm + ports: + - "7778:7778" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP} + LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_CODEGEN} + LLM_SERVICE_PORT: ${LLM_SERVICE_HOST_PORT_CODEGEN} + ipc: host + restart: always + llm_faqgen: + image: ${REGISTRY:-opea}/llm-faqgen-tgi:${TAG:-latest} + container_name: llm-faqgen-server + depends_on: + - tgi_service + ports: + - "9002:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_FAQGEN} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-llm-service" + restart: unless-stopped + faqgen-gaudi-backend-server: + image: ${REGISTRY:-opea}/faqgen:${TAG:-latest} + container_name: faqgen-gaudi-backend-server + depends_on: + - tgi_service + - llm_faqgen + ports: + - "8889:8888" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP} + LLM_SERVICE_PORT: ${LLM_SERVICE_HOST_PORT_FAQGEN} + LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_FAQGEN} + ipc: host + restart: always + llm_docsum_server: + image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest} + container_name: llm-docsum-server + depends_on: + - tgi_service + ports: + - "9003:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_DOCSUM} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-llm-service" + restart: unless-stopped + docsum-gaudi-backend-server: + image: ${REGISTRY:-opea}/docsum:${TAG:-latest} + container_name: docsum-gaudi-backend-server + depends_on: + - tgi_service + - llm_docsum_server + ports: + - "8890:8888" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + LLM_SERVICE_PORT: ${LLM_SERVICE_HOST_PORT_DOCSUM} + MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP} + LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_DOCSUM} + ipc: host + restart: always + mongo: + image: mongo:7.0.11 + container_name: mongodb + ports: + - 27017:27017 + environment: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + no_proxy: ${no_proxy} + command: mongod --quiet --logpath /dev/null + chathistory-mongo: + image: ${REGISTRY:-opea}/chathistory-mongo-server:${TAG:-latest} + container_name: chathistory-mongo-server + ports: + - "6012:6012" + ipc: host + environment: + http_proxy: ${http_proxy} + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + MONGO_HOST: ${MONGO_HOST} + MONGO_PORT: ${MONGO_PORT} + COLLECTION_NAME: ${COLLECTION_NAME} + restart: unless-stopped + promptregistry-mongo: + image: ${REGISTRY:-opea}/promptregistry-mongo-server:${TAG:-latest} + container_name: promptregistry-mongo-server + ports: + - "6018:6018" + ipc: host + environment: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + no_proxy: ${no_proxy} + MONGO_HOST: ${MONGO_HOST} + MONGO_PORT: ${MONGO_PORT} + COLLECTION_NAME: ${PROMPT_COLLECTION_NAME} + restart: unless-stopped + keycloak: + image: quay.io/keycloak/keycloak:25.0.2 + container_name: keycloak-server + ports: + - 8081:8080 + environment: + - KEYCLOAK_ADMIN=admin + - KEYCLOAK_ADMIN_PASSWORD=admin + - KC_PROXY=edge + ipc: host + command: start-dev + restart: always + productivity-suite-gaudi-react-ui-server: + image: ${REGISTRY:-opea}/productivity-suite-react-ui-server:${TAG:-latest} + container_name: productivity-suite-gaudi-react-ui-server + ports: + - "5174:80" + environment: + - APP_BACKEND_SERVICE_ENDPOINT_CHATQNA=${BACKEND_SERVICE_ENDPOINT_CHATQNA} + - APP_BACKEND_SERVICE_ENDPOINT_CODEGEN=${BACKEND_SERVICE_ENDPOINT_CODEGEN} + - APP_BACKEND_SERVICE_ENDPOINT_DOCSUM=${BACKEND_SERVICE_ENDPOINT_DOCSUM} + - APP_BACKEND_SERVICE_ENDPOINT_FAQGEN=${BACKEND_SERVICE_ENDPOINT_FAQGEN} + - APP_DATAPREP_SERVICE_ENDPOINT=${DATAPREP_SERVICE_ENDPOINT} + - APP_DATAPREP_GET_FILE_ENDPOINT=${DATAPREP_GET_FILE_ENDPOINT} + - APP_DATAPREP_DELETE_FILE_ENDPOINT=${DATAPREP_DELETE_FILE_ENDPOINT} + - APP_CHAT_HISTORY_CREATE_ENDPOINT=${CHAT_HISTORY_CREATE_ENDPOINT} + - APP_CHAT_HISTORY_DELETE_ENDPOINT=${CHAT_HISTORY_DELETE_ENDPOINT} + - APP_CHAT_HISTORY_GET_ENDPOINT=${CHAT_HISTORY_GET_ENDPOINT} + - APP_PROMPT_SERVICE_GET_ENDPOINT=${PROMPT_SERVICE_GET_ENDPOINT} + - APP_PROMPT_SERVICE_CREATE_ENDPOINT=${PROMPT_SERVICE_CREATE_ENDPOINT} + - APP_KEYCLOAK_SERVICE_ENDPOINT=${KEYCLOAK_SERVICE_ENDPOINT} + ipc: host + restart: always +networks: + default: + driver: bridge \ No newline at end of file diff --git a/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose_remote.yaml b/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose_remote.yaml new file mode 100644 index 0000000000..82408a33fe --- /dev/null +++ b/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose_remote.yaml @@ -0,0 +1,306 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +version: "3.3" +services: + redis-vector-db: + image: redis/redis-stack:7.2.0-v9 + container_name: redis-vector-db + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + ports: + - "6379:6379" + - "8001:8001" + dataprep-redis-service: + image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest} + container_name: dataprep-redis-server + depends_on: + - redis-vector-db + ports: + - "6007:6007" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + REDIS_URL: ${REDIS_URL} + INDEX_NAME: ${INDEX_NAME} + embedding: + image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest} + container_name: embedding-tei-server + ports: + - "6000:6000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-embedding-service" + CLIENTID: ${CLIENTID} + CLIENT_SECRET: ${CLIENT_SECRET} + TOKEN_URL: ${TOKEN_URL} + restart: unless-stopped + retriever: + image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest} + container_name: retriever-redis-server + depends_on: + - redis-vector-db + ports: + - "7000:7000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + REDIS_URL: ${REDIS_URL} + INDEX_NAME: ${INDEX_NAME} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + restart: unless-stopped + reranking: + image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest} + container_name: reranking-tei-gaudi-server + ports: + - "8000:8000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-reranking-service" + CLIENTID: ${CLIENTID} + CLIENT_SECRET: ${CLIENT_SECRET} + TOKEN_URL: ${TOKEN_URL} + restart: unless-stopped + llm: + image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest} + container_name: llm-tgi-gaudi-server + ports: + - "9000:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_CHATQNA} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-llm-service" + MODEL_CONFIGS: ${MODEL_CONFIGS} + CLIENTID: ${CLIENTID} + CLIENT_SECRET: ${CLIENT_SECRET} + TOKEN_URL: ${TOKEN_URL} + + restart: unless-stopped + chatqna-gaudi-backend-server: + image: ${REGISTRY:-opea}/chatqna:${TAG:-latest} + container_name: chatqna-gaudi-backend-server + depends_on: + - redis-vector-db + - embedding + - retriever + - reranking + - llm + ports: + - "8888:8888" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP} + EMBEDDING_SERVICE_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP} + RETRIEVER_SERVICE_HOST_IP: ${RETRIEVER_SERVICE_HOST_IP} + RERANK_SERVICE_HOST_IP: ${RERANK_SERVICE_HOST_IP} + LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_CHATQNA} + ipc: host + restart: always + llm_codegen: + image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest} + container_name: llm-tgi-server-codegen + ports: + - "9001:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_CODEGEN} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-llm-service" + CLIENTID: ${CLIENTID} + CLIENT_SECRET: ${CLIENT_SECRET} + TOKEN_URL: ${TOKEN_URL} + restart: unless-stopped + codegen-gaudi-backend-server: + image: ${REGISTRY:-opea}/codegen:${TAG:-latest} + container_name: codegen-gaudi-backend-server + ports: + - "7778:7778" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP} + LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_CODEGEN} + LLM_SERVICE_PORT: ${LLM_SERVICE_HOST_PORT_CODEGEN} + ipc: host + restart: always + llm_faqgen: + image: ${REGISTRY:-opea}/llm-faqgen-tgi:${TAG:-latest} + container_name: llm-faqgen-server + ports: + - "9002:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_FAQGEN} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-llm-service" + CLIENTID: ${CLIENTID} + CLIENT_SECRET: ${CLIENT_SECRET} + TOKEN_URL: ${TOKEN_URL} + restart: unless-stopped + faqgen-gaudi-backend-server: + image: ${REGISTRY:-opea}/faqgen:${TAG:-latest} + container_name: faqgen-gaudi-backend-server + depends_on: + - llm_faqgen + ports: + - "8889:8888" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP} + LLM_SERVICE_PORT: ${LLM_SERVICE_HOST_PORT_FAQGEN} + LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_FAQGEN} + ipc: host + restart: always + llm_docsum_server: + image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest} + container_name: llm-docsum-server + ports: + - "9003:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_DOCSUM} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-llm-service" + CLIENTID: ${CLIENTID} + CLIENT_SECRET: ${CLIENT_SECRET} + TOKEN_URL: ${TOKEN_URL} + restart: unless-stopped + docsum-gaudi-backend-server: + image: ${REGISTRY:-opea}/docsum:${TAG:-latest} + container_name: docsum-gaudi-backend-server + depends_on: + - llm_docsum_server + ports: + - "8890:8888" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + LLM_SERVICE_PORT: ${LLM_SERVICE_HOST_PORT_DOCSUM} + MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP} + LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_DOCSUM} + ipc: host + restart: always + mongo: + image: mongo:7.0.11 + container_name: mongodb + ports: + - 27017:27017 + environment: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + no_proxy: ${no_proxy} + command: mongod --quiet --logpath /dev/null + chathistory-mongo: + image: ${REGISTRY:-opea}/chathistory-mongo-server:${TAG:-latest} + container_name: chathistory-mongo-server + ports: + - "6012:6012" + ipc: host + environment: + http_proxy: ${http_proxy} + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + MONGO_HOST: ${MONGO_HOST} + MONGO_PORT: ${MONGO_PORT} + COLLECTION_NAME: ${COLLECTION_NAME} + restart: unless-stopped + promptregistry-mongo: + image: ${REGISTRY:-opea}/promptregistry-mongo-server:${TAG:-latest} + container_name: promptregistry-mongo-server + ports: + - "6018:6018" + ipc: host + environment: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + no_proxy: ${no_proxy} + MONGO_HOST: ${MONGO_HOST} + MONGO_PORT: ${MONGO_PORT} + COLLECTION_NAME: ${PROMPT_COLLECTION_NAME} + restart: unless-stopped + keycloak: + image: quay.io/keycloak/keycloak:25.0.2 + container_name: keycloak-server + ports: + - 8080:8080 + environment: + - KEYCLOAK_ADMIN=admin + - KEYCLOAK_ADMIN_PASSWORD=admin + - KC_PROXY=edge + ipc: host + command: + - start-dev + restart: always + productivity-suite-gaudi-react-ui-server: + image: ${REGISTRY:-opea}/productivity-suite-react-ui-server:${TAG:-latest} + container_name: productivity-suite-gaudi-react-ui-server + ports: + - "5174:80" + environment: + - APP_BACKEND_SERVICE_ENDPOINT_CHATQNA=${BACKEND_SERVICE_ENDPOINT_CHATQNA} + - APP_BACKEND_SERVICE_ENDPOINT_CODEGEN=${BACKEND_SERVICE_ENDPOINT_CODEGEN} + - APP_BACKEND_SERVICE_ENDPOINT_DOCSUM=${BACKEND_SERVICE_ENDPOINT_DOCSUM} + - APP_BACKEND_SERVICE_ENDPOINT_FAQGEN=${BACKEND_SERVICE_ENDPOINT_FAQGEN} + - APP_DATAPREP_SERVICE_ENDPOINT=${DATAPREP_SERVICE_ENDPOINT} + - APP_DATAPREP_GET_FILE_ENDPOINT=${DATAPREP_GET_FILE_ENDPOINT} + - APP_DATAPREP_DELETE_FILE_ENDPOINT=${DATAPREP_DELETE_FILE_ENDPOINT} + - APP_CHAT_HISTORY_CREATE_ENDPOINT=${CHAT_HISTORY_CREATE_ENDPOINT} + - APP_CHAT_HISTORY_DELETE_ENDPOINT=${CHAT_HISTORY_DELETE_ENDPOINT} + - APP_CHAT_HISTORY_GET_ENDPOINT=${CHAT_HISTORY_GET_ENDPOINT} + - APP_PROMPT_SERVICE_GET_ENDPOINT=${PROMPT_SERVICE_GET_ENDPOINT} + - APP_PROMPT_SERVICE_CREATE_ENDPOINT=${PROMPT_SERVICE_CREATE_ENDPOINT} + - APP_KEYCLOAK_SERVICE_ENDPOINT=${KEYCLOAK_SERVICE_ENDPOINT} + ipc: host + restart: always +networks: + default: + driver: bridge \ No newline at end of file diff --git a/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose_tgi_remote.yaml b/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose_tgi_remote.yaml new file mode 100644 index 0000000000..02ab491e5f --- /dev/null +++ b/ProductivitySuite/docker_compose/intel/hpu/gaudi/compose_tgi_remote.yaml @@ -0,0 +1,354 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +version: "3.3" +services: + redis-vector-db: + image: redis/redis-stack:7.2.0-v9 + container_name: redis-vector-db + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + ports: + - "6379:6379" + - "8001:8001" + dataprep-redis-service: + image: ${REGISTRY:-opea}/dataprep-redis:${TAG:-latest} + container_name: dataprep-redis-server + depends_on: + - redis-vector-db + ports: + - "6007:6007" + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + REDIS_URL: ${REDIS_URL} + INDEX_NAME: ${INDEX_NAME} + tei-embedding-service: + image: ghcr.io/huggingface/tei-gaudi:latest + container_name: tei-embedding-gaudi-server + ports: + - "6006:80" + volumes: + - "./data_embedding:/data" + shm_size: 1g + runtime: habana + cap_add: + - SYS_NICE + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + MAX_WARMUP_SEQUENCE_LENGTH: 512 + command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate + embedding: + image: ${REGISTRY:-opea}/embedding-tei:${TAG:-latest} + container_name: embedding-tei-server + depends_on: + - tei-embedding-service + ports: + - "6000:6000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-embedding-service" + restart: unless-stopped + retriever: + image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest} + container_name: retriever-redis-server + depends_on: + - redis-vector-db + ports: + - "7000:7000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + REDIS_URL: ${REDIS_URL} + INDEX_NAME: ${INDEX_NAME} + TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + restart: unless-stopped + tei-reranking-service: + image: ghcr.io/huggingface/tei-gaudi:latest + container_name: tei-reranking-server + ports: + - "8808:80" + volumes: + - "./data_tei:/data" + shm_size: 1g + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + MAX_WARMUP_SEQUENCE_LENGTH: 512 + runtime: habana + cap_add: + - SYS_NICE + ipc: host + command: --model-id ${RERANK_MODEL_ID} --auto-truncate + reranking: + image: ${REGISTRY:-opea}/reranking-tei:${TAG:-latest} + container_name: reranking-tei-gaudi-server + depends_on: + - tei-reranking-service + ports: + - "8000:8000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TEI_RERANKING_ENDPOINT: ${TEI_RERANKING_ENDPOINT} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-reranking-service" + restart: unless-stopped + llm: + image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest} + container_name: llm-tgi-gaudi-server + depends_on: + - tgi_service + ports: + - "9000:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_CHATQNA} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-llm-service" + MODEL_CONFIGS: ${MODEL_CONFIGS} + CLIENTID: ${CLIENTID} + CLIENT_SECRET: ${CLIENT_SECRET} + TOKEN_URL: ${TOKEN_URL} + + restart: unless-stopped + chatqna-gaudi-backend-server: + image: ${REGISTRY:-opea}/chatqna:${TAG:-latest} + container_name: chatqna-gaudi-backend-server + depends_on: + - redis-vector-db + - tei-embedding-service + - embedding + - retriever + - tei-reranking-service + - reranking + - tgi_service + - llm + ports: + - "8888:8888" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP} + EMBEDDING_SERVICE_HOST_IP: ${EMBEDDING_SERVICE_HOST_IP} + RETRIEVER_SERVICE_HOST_IP: ${RETRIEVER_SERVICE_HOST_IP} + RERANK_SERVICE_HOST_IP: ${RERANK_SERVICE_HOST_IP} + LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_CHATQNA} + ipc: host + restart: always + llm_codegen: + image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest} + container_name: llm-tgi-server-codegen + ports: + - "9001:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_CODEGEN} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-llm-service" + CLIENTID: ${CLIENTID} + CLIENT_SECRET: ${CLIENT_SECRET} + TOKEN_URL: ${TOKEN_URL} + restart: unless-stopped + codegen-gaudi-backend-server: + image: ${REGISTRY:-opea}/codegen:${TAG:-latest} + container_name: codegen-gaudi-backend-server + depends_on: + - llm + ports: + - "7778:7778" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP} + LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_CODEGEN} + LLM_SERVICE_PORT: ${LLM_SERVICE_HOST_PORT_CODEGEN} + ipc: host + restart: always + llm_faqgen: + image: ${REGISTRY:-opea}/llm-faqgen-tgi:${TAG:-latest} + container_name: llm-faqgen-server + ports: + - "9002:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_FAQGEN} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-llm-service" + CLIENTID: ${CLIENTID} + CLIENT_SECRET: ${CLIENT_SECRET} + TOKEN_URL: ${TOKEN_URL} + restart: unless-stopped + faqgen-gaudi-backend-server: + image: ${REGISTRY:-opea}/faqgen:${TAG:-latest} + container_name: faqgen-gaudi-backend-server + depends_on: + - llm_faqgen + ports: + - "8889:8888" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP} + LLM_SERVICE_PORT: ${LLM_SERVICE_HOST_PORT_FAQGEN} + LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_FAQGEN} + ipc: host + restart: always + llm_docsum_server: + image: ${REGISTRY:-opea}/llm-docsum-tgi:${TAG:-latest} + container_name: llm-docsum-server + ports: + - "9003:9000" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT_DOCSUM} + HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY} + LANGCHAIN_TRACING_V2: ${LANGCHAIN_TRACING_V2} + LANGCHAIN_PROJECT: "opea-llm-service" + CLIENTID: ${CLIENTID} + CLIENT_SECRET: ${CLIENT_SECRET} + TOKEN_URL: ${TOKEN_URL} + restart: unless-stopped + docsum-gaudi-backend-server: + image: ${REGISTRY:-opea}/docsum:${TAG:-latest} + container_name: docsum-gaudi-backend-server + depends_on: + - tgi_service + - llm_docsum_server + ports: + - "8890:8888" + environment: + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + http_proxy: ${http_proxy} + LLM_SERVICE_PORT: ${LLM_SERVICE_HOST_PORT_DOCSUM} + MEGA_SERVICE_HOST_IP: ${MEGA_SERVICE_HOST_IP} + LLM_SERVICE_HOST_IP: ${LLM_SERVICE_HOST_IP_DOCSUM} + ipc: host + restart: always + mongo: + image: mongo:7.0.11 + container_name: mongodb + ports: + - 27017:27017 + environment: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + no_proxy: ${no_proxy} + command: mongod --quiet --logpath /dev/null + chathistory-mongo: + image: ${REGISTRY:-opea}/chathistory-mongo-server:${TAG:-latest} + container_name: chathistory-mongo-server + ports: + - "6012:6012" + ipc: host + environment: + http_proxy: ${http_proxy} + no_proxy: ${no_proxy} + https_proxy: ${https_proxy} + MONGO_HOST: ${MONGO_HOST} + MONGO_PORT: ${MONGO_PORT} + COLLECTION_NAME: ${COLLECTION_NAME} + restart: unless-stopped + promptregistry-mongo: + image: ${REGISTRY:-opea}/promptregistry-mongo-server:${TAG:-latest} + container_name: promptregistry-mongo-server + ports: + - "6018:6018" + ipc: host + environment: + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + no_proxy: ${no_proxy} + MONGO_HOST: ${MONGO_HOST} + MONGO_PORT: ${MONGO_PORT} + COLLECTION_NAME: ${PROMPT_COLLECTION_NAME} + restart: unless-stopped + keycloak: + image: quay.io/keycloak/keycloak:25.0.2 + container_name: keycloak-server + ports: + - 8081:8080 + environment: + - KEYCLOAK_ADMIN=admin + - KEYCLOAK_ADMIN_PASSWORD=admin + - KC_PROXY=edge + ipc: host + command: start-dev + restart: always + productivity-suite-gaudi-react-ui-server: + image: ${REGISTRY:-opea}/productivity-suite-react-ui-server:${TAG:-latest} + container_name: productivity-suite-gaudi-react-ui-server + ports: + - "5174:80" + environment: + - APP_BACKEND_SERVICE_ENDPOINT_CHATQNA=${BACKEND_SERVICE_ENDPOINT_CHATQNA} + - APP_BACKEND_SERVICE_ENDPOINT_CODEGEN=${BACKEND_SERVICE_ENDPOINT_CODEGEN} + - APP_BACKEND_SERVICE_ENDPOINT_DOCSUM=${BACKEND_SERVICE_ENDPOINT_DOCSUM} + - APP_BACKEND_SERVICE_ENDPOINT_FAQGEN=${BACKEND_SERVICE_ENDPOINT_FAQGEN} + - APP_DATAPREP_SERVICE_ENDPOINT=${DATAPREP_SERVICE_ENDPOINT} + - APP_DATAPREP_GET_FILE_ENDPOINT=${DATAPREP_GET_FILE_ENDPOINT} + - APP_DATAPREP_DELETE_FILE_ENDPOINT=${DATAPREP_DELETE_FILE_ENDPOINT} + - APP_CHAT_HISTORY_CREATE_ENDPOINT=${CHAT_HISTORY_CREATE_ENDPOINT} + - APP_CHAT_HISTORY_DELETE_ENDPOINT=${CHAT_HISTORY_DELETE_ENDPOINT} + - APP_CHAT_HISTORY_GET_ENDPOINT=${CHAT_HISTORY_GET_ENDPOINT} + - APP_PROMPT_SERVICE_GET_ENDPOINT=${PROMPT_SERVICE_GET_ENDPOINT} + - APP_PROMPT_SERVICE_CREATE_ENDPOINT=${PROMPT_SERVICE_CREATE_ENDPOINT} + - APP_KEYCLOAK_SERVICE_ENDPOINT=${KEYCLOAK_SERVICE_ENDPOINT} + ipc: host + restart: always +networks: + default: + driver: bridge \ No newline at end of file diff --git a/ProductivitySuite/docker_compose/intel/hpu/gaudi/set_env.sh b/ProductivitySuite/docker_compose/intel/hpu/gaudi/set_env.sh new file mode 100644 index 0000000000..0139b532af --- /dev/null +++ b/ProductivitySuite/docker_compose/intel/hpu/gaudi/set_env.sh @@ -0,0 +1,48 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +export MONGO_HOST=${host_ip} +export MONGO_PORT=27017 +export DB_NAME="opea" +export COLLECTION_NAME="Conversations" +export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" +export RERANK_MODEL_ID="BAAI/bge-reranker-base" +export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3" +export LLM_MODEL_ID_CODEGEN="meta-llama/CodeLlama-7b-hf" +export TEI_EMBEDDING_ENDPOINT="http://${host_ip}:6006" +export TEI_RERANKING_ENDPOINT="http://${host_ip}:8808" +export TGI_LLM_ENDPOINT="http://${host_ip}:9009" +export REDIS_URL="redis://${host_ip}:6379" +export INDEX_NAME="rag-redis" +export HUGGINGFACEHUB_API_TOKEN=${HUGGINGFACEHUB_API_TOKEN} +export MEGA_SERVICE_HOST_IP=${host_ip} +export EMBEDDING_SERVICE_HOST_IP=${host_ip} +export RETRIEVER_SERVICE_HOST_IP=${host_ip} +export RERANK_SERVICE_HOST_IP=${host_ip} +export LLM_SERVICE_HOST_IP=${host_ip} +export LLM_SERVICE_HOST_IP_DOCSUM=${host_ip} +export LLM_SERVICE_HOST_IP_FAQGEN=${host_ip} +export LLM_SERVICE_HOST_IP_CODEGEN=${host_ip} +export LLM_SERVICE_HOST_IP_CHATQNA=${host_ip} +export TGI_LLM_ENDPOINT_CHATQNA="http://${host_ip}:9009" +export TGI_LLM_ENDPOINT_CODEGEN="http://${host_ip}:8028" +export TGI_LLM_ENDPOINT_FAQGEN="http://${host_ip}:9009" +export TGI_LLM_ENDPOINT_DOCSUM="http://${host_ip}:9009" +export BACKEND_SERVICE_ENDPOINT_CHATQNA="http://${host_ip}:8888/v1/chatqna" +export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6009/v1/dataprep/delete_file" +export BACKEND_SERVICE_ENDPOINT_FAQGEN="http://${host_ip}:8889/v1/faqgen" +export BACKEND_SERVICE_ENDPOINT_CODEGEN="http://${host_ip}:7778/v1/codegen" +export BACKEND_SERVICE_ENDPOINT_DOCSUM="http://${host_ip}:8890/v1/docsum" +export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep" +export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file" +export CHAT_HISTORY_CREATE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/create" +export CHAT_HISTORY_CREATE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/create" +export CHAT_HISTORY_DELETE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/delete" +export CHAT_HISTORY_GET_ENDPOINT="http://${host_ip}:6012/v1/chathistory/get" +export PROMPT_SERVICE_GET_ENDPOINT="http://${host_ip}:6018/v1/prompt/get" +export PROMPT_SERVICE_CREATE_ENDPOINT="http://${host_ip}:6018/v1/prompt/create" +export KEYCLOAK_SERVICE_ENDPOINT="http://${host_ip}:8080" +export LLM_SERVICE_HOST_PORT_FAQGEN=9002 +export LLM_SERVICE_HOST_PORT_CODEGEN=9001 +export LLM_SERVICE_HOST_PORT_DOCSUM=9003 +export PROMPT_COLLECTION_NAME="prompt" diff --git a/ProductivitySuite/docker_compose/intel/hpu/gaudi/set_env_remote.sh b/ProductivitySuite/docker_compose/intel/hpu/gaudi/set_env_remote.sh new file mode 100644 index 0000000000..95e2402448 --- /dev/null +++ b/ProductivitySuite/docker_compose/intel/hpu/gaudi/set_env_remote.sh @@ -0,0 +1,57 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +#!/bin/bash + +# Check if the model_configs.json file exists +if [ -f model_configs.json ]; then + # If the file exists, set the MODEL_CONFIGS environment variable using the content of the file + export MODEL_CONFIGS=$(jq -c . model_configs.json) +fi +export MONGO_HOST=${host_ip} +export MONGO_PORT=27017 +export DB_NAME="opea" +export COLLECTION_NAME="Conversations" +export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5" +export RERANK_MODEL_ID="BAAI/bge-reranker-base" +export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-70B-Instruct" +export LLM_MODEL_ID_CODEGEN="meta-llama/CodeLlama-7b-hf" +export TEI_EMBEDDING_ENDPOINT="${remote_host}/${embedding_endpoint}" +export TEI_RERANKING_ENDPOINT="${remote_host}/${reranking_endpoint}" +export TGI_LLM_ENDPOINT="${remote_host}/${tgi_endpoint}" +export REDIS_URL="redis://${host_ip}:6379" +export INDEX_NAME="rag-redis" +export HUGGINGFACEHUB_API_TOKEN=${hf_api_token} +export MEGA_SERVICE_HOST_IP=${host_ip} +export EMBEDDING_SERVICE_HOST_IP=${host_ip} +export RETRIEVER_SERVICE_HOST_IP=${host_ip} +export RERANK_SERVICE_HOST_IP=${host_ip} +export LLM_SERVICE_HOST_IP=${host_ip} +export LLM_SERVICE_HOST_IP_DOCSUM=${host_ip} +export LLM_SERVICE_HOST_IP_FAQGEN=${host_ip} +export LLM_SERVICE_HOST_IP_CODEGEN=${host_ip} +export LLM_SERVICE_HOST_IP_CHATQNA=${host_ip} +export TGI_LLM_ENDPOINT_CHATQNA="${remote_host}/${tgi_endpoint}" +export TGI_LLM_ENDPOINT_CODEGEN="${remote_host}/${tgi_endpoint}" +export TGI_LLM_ENDPOINT_FAQGEN="${remote_host}/${tgi_endpoint}" +export TGI_LLM_ENDPOINT_DOCSUM="${remote_host}/${tgi_endpoint}" +export BACKEND_SERVICE_ENDPOINT_CHATQNA="http://${host_ip}:8888/v1/chatqna" +export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_file" +export BACKEND_SERVICE_ENDPOINT_FAQGEN="http://${host_ip}:8889/v1/faqgen" +export BACKEND_SERVICE_ENDPOINT_CODEGEN="http://${host_ip}:7778/v1/codegen" +export BACKEND_SERVICE_ENDPOINT_DOCSUM="http://${host_ip}:8890/v1/docsum" +export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep" +export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file" +export CHAT_HISTORY_CREATE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/create" +export CHAT_HISTORY_CREATE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/create" +export CHAT_HISTORY_DELETE_ENDPOINT="http://${host_ip}:6012/v1/chathistory/delete" +export CHAT_HISTORY_GET_ENDPOINT="http://${host_ip}:6012/v1/chathistory/get" +export PROMPT_SERVICE_GET_ENDPOINT="http://${host_ip}:6018/v1/prompt/get" +export PROMPT_SERVICE_CREATE_ENDPOINT="http://${host_ip}:6018/v1/prompt/create" +export KEYCLOAK_SERVICE_ENDPOINT="http://${host_ip}:8080" +export LLM_SERVICE_HOST_PORT_FAQGEN=9002 +export LLM_SERVICE_HOST_PORT_CODEGEN=9001 +export LLM_SERVICE_HOST_PORT_DOCSUM=9003 +export PROMPT_COLLECTION_NAME="prompt" +export CLIENTID=${clientid} +export CLIENT_SECRET=${client_secret} +export TOKEN_URL=${token_url} \ No newline at end of file From 8ec0f6ac4d88d19824a49f1233fd5dfd79d1d1f6 Mon Sep 17 00:00:00 2001 From: sgurunat Date: Tue, 29 Oct 2024 12:44:53 +0000 Subject: [PATCH 07/91] ProductivitySuite UI: Update names of ChatQnA, CodeGen, DocSum to Digital Assistant, Code Generator, Content Summarizer respectively --- ProductivitySuite/ui/react/src/App.tsx | 10 +++++----- .../ui/react/src/components/CodeGen/CodeGen.tsx | 2 +- .../ui/react/src/components/DocSum/DocSum.tsx | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/ProductivitySuite/ui/react/src/App.tsx b/ProductivitySuite/ui/react/src/App.tsx index c12ee1d8fa..9f0fdee957 100644 --- a/ProductivitySuite/ui/react/src/App.tsx +++ b/ProductivitySuite/ui/react/src/App.tsx @@ -18,12 +18,12 @@ import { useAppDispatch } from "./redux/store"; import { setUser } from "./redux/User/userSlice"; import { useEffect } from "react"; -const title = "Chat QnA" +const title = "Digital Assistant" const navList: SidebarNavList = [ - { icon: IconMessages, label: "Chat Qna", path: "/", children: }, - { icon: IconCode, label: "CodeGen", path: "/codegen", children: }, - { icon: IconFileTextAi, label: "DocSum", path: "/docsum", children: }, - { icon: IconFileInfo, label: "FaqGen", path: "/faqgen", children: }, + { icon: IconMessages, label: "Digital Assistant", path: "/", children: }, + { icon: IconCode, label: "Code Generator", path: "/codegen", children: }, + { icon: IconFileTextAi, label: "Content Summarizer", path: "/docsum", children: }, + { icon: IconFileInfo, label: "Faq Generator", path: "/faqgen", children: }, { icon: IconDatabaseCog, label: "Data Management", path: "/data-management", children: } ] diff --git a/ProductivitySuite/ui/react/src/components/CodeGen/CodeGen.tsx b/ProductivitySuite/ui/react/src/components/CodeGen/CodeGen.tsx index 29c96f61cb..1811b1bf0d 100644 --- a/ProductivitySuite/ui/react/src/components/CodeGen/CodeGen.tsx +++ b/ProductivitySuite/ui/react/src/components/CodeGen/CodeGen.tsx @@ -97,7 +97,7 @@ const CodeGen = () => {
- CodeGen + Code Generator
diff --git a/ProductivitySuite/ui/react/src/components/DocSum/DocSum.tsx b/ProductivitySuite/ui/react/src/components/DocSum/DocSum.tsx index 9e7472c658..30591bd55c 100644 --- a/ProductivitySuite/ui/react/src/components/DocSum/DocSum.tsx +++ b/ProductivitySuite/ui/react/src/components/DocSum/DocSum.tsx @@ -106,7 +106,7 @@ const DocSum = () => {
- Doc Summary + Content Summarizer
Please upload file or paste content for summarization. From 6216b5a3e5e0b4e643ddf04b9082e221767da2d2 Mon Sep 17 00:00:00 2001 From: sgurunat Date: Tue, 29 Oct 2024 12:51:06 +0000 Subject: [PATCH 08/91] ProductivitySuite UI: Update Docsum to have vertical scroll bar if content exceeds the window height --- .../ui/react/src/components/DocSum/DocSum.tsx | 16 +++++++++++++++- .../src/components/DocSum/docSum.module.scss | 1 + 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/ProductivitySuite/ui/react/src/components/DocSum/DocSum.tsx b/ProductivitySuite/ui/react/src/components/DocSum/DocSum.tsx index 30591bd55c..848b335bec 100644 --- a/ProductivitySuite/ui/react/src/components/DocSum/DocSum.tsx +++ b/ProductivitySuite/ui/react/src/components/DocSum/DocSum.tsx @@ -16,6 +16,17 @@ const DocSum = () => { const [value, setValue] = useState(''); const [fileContent, setFileContent] = useState(''); const [response, setResponse] = useState(''); + + let messagesEnd:HTMLDivElement; + + const scrollToView = () => { + if (messagesEnd) { + messagesEnd.scrollTop = messagesEnd.scrollHeight; + } + }; + useEffect(()=>{ + scrollToView() + },[response]) useEffect(() => { if(isFile){ @@ -139,7 +150,10 @@ const DocSum = () => {
{response && ( -
+
{ + if(el) + messagesEnd = el; + }}>
)} diff --git a/ProductivitySuite/ui/react/src/components/DocSum/docSum.module.scss b/ProductivitySuite/ui/react/src/components/DocSum/docSum.module.scss index 399e979391..a5061d8151 100644 --- a/ProductivitySuite/ui/react/src/components/DocSum/docSum.module.scss +++ b/ProductivitySuite/ui/react/src/components/DocSum/docSum.module.scss @@ -38,6 +38,7 @@ } } .docSumResult { + overflow-y: auto; } } } From a6e4a7dd727d6d340878fca6aafe0b23b9abdf46 Mon Sep 17 00:00:00 2001 From: sgurunat Date: Tue, 29 Oct 2024 12:56:19 +0000 Subject: [PATCH 09/91] ProductivitySuite UI: Remove <|eot_id|> string from the Chat, Docsum and Faqgen response --- .../ui/react/src/components/DocSum/DocSum.tsx | 16 +++++----------- .../ui/react/src/components/FaqGen/FaqGen.tsx | 2 +- .../src/redux/Conversation/ConversationSlice.ts | 2 +- 3 files changed, 7 insertions(+), 13 deletions(-) diff --git a/ProductivitySuite/ui/react/src/components/DocSum/DocSum.tsx b/ProductivitySuite/ui/react/src/components/DocSum/DocSum.tsx index 848b335bec..8f2eff7e8b 100644 --- a/ProductivitySuite/ui/react/src/components/DocSum/DocSum.tsx +++ b/ProductivitySuite/ui/react/src/components/DocSum/DocSum.tsx @@ -83,17 +83,11 @@ const DocSum = () => { onmessage(msg) { if (msg?.data != "[DONE]") { try { - const res = JSON.parse(msg.data) - const logs = res.ops; - logs.forEach((log: { op: string; path: string; value: string }) => { - if (log.op === "add") { - if ( - log.value !== "" && log.path.endsWith("/streamed_output/-") && log.path.length > "/streamed_output/-".length - ) { - setResponse(prev=>prev+log.value); - } - } - }); + const match = msg.data.match(/b'([^']*)'/); + if (match && match[1] != "") { + const extractedText = match[1]; + setResponse(prev => (prev + extractedText.replace("<|eot_id|>", "").replace(/\\n/g, "\n"))); + } } catch (e) { console.log("something wrong in msg", e); throw e; diff --git a/ProductivitySuite/ui/react/src/components/FaqGen/FaqGen.tsx b/ProductivitySuite/ui/react/src/components/FaqGen/FaqGen.tsx index ca731cbf8b..cbb74d4ad2 100644 --- a/ProductivitySuite/ui/react/src/components/FaqGen/FaqGen.tsx +++ b/ProductivitySuite/ui/react/src/components/FaqGen/FaqGen.tsx @@ -90,7 +90,7 @@ const FaqGen = () => { if ( log.value !== "" && log.path.endsWith("/streamed_output/-") && log.path.length > "/streamed_output/-".length ) { - setResponse(prev=>prev+log.value); + setResponse(prev => prev + log.value.replace("<|eot_id|>", "").replace(/\\n/g, "\n")); } } }); diff --git a/ProductivitySuite/ui/react/src/redux/Conversation/ConversationSlice.ts b/ProductivitySuite/ui/react/src/redux/Conversation/ConversationSlice.ts index ea7617d82c..e1fda0676d 100644 --- a/ProductivitySuite/ui/react/src/redux/Conversation/ConversationSlice.ts +++ b/ProductivitySuite/ui/react/src/redux/Conversation/ConversationSlice.ts @@ -271,7 +271,7 @@ export const doConversation = (conversationRequest: ConversationRequest) => { const match = msg.data.match(/b'([^']*)'/); if (match && match[1] != "") { const extractedText = match[1]; - result += extractedText; + result += extractedText.replace("<|eot_id|>","").replace(/\\n/g, "\n"); store.dispatch(setOnGoingResult(result)); } } catch (e) { From b999077319ad10205a057662c3a87cdc9aa61567 Mon Sep 17 00:00:00 2001 From: sgurunat Date: Tue, 29 Oct 2024 13:01:07 +0000 Subject: [PATCH 10/91] ProductivitySuite UI: Update contextWrapper and contextTitle width to adjust to different screen sizes --- .../ui/react/src/styles/components/context.module.scss | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ProductivitySuite/ui/react/src/styles/components/context.module.scss b/ProductivitySuite/ui/react/src/styles/components/context.module.scss index e2d3caafaa..cac58cdfdf 100644 --- a/ProductivitySuite/ui/react/src/styles/components/context.module.scss +++ b/ProductivitySuite/ui/react/src/styles/components/context.module.scss @@ -15,7 +15,7 @@ "settings"; grid-template-columns: auto; - grid-template-rows: 70px 1fr 175px; + grid-template-rows: 70px 1fr 240px; .contextTitle { grid-area: title; @@ -30,6 +30,7 @@ width: 100%; height: 60px; border-bottom: 1px solid light-dark(var(--mantine-color-gray-3), var(--mantine-color-dark-7)); + font-size: 1.3vw !important; } .contextList { From cf96dcc2d34b2e63c8dd3a8af886d58789d9b335 Mon Sep 17 00:00:00 2001 From: sgurunat Date: Tue, 29 Oct 2024 13:06:03 +0000 Subject: [PATCH 11/91] ProductivitySuite UI: Show system prompt input field always to edit in the chatqna prompt section --- .../components/Conversation/Conversation.tsx | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/ProductivitySuite/ui/react/src/components/Conversation/Conversation.tsx b/ProductivitySuite/ui/react/src/components/Conversation/Conversation.tsx index e772248f39..e6455c85eb 100644 --- a/ProductivitySuite/ui/react/src/components/Conversation/Conversation.tsx +++ b/ProductivitySuite/ui/react/src/components/Conversation/Conversation.tsx @@ -4,8 +4,8 @@ import { KeyboardEventHandler, SyntheticEvent, useEffect, useRef, useState } from 'react' import styleClasses from "./conversation.module.scss" import { ActionIcon, Group, Textarea, Title, Tooltip, rem } from '@mantine/core' -import { IconArrowDown, IconArrowRight, IconArrowUp, IconMessagePlus } from '@tabler/icons-react' -import { conversationSelector, doConversation, getAllConversations, newConversation, setSystemPrompt } from '../../redux/Conversation/ConversationSlice' +import { IconArrowDown, IconArrowRight, IconMessagePlus, IconPencil } from '@tabler/icons-react' +import { conversationSelector, doConversation, getAllConversations, newConversation, setSystemPrompt} from '../../redux/Conversation/ConversationSlice' import { ConversationMessage } from '../Message/conversationMessage' import { useAppDispatch, useAppSelector } from '../../redux/store' import { Message, MessageRole } from '../../redux/Conversation/Conversation' @@ -21,7 +21,7 @@ type ConversationProps = { const Conversation = ({ title }: ConversationProps) => { const [prompt, setPrompt] = useState("") - const [updateSystemPrompt, setUpdateSystemPrompt] = useState(false) + const [updateSystemPrompt, setUpdateSystemPrompt] = useState(true) const dispatch = useAppDispatch(); const promptInputRef = useRef(null) @@ -53,8 +53,11 @@ const Conversation = ({ title }: ConversationProps) => { // return { role: message.role, content: message.content } // }) // } - - messages = [systemPromptObject, ...(selectedConversationHistory)] + if(selectedConversationHistory.length==0){ + messages = [systemPromptObject, ...(selectedConversationHistory)] + }else{ + messages = [...(selectedConversationHistory)] + } doConversation({ conversationId: selectedConversationId, @@ -111,7 +114,7 @@ const Conversation = ({ title }: ConversationProps) => {
-
+
{selectedConversation?.first_query || ""} @@ -151,6 +154,7 @@ const Conversation = ({ title }: ConversationProps) => {