From 944ae4794812aca806c3dad4c44392e12a167361 Mon Sep 17 00:00:00 2001 From: "Wang, Kai Lawrence" <109344418+wangkl2@users.noreply.github.com> Date: Wed, 6 Nov 2024 10:22:21 +0800 Subject: [PATCH] [ChatQnA] Fix the service connection issue on GPU and modify the emb backend (#1059) Signed-off-by: Wang, Kai Lawrence --- ChatQnA/docker_compose/nvidia/gpu/README.md | 6 ++-- .../docker_compose/nvidia/gpu/compose.yaml | 31 +++++++++---------- 2 files changed, 17 insertions(+), 20 deletions(-) diff --git a/ChatQnA/docker_compose/nvidia/gpu/README.md b/ChatQnA/docker_compose/nvidia/gpu/README.md index cc8cb7193..31ab0549b 100644 --- a/ChatQnA/docker_compose/nvidia/gpu/README.md +++ b/ChatQnA/docker_compose/nvidia/gpu/README.md @@ -97,7 +97,7 @@ To construct the Mega Service, we utilize the [GenAIComps](https://github.com/op git clone https://github.com/opea-project/GenAIExamples.git cd GenAIExamples/ChatQnA docker build --no-cache -t opea/chatqna:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f Dockerfile . -cd ../../.. +cd ../.. ``` ### 5. Build UI Docker Image @@ -107,7 +107,7 @@ Construct the frontend Docker image using the command below: ```bash cd GenAIExamples/ChatQnA/ui docker build --no-cache -t opea/chatqna-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile . -cd ../../../.. +cd ../../../ ``` ### 6. Build React UI Docker Image (Optional) @@ -117,7 +117,7 @@ Construct the frontend Docker image using the command below: ```bash cd GenAIExamples/ChatQnA/ui docker build --no-cache -t opea/chatqna-react-ui:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f ./docker/Dockerfile.react . -cd ../../../.. +cd ../../.. ``` ### 7. Build Nginx Docker Image diff --git a/ChatQnA/docker_compose/nvidia/gpu/compose.yaml b/ChatQnA/docker_compose/nvidia/gpu/compose.yaml index c35866b10..ba504c2eb 100644 --- a/ChatQnA/docker_compose/nvidia/gpu/compose.yaml +++ b/ChatQnA/docker_compose/nvidia/gpu/compose.yaml @@ -20,10 +20,10 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - REDIS_URL: ${REDIS_URL} - REDIS_HOST: ${REDIS_HOST} + REDIS_URL: redis://redis-vector-db:6379 + REDIS_HOST: redis-vector-db INDEX_NAME: ${INDEX_NAME} - TEI_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} + TEI_ENDPOINT: http://tei-embedding-service:80 HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} tei-embedding-service: image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 @@ -39,13 +39,6 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: 1 - capabilities: [gpu] retriever: image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest} container_name: retriever-redis-server @@ -58,12 +51,13 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - REDIS_URL: ${REDIS_URL} + REDIS_URL: redis://redis-vector-db:6379 + REDIS_HOST: redis-vector-db INDEX_NAME: ${INDEX_NAME} TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT} restart: unless-stopped tei-reranking-service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + image: ghcr.io/huggingface/text-embeddings-inference:1.5 container_name: tei-reranking-server ports: - "8808:80" @@ -123,11 +117,14 @@ services: - no_proxy=${no_proxy} - https_proxy=${https_proxy} - http_proxy=${http_proxy} - - MEGA_SERVICE_HOST_IP=${MEGA_SERVICE_HOST_IP} - - EMBEDDING_SERVICE_HOST_IP=${EMBEDDING_SERVICE_HOST_IP} - - RETRIEVER_SERVICE_HOST_IP=${RETRIEVER_SERVICE_HOST_IP} - - RERANK_SERVICE_HOST_IP=${RERANK_SERVICE_HOST_IP} - - LLM_SERVICE_HOST_IP=${LLM_SERVICE_HOST_IP} + - MEGA_SERVICE_HOST_IP=chaqna-backend-server + - EMBEDDING_SERVER_HOST_IP=tei-embedding-service + - EMBEDDING_SERVER_PORT=${EMBEDDING_SERVER_PORT:-80} + - RETRIEVER_SERVICE_HOST_IP=retriever + - RERANK_SERVER_HOST_IP=tei-reranking-service + - RERANK_SERVER_PORT=${RERANK_SERVER_PORT:-80} + - LLM_SERVER_HOST_IP=tgi-service + - LLM_SERVER_PORT=${LLM_SERVER_PORT:-80} ipc: host restart: always chaqna-ui-server: