Make embedding run on CPU for aligning with Gaudi performance benchma…

…rk (opea-project#1057) Signed-off-by: lvliang-intel <[email protected]> Co-authored-by: chen, suyue <[email protected]>
Yongbozzz · Nov 7, 2024 · 4635a92 · 4635a92
1 parent 1da44d9
commit 4635a92
Show file tree

Hide file tree

Showing 5 changed files with 10 additions and 50 deletions.
diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -26,25 +26,17 @@ services:
       TEI_ENDPOINT: http://tei-embedding-service:80
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
   tei-embedding-service:
-    image: ghcr.io/huggingface/tei-gaudi:latest
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
     container_name: tei-embedding-gaudi-server
     ports:
       - "8090:80"
     volumes:
       - "./data:/data"
-    runtime: habana
-    cap_add:
-      - SYS_NICE
-    ipc: host
+    shm_size: 1g
     environment:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HF_HUB_DISABLE_PROGRESS_BARS: 1
-      HF_HUB_ENABLE_HF_TRANSFER: 0
-      HABANA_VISIBLE_DEVICES: all
-      OMPI_MCA_btl_vader_single_copy_mechanism: none
-      MAX_WARMUP_SEQUENCE_LENGTH: 512
     command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
   retriever:
     image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}

diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml
@@ -65,25 +65,17 @@ services:
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
     restart: unless-stopped
   tei-embedding-service:
-    image: ghcr.io/huggingface/tei-gaudi:latest
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
     container_name: tei-embedding-gaudi-server
     ports:
       - "8090:80"
     volumes:
       - "./data:/data"
-    runtime: habana
-    cap_add:
-      - SYS_NICE
-    ipc: host
+    shm_size: 1g
     environment:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HF_HUB_DISABLE_PROGRESS_BARS: 1
-      HF_HUB_ENABLE_HF_TRANSFER: 0
-      HABANA_VISIBLE_DEVICES: all
-      OMPI_MCA_btl_vader_single_copy_mechanism: none
-      MAX_WARMUP_SEQUENCE_LENGTH: 512
     command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
   retriever:
     image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}

diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml
@@ -26,25 +26,17 @@ services:
       TEI_ENDPOINT: http://tei-embedding-service:80
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
   tei-embedding-service:
-    image: ghcr.io/huggingface/tei-gaudi:latest
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
     container_name: tei-embedding-gaudi-server
     ports:
       - "8090:80"
     volumes:
       - "./data:/data"
-    runtime: habana
-    cap_add:
-      - SYS_NICE
-    ipc: host
+    shm_size: 1g
     environment:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HF_HUB_DISABLE_PROGRESS_BARS: 1
-      HF_HUB_ENABLE_HF_TRANSFER: 0
-      HABANA_VISIBLE_DEVICES: all
-      OMPI_MCA_btl_vader_single_copy_mechanism: none
-      MAX_WARMUP_SEQUENCE_LENGTH: 512
     command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
   retriever:
     image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}

diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm_ray.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm_ray.yaml
@@ -26,25 +26,17 @@ services:
       TEI_ENDPOINT: http://tei-embedding-service:80
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
   tei-embedding-service:
-    image: ghcr.io/huggingface/tei-gaudi:latest
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
     container_name: tei-embedding-gaudi-server
     ports:
       - "8090:80"
     volumes:
       - "./data:/data"
-    runtime: habana
-    cap_add:
-      - SYS_NICE
-    ipc: host
+    shm_size: 1g
     environment:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HF_HUB_DISABLE_PROGRESS_BARS: 1
-      HF_HUB_ENABLE_HF_TRANSFER: 0
-      HABANA_VISIBLE_DEVICES: all
-      OMPI_MCA_btl_vader_single_copy_mechanism: none
-      MAX_WARMUP_SEQUENCE_LENGTH: 512
     command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
   retriever:
     image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}

diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml
@@ -26,25 +26,17 @@ services:
       TEI_ENDPOINT: http://tei-embedding-service:80
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
   tei-embedding-service:
-    image: ghcr.io/huggingface/tei-gaudi:latest
+    image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
     container_name: tei-embedding-gaudi-server
     ports:
       - "8090:80"
     volumes:
       - "./data:/data"
-    runtime: habana
-    cap_add:
-      - SYS_NICE
-    ipc: host
+    shm_size: 1g
     environment:
       no_proxy: ${no_proxy}
       http_proxy: ${http_proxy}
       https_proxy: ${https_proxy}
-      HF_HUB_DISABLE_PROGRESS_BARS: 1
-      HF_HUB_ENABLE_HF_TRANSFER: 0
-      HABANA_VISIBLE_DEVICES: all
-      OMPI_MCA_btl_vader_single_copy_mechanism: none
-      MAX_WARMUP_SEQUENCE_LENGTH: 512
     command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
   retriever:
     image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}