From 49f5b01812c362f2d9735d108cc15586e9ddfa05 Mon Sep 17 00:00:00 2001 From: Travis Johnson Date: Thu, 8 Aug 2024 15:43:20 -0600 Subject: [PATCH] feat: allow long max seq length Signed-off-by: Travis Johnson --- Dockerfile.ubi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Dockerfile.ubi b/Dockerfile.ubi index 53a6c90ecfbf..39ee4a63c84a 100644 --- a/Dockerfile.ubi +++ b/Dockerfile.ubi @@ -183,6 +183,10 @@ RUN --mount=type=cache,target=/root/.cache/pip \ ENV HF_HUB_OFFLINE=1 \ PORT=8000 \ HOME=/home/vllm \ + # Allow requested max length to exceed what is extracted from the + # config.json + # see: https://github.com/vllm-project/vllm/pull/7080 + VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \ VLLM_USAGE_SOURCE=production-docker-image \ VLLM_WORKER_MULTIPROC_METHOD=fork