From cbe008afd8b05d9bc8c4cac8155913ad48749c4b Mon Sep 17 00:00:00 2001 From: Cedar Date: Fri, 15 Nov 2024 12:03:01 -0800 Subject: [PATCH] reduce max_completion_tokens to 15 to speed up generation --- app_tests/integration_tests/llm/cpu_llm_server_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app_tests/integration_tests/llm/cpu_llm_server_test.py b/app_tests/integration_tests/llm/cpu_llm_server_test.py index c1ed571f..a965bcf8 100644 --- a/app_tests/integration_tests/llm/cpu_llm_server_test.py +++ b/app_tests/integration_tests/llm/cpu_llm_server_test.py @@ -37,7 +37,7 @@ def do_generate(prompt, port): # Create a GenerateReqInput-like structure data = { "text": prompt, - "sampling_params": {"max_completion_tokens": 50, "temperature": 0.7}, + "sampling_params": {"max_completion_tokens": 15, "temperature": 0.7}, "rid": uuid.uuid4().hex, "return_logprob": False, "logprob_start_len": -1,