diff --git a/app_tests/benchmark_tests/llm/conftest.py b/app_tests/benchmark_tests/llm/conftest.py index aac66ca0f..cc354b7eb 100644 --- a/app_tests/benchmark_tests/llm/conftest.py +++ b/app_tests/benchmark_tests/llm/conftest.py @@ -21,7 +21,6 @@ def pre_process_model(request, tmp_path_factory): settings = request.param["settings"] batch_sizes = request.param["batch_sizes"] - tmp_dir = tmp_path_factory.mktemp("llm_benchmark_test") mlir_path = tmp_dir / "model.mlir" config_path = tmp_dir / "config.json" vmfb_path = tmp_dir / "model.vmfb" diff --git a/app_tests/benchmark_tests/llm/sglang_benchmark_test.py b/app_tests/benchmark_tests/llm/sglang_benchmark_test.py index 8027fcea7..0de775795 100644 --- a/app_tests/benchmark_tests/llm/sglang_benchmark_test.py +++ b/app_tests/benchmark_tests/llm/sglang_benchmark_test.py @@ -38,7 +38,19 @@ TOKENIZER_DIR = Path("/data/llama3.1/8b/") -@pytest.mark.parametrize("request_rate", [1, 2, 4, 8, 16, 32]) +def log_jsonl_result(file_path): + with open(file_path, "r") as file: + json_string = file.readline().strip() + + json_data = json.loads(json_string) + for key, val in json_data.items(): + logger.info(f"{key.upper()}: {val}") + + +@pytest.mark.parametrize( + "request_rate", + [1, 2, 4, 8, 16, 32], +) @pytest.mark.parametrize( "pre_process_model", [ @@ -101,6 +113,8 @@ def test_sglang_benchmark_server(request_rate, pre_process_model): benchmark_process.join() logger.info(f"Benchmark run completed in {str(time.time() - start)} seconds") + logger.info("======== RESULTS ========") + log_jsonl_result(benchmark_args.output_file) except Exception as e: logger.info(e) diff --git a/app_tests/benchmark_tests/llm/utils.py b/app_tests/benchmark_tests/llm/utils.py index c217720cb..55b01da04 100644 --- a/app_tests/benchmark_tests/llm/utils.py +++ b/app_tests/benchmark_tests/llm/utils.py @@ -37,6 +37,7 @@ def as_namespace(self) -> Namespace: dataset_name="sharegpt", random_input_len=None, random_output_len=None, + random_range_ratio=0.0, dataset_path="", sharegpt_output_len=None, multi=False,