Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into apecloud-main
Browse files Browse the repository at this point in the history
  • Loading branch information
lynnleelhl committed Jan 25, 2024
2 parents b6a2999 + 3a7dd7e commit 255a0ad
Show file tree
Hide file tree
Showing 232 changed files with 26,165 additions and 6,069 deletions.
63 changes: 63 additions & 0 deletions .buildkite/run-benchmarks.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# This script is run by buildkite to run the benchmarks and upload the results to buildkite

set -ex
set -o pipefail

# cd into parent directory of this file
cd "$(dirname "${BASH_SOURCE[0]}")/.."

(wget && curl) || (apt-get update && apt-get install -y wget curl)

# run benchmarks and upload the result to buildkite
python3 benchmarks/benchmark_latency.py 2>&1 | tee benchmark_latency.txt
bench_latency_exit_code=$?

python3 benchmarks/benchmark_throughput.py --input-len 256 --output-len 256 2>&1 | tee benchmark_throughput.txt
bench_throughput_exit_code=$?

python3 -m vllm.entrypoints.openai.api_server --model meta-llama/Llama-2-7b-chat-hf &
server_pid=$!
wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json

# wait for server to start, timeout after 600 seconds
timeout 600 bash -c 'until curl localhost:8000/v1/models; do sleep 1; done' || exit 1
python3 benchmarks/benchmark_serving.py \
--dataset ./ShareGPT_V3_unfiltered_cleaned_split.json \
--model meta-llama/Llama-2-7b-chat-hf \
--num-prompts 20 \
--endpoint /v1/completions \
--tokenizer meta-llama/Llama-2-7b-chat-hf 2>&1 | tee benchmark_serving.txt
bench_serving_exit_code=$?
kill $server_pid

# write the results into a markdown file
echo "### Latency Benchmarks" >> benchmark_results.md
sed -n '1p' benchmark_latency.txt >> benchmark_results.md # first line
echo "" >> benchmark_results.md
sed -n '$p' benchmark_latency.txt >> benchmark_results.md # last line

echo "### Throughput Benchmarks" >> benchmark_results.md
sed -n '1p' benchmark_throughput.txt >> benchmark_results.md # first line
echo "" >> benchmark_results.md
sed -n '$p' benchmark_throughput.txt >> benchmark_results.md # last line

echo "### Serving Benchmarks" >> benchmark_results.md
sed -n '1p' benchmark_serving.txt >> benchmark_results.md # first line
echo "" >> benchmark_results.md
tail -n 5 benchmark_serving.txt >> benchmark_results.md # last 5 lines

# upload the results to buildkite
/workspace/buildkite-agent annotate --style "info" --context "benchmark-results" < benchmark_results.md

# exit with the exit code of the benchmarks
if [ $bench_latency_exit_code -ne 0 ]; then
exit $bench_latency_exit_code
fi

if [ $bench_throughput_exit_code -ne 0 ]; then
exit $bench_throughput_exit_code
fi

if [ $bench_serving_exit_code -ne 0 ]; then
exit $bench_serving_exit_code
fi
51 changes: 51 additions & 0 deletions .buildkite/test-pipeline.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# In this file, you can add more tests to run either by adding a new step or
# adding a new command to an existing step. See different options here for examples.
# This script will be feed into Jinja template in `test-template.j2` to generate
# the final pipeline yaml file.

steps:
- label: Regression Test
command: pytest -v -s test_regression.py
working_dir: "/vllm-workspace/tests" # optional

- label: AsyncEngine Test
command: pytest -v -s async_engine

- label: Distributed Test
command: pytest -v -s test_comm_ops.py
working_dir: "/vllm-workspace/tests/distributed"
num_gpus: 2 # only support 1 or 2 for now.

- label: Engine Test
command: pytest -v -s engine

- label: Entrypoints Test
command: pytest -v -s entrypoints

- label: Kernels Test
command: pytest -v -s kernels
soft_fail: true

- label: Models Test
commands:
- pytest -v -s models --forked
soft_fail: true

- label: Prefix Caching Test
commands:
- pytest -v -s prefix_caching

- label: Samplers Test
command: pytest -v -s samplers --forked

- label: Worker Test
command: pytest -v -s worker

- label: LoRA Test
command: pytest -v -s lora

- label: Benchmarks
working_dir: "/vllm-workspace/.buildkite"
commands:
- pip install aiohttp
- bash run-benchmarks.sh
54 changes: 54 additions & 0 deletions .buildkite/test-template.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
{% set docker_image = "us-central1-docker.pkg.dev/vllm-405802/vllm-ci-test-repo/vllm-test:$BUILDKITE_COMMIT" %}
{% set default_num_gpu = 1 %}
{% set default_working_dir = "/vllm-workspace/tests" %}

steps:
- label: ":docker: build image"
commands:
- "docker build --tag {{ docker_image }} --target test --progress plain ."
- "docker push {{ docker_image }}"
env:
DOCKER_BUILDKIT: "1"
retry:
automatic:
- exit_status: -1 # Agent was lost
limit: 5
- wait

{% for step in steps %}
- label: "{{ step.label }}"
agents:
queue: kubernetes
soft_fail: {{ step.soft_fail or false }}
retry:
automatic:
- exit_status: -1 # Agent was lost
limit: 5
plugins:
- kubernetes:
podSpec:
volumes:
- name: dshm
emptyDir:
medium: Memory
containers:
- image: "{{ docker_image }}"
command: ["bash"]
args:
- "-c"
- "'cd {{ (step.working_dir or default_working_dir) | safe }} && {{ step.command or (step.commands | join(' && ')) | safe }}'"
resources:
requests:
nvidia.com/gpu: "{{ step.num_gpus or default_num_gpu }}"
limits:
nvidia.com/gpu: "{{ step.num_gpus or default_num_gpu }}"
env:
- name: HF_TOKEN
valueFrom:
secretKeyRef:
name: hf-token-secret
key: token
volumeMounts:
- mountPath: /dev/shm
name: dshm
{% endfor %}
1 change: 1 addition & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
vllm/*.so
11 changes: 6 additions & 5 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,14 @@ jobs:
name: Build Wheel
runs-on: ${{ matrix.os }}
needs: release

strategy:
fail-fast: false
matrix:
os: ['ubuntu-20.04']
python-version: ['3.8', '3.9', '3.10', '3.11']
cuda-version: ['11.8'] # Github runner can't build anything older than 11.8
pytorch-version: ['2.1.2'] # Must be the most recent version that meets requirements.txt.
cuda-version: ['11.8', '12.1']

steps:
- name: Checkout
Expand All @@ -69,9 +70,9 @@ jobs:
run: |
bash -x .github/workflows/scripts/cuda-install.sh ${{ matrix.cuda-version }} ${{ matrix.os }}
- name: Install PyTorch-cu${{ matrix.cuda-version }}
- name: Install PyTorch ${{ matrix.pytorch-version }} with CUDA ${{ matrix.cuda-version }}
run: |
bash -x .github/workflows/scripts/pytorch-install.sh ${{ matrix.python-version }} ${{ matrix.cuda-version }}
bash -x .github/workflows/scripts/pytorch-install.sh ${{ matrix.python-version }} ${{ matrix.pytorch-version }} ${{ matrix.cuda-version }}
- name: Build wheel
shell: bash
Expand All @@ -81,7 +82,7 @@ jobs:
asset_name=${wheel_name//"linux"/"manylinux1"}
echo "wheel_name=${wheel_name}" >> $GITHUB_ENV
echo "asset_name=${asset_name}" >> $GITHUB_ENV
- name: Upload Release Asset
uses: actions/upload-release-asset@v1
env:
Expand Down
10 changes: 5 additions & 5 deletions .github/workflows/pylint.yml → .github/workflows/ruff.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: pylint
name: ruff

on:
# Trigger the workflow on push or pull request,
Expand All @@ -11,7 +11,7 @@ on:
- main

jobs:
pylint:
ruff:
runs-on: ubuntu-latest
strategy:
matrix:
Expand All @@ -25,7 +25,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pylint==2.8.2
- name: Analysing the code with pylint
pip install ruff==0.1.5
- name: Analysing the code with ruff
run: |
pylint vllm tests
ruff vllm tests
3 changes: 3 additions & 0 deletions .github/workflows/scripts/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,8 @@ LD_LIBRARY_PATH=${cuda_home}/lib64:$LD_LIBRARY_PATH
$python_executable -m pip install wheel packaging
$python_executable -m pip install -r requirements.txt

# Limit the number of parallel jobs to avoid OOM
export MAX_JOBS=1

# Build
$python_executable setup.py bdist_wheel --dist-dir=dist
5 changes: 5 additions & 0 deletions .github/workflows/scripts/cuda-install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,8 @@ sudo apt clean
# Test nvcc
PATH=/usr/local/cuda-$1/bin:${PATH}
nvcc --version

# Log gcc, g++, c++ versions
gcc --version
g++ --version
c++ --version
5 changes: 3 additions & 2 deletions .github/workflows/scripts/pytorch-install.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
#!/bin/bash

python_executable=python$1
cuda_version=$2
pytorch_version=$2
cuda_version=$3

# Install torch
$python_executable -m pip install numpy pyyaml scipy ipython mkl mkl-include ninja cython typing pandas typing-extensions dataclasses setuptools && conda clean -ya
$python_executable -m pip install torch -f https://download.pytorch.org/whl/cu${cuda_version//./}/torch_stable.html
$python_executable -m pip install torch==${pytorch_version}+cu${cuda_version//./} --extra-index-url https://download.pytorch.org/whl/cu${cuda_version//./}

# Print version information
$python_executable --version
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/yapf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,4 @@ jobs:
pip install toml==0.10.2
- name: Running yapf
run: |
yapf --diff --recursive vllm tests
yapf --diff --recursive .
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -177,3 +177,10 @@ _build/
# vim swap files
*.swo
*.swp

# hip files generated by PyTorch
*.hip
*_hip*

# Benchmark dataset
*.json
Loading

0 comments on commit 255a0ad

Please sign in to comment.