Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Nov 6 rebase (sans vllm-project#6143) #468

Merged
merged 25 commits into from
Nov 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
5952d81
[Frontend] Fix tcp port reservation for api server (#10012)
russellb Nov 5, 2024
cd34029
Refactor TPU requirements file and pin build dependencies (#10010)
richardsliu Nov 5, 2024
09d3550
[Misc] Add logging for CUDA memory (#10027)
yangalan123 Nov 5, 2024
731aec5
[CI/Build] Limit github CI jobs based on files changed (#9928)
russellb Nov 5, 2024
a53046b
[Model] Support quantization of PixtralHFTransformer for PixtralHF (#…
mgoin Nov 5, 2024
d2e8033
[Feature] Update benchmark_throughput.py to support image input (#9851)
lk-chen Nov 5, 2024
b9c64c0
[Misc] Modify BNB parameter name (#9997)
jeejeelee Nov 5, 2024
0246246
[CI] Prune tests/models/decoder_only/language/* tests (#9940)
mgoin Nov 5, 2024
235366f
[CI] Prune back the number of tests in tests/kernels/* (#9932)
mgoin Nov 5, 2024
ca9844b
[bugfix] fix weak ref in piecewise cudagraph and tractable test (#10048)
youkaichao Nov 5, 2024
43300bd
[Bugfix] Properly propagate trust_remote_code settings (#10047)
zifeitong Nov 6, 2024
966e316
[Bugfix] Fix pickle of input when async output processing is on (#9931)
wallashss Nov 6, 2024
0c63c34
[Bugfix][SpecDecode] kv corruption with bonus tokens in spec decode (…
llsj14 Nov 6, 2024
c4cacba
[v1] reduce graph capture time for piecewise cudagraph (#10059)
youkaichao Nov 6, 2024
82bfc38
[Misc] Sort the list of embedding models (#10037)
DarkLight1337 Nov 6, 2024
ffc0f2b
[Model][OpenVINO] Fix regressions from #8346 (#10045)
petersalas Nov 6, 2024
2bcbae7
[Bugfix] Fix edge-case crash when using chat with the Mistral Tekken …
tjohnson31415 Nov 6, 2024
ea928f6
[Bugfix] Gpt-j-6B patch kv_scale to k_scale path (#10063)
arakowsk-amd Nov 6, 2024
9d59b75
[Bugfix] Remove CustomChatCompletionContentPartParam multimodal input…
zifeitong Nov 6, 2024
4089985
[V1] Integrate Piecewise CUDA graphs (#10058)
WoosukKwon Nov 6, 2024
4be3a45
[distributed] add function to create ipc buffers directly (#10064)
youkaichao Nov 6, 2024
21063c1
[CI/Build] drop support for Python 3.8 EOL (#8464)
aarnphm Nov 6, 2024
a5fda50
[CI/Build] Fix large_gpu_mark reason (#10070)
Isotr0py Nov 6, 2024
40882f3
Merge commit 'a5fda50a10641e47c0c290907f30ef2add6d4e7a' into HEAD
kzawora-intel Nov 6, 2024
8e62377
format.sh
kzawora-intel Nov 6, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@

def read_markdown(file):
if os.path.exists(file):
with open(file, "r") as f:
with open(file) as f:
return f.read() + "\n"
else:
return f"{file} not found.\n"
Expand All @@ -75,14 +75,14 @@ def results_to_json(latency, throughput, serving):
# collect results
for test_file in results_folder.glob("*.json"):

with open(test_file, "r") as f:
with open(test_file) as f:
raw_result = json.loads(f.read())

if "serving" in str(test_file):
# this result is generated via `benchmark_serving.py`

# attach the benchmarking command to raw_result
with open(test_file.with_suffix(".commands"), "r") as f:
with open(test_file.with_suffix(".commands")) as f:
command = json.loads(f.read())
raw_result.update(command)

Expand All @@ -97,7 +97,7 @@ def results_to_json(latency, throughput, serving):
# this result is generated via `benchmark_latency.py`

# attach the benchmarking command to raw_result
with open(test_file.with_suffix(".commands"), "r") as f:
with open(test_file.with_suffix(".commands")) as f:
command = json.loads(f.read())
raw_result.update(command)

Expand All @@ -119,7 +119,7 @@ def results_to_json(latency, throughput, serving):
# this result is generated via `benchmark_throughput.py`

# attach the benchmarking command to raw_result
with open(test_file.with_suffix(".commands"), "r") as f:
with open(test_file.with_suffix(".commands")) as f:
command = json.loads(f.read())
raw_result.update(command)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,15 +72,15 @@ def main(args):

# collect results
for test_file in results_folder.glob("*_nightly_results.json"):
with open(test_file, "r") as f:
with open(test_file) as f:
results = results + json.loads(f.read())

# generate markdown table
df = pd.DataFrame.from_dict(results)

md_table = tabulate(df, headers='keys', tablefmt='pipe', showindex=False)

with open(args.description, "r") as f:
with open(args.description) as f:
description = f.read()

description = description.format(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,11 @@
# collect results
for test_file in results_folder.glob("*.json"):

with open(test_file, "r") as f:
with open(test_file) as f:
raw_result = json.loads(f.read())

# attach the benchmarking command to raw_result
with open(test_file.with_suffix(".commands"), "r") as f:
with open(test_file.with_suffix(".commands")) as f:
command = json.loads(f.read())
raw_result.update(command)

Expand Down
2 changes: 1 addition & 1 deletion .buildkite/run-openvino-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ trap remove_docker_container EXIT
remove_docker_container

# Run the image and launch offline inference
docker run --network host --env VLLM_OPENVINO_KVCACHE_SPACE=1 --name openvino-test openvino-test python3 /workspace/vllm/examples/offline_inference.py
docker run --network host --env VLLM_OPENVINO_KVCACHE_SPACE=1 --name openvino-test openvino-test python3 /workspace/examples/offline_inference.py
4 changes: 2 additions & 2 deletions .buildkite/test-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -321,15 +321,14 @@ steps:
- tests/models/decoder_only/language
commands:
- pytest -v -s models/decoder_only/language/test_models.py
- pytest -v -s models/decoder_only/language/test_big_models.py

- label: Decoder-only Language Models Test (Extended) # 1h20min
nightly: true
source_file_dependencies:
- vllm/
- tests/models/decoder_only/language
commands:
- pytest -v -s models/decoder_only/language --ignore=models/decoder_only/language/test_models.py --ignore=models/decoder_only/language/test_big_models.py
- pytest -v -s models/decoder_only/language --ignore=models/decoder_only/language/test_models.py

- label: Decoder-only Multi-Modal Models Test (Standard)
#mirror_hardwares: [amd]
Expand Down Expand Up @@ -511,6 +510,7 @@ steps:
# NOTE: don't test llama model here, it seems hf implementation is buggy
# see https://github.com/vllm-project/vllm/pull/5689 for details
- pytest -v -s distributed/test_custom_all_reduce.py
- torchrun --nproc_per_node=2 distributed/test_ca_buffer_sharing.py
- TARGET_TEST_SUITE=A100 pytest basic_correctness/ -v -s -m distributed_2_gpus
- pytest -v -s -x lora/test_mixtral.py

Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/actionlint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@ on:
paths:
- '.github/workflows/*.ya?ml'
- '.github/workflows/actionlint.*'
- '.github/workflows/matchers/actionlint.json'
pull_request:
branches:
- "main"
paths:
- '.github/workflows/*.ya?ml'
- '.github/workflows/actionlint.*'
- '.github/workflows/matchers/actionlint.json'

env:
LC_ALL: en_US.UTF-8
Expand Down
12 changes: 12 additions & 0 deletions .github/workflows/clang-format.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,21 @@ on:
push:
branches:
- habana_main
paths:
- '**/*.h'
- '**/*.cpp'
- '**/*.cu'
- '**/*.cuh'
- '.github/workflows/clang-format.yml'
pull_request:
branches:
- habana_main
paths:
- '**/*.h'
- '**/*.cpp'
- '**/*.cu'
- '**/*.cuh'
- '.github/workflows/clang-format.yml'

jobs:
clang-format:
Expand Down
19 changes: 18 additions & 1 deletion .github/workflows/mypy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,34 @@ on:
# but only for the habana_main branch
push:
branches:
<<<<<<< HEAD
- habana_main
pull_request:
branches:
- habana_main
=======
- main
paths:
- '**/*.py'
- '.github/workflows/mypy.yaml'
- 'tools/mypy.sh'
- 'pyproject.toml'
pull_request:
branches:
- main
paths:
- '**/*.py'
- '.github/workflows/mypy.yaml'
- 'tools/mypy.sh'
- 'pyproject.toml'
>>>>>>> a5fda50a10641e47c0c290907f30ef2add6d4e7a

jobs:
mypy:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
python-version: ["3.9", "3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
- name: Set up Python ${{ matrix.python-version }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ jobs:
fail-fast: false
matrix:
os: ['ubuntu-20.04']
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
python-version: ['3.9', '3.10', '3.11', '3.12']
pytorch-version: ['2.4.0'] # Must be the most recent version that meets requirements-cuda.txt.
cuda-version: ['11.8', '12.1']

Expand Down
49 changes: 29 additions & 20 deletions .github/workflows/ruff.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,33 +6,42 @@ on:
push:
branches:
- habana_main
paths:
- "**/*.py"
- pyproject.toml
- requirements-lint.txt
- .github/workflows/matchers/ruff.json
- .github/workflows/ruff.yml
pull_request:
branches:
- habana_main
paths:
- "**/*.py"
- pyproject.toml
- requirements-lint.txt
- .github/workflows/matchers/ruff.json
- .github/workflows/ruff.yml

jobs:
ruff:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
python-version: ["3.12"]
steps:
- uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements-lint.txt
- name: Analysing the code with ruff
run: |
echo "::add-matcher::.github/workflows/matchers/ruff.json"
ruff check --output-format github .
- name: Spelling check with codespell
run: |
codespell --toml pyproject.toml
- name: Run isort
run: |
isort . --check-only
- uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements-lint.txt
- name: Analysing the code with ruff
run: |
echo "::add-matcher::.github/workflows/matchers/ruff.json"
ruff check --output-format github .
- name: Run isort
run: |
isort . --check-only
34 changes: 20 additions & 14 deletions .github/workflows/yapf.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,27 +6,33 @@ on:
push:
branches:
- habana_main
paths:
- "**/*.py"
- .github/workflows/yapf.yml
pull_request:
branches:
- habana_main
paths:
- "**/*.py"
- .github/workflows/yapf.yml

jobs:
yapf:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
python-version: ["3.12"]
steps:
- uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install yapf==0.32.0
pip install toml==0.10.2
- name: Running yapf
run: |
yapf --diff --recursive .
- uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install yapf==0.32.0
pip install toml==0.10.2
- name: Running yapf
run: |
yapf --diff --recursive .
20 changes: 8 additions & 12 deletions .jenkins/lm-eval-harness/test_lm_eval_correctness.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,18 +76,14 @@ def report_performance(task, input_lens, output_lens, time, record_property):
context_lens = [i + o for i, o in zip(input_lens, output_lens)]
gen_tput = sum(output_lens) / time
all_lens = [input_lens, output_lens, context_lens]
min_input_tokens, min_output_tokens, min_context_tokens = [
min(x) for x in all_lens
]
max_input_tokens, max_output_tokens, max_context_tokens = [
max(x) for x in all_lens
]
mean_input_tokens, mean_output_tokens, mean_context_tokens = [
statistics.mean(x) for x in all_lens
]
stddev_input_tokens, stddev_output_tokens, stddev_context_tokens = [
statistics.stdev(x) for x in all_lens
]
min_input_tokens, min_output_tokens, min_context_tokens = (
min(x) for x in all_lens)
max_input_tokens, max_output_tokens, max_context_tokens = (
max(x) for x in all_lens)
mean_input_tokens, mean_output_tokens, mean_context_tokens = (
statistics.mean(x) for x in all_lens)
stddev_input_tokens, stddev_output_tokens, stddev_context_tokens = (
statistics.stdev(x) for x in all_lens)
msg = (
f'{task} | estimated average generation throughput: {gen_tput:.2f} tokens/s \n' # noqa: G004, E501
f'{task} | input_tokens | min: {min_input_tokens} | max: {max_input_tokens} | mean: {mean_input_tokens:.2f} | stddev: {stddev_input_tokens:.2f}\n' # noqa: E501
Expand Down
11 changes: 5 additions & 6 deletions .readthedocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,16 @@ version: 2
build:
os: ubuntu-22.04
tools:
python: "3.8"
python: '3.9'

sphinx:
configuration: docs/source/conf.py
fail_on_warning: true
configuration: docs/source/conf.py
fail_on_warning: true

# If using Sphinx, optionally build your docs in additional formats such as PDF
formats: []

# Optionally declare the Python requirements required to build your docs
python:
install:
- requirements: docs/requirements-docs.txt

install:
- requirements: docs/requirements-docs.txt
Loading
Loading