generated from kyegomez/Python-Package-Template
-
-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
🧬 Add llava phi 7b genetic algorithm config
- Loading branch information
1 parent
1d53b4a
commit 3ba68ee
Showing
1 changed file
with
50 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
envs: | ||
MODEL_NAME: xtuner/llava-phi-3-mini-hf | ||
HF_HUB_ENABLE_HF_TRANSFER: True | ||
|
||
resources: | ||
# accelerators: {L4:4, A100:4, A100:8, A100-80GB:2, A100-80GB:4, A100-80GB:8} ## Large models | ||
accelerators: [L4, A10g, A100, A100, A100-80GB, T4, M60] ## Small models | ||
# cpus: 32+ | ||
memory: 32+ | ||
# use_spot: True | ||
# disk_size: 512 # Ensure model checkpoints (~246GB) can fit. | ||
# disk_tier: best | ||
ports: 8080 # Expose to internet traffic. | ||
|
||
service: | ||
readiness_probe: | ||
path: /v1/chat/completions | ||
post_data: | ||
model: $MODEL_NAME | ||
messages: | ||
- role: user | ||
content: Hello! What is your name? | ||
max_tokens: 1 | ||
readiness_probe: /v1/models | ||
replica_policy: | ||
min_replicas: 1 | ||
max_replicas: 10 | ||
target_qps_per_replica: 5 | ||
upscale_delay_seconds: 300 | ||
downscale_delay_seconds: 1200 | ||
|
||
setup: | | ||
pip install hf_transfer && \ | ||
# GPU Monitoring with Grafana | ||
pip install nvidia-ml-py boto3 && \ | ||
# git clone --depth=1 https://gist.github.com/ZackBradshaw/5a50952f445596d046649e2d38b12176 gpumon && \ | ||
wget -O gpumon.py https://gist.githubusercontent.com/ZackBradshaw/5a50952f445596d046649e2d38b12176/raw/45135e5f90183bb6971bdddc276ab1e394ccb670/gpumon.py/ | ||
nohup python gpumon.py > gpumon.out 2>&1 && \ | ||
cat gpumon.out | ||
run: | | ||
# Serve With Docker | ||
docker run --runtime nvidia --gpus all \ | ||
-v ~/.cache/huggingface:/root/.cache/huggingface \ | ||
--env "HUGGING_FACE_HUB_TOKEN=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"\ | ||
-p 8080:8080 \ | ||
--ipc=host \ | ||
openmmlab/lmdeploy:latest \ | ||
lmdeploy serve api_server $MODEL_NAME --model-name llava-phi --server-port 8080 |