-
Notifications
You must be signed in to change notification settings - Fork 532
/
serve.yaml
37 lines (29 loc) · 942 Bytes
/
serve.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
resources:
accelerators: A100:1
disk_size: 1024
disk_tier: best
setup: |
conda activate chatbot
if [ $? -ne 0 ]; then
conda create -n chatbot python=3.9 -y
conda activate chatbot
fi
# Install dependencies
pip install "fschat[model_worker,webui]==0.2.24"
pip install protobuf
run: |
conda activate chatbot
echo 'Starting controller...'
python -u -m fastchat.serve.controller --host 127.0.0.1 > ~/controller.log 2>&1 &
sleep 10
echo 'Starting model worker...'
python -u -m fastchat.serve.model_worker \
--model-path lmsys/vicuna-${MODEL_SIZE}b-v1.3 2>&1 \
--host 127.0.0.1 \
| tee model_worker.log &
echo 'Waiting for model worker to start...'
while ! `cat model_worker.log | grep -q 'Uvicorn running on'`; do sleep 1; done
echo 'Starting gradio server...'
python -u -m fastchat.serve.gradio_web_server --share | tee ~/gradio.log
envs:
MODEL_SIZE: 7