Skip to content

Commit

Permalink
init_models
Browse files Browse the repository at this point in the history
  • Loading branch information
abrichr committed Mar 6, 2024
1 parent 94271ee commit 538f289
Showing 1 changed file with 37 additions and 14 deletions.
51 changes: 37 additions & 14 deletions composite_demo/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,15 @@
},
'agent_chat': {
'path': os.environ.get('MODEL_PATH_AGENT_CHAT', 'THUDM/cogagent-chat-hf'),
'device': ['cuda:0']
#'device': ['cuda:0']
},
'vlm_chat': {
'path': os.environ.get('MODEL_PATH_VLM_CHAT', 'THUDM/cogvlm-chat-hf'),
'device': ['cuda:3']
#'device': ['cuda:3']
},
'vlm_grounding': {
'path': os.environ.get('MODEL_PATH_VLM_GROUNDING','THUDM/cogvlm-grounding-generalist-hf'),
'device': ['cuda:6']
#'device': ['cuda:6']
}
}

Expand All @@ -50,6 +50,10 @@
# 'device': ['cuda:0']
# },

def get_available_devices() -> list[str]:
"""Returns a list of available GPU devices in the format 'cuda:X' or an empty list if no GPU is available."""
n_gpus = torch.cuda.device_count()
return [f'cuda:{i}' for i in range(n_gpus)]


@st.cache_resource
Expand Down Expand Up @@ -119,18 +123,36 @@ class HFClient(Client):
"""
def __init__(self, models_info):
self.models = {}
self.tokenizer = AutoTokenizer.from_pretrained(models_info['tokenizer']['path'], trust_remote_code=True)
self.init_models(models_info)

def init_models(self, models_info):
"""
Initializes models on dynamically selected devices based on availability.
"""
available_devices = get_available_devices()
device_iterator = iter(available_devices)

for model_name, model_info in models_info.items():
if model_name != 'tokenizer':
self.models[model_name] = []
for device in model_info['device']:
model = AutoModelForCausalLM.from_pretrained(
model_info['path'],
torch_dtype=torch_type,
low_cpu_mem_usage=True,
trust_remote_code=True,
).to(device).eval()
self.models[model_name].append(model)
if model_name == 'tokenizer': # Skip tokenizer for device allocation
continue

try:
device = next(device_iterator) # Assign next available device
except StopIteration:
warnings.warn("Not enough GPUs for all models, some models may share devices")
device_iterator = iter(available_devices) # Reset iterator to reuse devices
device = next(device_iterator)

print(f"loading {model_name=} into {device=}")
model = AutoModelForCausalLM.from_pretrained(
model_info['path'],
torch_dtype=torch_type,
low_cpu_mem_usage=True,
trust_remote_code=True,
).to(device).eval()

# Assign model and device to the models_info dict for later reference
self.models[model_name] = {'model': model, 'device': device}

def select_best_gpu(self, model_name):
min_memory_used = None
Expand All @@ -144,6 +166,7 @@ def select_best_gpu(self, model_name):
min_memory_used = mem_used
selected_model = model

print(f"{model_name=} {selected_model=}")
return selected_model

def generate_stream(self,
Expand Down

0 comments on commit 538f289

Please sign in to comment.