You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
when i use this model ask some question while a moment later happens the memory is not enough
WeTraceback (most recent call last):
File "/export/openChatKit/openChatKit/inference/bot.py", line 285, in
main()
File "/export/openChatKit/openChatKit/inference/bot.py", line 281, in main
).cmdloop()
File "/export/dolly/anaconda3/envs/OpenChatKit/lib/python3.10/cmd.py", line 138, in cmdloop
stop = self.onecmd(line)
File "/export/dolly/anaconda3/envs/OpenChatKit/lib/python3.10/cmd.py", line 217, in onecmd
return func(arg)
File "/export/openChatKit/openChatKit/inference/bot.py", line 150, in do_say
output = self._model.do_inference(
File "/export/openChatKit/openChatKit/inference/bot.py", line 92, in do_inference
outputs = self._model.generate(
File "/export/dolly/anaconda3/envs/OpenChatKit/lib/python3.10/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
return func(*args, **kwargs)
File "/export/dolly/anaconda3/envs/OpenChatKit/lib/python3.10/site-packages/transformers/generation_utils.py", line 1326, in generate
return self.sample(
File "/export/dolly/anaconda3/envs/OpenChatKit/lib/python3.10/site-packages/transformers/generation_utils.py", line 1944, in sample
outputs = self(
File "/export/dolly/anaconda3/envs/OpenChatKit/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/export/dolly/anaconda3/envs/OpenChatKit/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/export/dolly/anaconda3/envs/OpenChatKit/lib/python3.10/site-packages/transformers/models/gpt_neox/modeling_gpt_neox.py", line 619, in forward
outputs = self.gpt_neox(
File "/export/dolly/anaconda3/envs/OpenChatKit/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/export/dolly/anaconda3/envs/OpenChatKit/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/export/dolly/anaconda3/envs/OpenChatKit/lib/python3.10/site-packages/transformers/models/gpt_neox/modeling_gpt_neox.py", line 511, in forward
outputs = layer(
File "/export/dolly/anaconda3/envs/OpenChatKit/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/export/dolly/anaconda3/envs/OpenChatKit/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/export/dolly/anaconda3/envs/OpenChatKit/lib/python3.10/site-packages/transformers/models/gpt_neox/modeling_gpt_neox.py", line 319, in forward
attention_layer_outputs = self.attention(
File "/export/dolly/anaconda3/envs/OpenChatKit/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/export/dolly/anaconda3/envs/OpenChatKit/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/export/dolly/anaconda3/envs/OpenChatKit/lib/python3.10/site-packages/transformers/models/gpt_neox/modeling_gpt_neox.py", line 148, in forward
key = torch.cat((past_key, key), dim=-2)
torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 18.00 MiB (GPU 0; 15.89 GiB total capacity; 14.98 GiB already allocated; 15.88 MiB free; 15.16 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
The text was updated successfully, but these errors were encountered:
What args are you adding to the command? I see that you're trying to run this on 16 GB VRAM which means that you'll only be able to either fit the Pythia model in 8-bit or you'd have to offload parts of the model to cpu/disk.
when i use this model ask some question while a moment later happens the memory is not enough
WeTraceback (most recent call last):
File "/export/openChatKit/openChatKit/inference/bot.py", line 285, in
main()
File "/export/openChatKit/openChatKit/inference/bot.py", line 281, in main
).cmdloop()
File "/export/dolly/anaconda3/envs/OpenChatKit/lib/python3.10/cmd.py", line 138, in cmdloop
stop = self.onecmd(line)
File "/export/dolly/anaconda3/envs/OpenChatKit/lib/python3.10/cmd.py", line 217, in onecmd
return func(arg)
File "/export/openChatKit/openChatKit/inference/bot.py", line 150, in do_say
output = self._model.do_inference(
File "/export/openChatKit/openChatKit/inference/bot.py", line 92, in do_inference
outputs = self._model.generate(
File "/export/dolly/anaconda3/envs/OpenChatKit/lib/python3.10/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
return func(*args, **kwargs)
File "/export/dolly/anaconda3/envs/OpenChatKit/lib/python3.10/site-packages/transformers/generation_utils.py", line 1326, in generate
return self.sample(
File "/export/dolly/anaconda3/envs/OpenChatKit/lib/python3.10/site-packages/transformers/generation_utils.py", line 1944, in sample
outputs = self(
File "/export/dolly/anaconda3/envs/OpenChatKit/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/export/dolly/anaconda3/envs/OpenChatKit/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/export/dolly/anaconda3/envs/OpenChatKit/lib/python3.10/site-packages/transformers/models/gpt_neox/modeling_gpt_neox.py", line 619, in forward
outputs = self.gpt_neox(
File "/export/dolly/anaconda3/envs/OpenChatKit/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/export/dolly/anaconda3/envs/OpenChatKit/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/export/dolly/anaconda3/envs/OpenChatKit/lib/python3.10/site-packages/transformers/models/gpt_neox/modeling_gpt_neox.py", line 511, in forward
outputs = layer(
File "/export/dolly/anaconda3/envs/OpenChatKit/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/export/dolly/anaconda3/envs/OpenChatKit/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/export/dolly/anaconda3/envs/OpenChatKit/lib/python3.10/site-packages/transformers/models/gpt_neox/modeling_gpt_neox.py", line 319, in forward
attention_layer_outputs = self.attention(
File "/export/dolly/anaconda3/envs/OpenChatKit/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
return forward_call(*input, **kwargs)
File "/export/dolly/anaconda3/envs/OpenChatKit/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/export/dolly/anaconda3/envs/OpenChatKit/lib/python3.10/site-packages/transformers/models/gpt_neox/modeling_gpt_neox.py", line 148, in forward
key = torch.cat((past_key, key), dim=-2)
torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 18.00 MiB (GPU 0; 15.89 GiB total capacity; 14.98 GiB already allocated; 15.88 MiB free; 15.16 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
The text was updated successfully, but these errors were encountered: