You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
same huggingface/transformers#27925
if os.path.exists(output_dir) and len(os.listdir(output_dir)) > 0:
logger.warning(
f"Checkpoint destination directory {output_dir} already exists and is non-empty."
"Saving will proceed but saved results may be invalid."
)
staging_output_dir = output_dir
else:
# staging_output_dir = os.path.join(run_dir, f"tmp-{checkpoint_folder}")
staging_output_dir = output_dir
File "/data4/azuryl/DoRA/commonsense_reasoning/finetune.py", line 410, in
[rank0]: fire.Fire(train)
[rank0]: File "/home/azuryl/anaconda3/envs/dora/lib/python3.10/site-packages/fire/core.py", line 141, in Fire
[rank0]: component_trace = _Fire(component, args, parsed_flag_args, context, name)
[rank0]: File "/home/azuryl/anaconda3/envs/dora/lib/python3.10/site-packages/fire/core.py", line 475, in _Fire
[rank0]: component, remaining_args = _CallAndUpdateTrace(
[rank0]: File "/home/azuryl/anaconda3/envs/dora/lib/python3.10/site-packages/fire/core.py", line 691, in _CallAndUpdateTrace
[rank0]: component = fn(*varargs, **kwargs)
[rank0]: File "/data4/azuryl/DoRA/commonsense_reasoning/finetune.py", line 375, in train
[rank0]: trainer.train(resume_from_checkpoint=resume_from_checkpoint)
[rank0]: File "/home/azuryl/anaconda3/envs/dora/lib/python3.10/site-packages/transformers/trainer.py", line 1537, in train
[rank0]: return inner_training_loop(
[rank0]: File "/home/azuryl/anaconda3/envs/dora/lib/python3.10/site-packages/transformers/trainer.py", line 1914, in _inner_training_loop
[rank0]: self._maybe_log_save_evaluate(tr_loss, model, trial, epoch, ignore_keys_for_eval)
[rank0]: File "/home/azuryl/anaconda3/envs/dora/lib/python3.10/site-packages/transformers/trainer.py", line 2274, in _maybe_log_save_evaluate
[rank0]: self._save_checkpoint(model, trial, metrics=metrics)
[rank0]: File "/home/azuryl/anaconda3/envs/dora/lib/python3.10/site-packages/transformers/trainer.py", line 2350, in _save_checkpoint
[rank0]: self.save_model(staging_output_dir, _internal_call=True)
[rank0]: File "/home/azuryl/anaconda3/envs/dora/lib/python3.10/site-packages/transformers/trainer.py", line 2837, in save_model
[rank0]: self._save(output_dir)
[rank0]: File "/home/azuryl/anaconda3/envs/dora/lib/python3.10/site-packages/transformers/trainer.py", line 2893, in _save
[rank0]: safetensors.torch.save_file(state_dict, os.path.join(output_dir, SAFE_WEIGHTS_NAME))
[rank0]: File "/home/azuryl/anaconda3/envs/dora/lib/python3.10/site-packages/safetensors/torch.py", line 286, in save_file
[rank0]: serialize_file(_flatten(tensors), filename, metadata=metadata)
[rank0]: safetensors_rust.SafetensorError: Error while serializing: IoError(Os { code: 2, kind: NotFound, message: "No such file or directory" })
The text was updated successfully, but these errors were encountered: