Skip to content

Commit

Permalink
Merge pull request #238 from RobotSail/fix-checkpoint-selection
Browse files Browse the repository at this point in the history
fix: updates sorting logic to correctly compare numbers
  • Loading branch information
mergify[bot] authored Oct 1, 2024
2 parents 8b252d8 + 0310cae commit 8e6c160
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions src/instructlab/training/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -870,8 +870,13 @@ def load_latest_full_state(args, accelerator) -> None:
if not output_dir.is_dir():
return

# picks checkpoint with the largest number of samples seen, by name.
checkpoint_list = sorted(list(output_dir.iterdir()), reverse=True)
# picks checkpoint with the largest number of samples by splitting the "samples_NNNN" string on _
# and comparing the number at the end of the string
checkpoint_list = sorted(
list(output_dir.iterdir()),
reverse=True,
key=lambda x: int(str(x).rsplit("_", maxsplit=1)[-1]),
)

if len(checkpoint_list) == 0:
log_rank_0(
Expand Down

0 comments on commit 8e6c160

Please sign in to comment.