From 0310caecc22e747d722f7df386d8fe2c2a1881aa Mon Sep 17 00:00:00 2001 From: Oleg S <97077423+RobotSail@users.noreply.github.com> Date: Tue, 1 Oct 2024 16:32:30 +0000 Subject: [PATCH] fix: updates sorting logic to correctly compare numbers Signed-off-by: Oleg S <97077423+RobotSail@users.noreply.github.com> --- src/instructlab/training/utils.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/instructlab/training/utils.py b/src/instructlab/training/utils.py index 9b1a6c8f..0ba0db4e 100644 --- a/src/instructlab/training/utils.py +++ b/src/instructlab/training/utils.py @@ -870,8 +870,13 @@ def load_latest_full_state(args, accelerator) -> None: if not output_dir.is_dir(): return - # picks checkpoint with the largest number of samples seen, by name. - checkpoint_list = sorted(list(output_dir.iterdir()), reverse=True) + # picks checkpoint with the largest number of samples by splitting the "samples_NNNN" string on _ + # and comparing the number at the end of the string + checkpoint_list = sorted( + list(output_dir.iterdir()), + reverse=True, + key=lambda x: int(str(x).rsplit("_", maxsplit=1)[-1]), + ) if len(checkpoint_list) == 0: log_rank_0(