From 39b7588af9fa757c60f65a5a025bf0fdb5e78c24 Mon Sep 17 00:00:00 2001 From: Rakib Hassan Date: Fri, 4 Oct 2024 15:52:09 +1000 Subject: [PATCH] Better handling of aborts in parallel runs --- seismic/bulk_station_orientations.py | 7 ++++++- seismic/misc_p.py | 9 +++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/seismic/bulk_station_orientations.py b/seismic/bulk_station_orientations.py index 8ed5ba45..612a03c5 100644 --- a/seismic/bulk_station_orientations.py +++ b/seismic/bulk_station_orientations.py @@ -34,6 +34,7 @@ from matplotlib.backends.backend_pdf import PdfPages import matplotlib.pyplot as plt from shutil import rmtree +from seismic.misc_p import parallel_abort logging.basicConfig() @@ -212,7 +213,11 @@ def main(src_h5_event_file, network, output_basename, station_list, dump_swp_dat proc_hdfkeys = list(proc_hdfkeys) # trim stations to be processed based on the user-provided network- and station-list - proc_hdfkeys = rf_util.trim_hdf_keys(proc_hdfkeys, network, station_list) + try: + proc_hdfkeys = rf_util.trim_hdf_keys(proc_hdfkeys, network, station_list) + except Exception as e: + parallel_abort(str(e), logger) + # end try # split work-load over all procs proc_hdfkeys = split_list(proc_hdfkeys, nproc) diff --git a/seismic/misc_p.py b/seismic/misc_p.py index 2bfef472..7d79767e 100644 --- a/seismic/misc_p.py +++ b/seismic/misc_p.py @@ -14,6 +14,7 @@ import os from mpi4py import MPI +from logging import Logger class ProgressTracker: def __init__(self, output_folder, restart_mode=False): @@ -51,3 +52,11 @@ def increment(self): # end func # end class +def parallel_abort(msg: str, logger:Logger=None): + comm = MPI.COMM_WORLD + nproc = comm.Get_size() + rank = comm.Get_rank() + + if(logger is not None): logger.error('Aborting job from rank {}: {}'.format(rank, msg)) + comm.Abort() +# end func \ No newline at end of file