Skip to content

Commit

Permalink
Merge pull request #13 from Brown-University-Library/show_progress
Browse files Browse the repository at this point in the history
adds progress-output.
  • Loading branch information
birkin authored Aug 12, 2024
2 parents fe53284 + 403d712 commit 4a6af44
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions save_mods_to_dir/save_mods.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,8 @@ def check_well_formed_xml( output_filepath: pathlib.Path, pid: str ):
## mamager functions ------------------------------------------------


def download_mods( pid: str, output_dir_path: pathlib.Path ) -> None:
# def download_mods( pid: str, output_dir_path: pathlib.Path ) -> None:
def download_mods( pid: str, output_dir_path: pathlib.Path, index: int ) -> None:
""" Manager function.
Downloads MODS files to the specified directory, for given PIDS.
Called by parse_args(). """
Expand All @@ -140,6 +141,9 @@ def download_mods( pid: str, output_dir_path: pathlib.Path ) -> None:
output_filepath: pathlib.Path = make_output_filepath( output_dir_path, pid )
grab_and_save_mods( url, output_filepath, pid )
check_well_formed_xml( output_filepath, pid )
## show progress ------------------------------------------------
if (index + 1) % 10 == 0:
log.info(f'Processed {index + 1} items.')
return


Expand All @@ -153,7 +157,8 @@ def run_multiprocessing( output_dir_path: pathlib.Path, pids_list_path: pathlib.
pids: list = pids_file.read().splitlines()
log.info( f'pids to process, ``{pprint.pformat(pids)}``' )
with Pool( processes=PROCESSES ) as pool:
args = [ (pid, output_dir_path) for pid in pids ]
# args = [ (pid, output_dir_path) for pid in pids ]
args = [ (pid, output_dir_path, index) for index, pid in enumerate(pids) ]
pool.starmap( download_mods, args )
return

Expand Down

0 comments on commit 4a6af44

Please sign in to comment.