From 1817e5389ae99998dfa686395f6599539d9bb022 Mon Sep 17 00:00:00 2001 From: zhangtianning Date: Wed, 25 Sep 2024 18:42:45 +0800 Subject: [PATCH] sync --- batch_running_task/batch_run.sh | 23 +++++++++++--- batch_running_task/get_data_utils.py | 2 +- .../task_rec/batch_deal_with_rec.py | 6 ++-- batch_running_task/task_rec/run_rec.sh | 31 +++++++++++++------ batch_running_task/task_schedule.sh | 12 ++++--- 5 files changed, 51 insertions(+), 23 deletions(-) diff --git a/batch_running_task/batch_run.sh b/batch_running_task/batch_run.sh index a7d4377..97e5647 100644 --- a/batch_running_task/batch_run.sh +++ b/batch_running_task/batch_run.sh @@ -1,20 +1,33 @@ -export LD_LIBRARY_PATH=/mnt/cache/share/gcc/gcc-7.5.0/lib64:${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} -export PATH=/mnt/cache/share/gcc/gcc-7.5.0/bin:$PATH - TOTALNUM=30 CPU_NUM=$1 # Automatically get the number of CPUs if [ -z "$CPU_NUM" ]; then CPU_NUM=$TOTALNUM fi +# check hostname: if it start with SH than use + +if [[ $(hostname) == SH* ]]; then + PARA="--quotatype=spot -p AI4Chem -N1 -c8 --gres=gpu:1" + + export LD_LIBRARY_PATH=/mnt/cache/share/gcc/gcc-7.5.0/lib64:${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} + export PATH=/mnt/cache/share/gcc/gcc-7.5.0/bin:$PATH + +else + + PARA="-p vip_gpu_ailab_low -N1 -c8 --gres=gpu:1" +fi +SCRIPT="batch_running_task/task_rec/run_rec.sh" +FILELIST="physics_collection/wait_for_ocr.filelist" + + START=0 for ((CPU=0; CPU&1 | grep "gcc version" | awk '{print $3}') -# Required version -REQUIRED_VERSION="7.5.0" + # Check if the version matches -if [ "$GCC_VERSION" != "$REQUIRED_VERSION" ]; then - echo "[`hostname`] GCC version is not $REQUIRED_VERSION. Exiting." - exit 1 + +if [[ $(hostname) == SH* ]]; then + IMAGE_BATCH_SIZE=256 + PDF_BATCH_SIZE=32 + export LD_LIBRARY_PATH=/mnt/cache/share/gcc/gcc-7.5.0/lib64:${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} + export PATH=/mnt/cache/share/gcc/gcc-7.5.0/bin:$PATH + GCC_VERSION=$(gcc -v 2>&1 | grep "gcc version" | awk '{print $3}') + # Required version + REQUIRED_VERSION="7.5.0" + if [ "$GCC_VERSION" != "$REQUIRED_VERSION" ]; then + echo "[`hostname`] GCC version is not $REQUIRED_VERSION. Exiting." + exit 1 + else + echo "[`hostname`] GCC version is $REQUIRED_VERSION." + fi else - echo "[`hostname`] GCC version is $REQUIRED_VERSION." + IMAGE_BATCH_SIZE=128 + PDF_BATCH_SIZE=16 + fi -python batch_running_task/task_rec/batch_deal_with_rec.py --root_path $1 --index_part $2 --num_parts $3 --num_workers 8 --update_origin --replace --shuffle #--compile \ No newline at end of file + +python batch_running_task/task_rec/batch_deal_with_rec.py --image_batch_size $IMAGE_BATCH_SIZE --pdf_batch_size $PDF_BATCH_SIZE --root_path $1 --index_part $2 --num_parts $3 --num_workers 8 --update_origin --replace --shuffle #--compile \ No newline at end of file diff --git a/batch_running_task/task_schedule.sh b/batch_running_task/task_schedule.sh index 9b38311..9ab1b76 100644 --- a/batch_running_task/task_schedule.sh +++ b/batch_running_task/task_schedule.sh @@ -1,12 +1,16 @@ #!/bin/bash -TASKLIMIT=70 +TASKLIMIT=30 PENDINGLIMIT=2 # Function to get the count of pending tasks user=`whoami` -partition='AI4Chem' -jobscript="batch_running_task/task_layout/run_layout_for_missing_page.sh" -filelist='scihub_collection/analysis/not_complete_pdf_page_id.pairlist.filelist' +if [[ $(hostname) == SH* ]]; then + partition='AI4Chem' +else + partition='vip_gpu_ailab_low' +fi +# jobscript="batch_running_task/task_layout/run_layout_for_missing_page.sh" +# filelist='scihub_collection/analysis/not_complete_pdf_page_id.pairlist.filelist' jobname='ParseSciHUB' get_pending_count() { squeue -u $user -p $partition -n $jobname | grep PD | wc -l