Skip to content

Commit

Permalink
Merge pull request #243 from spacemansteve/master
Browse files Browse the repository at this point in the history
add local cache of classic data files
  • Loading branch information
spacemansteve authored Feb 18, 2020
2 parents 743f45a + a6c9b77 commit e9d6339
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 5 deletions.
9 changes: 4 additions & 5 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,10 @@

#Order matches their priority
BIBCODE_FILES = [
'/proj/ads/abstracts/ast/load/current/index.status',
'/proj/ads/abstracts/phy/load/current/index.status',
'/proj/ads/abstracts/gen/load/current/index.status',
'/proj/ads/abstracts/pre/load/current/index.status',

'./logs/input/current/ast/load/current/index.status',
'./logs/input/current/phy/load/current/index.status',
'./logs/input/current/gen/load/current/index.status',
'./logs/input/current/pre/load/current/index.status',
]

BIBCODES_PER_JOB = 100
Expand Down
41 changes: 41 additions & 0 deletions copy_input_files.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
set -e

INPUT_BASE=/proj/ads/abstracts/
TIMESTAMP=$(date +%Y-%m-%d_%H-%M-%S)
OUTPUT_BASE=./logs/input/input.$TIMESTAMP/

FILES_INFO=(
ast/load/current/index.status:2100000
phy/load/current/index.status:9000000
gen/load/current/index.status:1300000
pre/load/current/index.status:1500000
)

# Delete old input files
if [ -d ./logs/input ]; then
find ./logs/input/ -name "input.20*-*-*_*-*-*" -type d -mtime +7 -exec rm -rf '{}' \;
fi

# create local copies of files
for FILE_INFO in ${FILES_INFO[@]} ; do
FILE=${FILE_INFO%%:*}
mkdir -p $(dirname "$OUTPUT_BASE$FILE")
echo INFO: `date` copying $INPUT_BASE$FILE to $OUTPUT_BASE$FILE
cp -v $INPUT_BASE$FILE $OUTPUT_BASE$FILE
done

# validate local files
for FILE_INFO in ${FILES_INFO[@]} ; do
FILE=${FILE_INFO%%:*}
MIN_LINES=${FILE_INFO##*:}
echo INFO: `date` validating $OUTPUT_BASE$FILE is at least $MIN_LINES lines long
if [ $(wc -l < $OUTPUT_BASE$FILE) -lt ${MIN_LINES} ]; then
echo "ERROR: file $OUTPUT_BASE$FILE has less than ${MIN_LINES} lines, processing aborted"
exit 1
fi
done

# ingest code expects latest files in directory named current
echo INFO: `date` linking $PWD/logs/input/current to $PWD/$OUTPUT_BASE
rm -fv ./logs/input/current
ln -fsv $PWD/$OUTPUT_BASE $PWD/logs/input/current

0 comments on commit e9d6339

Please sign in to comment.