forked from tsutterley/reference-toolkit
-
Notifications
You must be signed in to change notification settings - Fork 0
/
move_journal_articles.py
171 lines (151 loc) · 7.09 KB
/
move_journal_articles.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
#!/usr/bin/env python
u"""
move_journal_articles.py (12/2020)
Moves journal articles and supplements to the reference local directory
Enter Author names, journal name, publication year and volume will copy a pdf
file (or any other file if supplement) to the reference path
CALLING SEQUENCE:
python move_journal_articles.py --author Rignot --year 2008 \
--journal "Nature Geoscience" --volume 1 ~/Downloads/ngeo102.pdf
will move the file to 2008/Rignot/Rignot_Nat._Geosci.-1_2008.pdf
INPUTS:
file to be moved into the reference path
COMMAND LINE OPTIONS:
-A X, --author X: lead author of publication
-J X, --journal X: corresponding publication journal
-Y X, --year X: corresponding publication year
-V X, --volume X: corresponding publication volume
-N X, --number X: Corresponding publication number
-S, --supplement: file is a supplemental file
-C, --cleanup: Remove input file after moving
PROGRAM DEPENDENCIES:
read_referencerc.py: Sets default file path and file format for output files
language_conversion.py: Outputs map for converting symbols between languages
NOTES:
Lists of journal abbreviations
https://github.com/JabRef/abbrv.jabref.org/tree/master/journals
If using author name with unicode characters: put in quotes and check
unicode characters with http://www.fileformat.info/
UPDATE HISTORY:
Updated 12/2020: using argparse to set command line options
Updated 07/2019: modifications for python3 string compatibility
Updated 07/2018: tilde-expansion of input journal file
Updated 10/2017: use data path and data file format from referencerc file
Written 05/2017
"""
from __future__ import print_function
import sys
import os
import re
import shutil
import inspect
import argparse
from read_referencerc import read_referencerc
from language_conversion import language_conversion
#-- current file path for the program
filename = inspect.getframeinfo(inspect.currentframe()).filename
filepath = os.path.dirname(os.path.abspath(filename))
#-- PURPOSE: create directories and move a reference file after formatting
def move_journal_articles(fi,author,journal,year,volume,number,SUPPLEMENT,CLEANUP):
#-- get reference filepath and reference format from referencerc file
datapath,dataformat=read_referencerc(os.path.join(filepath,'.referencerc'))
#-- get extension from file (assume pdf if extension cannot be extracted)
fileExtension=os.path.splitext(fi)[1] if os.path.splitext(fi)[1] else '.pdf'
#-- file listing journal abbreviations modified from
#-- https://github.com/JabRef/abbrv.jabref.org/tree/master/journals
abbreviation_file = 'journal_abbreviations_webofscience-ts.txt'
#-- create regular expression pattern for extracting abbreviations
arg = journal.replace(' ','\s+')
rx=re.compile('\n{0}[\s+]?\=[\s+]?(.*?)\n'.format(arg),flags=re.IGNORECASE)
#-- try to find journal article within filename from webofscience file
with open(os.path.join(filepath,abbreviation_file),'r') as f:
abbreviation_contents = f.read()
#-- if abbreviation not found: just use the whole journal name
#-- else use the found journal abbreviation
if not bool(rx.search(abbreviation_contents)):
print('Abbreviation for {0} not found'.format(journal))
abbreviation = journal
else:
abbreviation = rx.findall(abbreviation_contents)[0]
#-- replace unicode characters with combining unicode version
if sys.version_info[0] == 2:
author = author.decode('unicode-escape')
#-- 1st column: latex, 2nd: combining unicode, 3rd: unicode, 4th: plain text
for LV, CV, UV, PV in language_conversion():
author = author.replace(UV, CV)
#-- directory path for local file
if SUPPLEMENT:
directory = os.path.join(datapath,year,author,'Supplemental')
else:
directory = os.path.join(datapath,year,author)
#-- check if output directory currently exist and recursively create if not
os.makedirs(directory) if not os.path.exists(directory) else None
#-- format used for saving articles using string formatter
#-- 0) Author Last Name
#-- 1) Journal Name
#-- 2) Journal Abbreviation
#-- 3) Publication Volume
#-- 4) Publication Number
#-- 5) Publication Year
#-- 6) File Extension (will include period)
#-- initial test case for output file (will add numbers if not unique in fs)
args = (author, journal.replace(' ','_'), abbreviation.replace(' ','_'),
volume, number, year, fileExtension)
local_file = os.path.join(directory, dataformat.format(*args))
#-- open input file and copy contents to local file
with open(fi, 'rb') as f_in, create_unique_filename(local_file) as f_out:
shutil.copyfileobj(f_in, f_out)
#-- remove the input file
os.remove(fi) if CLEANUP else None
#-- PURPOSE: open a unique filename adding a numerical instance if existing
def create_unique_filename(filename):
#-- split filename into fileBasename and fileExtension
fileBasename, fileExtension = os.path.splitext(filename)
#-- create counter to add to the end of the filename if existing
counter = 1
while counter:
try:
#-- open file descriptor only if the file doesn't exist
fd = os.open(filename, os.O_CREAT | os.O_EXCL | os.O_RDWR)
except OSError:
pass
else:
print(filename.replace(os.path.expanduser('~'),'~'))
return os.fdopen(fd, 'wb+')
#-- new filename adds counter the between fileBasename and fileExtension
filename = u'{0}-{1:d}{2}'.format(fileBasename, counter, fileExtension)
counter += 1
#-- main program that calls move_journal_articles()
def main():
#-- Read the system arguments listed after the program
parser = argparse.ArgumentParser(
description="""Moves a journal article to the reference local directory
"""
)
#-- command line parameters
parser.add_argument('infile',
type=lambda p: os.path.abspath(os.path.expanduser(p)),
help='article file to be copied into the reference path')
parser.add_argument('--author','-A',
type=str, help='Lead author of publication')
parser.add_argument('--journal','-J',
type=str, help='Corresponding publication journal')
parser.add_argument('--year','-Y',
type=str, help='Corresponding publication year')
parser.add_argument('--volume','-V',
type=str, default='', help='Corresponding publication volume')
parser.add_argument('--number','-N',
type=str, default='', help='Corresponding publication number')
parser.add_argument('--supplement','-S',
default=False, action='store_true',
help='File is an article supplement')
parser.add_argument('--cleanup','-C',
default=False, action='store_true',
help='Remove input file after moving')
args = parser.parse_args()
#-- move article file to reference directory
move_journal_articles(args.infile, args.author, args.journal, args.year,
args.volume, args.number, args.supplement, args.cleanup)
#-- run main program
if __name__ == '__main__':
main()