-
Notifications
You must be signed in to change notification settings - Fork 1
/
Satz_WK5_pubmedapi.py
118 lines (94 loc) · 2.74 KB
/
Satz_WK5_pubmedapi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# -*- coding: utf-8 -*-
"""
Created on Sat Oct 1 15:27:59 2016
@author: alexandersatz
"""
from Bio import Entrez
from Bio import Medline
import re
MAX_COUNT = 17
TERM = 'Crispr editing'
print('Getting {0} publications containing {1}...'.format(MAX_COUNT, TERM))
Entrez.email = '[email protected]'
h = Entrez.esearch(db='pubmed', retmax=MAX_COUNT, term=TERM)
result = Entrez.read(h)
print('Total number of publications containing {0}: {1}'.format(TERM, result['Count']))
ids = result['IdList']
h = Entrez.efetch(db='pubmed', id=ids, rettype='medline', retmode='text')
records = Medline.parse(h)
pubs = []
perm = []
## Acquire Author and Affiliation Data for each record
for record in records:
perm.append(record)
auth = []
au = record.get('AU','?')
for a in au:
auth.append(au)
af = record.get('AD','?')
d = record.get('EDAT', '?')
pubs.append([auth, af, d])
#Assign Authors to each other (source-targets)
audict = {}
for x in pubs:
x = x[0]
x = x[0]
for z in range(0,len(x)):
for i in range(z, len(x)):
try:
audict[str(x[z]) +'WWW'+ str(x[i])] += 1
except KeyError:
audict[str(x[z]) +'WWW'+ str(x[i])] = 1
##Assign affiliations to each author
auaffil = {}
count = 0
bad = 0
for x in pubs:
au = x[0][0]
af = x[1]
af = af.split(". ")
#af = re.split(r'.;', af)
#b = copy.deepcopy(af)
if isinstance(af, list):
for x in af:
if len(x) < 80:
af.remove(x)
if len(x) > 400:
af.remove(x)
if not isinstance(af, list):
if len(af) > 80 and len(af) <400:
af = list(af)
else:
af = []
for a in range(0, min(len(af),len(au))):
try:
ans = auaffil[au[a]]
if len(ans)>len(af[a]):
auaffil[au[a]] = af[a]
except KeyError:
auaffil[au[a]] = af[a]
f = open('saveCrisprSearch.txt','w')
f.write('Source,Target\n')
for k,v in audict.iteritems():
s = k.split('WWW')
for i in range(0,v):
f.write(s[0] + "," + s[1] + "\n")
f.close() # you can omit in most cases as the destructor will call it
##in final, assign each author to USA or not.
def hasUSA(line):
line = line.lower()
if 'usa' in line:
return True
if 'u.s.a' in line:
return True
if 'america' in line:
return True
return False
f = open('saveCrisprAuthorAffilUSA.txt','w')
f.write('Author,InUSA\n')
for k,v in auaffil.iteritems():
if hasUSA(v):
f.write(k + "," + str(1) + "\n")
else:
f.write(k + "," + str(0) + "\n")
f.close() # you can omit in most cases as the destructor will call it