-
Notifications
You must be signed in to change notification settings - Fork 2
/
utils.py
executable file
·157 lines (136 loc) · 4.71 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
from __future__ import print_function
import os, sys
from math import log10, floor
default_num_chars = 2
default_num_levels = 2
def log(*args, **kwargs):
"""Log output to STDERR
"""
print(*args, file=sys.stderr, **kwargs)
def get_path_from_digest(digest, num_chars=default_num_chars, num_levels=default_num_levels):
parts = []
start = 0
for l in range(0, num_levels):
end = start + num_chars
p = digest[start:end]
parts.append(p)
start = start + num_chars
return parts
def expand_path(path):
"""
Create any necessary directories to ensure that the file path is valid
:param path: a filename or directory that might or not exist
"""
head_tail = os.path.split(path)
if head_tail[0]:
if not os.path.isdir(head_tail[0]):
log('Creating directories for', head_tail[0])
os.makedirs(head_tail[0], exist_ok=True)
def UpdateChargeFlagInAtomBlock(mb):
"""
See https://sourceforge.net/p/rdkit/mailman/message/36425493/
"""
f="{:>10s}"*3+"{:>2}{:>4s}"+"{:>3s}"*11
chgs = [] # list of charges
lines = mb.split("\n")
if mb[0] == '' or mb[0] == "\n":
del lines[0]
CTAB = lines[2]
atomCount = int(CTAB.split()[0])
# parse mb line per line
for l in lines:
# look for M CHG property
if l[0:6] == "M CHG":
records = l.split()[3:] # M CHG X is not needed for parsing, the info we want comes afterwards
# record each charge into a list
for i in range(0,len(records),2):
idx = records[i]
chg = records[i+1]
chgs.append((int(idx), int(chg))) # sort tuples by first element?
break # stop iterating
# sort by idx in order to parse the molblock only once more
chgs = sorted(chgs, key=lambda x: x[0])
# that we have a list for the current molblock, attribute each charges
for chg in chgs:
i=3
while i < 3+atomCount: # do not read from beginning each time, rather continue parsing mb!
# when finding the idx of the atom we want to update, extract all fields and rewrite whole sequence
if i-2 == chg[0]: # -4 to take into account the CTAB headers, +1 because idx begin at 1 and not 0
fields = lines[i].split()
x=fields[0]
y=fields[1]
z=fields[2]
symb=fields[3]
massDiff=fields[4]
charge=fields[5]
sp=fields[6]
hc=fields[7]
scb=fields[8]
v=fields[9]
hd=fields[10]
nu1=fields[11]
nu2=fields[12]
aamn=fields[13]
irf=fields[14]
ecf=fields[15]
# update charge flag
if chg[1] == -1:
charge = '5'
elif chg[1] == -2:
charge = '6'
elif chg[1] == -3:
charge = '7'
elif chg[1] == 1:
charge = '3'
elif chg[1] == 2:
charge = '2'
elif chg[1] == 3:
charge = '1'
else:
print("ERROR! " + str(lines[0]) + "unknown charge flag: " + str(chg[1])) # print name then go to next chg
break
# update modatom block line
lines[i] = f.format(x,y,z,symb,massDiff,charge,sp,hc,scb,v,hd,nu1,nu2,aamn,irf,ecf)
i+=1
#print("\n".join(lines))
del lines[-1] # remove empty element left because last character before $$$$ is \n
upmb = "\n" + "\n".join(lines)
return(upmb)
def read_delimiter(input):
if input:
if 'tab' == input:
delimiter = '\t'
elif 'space' == input:
delimiter = None
elif 'comma' == input:
delimiter = ','
elif 'pipe' == input:
delimiter = '|'
else:
delimiter = input
else:
delimiter = None
return delimiter
def calc_geometric_mean(scores):
total = 1.0
for score in scores:
total = total * score
result = total ** (1.0/len(scores))
return result
def round_to_significant_number(val, sig):
"""
Round the value to the specified number of significant numbers
:param val: The number to round
:param sig: Number of significant numbers
:return:
"""
return round(val, sig - int(floor(log10(abs(val))))-1)
def is_type(value, typ):
if value is not None:
try:
i = typ(value)
return 1, i
except:
return -1, value
else:
return 0, value