-
Notifications
You must be signed in to change notification settings - Fork 0
/
pdb_subset.py
executable file
·146 lines (111 loc) · 3.95 KB
/
pdb_subset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#!/usr/bin/env python
# Copyright 2007, Michael J. Harms
# This program is distributed under General Public License v. 3. See the file
# COPYING for a copy of the license.
__description__ = \
"""
pdb_subset.py
Takes a subset of residues/chains from a pdb file. All non-atom entries are
left intact.
"""
__author__ = "Michael J. Harms"
__date__ = "080121"
class PdbSubsetError(Exception):
"""
General error class for this module.
"""
pass
def pdbSubset(pdb,chain,residues,
atom_list=["ATOM ","ANISOU","HETATM","TER "]):
"""
Grabs a subset of a pdb file, spitting out the pdb, the chains, and the
actual min and max in the pdb file.
"""
if chain == "all":
chains = ()
chain_string = "all"
else:
chains = tuple(chain.split(","))
chain_string = "".join(chains)
seen_chains = []
out = []
for line in pdb:
# only look at records indicated by atom_list
if line[0:6] not in atom_list:
out.append(line)
continue
# Grab only residues belonging to specified chains. If chains is empty
# take every chain
if chains == () or line[21:22] in chains:
if line[21:22] not in seen_chains:
seen_chains.append(line[21:22])
# Grab only residues above minimum and below maximum
if residues[0] == 0 and residues[1] == 0:
out.append(line)
elif int(line[22:26]) >= residues[0] and \
int(line[22:26]) <= residues[1]:
out.append(line)
else:
continue
else:
continue
if len([l for l in out if l[0:6] in atom_list]) == 0:
err = "No residues in pdb meet subset criteria.\n"
raise PdbSubsetError(err)
seen_chains.sort()
input_chains = list(chains)
input_chains.sort()
if len(input_chains) > 0:
if seen_chains != input_chains:
missing_chains = ",".join([c for c in input_chains
if c not in seen_chains])
err = "pdb file did not have chain(s): %s.\n" % missing_chains
raise PdbSubsetError(err)
# Determine the actual min and max in the pdb file
residues = [0,0]
all_residues = [int(l[22:26]) for l in out if l[0:6] in atom_list]
residues[0] = min(all_residues)
residues[1] = max(all_residues)
return out, chain_string, residues
def main():
"""
If called from command line.
"""
import sys
from helper import cmdline
# Parse command line
cmdline.initializeParser(__description__,__date__)
cmdline.addOption(short_flag="c",
long_flag="chain",
action="store",
default="all",
help="chain to select (separte mulitiple by commas)",
nargs=1,
type=str)
cmdline.addOption(short_flag="r",
long_flag="residues",
default=[0,0],
action="store",
help="residues to select",
nargs=2,
type=int)
file_list, options = cmdline.parseCommandLine()
for pdb_file in file_list:
# Attempt to strip .pdb extension
try:
pdb_id = pdb_file[:pdb_file.index(".pdb")]
except IndexError:
pdb_id = pdb_file
# Read file
f = open(pdb_file,'r')
pdb = f.readlines()
f.close()
# Pop out subset
pdb, chain, residues = pdbSubset(pdb,options.chain,options.residues)
# Write to file
out_file = "%s_%s_%i-%i.pdb" % (pdb_id,chain,residues[0],residues[1])
g = open(out_file,'w')
g.writelines(pdb)
g.close()
if __name__ == "__main__":
main()