forked from enormandeau/Scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
fasta2paml.py
executable file
·148 lines (123 loc) · 4.2 KB
/
fasta2paml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Preparing a file for codeml in PAML from a fasta alignment file
__authors__ = "Eric Normandeau"
__program_name__ = "fasta2paml"
__version_info__ = ('0', '0', '1')
__version__ = '.'.join(__version_info__)
__revision_date__ = "2010-04-20"
# Importing modules
import os
import sys
import getopt
import platform
from Bio import SeqIO
# Function definitions
def help():
_plateform = platform.system()
name = __program_name__
text = """
%s(1) User Commands %s(1)
\033[1mNAME\033[0m
\t%s - prepare file for PAML (codeml) from a fasta file
\033[1mSYNOPSIS\033[0m
\t\033[1mpython %s \033[0m[\033[4mOPTION\033[0m]... [\033[4mFILE\033[0m]...
\033[1mDESCRIPTION\033[0m
\tCreate a PAML (codeml) allele file from a fasta alignment file.
\t%s uses the Biopython library to parse a fasta file
\tcontaining multiple alignements, each with the same number of
\tsequences, and create the allele file needed by codeml from PAML.
\t\033[1m-h, --help\033[0m
\t\tDisplay the help of this program
\t\033[1m-i, --input\033[0m
\t\tInput file in fasta format
\t\033[1m-o, --output\033[0m
\t\tOutput file name
\t\033[1m-n, --numseq\033[0m
\t\tNumber of sequences per group
\033[1mAUTHORS\033[0m
\t%s
%s %s %s %s(1)
"""%(name, name, name, name, name, __authors__, name, __version__, __revision_date__, name)
if _plateform != 'Windows' and "this is cool":
print text
else:
remove = ["\033[1m","\033[0m","\033[4m"]
for i in remove:
text = text.replace(i, "")
print text
def create_paml_files(in_file, out_file, numseq):
"""Create groups of N sequences and write them to a file in fasta format"""
fasta_sequences = SeqIO.parse(open(in_file),'fasta')
out_folder = "paml_ready_files"
out_path = os.path.join(out_folder, out_file)
end = 0
try:
with open (out_path, "w") as test:
pass
except:
print "Created", "'"+ out_folder +"'", "folder to put result files in"
os.mkdir(out_folder)
with open(out_path, "w") as f:
while end == 0:
sequences = []
for i in xrange(numseq):
try:
sequences.append(fasta_sequences.next())
except:
end = 1
if i == 0:
print "All sequences have been treated successfully."
else:
print "WARNING! Last group does not contain", \
numseq, "sequences."
break
if end == 0:
f.write(str(len(sequences)) + "\t" +
str(len(sequences[0].seq.tostring())) + "\n")
for i, seq in enumerate(sequences):
f.write("allele" + str(i + 1) + "\n")
f.write(seq.seq.tostring() + "\n")
# Main function
def main():
try:
opts, args = getopt.getopt(sys.argv[1:], "hi:o:n:", ["help", "input=",
"output=", "numseq="])
except getopt.GetoptError, e:
print "Input error. Use -h for help"
sys.exit(0)
output_file = None
numseq = "zero"
for option, value in opts:
if option in ('-h', '--help'):
help()
sys.exit(0)
elif option in ('-i', '--input'):
input_file = value
elif option in ('-o', '--output'):
output_file = value
elif option in ('-n', '--numseq'):
numseq = value
try:
with open(input_file) as test:
pass
except:
print "Input Error: No input file specified or file not found."
print "Use -h for help."
sys.exit(0)
if output_file == None:
print "Input Error: No output file specified."
print "Use -h for help."
sys.exit(0)
try:
numseq = int(numseq)
except:
print "Input Error: Enter number of sequences as a positive integer."
sys.exit(0)
print "Using version:", __version__, "of", sys.argv[0]
print "Last revision:", __revision_date__
print
create_paml_files(input_file, output_file, numseq)
print
if __name__ == "__main__":
main()