-
Notifications
You must be signed in to change notification settings - Fork 0
/
Prep_fasta_files_for_PAML.py
executable file
·69 lines (55 loc) · 1.97 KB
/
Prep_fasta_files_for_PAML.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
'''
Name: Sara K Nicholson
Title: FASTA FILE PREP FOR PAL2NAL+PAML
Description: Shorten Names of Sequence Names, Remove any Gaps & Translate Sequences
'''
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
# CHANGE SEQ NAMES
seq_records = SeqIO.parse("/home/sara/Marilia/individuals/proteins/MSAs/hypermut_removed/nefW7.fa", "fasta")
fasta = []
for record in seq_records:
rec = record
rec = rec[2:]
RecID = record.id
record_final = SeqRecord(
record.seq,
id=RecID[0:8],
name="NEF",
description="WEEK7",
)
print(record_final)
fasta.append(record_final)
# SAVE FILE
SeqIO.write(fasta, "/home/sara/Marilia/individuals/proteins/MSAs/hypermut_removed/Analyzed/nefW7_nc.fa", "fasta")
# REMOVE GAPS FROM SEQS
with open("/home/sara/Marilia/individuals/proteins/MSAs/hypermut_removed/Analyzed/nefW7_nc_ng.fa", "w") as o:
for record in SeqIO.parse("/home/sara/Marilia/individuals/proteins/MSAs/hypermut_removed/Analyzed/nefW7_nc.fa", "fasta"):
record.seq = record.seq.replace("-", "")
SeqIO.write(record, o, "fasta")
# TRANSLATE SEQUENCES
def pad_seq(sequence):
""" Pad sequence to multiple of 3 with N """
remainder = len(sequence) % 3
return sequence if remainder == 0 else sequence + Seq('N' * (3 - remainder))
seq_records = SeqIO.parse("/home/sara/Marilia/individuals/proteins/MSAs/hypermut_removed/Analyzed/nefW7_nc_ng.fa", "fasta")
fasta_aa = []
# ids = []
for record in seq_records:
#rec = record.seq.reverse_complement()
rec = record.seq
rec = rec[0:]
#rec1 = pad_seq(rec).translate()
RecID = record.id
# ids.append(RecID)
record_final = SeqRecord(
pad_seq(rec).translate(),
id=RecID,
name="NEF",
description="w7",
)
print(record_final)
fasta_aa.append(record_final)
# WRITE TRANSLATED SEQS TO FILE
SeqIO.write(fasta_aa, "/home/sara/Marilia/individuals/proteins/MSAs/hypermut_removed/Analyzed/nefW7_AA.fa", "fasta")