-
Notifications
You must be signed in to change notification settings - Fork 0
/
Find_Stop_Codons.py
executable file
·51 lines (42 loc) · 1.19 KB
/
Find_Stop_Codons.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
"""
Description: Find Sequences with Stop Codons in Open-Reading Frame in Fasta File,
Run on command line: "python3 Find_Stop_Codons.py /path/to/fasta.fa"
returns headers of fasta files with Stop Codons in ORF
Name: Sara Nicholson
Date: 27 April 2024
"""
import numpy
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
import sys
import re
import os
def findStopCodons(orf):
catch = numpy.arange(0, len(orf), 3)
stopCodon = []
for i in catch:
codon = orf[i:i + 3]
if codon == 'TAA' or codon == 'TAG' or codon == 'TGA':
stopCodon.append(i + 1)
return stopCodon
records = SeqIO.parse(str(sys.argv[1]), "fasta")
stops = []
for record in records:
if findStopCodons(record.seq):
stops.append(record.id)
print(stops)
#### update to remove stops from fasta
records = SeqIO.parse(str(sys.argv[2]), "fasta")
fasta = []
for record in records:
RecID = record.id
if RecID not in stops:
record_final = SeqRecord(
record.seq,
id=RecID
)
# print(record_final)
fasta.append(record_final)
# SAVE FILE
SeqIO.write(fasta, str(sys.argv[3]), "fasta")