Skip to content

Commit

Permalink
fix index count & add subset_pdb_to_fasta,_gui.py and executable
Browse files Browse the repository at this point in the history
  • Loading branch information
olgatsiouri1996 committed Nov 22, 2021
1 parent 5cd3908 commit 7bf1618
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 20 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# bioinfo_gui_scripts [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5703709.svg)](https://doi.org/10.5281/zenodo.5703709)
# bioinfo_gui_scripts
python scripts that can be easily transformed to gui programs for wet lab scientists to use(see the wiki page for documentation and depedences)
## GUI stadalone programs(.exe)
1. DSSP statistics GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4838997.svg)](https://doi.org/10.5281/zenodo.4838997)
Expand All @@ -19,3 +19,5 @@ python scripts that can be easily transformed to gui programs for wet lab scient
16. tabular file to single-fastas GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5652249.svg)](https://doi.org/10.5281/zenodo.5652249)
17. fasta formatter GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5703665.svg)](https://doi.org/10.5281/zenodo.5703665)
18. chain pdb to fasta GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5706468.svg)](https://doi.org/10.5281/zenodo.5706468)
19. subset pdb to fasta GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5718967.svg)](https://doi.org/10.5281/zenodo.5718967)

39 changes: 26 additions & 13 deletions fasta_manipulation/trim_multifasta_gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,34 @@
from gooey import *
from Bio import SeqIO
# input parameters
@Gooey(required_cols=5, program_name='trim multifasta', header_bg_color= '#DCDCDC', terminal_font_color= '#DCDCDC', terminal_panel_color= '#DCDCDC')
@Gooey(required_cols=3, program_name='trim multifasta', header_bg_color= '#DCDCDC', terminal_font_color= '#DCDCDC', terminal_panel_color= '#DCDCDC')
def main():
ap = GooeyParser()
ap.add_argument("-in", "--input", required=True, widget='FileChooser', help="input fasta file")
ap.add_argument("-start", "--start", required=True, type=int, help="region to start writing the fasta file(starts from 0)")
ap.add_argument("-stop", "--stop", required=True, type=int, help="region to stop writing the fasta file(it can also be a negative number to remove nucleotides from the end of the sequence)")
ap.add_argument("-out", "--output", required=True, widget='FileSaver', help="output fasta file")
args = vars(ap.parse_args())
# main
sequences = [] # setup an empty list
for record in SeqIO.parse(args['input'], "fasta"):
# add this record to the list
sequences.append(record[args['start']:args['stop']])
ap = GooeyParser()
ap.add_argument("-in", "--input", required=True, widget='FileChooser', help="input fasta file")
ap.add_argument("-start", "--start", required=False, default=1, type=int, help="region to start writing the fasta file")
ap.add_argument("-stop", "--stop", required=True, type=int, help="region to stop writing the fasta file(it can be both a positive and a negative number)")
ap.add_argument("-out", "--output", required=True, widget='FileSaver', help="output fasta file")
args = vars(ap.parse_args())
# main
sequences = [] # setup an empty list
# fix the index for start parameter
if args['start'] > 0:
seq_start = args['start'] -1
else:
print("-start parameter must be a positive integer")
exit(1)
# fix the index for end parameter
if args['stop'] > 0:
seq_end = args['stop'] -1
else:
seq_end = args['stop']
# iterate for each record
for record in SeqIO.parse(args['input'], "fasta"):
# add this record to the list
sequences.append(record[seq_start:seq_end])

SeqIO.write(sequences, args['output'], "fasta")
# export to fasta
SeqIO.write(sequences, args['output'], "fasta")

if __name__ == '__main__':
main()
23 changes: 17 additions & 6 deletions fasta_manipulation/trim_singlefastas_gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,31 @@
from gooey import *
from Bio import SeqIO
# input parameters
@Gooey(required_cols=3, program_name='trim multiple single-fasta files', header_bg_color= '#DCDCDC', terminal_font_color= '#DCDCDC', terminal_panel_color= '#DCDCDC')
@Gooey(required_cols=2, program_name='trim multiple single-fasta files', header_bg_color= '#DCDCDC', terminal_font_color= '#DCDCDC', terminal_panel_color= '#DCDCDC')
def main():
ap = GooeyParser()
ap.add_argument("-start", "--start_fasta", required=True, type=int, help="region to start writing the fasta file(min number 0)")
ap.add_argument("-stop", "--stop_fasta", required=True, type=int, help="region to stop writing the fasta file(negative number to remove nucleotides from the end of the sequence")
ap.add_argument("-start", "--start_fasta", required=False, default=1, type=int, help="region to start writing the fasta file")
ap.add_argument("-stop", "--stop", required=True, type=int, help="region to stop writing the fasta file(it can be both a positive and a negative number)")
ap.add_argument("-dir", "--directory", required=True, type=str, widget='DirChooser', help="directory to search for fasta files")
args = vars(ap.parse_args())
# main
# import each fasta file from a working directory of choice
# main
# fix the index for start parameter
if args['start'] > 0:
seq_start = args['start'] -1
else:
print("-start parameter must be a positive integer")
exit(1)
# fix the index for end parameter
if args['stop'] > 0:
seq_end = args['stop'] -1
else:
seq_end = args['stop']
# import each fasta file from a working directory of choice
for filename in sorted(os.listdir(os.chdir(args['directory']))):
if filename.endswith(".fa") or filename.endswith(".fasta"):
# read each file, trim and create SeqRecord to export
record = SeqIO.read(filename, "fasta")
sequence = record[args['start_fasta']:args['stop_fasta']]
sequence = record[seq_start:seq_end]
# export to fasta
SeqIO.write(sequence, "".join([filename.split(".")[0],"_","trimmed",".fasta"]), "fasta")

Expand Down
62 changes: 62 additions & 0 deletions pdb_corner/subset_pdb_to_fasta_gui.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# python3
import os
from gooey import *
from Bio.PDB import *
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
# input parameters
@Gooey(required_cols=2, program_name='subset pdb to fasta', header_bg_color= '#DCDCDC', terminal_font_color= '#DCDCDC', terminal_panel_color= '#DCDCDC')
def main():
ap = GooeyParser()
ap.add_argument("-pdb", "--pdb", required=True, widget='FileChooser', help="input pdb file")
ap.add_argument("-model", "--model",required=False, default=0, help="model from pdb file to select(integer). Default is 0(1 model only)")
ap.add_argument("-chain", "--chain", required=True, help="chain from pdb file to select")
ap.add_argument("-start", "--start", required=False, default=1, type=int, help="amino acid in chain to start writing the fasta file")
ap.add_argument("-end", "--end", required=False, type=int, help="amino acid in chain to end writing the fasta file")
ap.add_argument("-pro", "--program", required=False,default=1, type=int, help="program to choose 1) add both start and end location 2) the end location with be that of the latest amino acid in the chain. Default is 1")
args = vars(ap.parse_args())
# main
# select chain
parser = PDBParser()
s = parser.get_structure("name", args['pdb'])
fill = s[int(args['model'])][args['chain']]
# retrieve the pdb id of the input file
filename = os.path.split(args['pdb'])[1]
pdb_id = filename.split(".")[0]
# retrieve chain amino acids
ppb = PPBuilder()
for pp in ppb.build_peptides(fill):
aa_chain = str(pp.get_sequence())
# choose program
if args['program'] == 1:
# fix the index for start parameter
if args['start'] > 0:
aa_start = args['start'] -1
else:
print("-start parameter must be a positive integer")
exit(1)
# fix the index for end parameter
if args['end'] > 0:
aa_end = args['end'] -1
else:
aa_end = args['end']
else:

# fix the index for start parameter
if args['start'] > 0:
aa_start = args['start'] -1
else:
print("-start parameter must be a positive integer")
exit(1)
# fix the index for end parameter
args['end'] = len(aa_chain) -1
aa_end = args['end']
# subset based on aa in chain
sub_seq = aa_chain[aa_start:aa_end]
# export to fasta
record = SeqRecord(Seq(sub_seq),id="".join([str(pdb_id),"_",str(args['chain']),"_",str(args['start']),"_",str(args['end'])]),description="")
SeqIO.write(record, "".join([str(pdb_id),"_",str(args['chain']),"_",str(args['start']),"_",str(args['end']),".fasta"]), "fasta")

if __name__ == '__main__':
main()

0 comments on commit 7bf1618

Please sign in to comment.