Skip to content

Commit

Permalink
support of multiple width values 1 per fasta
Browse files Browse the repository at this point in the history
  • Loading branch information
olgatsiouri1996 committed Dec 23, 2021
1 parent c5fff5e commit 813a05a
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 5 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# bioinfo_gui_scripts [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5795514.svg)](https://doi.org/10.5281/zenodo.5795514)
# bioinfo_gui_scripts
python scripts that can be easily transformed to gui programs for wet lab scientists to use(see the wiki page for documentation and depedences)
## GUI stadalone programs(.exe)
1. DSSP statistics GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4838997.svg)](https://doi.org/10.5281/zenodo.4838997)
Expand All @@ -15,7 +15,7 @@ python scripts that can be easily transformed to gui programs for wet lab scient
12. tab to fasta GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5703366.svg)](https://doi.org/10.5281/zenodo.5703366)
13. single-fastas to tabular GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5672075.svg)](https://doi.org/10.5281/zenodo.5672075)
14. tabular file to single-fastas GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5652249.svg)](https://doi.org/10.5281/zenodo.5652249)
15. fasta formatter GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5786883.svg)](https://doi.org/10.5281/zenodo.5786883)
15. fasta formatter GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5799390.svg)](https://doi.org/10.5281/zenodo.5799390)
16. chain pdb to fasta GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5706468.svg)](https://doi.org/10.5281/zenodo.5706468)
17. subset pdb to fasta GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5725658.svg)](https://doi.org/10.5281/zenodo.5725658)
18. trim fasta GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5794123.svg)](https://doi.org/10.5281/zenodo.5794123)
Expand Down
22 changes: 19 additions & 3 deletions fasta_manipulation/fasta_formatter_gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,18 @@
import os
from gooey import *
from Bio import SeqIO
import pandas as pd
import sys
# input parameters
@Gooey(required_cols=0, program_name= 'fasta formatter', header_bg_color= '#DCDCDC', terminal_font_color= '#DCDCDC', terminal_panel_color= '#DCDCDC')
def main():
ap = GooeyParser(description="changes the width of sequences line in 1 or many FASTA files")
ap.add_argument("-in", "--input", required=False, widget="FileChooser", help="input fasta file")
ap.add_argument("-txt", "--txt", required=False, widget="FileChooser", help="input txt file with 2 columns 1) file name (without extension), 2) width")
ap.add_argument("-out", "--output", required=False, widget="FileSaver", help="output fasta file")
ap.add_argument("-dir", "--directory", required=False, type=str, widget='DirChooser', help="directory to search for fasta files")
ap.add_argument("-width", "--width", required=False, type=int, default=80, help="number of characters per line. Default 80")
ap.add_argument("-pro", "--program", required=False, type=int, default=1, help="program to choose. 1) one input/output fasta file, 2) many input/output fasta files. Default is 1")
ap.add_argument("-pro", "--program", required=False, type=int, default=1, help="program to choose. 1) one input/output fasta file, 2) many input/output fasta files, 3) .txt file with fasta file names and width for each file. Default is 1")
args = vars(ap.parse_args())
# main
# create function to split the input sequence based on a specific number of characters
Expand All @@ -24,16 +26,30 @@ def split_every_width(s,w): return [s[i:i+w] for i in range(0,len(s),w)]
print(">"+record.id)
print('\n'.join(split_every_width(str(record.seq), args['width']))) # add characters in new line after the number of characters surpasses the input width
sys.stdout.close()
else:
elif args['program'] == 2:
# import each fasta file from the working directory
for filename in sorted(os.listdir(os.chdir(args['directory']))):
for filename in sorted(os.listdir(str(os.getcwd()))):
if filename.endswith(".fa") or filename.endswith(".fasta"):
# export to new fasta files with the user imported width value
sys.stdout = open(''.join([filename.split(".")[0],"_","w",str(args['width']),".fasta"]), 'a')
for record in SeqIO.parse(filename,'fasta'):
print(">"+record.id)
print('\n'.join(split_every_width(str(record.seq), args['width']))) # add characters in new line after the number of characters surpasses the input width
sys.stdout.close()
else:
df = pd.read_csv(args['txt'], header=None, sep="\t")
# select ids and widths columns, convert to lists
headers = df.iloc[:,0].values.tolist()
widths = df.iloc[:,1].values.tolist()
# iter elements on pairs to export to fasta
for (a,b) in zip(headers, widths):
# export to new fasta files with the user imported width value
sys.stdout = open(''.join([str(a),"_","w",str(b),".fasta"]), 'a')
for record in SeqIO.parse(''.join([str(a),".fasta"]),'fasta'):
print(">"+record.id)
print('\n'.join(split_every_width(str(record.seq), int(b)))) # add characters in new line after the number of characters surpasses the input width
sys.stdout.close()


if __name__ == '__main__':
main()

0 comments on commit 813a05a

Please sign in to comment.