Skip to content

Commit

Permalink
tabular and multi/single fasta conversion
Browse files Browse the repository at this point in the history
  • Loading branch information
olgatsiouri1996 committed Nov 15, 2021
1 parent 43ef427 commit 17a8a48
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 12 deletions.
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,15 @@ python scripts that can be easily transformed to gui programs for wet lab scient
2. DSSP statistics by chain GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4891916.svg)](https://doi.org/10.5281/zenodo.4891916)
3. DSSP tabular by chain GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4839987.svg)](https://doi.org/10.5281/zenodo.4839987)
4. add adapters by pair GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5201840.svg)](https://doi.org/10.5281/zenodo.5201840)
5. Extract or remove sequences from fasta GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5213003.svg)](https://doi.org/10.5281/zenodo.5213003)
5. extract or remove sequences from fasta GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5703246.svg)](https://doi.org/10.5281/zenodo.5703246)
6. FASTA subset by length GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5218645.svg)](https://doi.org/10.5281/zenodo.5218645)
7. select header or fasta by aa GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5218741.svg)](https://doi.org/10.5281/zenodo.5218741)
8. amino acids content multifasta calculator GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5275827.svg)](https://doi.org/10.5281/zenodo.5275827)
9. pdbs secondary structure statistics GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5348006.svg)](https://doi.org/10.5281/zenodo.5348006)
10. add adapters on single-fastas GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5559117.svg)](https://doi.org/10.5281/zenodo.5559117)
11. Trim multi-fasta GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5565197.svg)](https://doi.org/10.5281/zenodo.5565197)
12. Trim single-fastas GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5565299.svg)](https://doi.org/10.5281/zenodo.5565299)
12. Trim single-fastas GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5565299.svg)](https://doi.org/10.5281/zenodo.5565299)
13. fasta to tab GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5699003.svg)](https://doi.org/10.5281/zenodo.5699003)
14. tab to fasta GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5703366.svg)](https://doi.org/10.5281/zenodo.5703366)
15. single-fastas to tabular GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5672075.svg)](https://doi.org/10.5281/zenodo.5672075)
16. tabular file to single-fastas GUI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5652249.svg)](https://doi.org/10.5281/zenodo.5652249)
24 changes: 20 additions & 4 deletions fasta_manipulation/extract_or_remove_seqs_from_fasta_gui.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
# python3
from gooey import *
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
# input parameters
@Gooey(required_cols=3, program_name= 'extract or remove sequences from fasta', header_bg_color= '#DCDCDC', terminal_font_color= '#DCDCDC', terminal_panel_color= '#DCDCDC')
@Gooey(required_cols=2, program_name= 'extract or remove sequences from fasta', header_bg_color= '#DCDCDC', terminal_font_color= '#DCDCDC', terminal_panel_color= '#DCDCDC')
def main():
ap = GooeyParser(description="use a txt file with fasta headers to extract or remove sequences from fasta file")
ap.add_argument("-in", "--input", required=True, widget='FileChooser', help="input fasta file")
ap.add_argument("-out", "--output", required=True, widget='FileSaver', help="output fasta file")
ap.add_argument("-pro", "--program",type=int, default=1, required=False, help="choose to exract or remove(1. extract, 2. remove. Defaults to 1)")
ap.add_argument("-out", "--output", required=False, widget='FileSaver', help="output fasta file")
ap.add_argument("-pro", "--program",type=int, default=1, required=False, help="choose to exract or remove( 1) extract, 2) remove, 3) extract and export as single-fasta files, 4) remove and export as single-fasta files. Defaults to 1)")
ap.add_argument("-headers", "--headers", required=True, widget='FileChooser', help="file with fasta headers to retrieve the output fasta sequences")
args = vars(ap.parse_args())
# main
Expand All @@ -27,12 +29,26 @@ def main():
if seq.id in wanted:
SeqIO.write([seq], f, "fasta")
# remove
elif program == 2:
if program == 2:
fasta_sequences = SeqIO.parse(open(args['input']),'fasta')
with open(args['output'], "w") as f:
for seq in fasta_sequences:
if seq.id not in wanted:
SeqIO.write([seq], f, "fasta")
# extract and export as single-fasta files
if program == 3:
fasta_sequences = SeqIO.parse(open(args['input']),'fasta')
for seq in fasta_sequences:
if seq.id in wanted:
record = SeqRecord(Seq(str(seq.seq)),id=str(seq.id),description="")
SeqIO.write(record, "".join([str(seq.id),".fasta"]), "fasta")
# remove and export as single-fasta files
if program == 4:
fasta_sequences = SeqIO.parse(open(args['input']),'fasta')
for seq in fasta_sequences:
if seq.id not in wanted:
record = SeqRecord(Seq(str(seq.seq)),id=str(seq.id),description="")
SeqIO.write(record, "".join([str(seq.id),".fasta"]), "fasta")

if __name__ == '__main__':
main()
33 changes: 33 additions & 0 deletions fasta_manipulation/singlefastas_to_tab_gui.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# python3
from gooey import *
import os
from Bio import SeqIO
import pandas as pd
# imput parameters
@Gooey(required_cols=2, program_name='single-fastas to tabular txt file', header_bg_color= '#DCDCDC', terminal_font_color= '#DCDCDC', terminal_panel_color= '#DCDCDC')
def main():
ap = GooeyParser()
ap.add_argument("-dir", "--directory", required=True, type=str, widget='DirChooser', help="directory to search for fasta files")
ap.add_argument("-out","--output", required=True, widget='FileSaver', help="output txt file")
args = vars(ap.parse_args())
# main
seqs = []
ids = [] # setup empty lists
# import each fasta file from the working directory
for filename in sorted(os.listdir(os.chdir(args['directory']))):
if filename.endswith(".fa") or filename.endswith(".fasta"):
for record in SeqIO.parse(filename, "fasta"):
ids.append(record.id)
seqs.append(record.seq)
# put the 2 list in a data frame of 2 columns
dfasta = pd.DataFrame()
dfasta['id'] = ids
dfasta['seq'] = seqs
# export data frame to a tabular txt file
with open(args['output'], 'a') as f:
f.write(
dfasta.to_csv(header = False, index = False, sep= "\t", line_terminator= '\n')
)

if __name__ == '__main__':
main()
12 changes: 6 additions & 6 deletions fasta_manipulation/tab_to_fasta_gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@
# input parameters
@Gooey(required_cols=2, program_name='tab to fasta', header_bg_color= '#DCDCDC', terminal_font_color= '#DCDCDC', terminal_panel_color= '#DCDCDC')
def main():
ap = GooeyParser(description="converts a tabular file with identifier and sequence, into a fasta file")
ap.add_argument("-in", "--input", required=True, widget='FileChooser', help="input tab seperated file")
ap.add_argument("-out", "--output", required=True, widget='FileSaver', help="output fasta file")
args = vars(ap.parse_args())
# main
count = SeqIO.convert(args['input'], "tab", args['output'], "fasta")
ap = GooeyParser(description="converts a tabular file with identifier and sequence, into a fasta file")
ap.add_argument("-in", "--input", required=True, widget='FileChooser', help="input tab seperated file")
ap.add_argument("-out", "--output", required=True, widget='FileSaver', help="output fasta file")
args = vars(ap.parse_args())
# main
count = SeqIO.convert(args['input'], "tab", args['output'], "fasta")

if __name__ == '__main__':
main()
25 changes: 25 additions & 0 deletions fasta_manipulation/tab_to_singlefastas_gui.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# python3
import itertools
from gooey import *
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
import pandas as pd
# input arguments
@Gooey(required_cols=1, program_name='tabular to single-fasta files', header_bg_color= '#DCDCDC', terminal_font_color= '#DCDCDC', terminal_panel_color= '#DCDCDC')
def main():
ap = GooeyParser(description="convert each row of a tabular file with the fasta headers and sequences in each row in single-fasta files")
ap.add_argument("-in", "--input", required=True, widget="FileChooser", help="input txt file")
args = vars(ap.parse_args())
# main
df = pd.read_csv(args['input'], header=None, sep="\t")
# select ids and sequence columns, convert to lists
headers = df.iloc[:,0].values.tolist()
sequences = df.iloc[:,1].values.tolist()
# iter elements on pairs to export to single-fasta files
for (ids, seq) in zip(headers, sequences):
seq_for_fasta=SeqRecord(Seq(str(seq)),id=str(ids),description="")
SeqIO.write(seq_for_fasta, "".join([str(ids),".fasta"]), "fasta")

if __name__ == '__main__':
main()

0 comments on commit 17a8a48

Please sign in to comment.