diff --git a/CONTRIBUTORS.yaml b/CONTRIBUTORS.yaml index 1abba40f5ba3f5..533f40686afcb9 100644 --- a/CONTRIBUTORS.yaml +++ b/CONTRIBUTORS.yaml @@ -581,6 +581,10 @@ dbrites: name: Daniela Brites joined: 2022-03 +debroas: + name: Didier Debroas + joined: 2024-12 + dechendb: name: Dechen Bhuming joined: 2023-06 diff --git a/topics/microbiome/tutorials/metaplasmidome_query/faqs/index.md b/topics/microbiome/tutorials/metaplasmidome_query/faqs/index.md new file mode 100644 index 00000000000000..9ce3fe4fce824b --- /dev/null +++ b/topics/microbiome/tutorials/metaplasmidome_query/faqs/index.md @@ -0,0 +1,3 @@ +--- +layout: faq-page +--- diff --git a/topics/microbiome/tutorials/metaplasmidome_query/images/mapping_score_histogram.png b/topics/microbiome/tutorials/metaplasmidome_query/images/mapping_score_histogram.png new file mode 100644 index 00000000000000..0edd71108af867 Binary files /dev/null and b/topics/microbiome/tutorials/metaplasmidome_query/images/mapping_score_histogram.png differ diff --git a/topics/microbiome/tutorials/metaplasmidome_query/images/mapping_score_histogram_after_coverage_filtering.png b/topics/microbiome/tutorials/metaplasmidome_query/images/mapping_score_histogram_after_coverage_filtering.png new file mode 100644 index 00000000000000..3281c6a56893f4 Binary files /dev/null and b/topics/microbiome/tutorials/metaplasmidome_query/images/mapping_score_histogram_after_coverage_filtering.png differ diff --git a/topics/microbiome/tutorials/metaplasmidome_query/images/plasmid_coverage_histogram.png b/topics/microbiome/tutorials/metaplasmidome_query/images/plasmid_coverage_histogram.png new file mode 100644 index 00000000000000..a1ebfb4e701b0b Binary files /dev/null and b/topics/microbiome/tutorials/metaplasmidome_query/images/plasmid_coverage_histogram.png differ diff --git a/topics/microbiome/tutorials/metaplasmidome_query/tutorial.bib b/topics/microbiome/tutorials/metaplasmidome_query/tutorial.bib new file mode 100644 index 00000000000000..3dd7ec036dd139 --- /dev/null +++ b/topics/microbiome/tutorials/metaplasmidome_query/tutorial.bib @@ -0,0 +1,186 @@ +@article{antipov2020metaviral, + title={Metaviral SPAdes: assembly of viruses from metagenomic data}, + author={Antipov, Dmitry and Raiko, Mikhail and Lapidus, Alla and Pevzner, Pavel A}, + journal={Bioinformatics}, + volume={36}, + number={14}, + pages={4126--4129}, + year={2020}, + publisher={Oxford University Press}, + doi={10.1093/bioinformatics/btaa490} +} + + +@article{danko2021global, + title={A global metagenomic map of urban microbiomes and antimicrobial resistance}, + author={Danko, David and Bezdan, Daniela and Afshin, Evan E and Ahsanuddin, Sofia and Bhattacharya, Chandrima and Butler, Daniel J and Chng, Kern Rei and Donnellan, Daisy and Hecht, Jochen and Jackson, Katelyn and others}, + journal={Cell}, + volume={184}, + number={13}, + pages={3376--3393}, + year={2021}, + publisher={Elsevier}, + doi={10.1016/j.cell.2021.05.002} +} + + +@dataset{debroas_2024_11124657, + author = {DEBROAS, Didier}, + title = {Plasmids Identified in Air Metagenomes}, + month = may, + year = 2024, + publisher = {Zenodo}, + doi = {10.5281/zenodo.11124657}, + url = {https://doi.org/10.5281/zenodo.11124657} +} + +@article{hennequin2022plasmidome, + title={Plasmidome analysis of a hospital effluent biofilm: Status of antibiotic resistance}, + author={Hennequin, Claire and Forestier, Christiane and Traore, Ousmane and Debroas, Didier and Bricheux, Genevi{\`e}ve}, + journal={Plasmid}, + volume={122}, + pages={102638}, + year={2022}, + publisher={Elsevier}, + doi={10.1016/j.plasmid.2022.102638} +} + + +@article{hilpert2021reconstruction, + title={Reconstruction of plasmids by shotgun sequencing from environmental DNA: which bioinformatic workflow?}, + author={Hilpert, C{\'e}cile and Bricheux, Genevi{\`e}ve and Debroas, Didier}, + journal={Briefings in bioinformatics}, + volume={22}, + number={3}, + pages={bbaa059}, + year={2021}, + publisher={Oxford University Press}, + doi={10.1093/bib/bbaa059} +} + +@article{kanehisa2016kegg, + title={KEGG as a reference resource for gene and protein annotation}, + author={Kanehisa, Minoru and Sato, Yoko and Kawashima, Masayuki and Furumichi, Miho and Tanabe, Mao}, + journal={Nucleic acids research}, + volume={44}, + number={D1}, + pages={D457--D462}, + year={2016}, + publisher={Oxford University Press}, + doi={10.1093/nar/gkv1070} +} + +@article{krawczyk2018plasflow, + title={PlasFlow: predicting plasmid sequences in metagenomic data using genome signatures}, + author={Krawczyk, Pawel S and Lipinski, Leszek and Dziembowski, Andrzej}, + journal={Nucleic acids research}, + volume={46}, + number={6}, + pages={e35--e35}, + year={2018}, + publisher={Oxford University Press}, + doi={10.1093/nar/gkx1321} +} + + +@article{li2015, + title = {{MEGAHIT}: an ultra-fast single-node solution for large and complex metagenomics assembly via succinct de {Bruijn} graph}, + volume = {31}, + issn = {1367-4803}, + shorttitle = {{MEGAHIT}}, + doi = {10.1093/bioinformatics/btv033}, + abstract = {Summary: MEGAHIT is a NGS de novo assembler for assembling large and complex metagenomics data in a time- and cost-efficient manner. It finished assembling a soil metagenomics dataset with 252 Gbps in 44.1 and 99.6 h on a single computing node with and without a graphics processing unit, respectively. MEGAHIT assembles the data as a whole, i.e. no pre-processing like partitioning and normalization was needed. When compared with previous methods on assembling the soil data, MEGAHIT generated a three-time larger assembly, with longer contig N50 and average contig length; furthermore, 55.8\% of the reads were aligned to the assembly, giving a fourfold improvement.Availability and implementation: The source code of MEGAHIT is freely available at https://github.com/voutcn/megahit under GPLv3 license.Contact:rb@l3-bioinfo.com or twlam@cs.hku.hkSupplementary information: Supplementary data are available at Bioinformatics online.}, + number = {10}, + journal = {Bioinformatics}, + author = {Li, Dinghua and Liu, Chi-Man and Luo, Ruibang and Sadakane, Kunihiko and Lam, Tak-Wah}, + month = may, + year = {2015}, + pages = {1674--1676}, +} + +@article{li2018minimap2, + title={Minimap2: pairwise alignment for nucleotide sequences}, + author={Li, Heng}, + journal={Bioinformatics}, + volume={34}, + number={18}, + pages={3094--3100}, + year={2018}, + publisher={Oxford University Press}, + doi={10.1093/bioinformatics/bty191}, +} + +@article{mirdita2019mmseqs2, + title={MMseqs2 desktop and local web server app for fast, interactive sequence searches}, + author={Mirdita, Milot and Steinegger, Martin and S{\"o}ding, Johannes}, + journal={Bioinformatics}, + volume={35}, + number={16}, + pages={2856--2858}, + year={2019}, + publisher={Oxford University Press}, + doi={10.1093/bioinformatics/bty1057} +} + +@article{mistry2021pfam, + title={Pfam: The protein families database in 2021}, + author={Mistry, Jaina and Chuguransky, Sara and Williams, Lowri and Qureshi, Matloob and Salazar, Gustavo A and Sonnhammer, Erik LL and Tosatto, Silvio CE and Paladin, Lisanna and Raj, Shriya and Richardson, Lorna J and others}, + journal={Nucleic acids research}, + volume={49}, + number={D1}, + pages={D412--D419}, + year={2021}, + publisher={Oxford University Press}, + doi={10.1093/nar/gkaa913} +} + + +@article{pellow2020plasclass, + title={PlasClass improves plasmid sequence classification}, + author={Pellow, David and Mizrahi, Itzik and Shamir, Ron}, + journal={PLoS computational biology}, + volume={16}, + number={4}, + pages={e1007781}, + year={2020}, + publisher={Public Library of Science San Francisco, CA USA}, + doi={10.1371/journal.pcbi.1007781} +} + + +@article{quast2012silva, + title={The SILVA ribosomal RNA gene database project: improved data processing and web-based tools}, + author={Quast, Christian and Pruesse, Elmar and Yilmaz, Pelin and Gerken, Jan and Schweer, Timmy and Yarza, Pablo and Peplies, J{\"o}rg and Gl{\"o}ckner, Frank Oliver}, + journal={Nucleic acids research}, + volume={41}, + number={D1}, + pages={D590--D596}, + year={2012}, + publisher={Oxford University Press}, + doi={10.1093/nar/gks1219} +} + + +@article{robertson2018mob, + title={MOB-suite: software tools for clustering, reconstruction and typing of plasmids from draft assemblies}, + author={Robertson, James and Nash, John HE}, + journal={Microbial genomics}, + volume={4}, + number={8}, + pages={e000206}, + year={2018}, + publisher={Microbiology Society}, + doi={10.1099/mgen.0.000206} +} + +@article{wu2013systematic, + title={Systematic identification of gene families for use as “markers” for phylogenetic and phylogeny-driven ecological studies of bacteria and archaea and their major subgroups}, + author={Wu, Dongying and Jospin, Guillaume and Eisen, Jonathan A}, + journal={PloS one}, + volume={8}, + number={10}, + pages={e77033}, + year={2013}, + publisher={Public Library of Science San Francisco, USA}, + doi={10.1371/journal.pone.0077033} +} diff --git a/topics/microbiome/tutorials/metaplasmidome_query/tutorial.md b/topics/microbiome/tutorials/metaplasmidome_query/tutorial.md new file mode 100644 index 00000000000000..386928eb639880 --- /dev/null +++ b/topics/microbiome/tutorials/metaplasmidome_query/tutorial.md @@ -0,0 +1,770 @@ +--- +layout: tutorial_hands_on + +title: Query an annotated mobile genetic element database to identify and annotate genetic elements (e.g. plasmids) in metagenomics data +zenodo_link: 'https://zenodo.org/records/14501567' +questions: +- How can we use an existing database annotated database to identify and annotate genetic elements in metagenomics data? +objectives: +- Perform metagenomics read mapping against mobile genetic element database. +- Evaluate the distribution of mapping scores to identify high-quality alignments. +- Evaluate plasmid coverage to determine effective filtering thresholds. +- Filter alignments based on plasmid coverage and mapping quality. +- Justify the filtering thresholds chosen for identifying plasmid sequences. +- Generate a curated table of plasmid sequences and convert it into a FASTA file for further analysis. +- Use tools to process sequences, ensuring data is sorted, deduplicated, and formatted correctly. +- Annotate features on the identified plasmids using mobile genetic element database annotations. +- Construct a final annotated dataset integrating genetic element information for downstream applications. +time_estimation: 1H +key_points: +- Leveraging annotated mobile genetic element databases, such as the metaplasmidome, is essential for identifying and characterizing plasmids and other genetic elements in metagenomic data. +- Effective filtering based on plasmid coverage and mapping quality is critical to ensure the reliability and accuracy of extracted sequences. +- Generating well-organized outputs, such as FASTA files and annotated tables, is vital for downstream analysis and collaborative research. +- Annotating genetic features on identified plasmids enhances understanding of their biological roles, such as antibiotic resistance or virulence. +contributions: + authorship: + - bebatut + - nagoue + - debroas +edam_ontology: +- topic_0622 # Genomics +- topic_3301 # Microbiology +- topic_0080 # Sequence analysis +- topic_0798 # Mobile genetic elements +level: Introductory + +--- + +Identifying and annotating mobile genetic elements (MGE) in metagenomic data can be tricky. To facilitate the process, we can use existing and custom annotated mobile genetic element database. + +The *metaplasmidome* refers to the collection of all plasmids present within a given environment, typically identified through metagenomic sequencing. Plasmids are extrachromosomal genetic elements that often carry genes associated with antibiotic resistance, virulence, or metabolic functions, making them crucial for microbial adaptability. In the context of metagenomics, plasmids are identified alongside chromosomal DNA. + +A common step in metaplasmidome analysis is searching sequencing reads against known plasmid databases to detect plasmid sequences within a metagenome, allowing researchers to map the diversity and abundance of plasmids in various environments. + +In this tutorial, we use a metaplasmidome database built from public air metagenomes and query it with assembled air metagenome data. A similar approach can be used for other mobile genetic elements. + +> How was built the air metaplasmidome database? +> +> The air metaplasmidome is available from Zenodo ({% cite debroas_2024_11124657 %}) and was built from metagenomic data selected in Web of Science (Clarivate) on October 2022 using keywords: `txid655179[Organism:noexp] AND metagenome [Filter]; AIR Metagenome; Air microbiome; Troposphere; Aerosol; Atmosphere`. Data were manually curated to remove sequencing originated from metabarcoding data (i.e., 16S). The assembled data supplied by MetaSUB consortium ({% cite danko2021global %}) when available was used for air metagenome in the built environments. +> +> Plasmid contents were predicted using the assembled data. Metagenomes sequencing by Illumina (paired-illumina reads) were assembled by using MEGAHIT 1.2.9 with metalarge option ({% cite li2015 %}) after cleaning data with bbduk2 (qtrim=rl trimq=28 minlen=25 maq=20 ktrim=r k=25 mink=11 and a list of adaptators to remove) from [bbtools suite](https://jgi.doe.gov/data-and-tools/software-tools/bbtools/) +> +> Plasmids were predicted for each assembling by using PlasSuite scripts describing in-depth in Hilpert et al. {% cite hilpert2021reconstruction %} {% cite hennequin2022plasmidome %} and available on [GitHub](https://github.com/meb-team/PlasSuite/). Briefly, contigs were analyzed using both reference-based and reference-free approaches. The databases employed included those for chromosomes (archaea and bacteria) and plasmids from NCBI, as well as the MOB-suite tool ({% cite robertson2018mob %}), SILVA ({% cite quast2012silva %}) and phylogenetic markers harbored by chromosomes ({% cite wu2013systematic %}). Two reference-free methods were applied to contigs that were not affiliated with chromosomes (discarded) or plasmids (retained in the first step): PlasFlow ({% cite krawczyk2018plasflow%}) and PlasClass ({% cite pellow2020plasclass %}). Viruses were removed by using [viralVerify](https://github.com/ablab/viralVerify) ({% cite antipov2020metaviral %}) that provides in parallel provide plasmid/non-plasmid classification. +> +> Eukaryotes contaminants were removed by aligning the sequences against NT databases and human chromosomes (GRCh38) with minimap2 with -x asm5 option ({% cite li2018minimap2 %}). Contigs mapping with an identity of 95% and a coverage of 80% were removed. the final plasmidome set was clustered by mmseqs ({% cite mirdita2019mmseqs2 %}) with 80% of coverage and 90% of identity (--min-seq-id 0.90 -c 0.8 --cov-mode 1 --cluster-mode 2 --alignment-mode 3 --kmer-per-seq-scale 0.2). +{: .details} + + +> +> +> In this tutorial, we will cover: +> +> 1. TOC +> {:toc} +> +{: .agenda} + +# Galaxy and data preparation + +Any analysis should get its own Galaxy history. So let's start by creating a new history and import the data (plasmidome database and query metagenomes) into it. + +> Prepare Galaxy and data +> +> 1. Create a new history for this tutorial +> +> {% snippet faqs/galaxy/histories_create_new.md %} +> +> 2. Rename the history +> +> {% snippet faqs/galaxy/histories_rename.md %} +> +> 3. Import the metaplasmidome reference database from [Zenodo]({{ page.zenodo_link }}) or from +> the shared data library +> +> ``` +> https://zenodo.org/records/14501567/files/air_metaplasmidome.fasta.gz +> ``` +> +> {% snippet faqs/galaxy/datasets_import_via_link.md %} +> +> {% snippet faqs/galaxy/datasets_import_from_data_library.md %} +> +> 4. Rename `Air plasmidome database` +> +> {% snippet faqs/galaxy/datasets_rename.md %} +> +> 5. Import the reads to query against the reference database from [Zenodo]({{ page.zenodo_link }}) or from +> the shared data library +> +> ``` +> https://zenodo.org/records/14501567/files/air_metagenome_assemblies.fasta.gz +> ``` +> +> 6. Rename `Air metagenomes` +{: .hands_on} + + +> +> +> 1. How many reads are in the query dataset? +> 2. How many plasmid sequences are in the metaplasmidome database? +> +> > +> > +> > 1. 16,637 sequences +> > 2. 674,495 sequences +> {: .solution} +{: .question} + + +# Read mapping against the metaplasmidome database + +> Map reads against metaplasmidome database +> +> 1. {% tool [Map with minimap2](toolshed.g2.bx.psu.edu/repos/iuc/minimap2/minimap2/2.28+galaxy0) %} with the following parameters: +> - *"Will you select a reference genome from your history or use a built-in index?"*: `Use a genome from history and build index` +> - {% icon param-file %} *"Use the following dataset as the reference sequence"*: `metaplasmidome database` +> - *"Single or Paired-end reads"*: `Single` +> - {% icon param-file %} *"Select fastq dataset"*: `query` +> - *"Select a profile of preset options"*: `Long assembly to reference mapping (-k19 -w19 -A1 -B19 -O39,81 -E3,1 -s200 -z200 --min-occ-floor=100). Typically, the alignment will not extend to regions with 5% or higher sequence divergence. Only use this preset if the average divergence is far below 5%. (asm5)` +> - In *"Set advanced output options"*: +> - *"Select an output format"*: `PAF` +> +{: .hands_on} + +PAF is the default output format of minimap2. It is TAB-delimited with each line consisting of the following predefined fields: +1. Query sequence name +2. Query sequence length +3. Query start coordinate (0-based) +4. Query end coordinate (0-based) +5. `+` if query/target on the same strand and `-` if opposite +6. Target sequence name +7. Target sequence length +8. Target start coordinate on the original strand +9. Target end coordinate on the original strand +10. Number of matching bases in the mapping +11. Number bases, including gaps, in the mapping +12. Mapping quality + +> +> +> 1. How many lines are in the file? +> 2. Can a query sequence be found several times in the target? +> 3. Are all alignments of good quality? +> +> > +> > +> > 1. 29,796 lines +> > 2. There are 16,637 query sequences. Several query sequences are found several times in the file: SRR17300492_75807 is found 16 times. It means they have been mapped to several locations or sequences in the target. +> > 3. The first alignment (SRR17300492_75807 mapping on SRR17300667-707) has a score of 60, the highest Phred score. The third alignment has a score of 0, so not good. +> > +> {: .solution} +> +{: .question} + +# Exploration of the mapping results + +Let's look at the distribution of the mapping score (column 12) + +> Plot score distribution +> +> 1. Change datatype to tabular +> +> {% snippet faqs/galaxy/datasets_change_datatype.md %} +> +> 2. {% tool [Cut columns from a table](Cut1) %} with: +> - *"Cut columns"*: `c12` +> - *"Delimited by"*: `tab` +> - {% icon param-file %} *"From"*: Output of **Map with minimap2** {% icon tool %} +> +> 2. {% tool [Histogram with ggplot2](toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_histogram/ggplot2_histogram/3.4.0+galaxy0) %} with the following parameters: +> - *"Input should have column headers - these will be the columns that are plotted * "*: Output of **Cut** {% icon tool %} +> - *"Label for x axis"*: `Score` +> - *"Label for y axis"*: `Distribution` +> - In *"Advanced Options"*: +> - *"Legend options"*: `Hide legend` +> +{: .hands_on} + +![Distribution of mapping quality score (on x-axis, value between 0 and 60) with density (y-axis)](./images/mapping_score_histogram.png) + +> +> +> Are all alignments of good quality? +> +> > +> > +> > There is a pic at 60 but many alignments have a score below 40. +> > +> {: .solution} +> +{: .question} + +We should remove alignments with a score below 40. Before that, let's look at the plasmid coverage. We first need to compute it, i.e. the ratio between the number of matching bases in the mapping (Column 10) and plasmid length (target sequence length - Column 7). + +> Compute plasmid coverage +> 1. {% tool [Cut columns from a table](Cut1) %} with: +> - *"Cut columns"*: `c1-c12` +> - *"Delimited by"*: `tab` +> - {% icon param-file %} *"From"*: Output of **Map with minimap2** {% icon tool %} +> +> 2. {% tool [Compute on rows](toolshed.g2.bx.psu.edu/repos/devteam/column_maker/Add_a_column1/2.1) %} with the following parameters: +> - *"Input file"*: Output of **Cut** {% icon tool %} +> - In *"Expressions"*: +> - *"Add expression"*: `float(c10)/float(c7)` +> +> 3. Rename `Mapping stats + plasmid coverage` +{: .hands_on} + +A new column has been added (column 13) with the plasmid coverage. Let's now plot its distribution. + +> Plot plasmid coverage distribution +> 1. {% tool [Cut columns from a table](Cut1) %} with: +> - *"Cut columns"*: `c13` +> - *"Delimited by"*: `tab` +> - {% icon param-file %} *"From"*: `Mapping stats + plasmid coverage` +> +> 2. {% tool [ Histogram with ggplot2](toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_histogram/ggplot2_histogram/3.4.0+galaxy0) %} with the following parameters: +> - *"Input should have column headers - these will be the columns that are plotted * "*: Output of **Cut** {% icon tool %} +> - *"Label for x axis"*: `Plasmid coverage` +> - *"Label for y axis"*: `Distribution` +> - *"Bin width for plotting"*: `0.1` +> - In *"Advanced Options"*: +> - *"Legend options"*: `Hide legend` +> +{: .hands_on} + +![Distribution of plasmid coverage (on x-axis, value between 0 and 1) with density (y-axis)](./images/plasmid_coverage_histogram.png) + +> +> +> 1. What is the distribution of the plasmid coverage? +> 2. What could be a good threshold to filter? +> +> > +> > +> > 1. There are 2 peaks: one around 0 (i.e. no plasmid coverage) that slowly decreases until 0.8 and a pic at 1 (full plasmid coverage) +> > 2. 0.8 seems to be a breaking point and could be a good value to filter. +> > +> {: .solution} +> +{: .question} + +# Filtering + +We will now filter the alignment to keep only the ones with a plasmid coverage (column 13) of at least 0.8, i.e. a read mapping to a plasmid covering at least 80% of the plasmid. + +> Filter alignments based on plasmid coverage +> +> 1. {% tool [Filter data on any column using simple expressions](Filter1) %} with the following parameters: +> - {% icon param-file %} *"Filter"*: `Mapping stats + plasmid coverage` +> - *"With following condition"*: `c13>=0.8` +> +> 2. Rename `Alignments with plasmid coverage >= 0.8` +{: .hands_on} + +> +> +> 1. How many lines have been kept? +> 2. Which percentage of lines does that correspond to? +> 3. What does the distribution of the mapping score look like for these alignments? +> 4. Is there any extra filter we should do on the data? +> +> > +> > +> > 1. 5,577 (over the 29,796) +> > 2. 18.73% +> > 3. To plot the distribution of the mapping score for filtered alignments, we need to run the series of tools as done earlier: +> > +> > 1. {% tool [Cut columns from a table](Cut1) %} with: +> > - *"Cut columns"*: `c12` +> > - *"Delimited by"*: `tab` +> > - {% icon param-file %} *"From"*: `Alignments with plasmid coverage >= 0.8` +> > +> > 2. {% tool [Histogram with ggplot2](toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_histogram/ggplot2_histogram/3.4.0+galaxy0) %} with the following parameters: +> > - *"Input should have column headers - these will be the columns that are plotted * "*: Output of **Cut** {% icon tool %} +> > - *"Label for x axis"*: `Score` +> > - *"Label for y axis"*: `Distribution` +> > - In *"Advanced Options"*: +> > - *"Legend options"*: `Hide legend` +> > +> > ![Distribution of mapping quality score (on x-axis, value between 0 and 60) with density (y-axis)](./images/mapping_score_histogram_after_coverage_filtering.png) +> > +> > Most of the alignments are of good quality (pic at 60). There is also a pic at 0, meaning that some alignments are of bad quality +> > +> > 4. It might be good to add a filter on the score. +> {: .solution} +> +{: .question} + +After filtering on the plasmid coverage, we also add a filter on the score to be sure that we keep only the best alignments (i.e. a score - column 12 - of at least 40). + +> Filter alignments based on score +> +> 1. {% tool [Filter data on any column using simple expressions](Filter1) %} with the following parameters: +> - {% icon param-file %} *"Filter"*: `Alignments with plasmid coverage >= 0.8` +> - *"With following condition"*: `c12>=40` +> +> 2. Rename `Alignments with plasmid coverage >= 0.8 and score >= 40` +{: .hands_on} + +> +> +> 1. How many lines have been kept? +> 2. Which percentage of lines does that correspond to? +> +> > +> > +> > 1. 5,249 (over the 5,577) +> > 2. 94.12% +> {: .solution} +> +{: .question} + +# Extract sequences mapping to plasmids + +Let's now extract the sequences mapping to plasmids (coverage higher than 80%). + +First, we need to extract the names of the reads (in `air metagenomes`) mapping to plasmids in the metaplasmidome database. For that, we need to extract column 1 (query names, i.e. names of reads in `Air metagenomes`) and 6 (reference names, i.e. names of sequences in `Air plasmidome database`). + +> Get sequences matching to plasmids +> +> 1. {% tool [Cut](Cut1) %} with the following parameters: +> - *"Cut columns"*: `c1,c6` +> - {% icon param-file %} *"From"*: `Alignments with plasmid coverage >= 0.8 and score >= 40` +> +> 2. Rename to `Names for alignments with plasmid coverage >= 0.8 and score >= 40` +> +> 2. {% tool [Sort](sort1) %} with the following parameters: +> - {% icon param-file %} *"Sort Dataset"*: output of **Cut** {% icon tool %} +> - *"on column"*: `c2` +{: .hands_on} + +Second, we need to convert the `Air metagenomes` FASTA file to a tabular so we can join it with `Names for alignments with plasmid coverage >= 0.8 and score >= 40` + +> Get query sequence as a table +> +> 1. {% tool [Convert FASTA to Tabular](CONVERTER_fasta_to_tabular) %} with the following parameters: +> - {% icon param-file %} *"Fasta file"*: `Air metagenomes` +> +> 2. Rename to `Air metagenome as table` +{: .hands_on} + +Let's now join the 2 datasets on the names of the reads in `air metagenomes` so column 1 in `Names for alignments with plasmid coverage >= 0.8 and score >= 40` and column 1 in `Air metagenome as table`. + +> Get sequences matching to plasmids +> +> 1. {% tool [Join two Datasets](join1) %} with the following parameters: +> - {% icon param-file %} *"Join"*: output of **Sort** {% icon tool %} +> - *"using column"*: `Column: 1` +> - {% icon param-file %} *"with"*: output of **Convert FASTA to Tabular** {% icon tool %} +> - *"and column"*: `Column: 1` +> - *"Fill empty columns"*: `No` +> +{: .hands_on} + +We have now a table with 4 columns: names of the reads in `air metagenomes`, names of sequences in the reference database, names of the reads in `air metagenomes`, sequence of reads in `air metagenomes`. We will now create 2 outputs: +- A table with metaplasmidome database sequence name, metagenomic sequence names and the metagenomic sequences +- A FASTA file with metagenomic sequences + +## Create the table with air metagenomes identified as plasmids + +Let's remove the duplicated column (column 3) and reorganize the columns + +> Keep non duplicated columns +> +> 1. {% tool [Cut](Cut1) %} with the following parameters: +> - *"Cut columns"*: `c2,c1,c4` +> - {% icon param-file %} *"From"*: output of **Join two Datasets** {% icon tool %} +{: .hands_on} + +Let's now add column names to the generated table. + +> Add column names +> +> 1. {% tool [Add Header](toolshed.g2.bx.psu.edu/repos/estrain/add_column_headers/add_column_headers/0.1.3) %} with the following parameters: +> - *"List of Column headers"*: `Metaplasmidome database sequence name,Metagenomic sequence name,Metagenomic sequence` +> - {% icon param-file %} *"Data File (tab-delimted)"*: output of **Replace Text** {% icon tool %} +> +> +> 3. Rename the output to `Air metagenomes identified as plasmids` +{: .hands_on} + +## Create the FASTA file with air metagenomes identified as plasmids + +We can now generate a FASTA file with the identified sequences. + +From the output of **Join two Datasets** {% icon tool %}, let's keep only metagenomic sequence names and the metagenomic sequences and remove duplicated sequences + +> Keep unique metagenomic sequences +> +> 1. {% tool [Cut](Cut1) %} with the following parameters: +> - *"Cut columns"*: `c1,c4` +> - {% icon param-file %} *"From"*: output of **Join two Datasets** {% icon tool %} +> 2. {% tool [Sort](sort1) %} with the following parameters: +> - {% icon param-file %} *"Sort Dataset"*: output of **Cut** {% icon tool %} +> - *"on column"*: `Column: 1` +> - *"with flavor"*: `Alphabetical sort` +> 3. {% tool [Unique occurrences of each record](toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_sorted_uniq/9.3+galaxy1) %} with the following parameters: +> - {% icon param-file %} *"File to scan for unique values"*: output of **Sort** {% icon tool %} +{: .hands_on} + +> +> +> How many lines have been kept? +> +> > +> > +> > 4,055 (over 5,249) +> > +> {: .solution} +> +{: .question} + +We have now a table with unique metagenomic sequences. Let's transform it into a FASTA file. + +> Convert to a FASTA file +> +> 1. {% tool [Tabular-to-FASTA](toolshed.g2.bx.psu.edu/repos/devteam/tabular_to_fasta/tab2fasta/1.1.1) %} with the following parameters: +> - {% icon param-file %} *"Tab-delimited file"*: output of **Unique occurrences of each record** {% icon tool %} +> - *"Title column(s)"*: `Column: 1` +> - *"Sequence column"*: `Column: 2` +> +> 2. Rename the output to `Air metagenome sequences identified as plasmids` +{: .hands_on} + +# Annotate features on the identified plasmids + +Let's now annotate features on the identified plasmids. For that, we will use the annotation of the air metaplasmidome sequences that have been done with Prokka (citation) and assume that the annotations are similar for the identified plasmids. + +We will import the GFF generated by Prokka. + +> Import the GFF with metaplasmidome reference database annotation +> +> 1. Import the metaplasmidome reference database annotation from [Zenodo]({{ page.zenodo_link }}) or from the shared data library +> +> ``` +> https://zenodo.org/records/14501567/files/air_metaplasmidome_annotations.gff +> ``` +> +> 2. Inspect it. +{: .hands_on} + +This file is a GFF: it describes genes and other features of DNA, RNA and protein sequences. It is a tab-delimited file with 9 fields per line: + +1. **seqid**: The name of the sequence where the feature is located. +2. **source**: The algorithm or procedure that generated the feature. +3. **type**: The feature type name, like "gene" or "exon". +4. **start**: Genomic start of the feature, with a 1-base offset. +5. **end**: Genomic end of the feature, with a 1-base offset. +6. **score**: Numeric value that generally indicates the confidence of the source in the annotated feature. +7. **strand**: Single character that indicates the strand of the feature. +8. **phase**: Phase of CDS features. +9. **attributes**: A list of tag-value pairs separated by a semicolon with additional information about the feature. + +The **seqid** corresponds here to the ID of the sequences in the metaplasmidome reference database. So to filter, we need to compare `Metaplasmidome database sequence name` in `Air metagenomes identified as plasmids` +to **seqid** by joining the 2 datasets on column 1. + +The file has with ~85 Million lines but many are comments (lines starting with `##`). + +> +> +> How many features are in the GFF file? +> +> > +> > +> > 2,951,015 features +> > +> > To get this number, we run {% tool [Count GFF Features](toolshed.g2.bx.psu.edu/repos/devteam/count_gff_features/count_gff_features/0.2) %} with the following parameters: +> > - {% icon param-file %} *"GFF Dataset to Filter"*: imported GFF +> > +> {: .solution} +> +{: .question} + +Let's now filter the GFF to keep only information related to the sequences matching to plasmids. +For that, we join the GFF on the SeqID column (column 1) with the `Air metagenomes identified as plasmids` file on the ` Metaplasmidome database sequence name` (Column 1) + +> Extract information about the sequences matching to plasmids +> +> 1. {% tool [Join two Datasets](join1) %} with the following parameters: +> - {% icon param-file %} *"Join"*: imported GFF +> - *"using column"*: `Column: 1` +> - {% icon param-file %} *"with"*: `Air metagenomes identified as plasmids` +> - *"and column"*: `Column: 1` +> - *"Fill empty columns"*: `No` +> +> 2. Inspect the generated file +{: .hands_on} + +> +> +> 1. How many lines have been kept? +> 2. Why is there more lines than in the `Air metagenomes identified as plasmids` file? +> 2. What are the columns? +> 3. Which columns should we keep if we want to keep the Metagenomic sequence name, the feature and the attributes? +> +> > +> > +> > 1. 26k+ lines +> > 2. Some sequences (e.g. SRR17300493-2380) have several lines: several features (CDS, rRNA, etc) annotated on it +> > 3. The 9 columns of the GFF file + the 3 columns of the `Air metagenomes identified as plasmids` file +> > 4. Columns to keep: +> > 1. Metagenomic sequence name (Column 11) +> > 2. Feature (Column 3) +> > 3. Attributes (Column 9) +> {: .solution} +> +{: .question} + +Let's now cut the columns. + +> Cut and filter +> +> 1. {% tool [Cut](Cut1) %} with the following parameters: +> - *"Cut columns"*: `c11,c3,c9` +> - {% icon param-file %} *"From"*: output of **Join two Datasets** {% icon tool %} +> +> 2. Inspect the generated file +{: .hands_on} + +> +> +> What are the different identified features? +> +> > +> > +> > Using {% tool [Group data by a column](Grouping1) %} to group and count on 2nd column, we find 26,329 CDS and 307 tRNA. +> > +> {: .solution} +> +{: .question} + +## Extract the CDS + +Let's filter to keep only the CDS and extract the gene names. + +> Keep CDS +> +> 1. {% tool [Filter](Filter1) %} with the following parameters: +> - {% icon param-file %} *"Filter"*: output of **Cut** {% icon tool %} +> - *"With following condition"*: `c2=='CDS'` +> +> 2. Inspect the generated file +{: .hands_on} + +We have now a file with 26,329 lines. The 2nd column is not useful so we will remove it. Column 3 (**atributes** in GFF) lists tag-value pairs separated by a semicolon with additional information about the feature. +The first lines seems to be hypothetical proteins. If we scroll down, we can find some annotated genes (with `gene` keyword). + +It would be good to create a tabular file with: +1. Metagenomic sequence name +2. Gene ID +3. Gene name +4. Gene product + +As not all genes are annotated (with `gene` keyword), we first need to split the file between hypothetical proteins and annotated genes and process the two generated files independently. + +> Keep CDS +> +> 1. {% tool [Cut](Cut1) %} with the following parameters: +> - *"Cut columns"*: `c1,c3` +> - {% icon param-file %} *"From"*: output of **Filter** {% icon tool %} +> +> 2. {% tool [Filter by keywords and/or numerical value](toolshed.g2.bx.psu.edu/repos/proteore/proteore_filter_keywords_values/MQoutputfilter/2021.04.19.1) %} with the following parameters: +> - {% icon param-file %} *"Input file"*: output of last **Cut** {% icon tool %} +> - *"Does file contain header? "*: `No` +> - In *Filter by keywords*: +> - *"Column number on which to apply the filter"*: `c2` +> - In `Enter keywords`: +> - `copy/paste` +> - *"Copy/paste keywords to find (keep or discard)"*: `gene=` +> +> 2. Inspect the generated files +{: .hands_on} + +> +> +> 1. How many lines have been kept (non-hypothetical proteins) and how many have been discarded (hypothetical proteins)? +> 2. Which information do we have for the 2nd identified gene? +> +> > +> > +> > 1. There are: +> > - 25,001 hypothetical proteins CDS (lines in the discarded lines file) +> > - 1,329 annotated CDS (lines in the other file) +> > +> > 2. For the 2nd gene, the column 3 is `ID=BAFOEJEB_01977;eC_number=1.8.5.7;Name=yqjG_1;dbxref=COG:COG0435;gene=yqjG_1;inference=ab initio prediction:Prodigal:2.6,similar to AA sequence:UniProtKB:P42620;locus_tag=BAFOEJEB_01977;product=Glutathionyl-hydroquinone reductase YqjG`, which mean: +> > - Gene name is `yqjG_1` +> > - The product is `Glutathionyl-hydroquinone reductase YqjG` +> > +> {: .solution} +> +{: .question} + +Let's extract gene ID, gene name and the product in different columns from the annotated gene output and add a header to the file. + +> Prepare the annotated CDS file +> +> 1. Change annotated CDS output of **Filter by keywords** {% icon tool %} datatype to tabular +> +> {% snippet faqs/galaxy/datasets_change_datatype.md %} +> +> 2. {% tool [Replace Text in a specific column](toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_column/9.3+galaxy1) %} with the following parameters: +> - {% icon param-file %} *"File to process"*: output of **Filter by keywords** {% icon tool %} +> - In *Replacement*: +> - *"in column"*: `Column: 2` +> - *"Find pattern"*: `ID=([^;]*);.*;gene=([^;]*).*;product=([^;]*).*` +> - *"Replace with"*: `\\1\t\\2\t\\3` +> +> 3. {% tool [Add Header](toolshed.g2.bx.psu.edu/repos/estrain/add_column_headers/add_column_headers/0.1.3) %} with the following parameters: +> - *"List of Column headers"*: `Metagenomic sequence name,Gene ID,Gene name,Gene product` +> - {% icon param-file %} *"Data File (tab-delimited)"*: output of **Replace Text** {% icon tool %} +> +{: .hands_on} + +Let's now prepare the hypothetical protein CDS file + +> Prepare the hypothetical CDS gene file +> +> 1. Change hypothetical CDS gene output of **Filter by keywords** {% icon tool %} datatype to tabular +> +> 2. {% tool [Replace Text in a specific column](toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_column/9.3+galaxy1) %} with the following parameters: +> - {% icon param-file %} *"File to process"*: output of **Filter by keywords** {% icon tool %} +> - In *Replacement*: +> - *"in column"*: `Column: 2` +> - *"Find pattern"*: `ID=([^;]*);.*` +> - *"Replace with"*: `\\1\t\t` +> +{: .hands_on} + +We can now merge both files. + +> Concatenate the annotated and hypothetical protein gene files +> +> 1. {% tool [Concatenate datasets](cat1) %} with the following parameters: +> - {% icon param-file %} *"Concatenate Dataset"*: output of **Add Header** {% icon tool %} +> - In *Dataset*: +> - {% icon param-file %} *"Select"*: output of last **Replace Text** {% icon tool %} +> +> 2. Rename `CDS in metagenomes identified as plasmids` +{: .hands_on} + +## Add KO and PFAM annotation + +Let's expand annotation with **[KO](https://www.genome.jp/kegg/ko.html) (KEGG Orthology)** ({% cite kanehisa2016kegg%}), a database of molecular functions +represented in terms of functional orthologs, and **[PFAM](http://pfam.xfam.org/)** ({% cite mistry2021pfam %}), a large collection of protein families. The files are + +> Import KO and PFAM annotations +> +> 1. Import the KO and PFAM annotations from [Zenodo]({{ page.zenodo_link }}) or from the shared data library +> +> ``` +> https://zenodo.org/records/14501567/files/KOsignificatifNR.tsv +> https://zenodo.org/records/14501567/files/pfamsignificatifNR.tsv +> ``` +> +> 2. Inspect both files +{: .hands_on} + +Both are tabular files with several columns including the gene ID, KO/PFAM ID, and some extra annotation. +Let's now join the files with `Annotated genes in air metagenomes sequences identified as plasmids` to extend the annotations using gene ID. + +> Join with KO and PFAM annotation files +> +> 1. {% tool [Join two Datasets](join1) %} with the following parameters: +> - {% icon param-file %} *"Join"*: `Annotated genes in air metagenomes sequences identified as plasmids` +> - *"using column"*: `Column: 2` +> - {% icon param-file %} *"with"*: imported KO file +> - *"and column"*: `Column: 2` +> - *"Keep lines of first input that do not join with second input"*: `Yes` +> +> 2. Inspect the generated file +> +> 3. {% tool [Cut](Cut1) %} with the following parameters: +> - *"Cut columns"*: `c1-c4,c7,c11` +> - {% icon param-file %} *"From"*: output of **Join** {% icon tool %} +> +> 4. {% tool [Join two Datasets](join1) %} with the following parameters: +> - {% icon param-file %} *"Join"*: output of **Cut** {% icon tool %} +> - *"using column"*: `Column: 2` +> - {% icon param-file %} *"with"*: imported PFAM file +> - *"and column"*: `Column: 1` +> - *"Keep lines of first input that do not join with second input"*: `Yes` +> +> 5. Inspect the generated file +> +> 6. {% tool [Cut](Cut1) %} with the following parameters: +> - *"Cut columns"*: `c1-c6,c9,c10,c25` +> - {% icon param-file %} *"From"*: output of **Join** {% icon tool %} +> +> 7. {% tool [Select last lines from a dataset (tail)](toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_tail_tool/9.3+galaxy1) %} with the following parameters: +> - {% icon param-file %} *"Text file"*: output of **Cut** {% icon tool %} +> - *"Operation"*: `Keep everything from lines on` +> - *"Number of lines"*: `2` +> +> 8. {% tool [Add Header](toolshed.g2.bx.psu.edu/repos/estrain/add_column_headers/add_column_headers/0.1.3) %} with the following parameters: +> - *"List of Column headers"*: `Metagenomic sequence name,Gene ID,Gene name,Gene product,KO ID,KO annotation,PFAM name,PFAM ID,PFAM annotation` +> - {% icon param-file %} *"Data File (tab-delimited)"*: output of **Cut** {% icon tool %} +> +> 9. Rename `CDS in metagenomes identified as plasmids + KO + PFAM` +{: .hands_on} + +Let's look at the KO and PFAM statistics. + +> +> +> 1. How many genes have been extended with KO information? +> 2. How many different KO are found? +> 5. Which KO is the most found? +> 3. How many genes have been extended with PFAM information? +> 4. How many different PFAM are found? +> 5. Which PFAM is the most found? +> +> > +> > +> > 1. Using {% tool [Filter](Filter1) %} with `c5!='.'`, we find that 426 CDS (1.62%) have been extended with KO information. +> > 2. 277 KO ({% tool [Group data by a column](Grouping1) %} to group and count on 5th column) +> > 3. K14572 (Ribosome biogenesis in eukaryotes) is found 9 times ({% tool [Sort](sort1) %} by descending order on the **Group data by a column** output on column 2) +> > 3. 4,817 CDS (18.29%) have been extended with PFAM information ({% tool [Filter](Filter1) %} with `c8!='.'` on `CDS + KO + PFAM`) +> > 4. 410 PFAM ({% tool [Group data by a column](Grouping1) %} to group and count on 8th column) +> > 5. PF00961.22 (Cytochrome c oxidase subunit 1) is found 89 times ({% tool [Sort](sort1) %} by descending order) +> > +> {: .solution} +> +{: .question} + +Let's extract an overview of the annotations per metagenomic sequence by grouping on the 1st column and counting the number of distinct values on gene ID, gene name, KO ID, PFAM ID. + +> Annotation per metagenomic sequences +> +> 1. {% tool [Group data by a column](Grouping1) %} with the following parameters: +> - {% icon param-file %} *"Select data"*: `CDS + KO + PFAM` +> - *"Group by column"*: `Column: 1` +> - In *"Operation"*: +> - In *"1: Operation"*: +> - *"Type"*: `Count distinct` +> - *"On column"*: `Column: 2` +> - In *"2: Operation"*: +> - *"Type"*: `Count distinct` +> - *"On column"*: `Column: 3` +> - In *"3: Operation"*: +> - *"Type"*: `Count distinct` +> - *"On column"*: `Column: 5` +> - In *"4: Operation"*: +> - *"Type"*: `Count distinct` +> - *"On column"*: `Column: 8` +> +> 2. {% tool [Select last lines from a dataset (tail)](toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_tail_tool/9.3+galaxy1) %} with the following parameters: +> - {% icon param-file %} *"Text file"*: output of **Cut** {% icon tool %} +> - *"Operation"*: `Keep everything from lines on` +> - *"Number of lines"*: `2` +> +> 3. {% tool [Add Header](toolshed.g2.bx.psu.edu/repos/estrain/add_column_headers/add_column_headers/0.1.3) %} with the following parameters: +> - *"List of Column headers"*: `Metagenomic sequence name,Number of CDS,Number of annotated CDS,Number of associated KO,Number of associated PFAM` +> - {% icon param-file %} *"Data File (tab-delimited)"*: output of **Select last** {% icon tool %} +> +> 4. Rename `CDS annotation overview per metagenomic sequences` +{: .hands_on} + +# Conclusion + diff --git a/topics/microbiome/tutorials/metaplasmidome_query/workflows/index.md b/topics/microbiome/tutorials/metaplasmidome_query/workflows/index.md new file mode 100644 index 00000000000000..e092e0ae66ddd4 --- /dev/null +++ b/topics/microbiome/tutorials/metaplasmidome_query/workflows/index.md @@ -0,0 +1,3 @@ +--- +layout: workflow-list +--- diff --git a/topics/microbiome/tutorials/metaplasmidome_query/workflows/metaplasmidome-tests.yml b/topics/microbiome/tutorials/metaplasmidome_query/workflows/metaplasmidome-tests.yml new file mode 100644 index 00000000000000..ad212c6129d280 --- /dev/null +++ b/topics/microbiome/tutorials/metaplasmidome_query/workflows/metaplasmidome-tests.yml @@ -0,0 +1,73 @@ +- doc: Test outline for Query-a-metaplasmidome-database-to-identify-and-annotate-plasmids-in-metagenomes + job: + Metaplasmidome sequences: + class: File + path: https://zenodo.org/records/14501567/files/air_metaplasmidome.fasta.gz + filetype: fasta.gz + Raw metagenomics data: + class: File + path: https://zenodo.org/records/14501567/files/air_metagenome_assemblies.fasta.gz + filetype: fasta.gz + Metaplasmidome predicted CDS: + class: File + path: https://zenodo.org/records/14501567/files/air_metaplasmidome_annotations.gff + filetype: gff3 + PFAM: + class: File + path: https://zenodo.org/records/14501567/files/KOsignificatifNR.tsv + filetype: tabular + KEGG Ortogolog: + class: File + path: https://zenodo.org/records/14501567/files/KOsignificatifNR.tsv + filetype: tabular + outputs: + Metagenome sequences identified as plasmids: + asserts: + has_text: + text: ">SRR17300466_100021" + Metagenomes identified as plasmids: + asserts: + has_text: + text: "SRR17300667-707" + has_text: + text: "SRR17300492_244288" + has_n_lines: + n: 5250 + has_n_columns: + n: 3 + CDS in metagenomes identified as plasmids: + asserts: + has_text: + text: "SRR17300466_10187" + has_text: + text: "BAFOEJEB_01977" + has_text: + text: "serS_2" + has_n_lines: + n: 26330 + has_n_columns: + n: 4 + CDS in metagenomes identified as plasmids + KO + PFAM: + asserts: + has_text: + text: "SRR17300466_10187" + has_text: + text: "BAFOEJEB_01977" + has_text: + text: "serS_2" + has_text: + text: "K07393" + has_text: + text: "PF00115.23" + has_n_lines: + n: 26330 + has_n_columns: + n: 9 + CDS annotation overview per metagenomic sequences: + asserts: + has_text: + text: "SRR17300466_100021" + has_n_lines: + n: 4034 + has_n_columns: + n: 5 diff --git a/topics/microbiome/tutorials/metaplasmidome_query/workflows/metaplasmidome.ga b/topics/microbiome/tutorials/metaplasmidome_query/workflows/metaplasmidome.ga new file mode 100644 index 00000000000000..00d49d0395aabf --- /dev/null +++ b/topics/microbiome/tutorials/metaplasmidome_query/workflows/metaplasmidome.ga @@ -0,0 +1,2180 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "This workflow query metagenomic raw data against a metaplasmidome database to identify plasmids and annotate them with genes, KO, PFAM", + "comments": [], + "creator": [ + { + "class": "Person", + "identifier": "0000-0001-9852-1987", + "name": "B\u00e9r\u00e9nice Batut" + }, + { + "class": "Person", + "identifier": "0000-0003-2750-1473", + "name": "Nadia Gou\u00e9" + } + ], + "format-version": "0.1", + "license": "MIT", + "name": "Query a metaplasmidome database to identify and annotate plasmids in metagenomes", + "report": { + "markdown": "\n# Workflow Execution Report\n\n## Workflow Inputs\n```galaxy\ninvocation_inputs()\n```\n\n## Workflow Outputs\n```galaxy\ninvocation_outputs()\n```\n\n## Workflow\n```galaxy\nworkflow_display()\n```\n" + }, + "steps": { + "0": { + "annotation": "FASTA sequences of the metaplasmidome database", + "content_id": null, + "errors": null, + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "FASTA sequences of the metaplasmidome database", + "name": "Metaplasmidome sequences" + } + ], + "label": "Metaplasmidome sequences", + "name": "Input dataset", + "outputs": [], + "position": { + "left": 0, + "top": 296.94122404660857 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"tag\": null}", + "tool_version": null, + "type": "data_input", + "uuid": "36672496-e380-4208-9c5b-24de5376b583", + "when": null, + "workflow_outputs": [] + }, + "1": { + "annotation": "FASTA files of the raw metagenomics data", + "content_id": null, + "errors": null, + "id": 1, + "input_connections": {}, + "inputs": [ + { + "description": "FASTA files of the raw metagenomics data", + "name": "Raw metagenomics data" + } + ], + "label": "Raw metagenomics data", + "name": "Input dataset", + "outputs": [], + "position": { + "left": 0, + "top": 419.94122404660857 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"tag\": null}", + "tool_version": null, + "type": "data_input", + "uuid": "d5c8b2ed-bba0-40b9-8951-89f4407a4aa6", + "when": null, + "workflow_outputs": [] + }, + "2": { + "annotation": "GFF file with predicted CDS", + "content_id": null, + "errors": null, + "id": 2, + "input_connections": {}, + "inputs": [ + { + "description": "GFF file with predicted CDS", + "name": "Metaplasmidome predicted CDS" + } + ], + "label": "Metaplasmidome predicted CDS", + "name": "Input dataset", + "outputs": [], + "position": { + "left": 2520.084604995301, + "top": 503.94122404660857 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"tag\": null}", + "tool_version": null, + "type": "data_input", + "uuid": "8af11be5-af1a-455c-a40a-cb881cdcbccd", + "when": null, + "workflow_outputs": [] + }, + "3": { + "annotation": "Tabular file with non-redundant KEGG Orthologs and CDS IDs", + "content_id": null, + "errors": null, + "id": 3, + "input_connections": {}, + "inputs": [ + { + "description": "Tabular file with non-redundant KEGG Orthologs and CDS IDs", + "name": "KEGG Ortogolog" + } + ], + "label": "KEGG Ortogolog", + "name": "Input dataset", + "outputs": [], + "position": { + "left": 4760.159856005782, + "top": 219.94122404660857 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"tag\": null}", + "tool_version": null, + "type": "data_input", + "uuid": "7b7694c5-3182-4387-8ad5-b799e414659b", + "when": null, + "workflow_outputs": [] + }, + "4": { + "annotation": "Tabular file with non-redundant PFAM and CDS IDs", + "content_id": null, + "errors": null, + "id": 4, + "input_connections": {}, + "inputs": [ + { + "description": "Tabular file with non-redundant PFAM and CDS IDs", + "name": "PFAM" + } + ], + "label": "PFAM", + "name": "Input dataset", + "outputs": [], + "position": { + "left": 5320.178657115849, + "top": 374.94122404660857 + }, + "tool_id": null, + "tool_state": "{\"optional\": false, \"tag\": null}", + "tool_version": null, + "type": "data_input", + "uuid": "795a188d-6bce-4d08-873e-7224a06fca74", + "when": null, + "workflow_outputs": [] + }, + "5": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/minimap2/minimap2/2.28+galaxy0", + "errors": null, + "id": 5, + "input_connections": { + "fastq_input|fastq_input1": { + "id": 1, + "output_name": "output" + }, + "reference_source|ref_file": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Map with minimap2", + "name": "fastq_input" + }, + { + "description": "runtime parameter for tool Map with minimap2", + "name": "reference_source" + } + ], + "label": null, + "name": "Map with minimap2", + "outputs": [ + { + "name": "alignment_output", + "type": "bam" + } + ], + "position": { + "left": 280.00940055503344, + "top": 279.94122404660857 + }, + "post_job_actions": { + "HideDatasetActionalignment_output": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "alignment_output" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/minimap2/minimap2/2.28+galaxy0", + "tool_shed_repository": { + "changeset_revision": "5cc34c3f440d", + "name": "minimap2", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"alignment_options\": {\"splicing\": {\"splice_mode\": \"preset\", \"__current_case__\": 0}, \"A\": null, \"B\": null, \"O\": null, \"O2\": null, \"E\": null, \"E2\": null, \"z\": null, \"z2\": null, \"s\": null, \"no_end_flt\": true}, \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"fastq_input\": {\"fastq_input_selector\": \"single\", \"__current_case__\": 0, \"fastq_input1\": {\"__class__\": \"ConnectedValue\"}, \"analysis_type_selector\": \"asm5\"}, \"indexing_options\": {\"H\": false, \"k\": null, \"w\": null, \"I\": null}, \"io_options\": {\"output_format\": \"paf\", \"Q\": false, \"L\": false, \"K\": null, \"cs\": null, \"c\": false, \"eqx\": false, \"Y\": false}, \"mapping_options\": {\"N\": null, \"F\": null, \"f\": null, \"kmer_ocurrence_interval\": {\"interval\": \"\", \"__current_case__\": 1}, \"min_occ_floor\": null, \"q_occ_frac\": \"0.01\", \"g\": null, \"r\": null, \"n\": null, \"m\": null, \"max_chain_skip\": null, \"max_chain_iter\": null, \"X\": false, \"p\": null, \"mask_len\": null}, \"reference_source\": {\"reference_source_selector\": \"history\", \"__current_case__\": 1, \"ref_file\": {\"__class__\": \"ConnectedValue\"}}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.28+galaxy0", + "type": "tool", + "uuid": "f5bfdf13-1595-4329-871e-e33296d5eaf8", + "when": null, + "workflow_outputs": [] + }, + "6": { + "annotation": "", + "content_id": "CONVERTER_fasta_to_tabular", + "errors": null, + "id": 6, + "input_connections": { + "input": { + "id": 1, + "output_name": "output" + } + }, + "inputs": [], + "label": null, + "name": "Convert FASTA to Tabular", + "outputs": [ + { + "name": "output", + "type": "tabular" + } + ], + "position": { + "left": 280.00940055503344, + "top": 594.9412240466086 + }, + "post_job_actions": { + "HideDatasetActionoutput": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "output" + } + }, + "tool_id": "CONVERTER_fasta_to_tabular", + "tool_state": "{\"__input_ext\": \"input\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.0.1", + "type": "tool", + "uuid": "57338f7f-1f62-45e1-a325-fc6f5970c12a", + "when": null, + "workflow_outputs": [] + }, + "7": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/count_gff_features/count_gff_features/0.2", + "errors": null, + "id": 7, + "input_connections": { + "input": { + "id": 2, + "output_name": "output" + } + }, + "inputs": [], + "label": null, + "name": "Count GFF Features", + "outputs": [ + { + "name": "output", + "type": "txt" + } + ], + "position": { + "left": 2800.0940055503347, + "top": 519.9412240466086 + }, + "post_job_actions": { + "HideDatasetActionoutput": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "output" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/count_gff_features/count_gff_features/0.2", + "tool_shed_repository": { + "changeset_revision": "188392a0d0a8", + "name": "count_gff_features", + "owner": "devteam", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"gff3\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "0.2", + "type": "tool", + "uuid": "e31fb584-bb1d-4be8-a62f-7e0e1dd4f99f", + "when": null, + "workflow_outputs": [] + }, + "8": { + "annotation": "", + "content_id": "Cut1", + "errors": null, + "id": 8, + "input_connections": { + "input": { + "id": 5, + "output_name": "alignment_output" + } + }, + "inputs": [], + "label": null, + "name": "Cut", + "outputs": [ + { + "name": "out_file1", + "type": "tabular" + } + ], + "position": { + "left": 560.0166625976562, + "top": 189.94122404660857 + }, + "post_job_actions": { + "HideDatasetActionout_file1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "out_file1" + } + }, + "tool_id": "Cut1", + "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"columnList\": \"c1-c12\", \"delimiter\": \"T\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.0.2", + "type": "tool", + "uuid": "f4c47e71-a7f5-48de-8621-be8e3e3a05b2", + "when": null, + "workflow_outputs": [] + }, + "9": { + "annotation": "", + "content_id": "Cut1", + "errors": null, + "id": 9, + "input_connections": { + "input": { + "id": 5, + "output_name": "alignment_output" + } + }, + "inputs": [], + "label": null, + "name": "Cut", + "outputs": [ + { + "name": "out_file1", + "type": "tabular" + } + ], + "position": { + "left": 560.0188011100669, + "top": 324.94122404660857 + }, + "post_job_actions": { + "HideDatasetActionout_file1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "out_file1" + } + }, + "tool_id": "Cut1", + "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"columnList\": \"c12\", \"delimiter\": \"T\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.0.2", + "type": "tool", + "uuid": "1470405e-27e7-4328-a74a-7275d068d214", + "when": null, + "workflow_outputs": [] + }, + "10": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/column_maker/Add_a_column1/2.1", + "errors": null, + "id": 10, + "input_connections": { + "input": { + "id": 8, + "output_name": "out_file1" + } + }, + "inputs": [], + "label": null, + "name": "Compute", + "outputs": [ + { + "name": "out_file1", + "type": "input" + } + ], + "position": { + "left": 840.0282016651004, + "top": 189.94122404660857 + }, + "post_job_actions": { + "HideDatasetActionout_file1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "out_file1" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/column_maker/Add_a_column1/2.1", + "tool_shed_repository": { + "changeset_revision": "aff5135563c6", + "name": "column_maker", + "owner": "devteam", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"avoid_scientific_notation\": false, \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"error_handling\": {\"auto_col_types\": true, \"fail_on_non_existent_columns\": true, \"non_computable\": {\"action\": \"--fail-on-non-computable\", \"__current_case__\": 0}}, \"input\": {\"__class__\": \"ConnectedValue\"}, \"ops\": {\"header_lines_select\": \"no\", \"__current_case__\": 0, \"expressions\": [{\"__index__\": 0, \"cond\": \"c10/c7\", \"add_column\": {\"mode\": \"\", \"__current_case__\": 0, \"pos\": \"\"}}]}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.1", + "type": "tool", + "uuid": "fb64d4fd-2bf9-4966-b131-77d30265b2f7", + "when": null, + "workflow_outputs": [] + }, + "11": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_histogram/ggplot2_histogram/3.4.0+galaxy0", + "errors": null, + "id": 11, + "input_connections": { + "input1": { + "id": 9, + "output_name": "out_file1" + } + }, + "inputs": [], + "label": null, + "name": "Histogram with ggplot2", + "outputs": [ + { + "name": "output1", + "type": "png" + } + ], + "position": { + "left": 840.0282016651004, + "top": 323.94122404660857 + }, + "post_job_actions": { + "HideDatasetActionoutput1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "output1" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_histogram/ggplot2_histogram/3.4.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "b0d96516e6a5", + "name": "ggplot2_histogram", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"adv\": {\"density\": \"counts\", \"facet\": \"facet\", \"coloring\": {\"colorscheme\": \"Default\", \"__current_case__\": 0}, \"transform\": \"none\", \"scaling\": {\"plot_scaling\": \"Automatic\", \"__current_case__\": 0}, \"theme\": \"bw\", \"legend\": \"no\"}, \"binwidth\": \"0.5\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"input1\": {\"__class__\": \"ConnectedValue\"}, \"out\": {\"unit_output_dim\": \"in\", \"width_output_dim\": \"7.0\", \"height_output_dim\": \"7.0\", \"dpi_output_dim\": \"300.0\", \"additional_output_format\": \"none\"}, \"size\": \"1.0\", \"title\": \"\", \"xlab\": \"Score\", \"ylab\": \"Distribution\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "3.4.0+galaxy0", + "type": "tool", + "uuid": "038166d9-27c3-4d1c-aac6-9d3996f6cf8f", + "when": null, + "workflow_outputs": [] + }, + "12": { + "annotation": "", + "content_id": "Cut1", + "errors": null, + "id": 12, + "input_connections": { + "input": { + "id": 10, + "output_name": "out_file1" + } + }, + "inputs": [], + "label": null, + "name": "Cut", + "outputs": [ + { + "name": "out_file1", + "type": "tabular" + } + ], + "position": { + "left": 1120.0376022201337, + "top": 74.94122404660857 + }, + "post_job_actions": { + "HideDatasetActionout_file1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "out_file1" + } + }, + "tool_id": "Cut1", + "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"columnList\": \"c13\", \"delimiter\": \"T\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.0.2", + "type": "tool", + "uuid": "6b82f850-f67a-4bae-b12d-f455d342c295", + "when": null, + "workflow_outputs": [] + }, + "13": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/histogram/histogram_rpy/1.0.4", + "errors": null, + "id": 13, + "input_connections": { + "input": { + "id": 10, + "output_name": "out_file1" + } + }, + "inputs": [], + "label": null, + "name": "Histogram", + "outputs": [ + { + "name": "out_file1", + "type": "pdf" + } + ], + "position": { + "left": 1120.0376022201337, + "top": 208.94122404660857 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/histogram/histogram_rpy/1.0.4", + "tool_shed_repository": { + "changeset_revision": "6f134426c2b0", + "name": "histogram", + "owner": "devteam", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"tabular\", \"breaks\": \"0\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"density\": true, \"frequency\": false, \"input\": {\"__class__\": \"ConnectedValue\"}, \"numerical_column\": \"13\", \"title\": \"Histogram\", \"xlab\": \"Plasmid coverage\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.0.4", + "type": "tool", + "uuid": "a6e67071-29e0-4fd0-927d-8f1a96c77cc0", + "when": null, + "workflow_outputs": [] + }, + "14": { + "annotation": "", + "content_id": "Filter1", + "errors": null, + "id": 14, + "input_connections": { + "input": { + "id": 10, + "output_name": "out_file1" + } + }, + "inputs": [], + "label": null, + "name": "Filter", + "outputs": [ + { + "name": "out_file1", + "type": "input" + } + ], + "position": { + "left": 1120.0376022201337, + "top": 342.94122404660857 + }, + "post_job_actions": { + "HideDatasetActionout_file1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "out_file1" + } + }, + "tool_id": "Filter1", + "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"cond\": \"c13>=0.8\", \"header_lines\": \"0\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.1.1", + "type": "tool", + "uuid": "6007ea0f-97bb-4c1a-bf75-f1e295b52451", + "when": null, + "workflow_outputs": [] + }, + "15": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_histogram/ggplot2_histogram/3.4.0+galaxy0", + "errors": null, + "id": 15, + "input_connections": { + "input1": { + "id": 12, + "output_name": "out_file1" + } + }, + "inputs": [], + "label": null, + "name": "Histogram with ggplot2", + "outputs": [ + { + "name": "output1", + "type": "png" + } + ], + "position": { + "left": 1413.6394515545703, + "top": 0 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_histogram/ggplot2_histogram/3.4.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "b0d96516e6a5", + "name": "ggplot2_histogram", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"adv\": {\"density\": \"counts\", \"facet\": \"facet\", \"coloring\": {\"colorscheme\": \"Default\", \"__current_case__\": 0}, \"transform\": \"none\", \"scaling\": {\"plot_scaling\": \"Automatic\", \"__current_case__\": 0}, \"theme\": \"bw\", \"legend\": \"no\"}, \"binwidth\": \"0.1\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"input1\": {\"__class__\": \"ConnectedValue\"}, \"out\": {\"unit_output_dim\": \"in\", \"width_output_dim\": \"7.0\", \"height_output_dim\": \"7.0\", \"dpi_output_dim\": \"300.0\", \"additional_output_format\": \"none\"}, \"size\": \"1.0\", \"title\": \"\", \"xlab\": \"Plasmid coverage\", \"ylab\": \"Distribution\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "3.4.0+galaxy0", + "type": "tool", + "uuid": "5a96b6fc-c459-42bf-82c9-2ac5e5ce6fe9", + "when": null, + "workflow_outputs": [] + }, + "16": { + "annotation": "", + "content_id": "Filter1", + "errors": null, + "id": 16, + "input_connections": { + "input": { + "id": 14, + "output_name": "out_file1" + } + }, + "inputs": [], + "label": null, + "name": "Filter", + "outputs": [ + { + "name": "out_file1", + "type": "input" + } + ], + "position": { + "left": 1400.0470027751671, + "top": 342.94122404660857 + }, + "post_job_actions": { + "HideDatasetActionout_file1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "out_file1" + } + }, + "tool_id": "Filter1", + "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"cond\": \"c12>=40\", \"header_lines\": \"0\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.1.1", + "type": "tool", + "uuid": "f20fcc2a-9666-45c4-bf3d-959c1835eeb5", + "when": null, + "workflow_outputs": [] + }, + "17": { + "annotation": "", + "content_id": "Cut1", + "errors": null, + "id": 17, + "input_connections": { + "input": { + "id": 14, + "output_name": "out_file1" + } + }, + "inputs": [], + "label": null, + "name": "Cut", + "outputs": [ + { + "name": "out_file1", + "type": "tabular" + } + ], + "position": { + "left": 1400.0470027751671, + "top": 477.94122404660857 + }, + "post_job_actions": { + "HideDatasetActionout_file1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "out_file1" + } + }, + "tool_id": "Cut1", + "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"columnList\": \"c12\", \"delimiter\": \"T\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.0.2", + "type": "tool", + "uuid": "9762e6a6-d2e6-4f86-96f0-ffff9a92c7b9", + "when": null, + "workflow_outputs": [] + }, + "18": { + "annotation": "", + "content_id": "Cut1", + "errors": null, + "id": 18, + "input_connections": { + "input": { + "id": 16, + "output_name": "out_file1" + } + }, + "inputs": [], + "label": null, + "name": "Cut", + "outputs": [ + { + "name": "out_file1", + "type": "tabular" + } + ], + "position": { + "left": 1680.0564033302005, + "top": 342.94122404660857 + }, + "post_job_actions": { + "HideDatasetActionout_file1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "out_file1" + } + }, + "tool_id": "Cut1", + "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"columnList\": \"c1,c6\", \"delimiter\": \"T\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.0.2", + "type": "tool", + "uuid": "02b58e05-4e1a-40a1-99b6-f839d0155227", + "when": null, + "workflow_outputs": [] + }, + "19": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_histogram/ggplot2_histogram/3.4.0+galaxy0", + "errors": null, + "id": 19, + "input_connections": { + "input1": { + "id": 17, + "output_name": "out_file1" + } + }, + "inputs": [], + "label": null, + "name": "Histogram with ggplot2", + "outputs": [ + { + "name": "output1", + "type": "png" + } + ], + "position": { + "left": 1680.047227633093, + "top": 476.9450002240956 + }, + "post_job_actions": { + "HideDatasetActionoutput1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "output1" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/ggplot2_histogram/ggplot2_histogram/3.4.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "b0d96516e6a5", + "name": "ggplot2_histogram", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"adv\": {\"density\": \"counts\", \"facet\": \"none\", \"coloring\": {\"colorscheme\": \"Default\", \"__current_case__\": 0}, \"transform\": \"none\", \"scaling\": {\"plot_scaling\": \"Automatic\", \"__current_case__\": 0}, \"theme\": \"bw\", \"legend\": \"no\"}, \"binwidth\": \"0.5\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"input1\": {\"__class__\": \"ConnectedValue\"}, \"out\": {\"unit_output_dim\": \"in\", \"width_output_dim\": \"7.0\", \"height_output_dim\": \"7.0\", \"dpi_output_dim\": \"300.0\", \"additional_output_format\": \"none\"}, \"size\": \"1.0\", \"title\": \"\", \"xlab\": \"Score\", \"ylab\": \"Distribution\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "3.4.0+galaxy0", + "type": "tool", + "uuid": "68ecb6fc-7850-41d7-9b03-9636e8a8cb44", + "when": null, + "workflow_outputs": [] + }, + "20": { + "annotation": "", + "content_id": "join1", + "errors": null, + "id": 20, + "input_connections": { + "input1": { + "id": 18, + "output_name": "out_file1" + }, + "input2": { + "id": 6, + "output_name": "output" + } + }, + "inputs": [], + "label": null, + "name": "Join two Datasets", + "outputs": [ + { + "name": "out_file1", + "type": "tabular" + } + ], + "position": { + "left": 1960.065803885234, + "top": 344.94122404660857 + }, + "post_job_actions": { + "HideDatasetActionout_file1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "out_file1" + } + }, + "tool_id": "join1", + "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"field1\": \"1\", \"field2\": \"1\", \"fill_empty_columns\": {\"fill_empty_columns_switch\": \"no_fill\", \"__current_case__\": 0}, \"header\": \"\", \"input1\": {\"__class__\": \"ConnectedValue\"}, \"input2\": {\"__class__\": \"ConnectedValue\"}, \"partial\": \"\", \"unmatched\": \"\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.1.3", + "type": "tool", + "uuid": "33032111-1cbb-494b-8c47-b78c6fc97a26", + "when": null, + "workflow_outputs": [] + }, + "21": { + "annotation": "", + "content_id": "Cut1", + "errors": null, + "id": 21, + "input_connections": { + "input": { + "id": 20, + "output_name": "out_file1" + } + }, + "inputs": [], + "label": null, + "name": "Cut", + "outputs": [ + { + "name": "out_file1", + "type": "tabular" + } + ], + "position": { + "left": 2240.0752044402675, + "top": 369.94122404660857 + }, + "post_job_actions": { + "HideDatasetActionout_file1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "out_file1" + } + }, + "tool_id": "Cut1", + "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"columnList\": \"c2,c1,c4\", \"delimiter\": \"T\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.0.2", + "type": "tool", + "uuid": "f8db4d39-1930-409e-9fca-695aa85ebe81", + "when": null, + "workflow_outputs": [] + }, + "22": { + "annotation": "", + "content_id": "Cut1", + "errors": null, + "id": 22, + "input_connections": { + "input": { + "id": 20, + "output_name": "out_file1" + } + }, + "inputs": [], + "label": null, + "name": "Cut", + "outputs": [ + { + "name": "out_file1", + "type": "tabular" + } + ], + "position": { + "left": 2240.0752044402675, + "top": 683.9412240466086 + }, + "post_job_actions": { + "HideDatasetActionout_file1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "out_file1" + } + }, + "tool_id": "Cut1", + "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"columnList\": \"c1,c4\", \"delimiter\": \"T\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.0.2", + "type": "tool", + "uuid": "0537bdb2-9e59-445f-819b-736158f3d944", + "when": null, + "workflow_outputs": [] + }, + "23": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/estrain/add_column_headers/add_column_headers/0.1.3", + "errors": null, + "id": 23, + "input_connections": { + "datatable": { + "id": 21, + "output_name": "out_file1" + } + }, + "inputs": [], + "label": null, + "name": "Add Header", + "outputs": [ + { + "name": "Data Table", + "type": "tabular" + } + ], + "position": { + "left": 2553.662374965172, + "top": 237.18001003407176 + }, + "post_job_actions": { + "RenameDatasetActionData Table": { + "action_arguments": { + "newname": "Metagenomes identified as plasmids" + }, + "action_type": "RenameDatasetAction", + "output_name": "Data Table" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/estrain/add_column_headers/add_column_headers/0.1.3", + "tool_shed_repository": { + "changeset_revision": "ff2acdb98a74", + "name": "add_column_headers", + "owner": "estrain", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"datatable\": {\"__class__\": \"ConnectedValue\"}, \"header\": \"Metaplasmidome database sequence name,Metagenomic sequence name,Metagenomic sequence\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "0.1.3", + "type": "tool", + "uuid": "82d46177-4eab-4399-b328-baab91477429", + "when": null, + "workflow_outputs": [ + { + "label": "Metagenomes identified as plasmids", + "output_name": "Data Table", + "uuid": "a183cd1f-a2f6-459c-a1f7-49ac50ab0df3" + } + ] + }, + "24": { + "annotation": "", + "content_id": "sort1", + "errors": null, + "id": 24, + "input_connections": { + "input": { + "id": 22, + "output_name": "out_file1" + } + }, + "inputs": [], + "label": null, + "name": "Sort", + "outputs": [ + { + "name": "out_file1", + "type": "input" + } + ], + "position": { + "left": 2520.084604995301, + "top": 683.9412240466086 + }, + "post_job_actions": { + "HideDatasetActionout_file1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "out_file1" + } + }, + "tool_id": "sort1", + "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"column\": \"1\", \"column_set\": [], \"header_lines\": \"0\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"order\": \"DESC\", \"style\": \"alpha\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.2.0", + "type": "tool", + "uuid": "315b7ab3-1226-488e-8a29-b46fab625995", + "when": null, + "workflow_outputs": [] + }, + "25": { + "annotation": "", + "content_id": "join1", + "errors": null, + "id": 25, + "input_connections": { + "input1": { + "id": 2, + "output_name": "output" + }, + "input2": { + "id": 23, + "output_name": "Data Table" + } + }, + "inputs": [], + "label": null, + "name": "Join two Datasets", + "outputs": [ + { + "name": "out_file1", + "type": "tabular" + } + ], + "position": { + "left": 2800.0940055503347, + "top": 334.94122404660857 + }, + "post_job_actions": { + "HideDatasetActionout_file1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "out_file1" + } + }, + "tool_id": "join1", + "tool_state": "{\"__input_ext\": \"gff3\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"field1\": \"1\", \"field2\": \"1\", \"fill_empty_columns\": {\"fill_empty_columns_switch\": \"no_fill\", \"__current_case__\": 0}, \"header\": \"\", \"input1\": {\"__class__\": \"ConnectedValue\"}, \"input2\": {\"__class__\": \"ConnectedValue\"}, \"partial\": \"\", \"unmatched\": \"\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.1.3", + "type": "tool", + "uuid": "3e313ef2-5928-4188-9738-53772566d835", + "when": null, + "workflow_outputs": [] + }, + "26": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_sorted_uniq/9.3+galaxy1", + "errors": null, + "id": 26, + "input_connections": { + "infile": { + "id": 24, + "output_name": "out_file1" + } + }, + "inputs": [], + "label": null, + "name": "Unique", + "outputs": [ + { + "name": "outfile", + "type": "input" + } + ], + "position": { + "left": 2800.0940055503347, + "top": 673.9412240466086 + }, + "post_job_actions": { + "HideDatasetActionoutfile": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "outfile" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_sorted_uniq/9.3+galaxy1", + "tool_shed_repository": { + "changeset_revision": "86755160afbf", + "name": "text_processing", + "owner": "bgruening", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"adv_opts\": {\"adv_opts_selector\": \"basic\", \"__current_case__\": 0}, \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"ignore_case\": false, \"infile\": {\"__class__\": \"ConnectedValue\"}, \"is_numeric\": false, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "9.3+galaxy1", + "type": "tool", + "uuid": "22ab4118-7d73-42e7-8b1e-ca93191a6d25", + "when": null, + "workflow_outputs": [] + }, + "27": { + "annotation": "", + "content_id": "Cut1", + "errors": null, + "id": 27, + "input_connections": { + "input": { + "id": 25, + "output_name": "out_file1" + } + }, + "inputs": [], + "label": null, + "name": "Cut", + "outputs": [ + { + "name": "out_file1", + "type": "tabular" + } + ], + "position": { + "left": 3078.6761300637186, + "top": 355.6441194223201 + }, + "post_job_actions": { + "HideDatasetActionout_file1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "out_file1" + } + }, + "tool_id": "Cut1", + "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"columnList\": \"c11,c3,c9\", \"delimiter\": \"T\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.0.2", + "type": "tool", + "uuid": "3b79d13f-cd93-47a0-aba6-5d7954a19cdc", + "when": null, + "workflow_outputs": [] + }, + "28": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/devteam/tabular_to_fasta/tab2fasta/1.1.1", + "errors": null, + "id": 28, + "input_connections": { + "input": { + "id": 26, + "output_name": "outfile" + } + }, + "inputs": [], + "label": null, + "name": "Tabular-to-FASTA", + "outputs": [ + { + "name": "output", + "type": "fasta" + } + ], + "position": { + "left": 3080.086245631013, + "top": 695.9318338936986 + }, + "post_job_actions": { + "RenameDatasetActionoutput": { + "action_arguments": { + "newname": "Metagenome sequences identified as plasmids" + }, + "action_type": "RenameDatasetAction", + "output_name": "output" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/tabular_to_fasta/tab2fasta/1.1.1", + "tool_shed_repository": { + "changeset_revision": "0a7799698fe5", + "name": "tabular_to_fasta", + "owner": "devteam", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"seq_col\": \"2\", \"title_col\": [\"1\"], \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.1.1", + "type": "tool", + "uuid": "96f96ec6-ed14-4c3e-b529-d1d3e7112e4a", + "when": null, + "workflow_outputs": [ + { + "label": "Metagenome sequences identified as plasmids", + "output_name": "output", + "uuid": "7c9e9f30-c340-4738-9f3b-66cbe5bde2c3" + } + ] + }, + "29": { + "annotation": "", + "content_id": "Grouping1", + "errors": null, + "id": 29, + "input_connections": { + "input1": { + "id": 27, + "output_name": "out_file1" + } + }, + "inputs": [], + "label": null, + "name": "Group", + "outputs": [ + { + "name": "out_file1", + "type": "tabular" + } + ], + "position": { + "left": 3360.1180413272446, + "top": 225.9298554256009 + }, + "post_job_actions": { + "HideDatasetActionout_file1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "out_file1" + } + }, + "tool_id": "Grouping1", + "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"groupcol\": \"2\", \"ignorecase\": false, \"ignorelines\": null, \"input1\": {\"__class__\": \"ConnectedValue\"}, \"operations\": [{\"__index__\": 0, \"optype\": \"length\", \"opcol\": \"3\", \"opround\": \"no\", \"opdefault\": null}], \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.1.4", + "type": "tool", + "uuid": "bbb86e7d-dae2-4e9a-bba4-014321316098", + "when": null, + "workflow_outputs": [] + }, + "30": { + "annotation": "", + "content_id": "Filter1", + "errors": null, + "id": 30, + "input_connections": { + "input": { + "id": 27, + "output_name": "out_file1" + } + }, + "inputs": [], + "label": null, + "name": "Filter", + "outputs": [ + { + "name": "out_file1", + "type": "input" + } + ], + "position": { + "left": 3360.1180413272446, + "top": 359.9464161550109 + }, + "post_job_actions": { + "HideDatasetActionout_file1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "out_file1" + } + }, + "tool_id": "Filter1", + "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"cond\": \"c2=='CDS'\", \"header_lines\": \"0\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.1.1", + "type": "tool", + "uuid": "77a50316-87e9-4e37-bec1-43001637161a", + "when": null, + "workflow_outputs": [] + }, + "31": { + "annotation": "", + "content_id": "Cut1", + "errors": null, + "id": 31, + "input_connections": { + "input": { + "id": 30, + "output_name": "out_file1" + } + }, + "inputs": [], + "label": null, + "name": "Cut", + "outputs": [ + { + "name": "out_file1", + "type": "tabular" + } + ], + "position": { + "left": 3640.122253785648, + "top": 359.94122404660857 + }, + "post_job_actions": { + "HideDatasetActionout_file1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "out_file1" + } + }, + "tool_id": "Cut1", + "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"columnList\": \"c1,c3\", \"delimiter\": \"T\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.0.2", + "type": "tool", + "uuid": "5c061df8-6f0b-4d5d-bb0c-951bfac6b225", + "when": null, + "workflow_outputs": [] + }, + "32": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/proteore/proteore_filter_keywords_values/MQoutputfilter/2021.04.19.1", + "errors": null, + "id": 32, + "input_connections": { + "input1": { + "id": 31, + "output_name": "out_file1" + } + }, + "inputs": [], + "label": null, + "name": "Filter by keywords and/or numerical value", + "outputs": [ + { + "name": "discarded_lines", + "type": "tsv" + }, + { + "name": "kept_lines", + "type": "tsv" + } + ], + "position": { + "left": 3920.1316543406815, + "top": 314.94122404660857 + }, + "post_job_actions": { + "ChangeDatatypeActiondiscarded_lines": { + "action_arguments": { + "newtype": "tabular" + }, + "action_type": "ChangeDatatypeAction", + "output_name": "discarded_lines" + }, + "ChangeDatatypeActionkept_lines": { + "action_arguments": { + "newtype": "tabular" + }, + "action_type": "ChangeDatatypeAction", + "output_name": "kept_lines" + }, + "HideDatasetActiondiscarded_lines": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "discarded_lines" + }, + "HideDatasetActionkept_lines": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "kept_lines" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/proteore/proteore_filter_keywords_values/MQoutputfilter/2021.04.19.1", + "tool_shed_repository": { + "changeset_revision": "98cb671a92eb", + "name": "proteore_filter_keywords_values", + "owner": "proteore", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"header\": false, \"input1\": {\"__class__\": \"ConnectedValue\"}, \"keyword\": [{\"__index__\": 0, \"ncol\": \"c2\", \"match\": false, \"k\": {\"kw\": \"text\", \"__current_case__\": 0, \"txt\": \"gene=\"}}], \"operation\": \"keep\", \"operator\": \"OR\", \"sort\": {\"sort_bool\": false, \"__current_case__\": 1}, \"value\": [], \"values_range\": [], \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2021.04.19.1", + "type": "tool", + "uuid": "05a159bb-75ef-41a1-9f80-346b76c56968", + "when": null, + "workflow_outputs": [] + }, + "33": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_column/9.3+galaxy1", + "errors": null, + "id": 33, + "input_connections": { + "infile": { + "id": 32, + "output_name": "kept_lines" + } + }, + "inputs": [], + "label": null, + "name": "Replace Text", + "outputs": [ + { + "name": "outfile", + "type": "input" + } + ], + "position": { + "left": 4200.141478385132, + "top": 394.9384208278514 + }, + "post_job_actions": { + "HideDatasetActionoutfile": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "outfile" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_column/9.3+galaxy1", + "tool_shed_repository": { + "changeset_revision": "86755160afbf", + "name": "text_processing", + "owner": "bgruening", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"infile\": {\"__class__\": \"ConnectedValue\"}, \"replacements\": [{\"__index__\": 0, \"column\": \"2\", \"find_pattern\": \"ID=([^;]*);.*;gene=([^;]*).*;product=([^;]*).*\", \"replace_pattern\": \"\\\\\\\\1\\\\t\\\\\\\\2\\\\t\\\\\\\\3\"}], \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "9.3+galaxy1", + "type": "tool", + "uuid": "db145538-9dc2-4811-970e-0a414c8979e7", + "when": null, + "workflow_outputs": [] + }, + "34": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_column/9.3+galaxy1", + "errors": null, + "id": 34, + "input_connections": { + "infile": { + "id": 32, + "output_name": "discarded_lines" + } + }, + "inputs": [], + "label": null, + "name": "Replace Text", + "outputs": [ + { + "name": "outfile", + "type": "input" + } + ], + "position": { + "left": 4480.150455450748, + "top": 260.94122404660857 + }, + "post_job_actions": { + "HideDatasetActionoutfile": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "outfile" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_replace_in_column/9.3+galaxy1", + "tool_shed_repository": { + "changeset_revision": "86755160afbf", + "name": "text_processing", + "owner": "bgruening", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"infile\": {\"__class__\": \"ConnectedValue\"}, \"replacements\": [{\"__index__\": 0, \"column\": \"2\", \"find_pattern\": \"ID=([^;]*);.*\", \"replace_pattern\": \"\\\\\\\\1\\\\t\\\\t\"}], \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "9.3+galaxy1", + "type": "tool", + "uuid": "1f9ecccb-6f55-4835-b08e-9c644bc9f3d5", + "when": null, + "workflow_outputs": [] + }, + "35": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/estrain/add_column_headers/add_column_headers/0.1.3", + "errors": null, + "id": 35, + "input_connections": { + "datatable": { + "id": 33, + "output_name": "outfile" + } + }, + "inputs": [], + "label": null, + "name": "Add Header", + "outputs": [ + { + "name": "Data Table", + "type": "tabular" + } + ], + "position": { + "left": 4480.149290737761, + "top": 394.9384208278514 + }, + "post_job_actions": { + "HideDatasetActionData Table": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "Data Table" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/estrain/add_column_headers/add_column_headers/0.1.3", + "tool_shed_repository": { + "changeset_revision": "ff2acdb98a74", + "name": "add_column_headers", + "owner": "estrain", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"datatable\": {\"__class__\": \"ConnectedValue\"}, \"header\": \"Metagenomic sequence name,Gene ID,Gene name,Gene product\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "0.1.3", + "type": "tool", + "uuid": "5ed1ab17-1209-42a2-9488-04eb9c2060eb", + "when": null, + "workflow_outputs": [] + }, + "36": { + "annotation": "", + "content_id": "cat1", + "errors": null, + "id": 36, + "input_connections": { + "input1": { + "id": 35, + "output_name": "Data Table" + }, + "queries_0|input2": { + "id": 34, + "output_name": "outfile" + } + }, + "inputs": [], + "label": null, + "name": "Concatenate datasets", + "outputs": [ + { + "name": "out_file1", + "type": "input" + } + ], + "position": { + "left": 4760.159856005782, + "top": 342.94122404660857 + }, + "post_job_actions": { + "RenameDatasetActionout_file1": { + "action_arguments": { + "newname": "CDS in metagenomes identified as plasmids" + }, + "action_type": "RenameDatasetAction", + "output_name": "out_file1" + } + }, + "tool_id": "cat1", + "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"input1\": {\"__class__\": \"ConnectedValue\"}, \"queries\": [{\"__index__\": 0, \"input2\": {\"__class__\": \"ConnectedValue\"}}], \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.0.0", + "type": "tool", + "uuid": "4507b53a-2503-42a2-bb39-2bf800a22745", + "when": null, + "workflow_outputs": [ + { + "label": "CDS in metagenomes identified as plasmids", + "output_name": "out_file1", + "uuid": "dd7ee5f7-b8a4-45a7-9525-8ae626364e20" + } + ] + }, + "37": { + "annotation": "", + "content_id": "join1", + "errors": null, + "id": 37, + "input_connections": { + "input1": { + "id": 36, + "output_name": "out_file1" + }, + "input2": { + "id": 3, + "output_name": "output" + } + }, + "inputs": [], + "label": null, + "name": "Join two Datasets", + "outputs": [ + { + "name": "out_file1", + "type": "tabular" + } + ], + "position": { + "left": 5040.1692565608155, + "top": 215.94122404660857 + }, + "post_job_actions": { + "HideDatasetActionout_file1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "out_file1" + } + }, + "tool_id": "join1", + "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"field1\": \"2\", \"field2\": \"2\", \"fill_empty_columns\": {\"fill_empty_columns_switch\": \"no_fill\", \"__current_case__\": 0}, \"header\": \"\", \"input1\": {\"__class__\": \"ConnectedValue\"}, \"input2\": {\"__class__\": \"ConnectedValue\"}, \"partial\": \"\", \"unmatched\": \"-u\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.1.3", + "type": "tool", + "uuid": "adb8b38c-3d54-40c7-9877-0f650bb22643", + "when": null, + "workflow_outputs": [] + }, + "38": { + "annotation": "", + "content_id": "Cut1", + "errors": null, + "id": 38, + "input_connections": { + "input": { + "id": 37, + "output_name": "out_file1" + } + }, + "inputs": [], + "label": null, + "name": "Cut", + "outputs": [ + { + "name": "out_file1", + "type": "tabular" + } + ], + "position": { + "left": 5320.178657115849, + "top": 240.94122404660857 + }, + "post_job_actions": { + "HideDatasetActionout_file1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "out_file1" + } + }, + "tool_id": "Cut1", + "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"columnList\": \"c1-c4,c7,c11\", \"delimiter\": \"T\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.0.2", + "type": "tool", + "uuid": "d300b735-1699-4f4c-be74-7dd143d91fad", + "when": null, + "workflow_outputs": [] + }, + "39": { + "annotation": "", + "content_id": "join1", + "errors": null, + "id": 39, + "input_connections": { + "input1": { + "id": 38, + "output_name": "out_file1" + }, + "input2": { + "id": 4, + "output_name": "output" + } + }, + "inputs": [], + "label": null, + "name": "Join two Datasets", + "outputs": [ + { + "name": "out_file1", + "type": "tabular" + } + ], + "position": { + "left": 5600.188057670883, + "top": 242.94122404660857 + }, + "post_job_actions": { + "HideDatasetActionout_file1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "out_file1" + } + }, + "tool_id": "join1", + "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"field1\": \"2\", \"field2\": \"1\", \"fill_empty_columns\": {\"fill_empty_columns_switch\": \"no_fill\", \"__current_case__\": 0}, \"header\": \"\", \"input1\": {\"__class__\": \"ConnectedValue\"}, \"input2\": {\"__class__\": \"ConnectedValue\"}, \"partial\": \"\", \"unmatched\": \"-u\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.1.3", + "type": "tool", + "uuid": "65d5da91-f2de-4737-b176-6192b9ec03b1", + "when": null, + "workflow_outputs": [] + }, + "40": { + "annotation": "", + "content_id": "Cut1", + "errors": null, + "id": 40, + "input_connections": { + "input": { + "id": 39, + "output_name": "out_file1" + } + }, + "inputs": [], + "label": null, + "name": "Cut", + "outputs": [ + { + "name": "out_file1", + "type": "tabular" + } + ], + "position": { + "left": 5880.197458225916, + "top": 267.94122404660857 + }, + "post_job_actions": { + "HideDatasetActionout_file1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "out_file1" + } + }, + "tool_id": "Cut1", + "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"columnList\": \"c1-c6,c9,c10,c25\", \"delimiter\": \"T\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.0.2", + "type": "tool", + "uuid": "7623f3cc-ded7-4323-94f6-1a7ffbd56deb", + "when": null, + "workflow_outputs": [] + }, + "41": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_tail_tool/9.3+galaxy1", + "errors": null, + "id": 41, + "input_connections": { + "infile": { + "id": 40, + "output_name": "out_file1" + } + }, + "inputs": [], + "label": null, + "name": "Select last", + "outputs": [ + { + "name": "outfile", + "type": "input" + } + ], + "position": { + "left": 6160.20685878095, + "top": 267.94122404660857 + }, + "post_job_actions": { + "HideDatasetActionoutfile": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "outfile" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_tail_tool/9.3+galaxy1", + "tool_shed_repository": { + "changeset_revision": "86755160afbf", + "name": "text_processing", + "owner": "bgruening", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"complement\": \"+\", \"infile\": {\"__class__\": \"ConnectedValue\"}, \"num_lines\": \"2\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "9.3+galaxy1", + "type": "tool", + "uuid": "ca65183a-6acc-4619-81c2-a398c2ca8fa9", + "when": null, + "workflow_outputs": [] + }, + "42": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/estrain/add_column_headers/add_column_headers/0.1.3", + "errors": null, + "id": 42, + "input_connections": { + "datatable": { + "id": 41, + "output_name": "outfile" + } + }, + "inputs": [], + "label": null, + "name": "Add Header", + "outputs": [ + { + "name": "Data Table", + "type": "tabular" + } + ], + "position": { + "left": 6440.2171365860595, + "top": 267.952424902099 + }, + "post_job_actions": { + "RenameDatasetActionData Table": { + "action_arguments": { + "newname": "CDS in metagenomes identified as plasmids + KO + PFAM" + }, + "action_type": "RenameDatasetAction", + "output_name": "Data Table" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/estrain/add_column_headers/add_column_headers/0.1.3", + "tool_shed_repository": { + "changeset_revision": "ff2acdb98a74", + "name": "add_column_headers", + "owner": "estrain", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"datatable\": {\"__class__\": \"ConnectedValue\"}, \"header\": \"Metagenomic sequence name,Gene ID,Gene name,Gene product,KO ID,KO annotation,PFAM name,PFAM ID,PFAM annotation\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "0.1.3", + "type": "tool", + "uuid": "7cf75682-f7c3-4de9-a650-4c7fba5ac08e", + "when": null, + "workflow_outputs": [ + { + "label": "CDS in metagenomes identified as plasmids + KO + PFAM", + "output_name": "Data Table", + "uuid": "8d4adc03-cab4-444a-8122-b891f3eca2be" + } + ] + }, + "43": { + "annotation": "", + "content_id": "Filter1", + "errors": null, + "id": 43, + "input_connections": { + "input": { + "id": 42, + "output_name": "Data Table" + } + }, + "inputs": [], + "label": null, + "name": "Filter", + "outputs": [ + { + "name": "out_file1", + "type": "input" + } + ], + "position": { + "left": 6720.23564160672, + "top": 133.9324190446071 + }, + "post_job_actions": { + "HideDatasetActionout_file1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "out_file1" + } + }, + "tool_id": "Filter1", + "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"cond\": \"c8!='.'\", \"header_lines\": \"0\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.1.1", + "type": "tool", + "uuid": "d2119c3e-478e-4164-93c7-72daf3966b58", + "when": null, + "workflow_outputs": [] + }, + "44": { + "annotation": "", + "content_id": "Filter1", + "errors": null, + "id": 44, + "input_connections": { + "input": { + "id": 42, + "output_name": "Data Table" + } + }, + "inputs": [], + "label": null, + "name": "Filter", + "outputs": [ + { + "name": "out_file1", + "type": "input" + } + ], + "position": { + "left": 6720.225659891017, + "top": 267.94122404660857 + }, + "post_job_actions": { + "HideDatasetActionout_file1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "out_file1" + } + }, + "tool_id": "Filter1", + "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"cond\": \"c5!='.'\", \"header_lines\": \"0\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.1.1", + "type": "tool", + "uuid": "69cbd130-4f13-45dc-bfa6-4d4d59ea1420", + "when": null, + "workflow_outputs": [] + }, + "45": { + "annotation": "", + "content_id": "Grouping1", + "errors": null, + "id": 45, + "input_connections": { + "input1": { + "id": 42, + "output_name": "Data Table" + } + }, + "inputs": [], + "label": null, + "name": "Group", + "outputs": [ + { + "name": "out_file1", + "type": "tabular" + } + ], + "position": { + "left": 6720.225659891017, + "top": 401.94122404660857 + }, + "post_job_actions": { + "HideDatasetActionout_file1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "out_file1" + } + }, + "tool_id": "Grouping1", + "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"groupcol\": \"1\", \"ignorecase\": false, \"ignorelines\": null, \"input1\": {\"__class__\": \"ConnectedValue\"}, \"operations\": [{\"__index__\": 0, \"optype\": \"unique\", \"opcol\": \"2\", \"opround\": \"no\", \"opdefault\": null}, {\"__index__\": 1, \"optype\": \"unique\", \"opcol\": \"3\", \"opround\": \"no\", \"opdefault\": null}, {\"__index__\": 2, \"optype\": \"unique\", \"opcol\": \"5\", \"opround\": \"no\", \"opdefault\": null}, {\"__index__\": 3, \"optype\": \"unique\", \"opcol\": \"8\", \"opround\": \"no\", \"opdefault\": null}], \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.1.4", + "type": "tool", + "uuid": "947912ee-f064-4963-a5f0-b0d8a2eceaca", + "when": null, + "workflow_outputs": [] + }, + "46": { + "annotation": "", + "content_id": "Grouping1", + "errors": null, + "id": 46, + "input_connections": { + "input1": { + "id": 43, + "output_name": "out_file1" + } + }, + "inputs": [], + "label": null, + "name": "Group", + "outputs": [ + { + "name": "out_file1", + "type": "tabular" + } + ], + "position": { + "left": 7000.235060446051, + "top": 133.94122404660857 + }, + "post_job_actions": { + "HideDatasetActionout_file1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "out_file1" + } + }, + "tool_id": "Grouping1", + "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"groupcol\": \"8\", \"ignorecase\": false, \"ignorelines\": null, \"input1\": {\"__class__\": \"ConnectedValue\"}, \"operations\": [{\"__index__\": 0, \"optype\": \"length\", \"opcol\": \"8\", \"opround\": \"no\", \"opdefault\": null}], \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.1.4", + "type": "tool", + "uuid": "e9e0be8b-b45c-4d4d-94c8-0349576fe8db", + "when": null, + "workflow_outputs": [] + }, + "47": { + "annotation": "", + "content_id": "Grouping1", + "errors": null, + "id": 47, + "input_connections": { + "input1": { + "id": 44, + "output_name": "out_file1" + } + }, + "inputs": [], + "label": null, + "name": "Group", + "outputs": [ + { + "name": "out_file1", + "type": "tabular" + } + ], + "position": { + "left": 7000.243410194123, + "top": 267.94897977401706 + }, + "post_job_actions": { + "HideDatasetActionout_file1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "out_file1" + } + }, + "tool_id": "Grouping1", + "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"groupcol\": \"5\", \"ignorecase\": false, \"ignorelines\": null, \"input1\": {\"__class__\": \"ConnectedValue\"}, \"operations\": [{\"__index__\": 0, \"optype\": \"length\", \"opcol\": \"5\", \"opround\": \"no\", \"opdefault\": null}], \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "2.1.4", + "type": "tool", + "uuid": "15ac58ba-0a36-4bc2-a428-2d414eb8b5b5", + "when": null, + "workflow_outputs": [] + }, + "48": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_tail_tool/9.3+galaxy1", + "errors": null, + "id": 48, + "input_connections": { + "infile": { + "id": 45, + "output_name": "out_file1" + } + }, + "inputs": [], + "label": null, + "name": "Select last", + "outputs": [ + { + "name": "outfile", + "type": "input" + } + ], + "position": { + "left": 7000.235060446051, + "top": 401.94122404660857 + }, + "post_job_actions": { + "HideDatasetActionoutfile": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "outfile" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/text_processing/tp_tail_tool/9.3+galaxy1", + "tool_shed_repository": { + "changeset_revision": "86755160afbf", + "name": "text_processing", + "owner": "bgruening", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"complement\": \"+\", \"infile\": {\"__class__\": \"ConnectedValue\"}, \"num_lines\": \"2\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "9.3+galaxy1", + "type": "tool", + "uuid": "eccd8c32-1a57-451a-88b0-6aeccaddcb2a", + "when": null, + "workflow_outputs": [] + }, + "49": { + "annotation": "", + "content_id": "sort1", + "errors": null, + "id": 49, + "input_connections": { + "input": { + "id": 46, + "output_name": "out_file1" + } + }, + "inputs": [], + "label": null, + "name": "Sort", + "outputs": [ + { + "name": "out_file1", + "type": "input" + } + ], + "position": { + "left": 7280.244461001084, + "top": 152.94122404660857 + }, + "post_job_actions": {}, + "tool_id": "sort1", + "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"column\": \"2\", \"column_set\": [], \"header_lines\": \"0\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"order\": \"DESC\", \"style\": \"num\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.2.0", + "type": "tool", + "uuid": "93cd4df6-8637-4584-a1d4-b81b594b7fa9", + "when": null, + "workflow_outputs": [] + }, + "50": { + "annotation": "", + "content_id": "sort1", + "errors": null, + "id": 50, + "input_connections": { + "input": { + "id": 47, + "output_name": "out_file1" + } + }, + "inputs": [], + "label": null, + "name": "Sort", + "outputs": [ + { + "name": "out_file1", + "type": "input" + } + ], + "position": { + "left": 7280.244461001084, + "top": 286.94122404660857 + }, + "post_job_actions": {}, + "tool_id": "sort1", + "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"column\": \"2\", \"column_set\": [], \"header_lines\": \"0\", \"input\": {\"__class__\": \"ConnectedValue\"}, \"order\": \"DESC\", \"style\": \"num\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.2.0", + "type": "tool", + "uuid": "a2cf6302-6786-4301-be91-13ceaec27ac5", + "when": null, + "workflow_outputs": [] + }, + "51": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/estrain/add_column_headers/add_column_headers/0.1.3", + "errors": null, + "id": 51, + "input_connections": { + "datatable": { + "id": 48, + "output_name": "outfile" + } + }, + "inputs": [], + "label": null, + "name": "Add Header", + "outputs": [ + { + "name": "Data Table", + "type": "tabular" + } + ], + "position": { + "left": 7280.244461001084, + "top": 420.94122404660857 + }, + "post_job_actions": { + "RenameDatasetActionData Table": { + "action_arguments": { + "newname": "CDS annotation overview per metagenomic sequences" + }, + "action_type": "RenameDatasetAction", + "output_name": "Data Table" + } + }, + "tool_id": "toolshed.g2.bx.psu.edu/repos/estrain/add_column_headers/add_column_headers/0.1.3", + "tool_shed_repository": { + "changeset_revision": "ff2acdb98a74", + "name": "add_column_headers", + "owner": "estrain", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/shared/ifbstor1/galaxy/mutable-config/tool-data/shared/ucsc/chrom/?.len\", \"datatable\": {\"__class__\": \"ConnectedValue\"}, \"header\": \"Metagenomic sequence name,Number of CDS,Number of annotated CDS,Number of associated KO,Number of associated PFAM\", \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "0.1.3", + "type": "tool", + "uuid": "04899a31-a9ab-4c55-a835-dd6160184b33", + "when": null, + "workflow_outputs": [ + { + "label": "CDS annotation overview per metagenomic sequences", + "output_name": "Data Table", + "uuid": "39e36bb3-4f8d-46e1-8d26-24619c7ad36d" + } + ] + } + }, + "tags": [ + "metagenomics", + "metaplasmidome", + "name:microGalaxy" + ], + "uuid": "8fc35d7e-0cf4-4365-b57a-75d247588f7e", + "version": 13 +} \ No newline at end of file