forked from geraldinepascal/FROGS-wrappers
-
Notifications
You must be signed in to change notification settings - Fork 0
/
affiliation_postprocess.xml
154 lines (117 loc) · 6.2 KB
/
affiliation_postprocess.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
<?xml version="1.0"?>
<!--
# Copyright (C) 2015 INRA
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
-->
<tool id="FROGS_affiliation_postprocess" name="FROGS Affiliation postprocess" version="3.1">
<description>Optionnal step to resolve inclusive amplicon ambiguities and to aggregate OTUs based on alignment metrics</description>
<requirements>
<requirement type="package" version="3.1.0">frogs</requirement>
</requirements>
<stdio>
<exit_code range="1:" />
<exit_code range=":-1" />
</stdio>
<command>
affiliation_postprocess.py
#if $is_HVL.HVL_amplicon
#set $reference_filename = str( $is_HVL.ref_file.fields.path )
--reference "${reference_filename}"
#end if
--identity $identity
--coverage $coverage
--input-biom $biom_in
--input-fasta $fasta_in
--output-biom $biom_out
--output-fasta $fasta_out
--output-compo $compo_out
</command>
<inputs>
<!-- Files -->
<param format="biom1" name="biom_in" type="data" label="Abundance file of affiliated OTUs" help="Abundances of affiliated OTUs (format: BIOM)."/>
<param format="fasta" name="fasta_in" type="data" label="OTU seed sequences" help="OTU sequences (format: fasta)."/>
<conditional name="is_HVL">
<param name="HVL_amplicon" type="boolean" label="Is this an hyper variable in length amplicon ?" help="Multi-affiliation tag may be resolved by selecting the shortest amplicon reference. For this you need the reference fasta file of your kind of amplicon."/>
<when value="true">
<param name="ref_file" type="select" label="Using reference database" help="Select reference from the list">
<options from_data_table="HVL_db"></options>
<validator type="no_options" message="A built-in database is not available" />
<!--column name="name" index="0"/>
<column name="value" index="1"/-->
</param>
</when>
<when value="false"/>
</conditional>
<param name="identity" type="float" min="0.0" max="100.0" value="99.0" label="minimum identity for aggregation" help="OTUs will be aggregated if they share the same taxonomy with at least X% identity." />
<param name="coverage" type="float" min="0.0" max="100.0" value="99.0" label="minimum coverage for aggregation" help="OTUs will be aggregated if they share the same taxonomy with at least X% alignment coverage." />
</inputs>
<outputs>
<data format="biom1" name="biom_out" label="${tool.name}: affiliation.biom" from_work_dir="affiliation.biom" />
<data format="fasta" name="fasta_out" label="${tool.name}: sequences.fasta" from_work_dir="sequences.fasta" />
<data format="tabular" name="compo_out" label="${tool.name}: aggregation_composition.txt" from_work_dir="aggregation_composition.txt" />
</outputs>
<tests>
<test>
<param name="biom_in" value="references/06-affiliation.biom"/>
<param name="fasta_in" value="references/04-filters.fasta"/>
<conditional name="is_HVL">
<param name="HVL_amplicon" value="true"/>
<param name="ref_file" value="Unite_extract_ITS1_test"/>
</conditional>
<output name="compo_out" file="references/07-affiliation_postprocessed.compo.tsv"/>
</test>
</tests>
<help>
.. image:: static/images/frogs_images/FROGS_logo.png
:height: 144
:width: 110
.. class:: infomark page-header h2
What it does
Resolves multi-hit ambiguities if exact amplicon length are available and aggregrated OTUs sharing same taxonomy based on alignment metrics thresholds
.. class:: infomark page-header h2
Inputs/outputs
.. class:: h3
Inputs
**Abundance file**:
The abundance of each OTU in each sample (format `BIOM <http://biom-format.org/>`_) with taxonomic affiliations metadata.
**Sequence file**:
The sequences (format `FASTA <https://en.wikipedia.org/wiki/FASTA_format>`_) of each OTU seed.
**Reference file** (optionnal):
The exact amplicon reference sequences (format `FASTA <https://en.wikipedia.org/wiki/FASTA_format>`_).
.. class:: h3
Outputs
**Abundance file**:
The abundance file of OTUs and aggregated OTUs, with their affiliation (format `BIOM <http://biom-format.org/>`_) and with potentially less ambiguities.
**Sequence file**:
The sequences (format `FASTA <https://en.wikipedia.org/wiki/FASTA_format>`_) of each aggregated OTU seed.
**Composition file**:
The aggregation composition file (format text) describing the composition of each resulting OTU.
.. class:: infomark page-header h2
How it works
If a reference fasta file is provided, for each OTU with multiaffiliation, among the different possible affiliations, we only keep the affiliation of the sequence with the shorter length. The aim is to resolve ambiguities due to potential inclusive sequences such as ITS.
Second step is the OTUs aggregation that share the same taxonomy inferred on alignment metrics.
We start with the most abundant OTU. If an OTU shares at least one affiliation with another OTU with at least I% of identity and C% of alignment coverage, so the OTUs are aggregated together (The different affiliations, which then generate the multi-affiliation tag, are merged, abundance counts are summed). The seed of the most abundant OTU is kept.
----
**Contact**
Support: please contact first your galaxy support team.
Contacts: [email protected]
Repository: https://github.com/geraldinepascal/FROGS
website: http://frogs.toulouse.inra.fr/
Please cite the **FROGS article**: *Escudie F., et al. Bioinformatics, 2018. FROGS: Find, Rapidly, OTUs with Galaxy Solution.*
</help>
<citations>
<citation type="doi">10.1093/bioinformatics/btx791</citation>
</citations>
</tool>