Skip to content

Commit

Permalink
Merge pull request #466 from LilyAnderssonLee/add_db_type
Browse files Browse the repository at this point in the history
Add the column db_type to database sheet
  • Loading branch information
LilyAnderssonLee authored Jun 25, 2024
2 parents 6b47739 + 2534c08 commit 2a679b6
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 24 deletions.
6 changes: 4 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,10 @@ jobs:
if [[ "${{ matrix.tags }}" == "test_motus" ]]; then
wget https://raw.githubusercontent.com/motu-tool/mOTUs/master/motus/downloadDB.py
python downloadDB.py --no-download-progress
echo 'tool,db_name,db_params,db_path' > 'database_motus.csv'
echo "motus,db_mOTU,,db_mOTU" >> 'database_motus.csv'
echo 'tool,db_name,db_params,db_type,db_path' > 'database_motus.csv'
echo "motus,db1_mOTU,,short,db_mOTU" >> 'database_motus.csv'
echo "motus,db2_mOTU,,long,db_mOTU" >> 'database_motus.csv'
echo "motus,db3_mOTU,,short;long,db_mOTU" >> 'database_motus.csv'
nextflow run ${GITHUB_WORKSPACE} -profile docker,${{ matrix.tags }} --databases ./database_motus.csv --outdir ./results_${{ matrix.tags }};
else
nextflow run ${GITHUB_WORKSPACE} -profile docker,${{ matrix.tags }} --outdir ./results_${{ matrix.tags }};
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### `Added`

- [#417](https://github.com/nf-core/taxprofiler/pull/417) - Added reference-free metagenome estimation with Nonpareil (added by @jfy133)
- [#466](https://github.com/nf-core/taxprofiler/pull/466) - Input database sheets now require a `db_type` column to distinguish between short- and long-read databases

## v1.1.8dev - Augmented Akita Patch []

Expand Down
6 changes: 6 additions & 0 deletions assets/schema_database.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@
"errorMessage": "Invalid database db_params entry. No quotes allowed.",
"meta": ["db_params"]
},
"db_type": {
"type": "string",
"enum": ["short", "long", "short;long"],
"default": "short;long",
"meta": ["db_type"]
},
"db_path": {
"type": "string",
"exists": true,
Expand Down
59 changes: 40 additions & 19 deletions subworkflows/local/profiling.nf
Original file line number Diff line number Diff line change
Expand Up @@ -60,26 +60,47 @@ workflow PROFILING {
COMBINE READS WITH POSSIBLE DATABASES
*/

// e.g. output [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':true], <reads_path>/2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], <db_path>/malt90]
// Separate default 'short;long' (when necessary) databases when short/long specified in database sheet
ch_dbs = databases
.map{
meta_db, db ->
[ [meta_db.db_type.split(";")].flatten(), meta_db, db]
}
.transpose(by: 0)
.map{
type, meta_db, db ->
[[type: type], meta_db.subMap(meta_db.keySet() - 'db_type') + [type: type], db]
}

// Join short and long reads with their corresponding short/long database
// Note that for not-specified `short;long`, it will match with the database.
// E.g. if there is no 'long' reads the above generted 'long' database channel element
// will have nothing to join to and will be discarded
// Final output: [DUMP: reads_plus_db] [['id':'2612', 'run_accession':'combined', 'instrument_platform':'ILLUMINA', 'single_end':1], <reads_path>/2612.merged.fastq.gz, ['tool':'malt', 'db_name':'mal95', 'db_params':'"-id 90"'], <db_path>/malt90]

ch_input_for_profiling = reads
.map {
meta, reads ->
[meta + [id: "${meta.id}${meta.single_end ? '_se' : '_pe'}"], reads]
}
.combine(databases)
.branch {
centrifuge: it[2]['tool'] == 'centrifuge'
diamond: it[2]['tool'] == 'diamond'
kaiju: it[2]['tool'] == 'kaiju'
kraken2: it[2]['tool'] == 'kraken2' || it[2]['tool'] == 'bracken' // to reuse the kraken module to produce the input data for bracken
krakenuniq: it[2]['tool'] == 'krakenuniq'
malt: it[2]['tool'] == 'malt'
metaphlan: it[2]['tool'] == 'metaphlan'
motus: it[2]['tool'] == 'motus'
kmcp: it[2]['tool'] == 'kmcp'
ganon: it[2]['tool'] == 'ganon'
unknown: true
}
.map{
meta, reads ->
[[type: meta.type], meta, reads]
}
.combine(ch_dbs, by: 0)
.map{
db_type, meta, reads, db_meta, db ->
[ meta, reads, db_meta, db ]
}
.branch { meta, reads, db_meta, db ->
centrifuge: db_meta.tool == 'centrifuge'
diamond: db_meta.tool == 'diamond'
kaiju: db_meta.tool == 'kaiju'
kraken2: db_meta.tool == 'kraken2' || db_meta.tool == 'bracken' // to reuse the kraken module to produce the input data for bracken
krakenuniq: db_meta.tool == 'krakenuniq'
malt: db_meta.tool == 'malt'
metaphlan: db_meta.tool == 'metaphlan'
motus: db_meta.tool == 'motus'
kmcp: db_meta.tool == 'kmcp'
ganon: db_meta.tool == 'ganon'
unknown: true
}

/*
PREPARE PROFILER INPUT CHANNELS & RUN PROFILING
Expand Down
9 changes: 6 additions & 3 deletions workflows/taxprofiler.nf
Original file line number Diff line number Diff line change
Expand Up @@ -135,13 +135,13 @@ workflow TAXPROFILER {
}
.branch { meta, run_accession, instrument_platform, fastq_1, fastq_2, fasta ->
fastq: meta.single_end || fastq_2
return [ meta, fastq_2 ? [ fastq_1, fastq_2 ] : [ fastq_1 ] ]
return [ meta + [ type: "short" ], fastq_2 ? [ fastq_1, fastq_2 ] : [ fastq_1 ] ]
nanopore: instrument_platform == 'OXFORD_NANOPORE'
meta.single_end = true
return [ meta, [ fastq_1 ] ]
return [ meta + [ type: "long" ], [ fastq_1 ] ]
fasta: meta.is_fasta
meta.single_end = true
return [ meta, [ fasta ] ]
return [ meta + [ type: "short" ], [ fasta ] ]
}

// Merge ch_input.fastq and ch_input.nanopore into a single channel
Expand All @@ -150,6 +150,9 @@ workflow TAXPROFILER {
// Validate and decompress databases
ch_dbs_for_untar = databases
.branch { db_meta, db_path ->
if ( !db_meta.db_type ) {
db_meta = db_meta + [ db_type: "short;long" ]
}
untar: db_path.name.endsWith( ".tar.gz" )
skip: true
}
Expand Down

0 comments on commit 2a679b6

Please sign in to comment.