From 9f98ccd3a5fb484eba86430535de8d1c6d0d085f Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 27 Jul 2023 14:00:01 +0200 Subject: [PATCH 1/3] Update CENTRIFUGE module and not by default generate SAM alignments --- CHANGELOG.md | 2 ++ conf/modules.config | 2 +- modules.json | 2 +- modules/nf-core/centrifuge/centrifuge/main.nf | 7 ++----- modules/nf-core/centrifuge/centrifuge/meta.yml | 9 +++++++-- subworkflows/local/profiling.nf | 2 +- 6 files changed, 14 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0cef7ac5..a18e7c89 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,6 +45,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Deprecated` +- [#335](https://github.com/nf-core/taxprofiler/pull/335) Updated Centrifuge module to not generate (undocumented) SAM alignments by default if --save_centrifuge_reads supplied, to due Centrifuge bug modifying profile header. SAM alignments can still be generated if `--out-fmt` supplied in `database.csv` (♥ to @LilyAnderssonLee for reporting, fix by @jfy133) + ## v1.0.1 - Dodgy Dachshund Patch [2023-05-15] ### `Added` diff --git a/conf/modules.config b/conf/modules.config index 8a290a6f..10f7bfac 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -559,7 +559,7 @@ process { publishDir = [ path: { "${params.outdir}/centrifuge/${meta.db_name}/" }, mode: params.publish_dir_mode, - pattern: '*.{txt,sam,gz}' + pattern: '*.{txt,sam,tab,gz}' ] } diff --git a/modules.json b/modules.json index cd870598..d5379c1e 100644 --- a/modules.json +++ b/modules.json @@ -42,7 +42,7 @@ }, "centrifuge/centrifuge": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "7b0b4276fbad744a69c35d3cbf9d55fdfc9491bd", "installed_by": ["modules"] }, "centrifuge/kreport": { diff --git a/modules/nf-core/centrifuge/centrifuge/main.nf b/modules/nf-core/centrifuge/centrifuge/main.nf index 26c91ee1..6e45ba4e 100644 --- a/modules/nf-core/centrifuge/centrifuge/main.nf +++ b/modules/nf-core/centrifuge/centrifuge/main.nf @@ -5,19 +5,18 @@ process CENTRIFUGE_CENTRIFUGE { conda "bioconda::centrifuge=1.0.4_beta" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/centrifuge:1.0.4_beta--h9a82719_6' : - 'quay.io/biocontainers/centrifuge:1.0.4_beta--h9a82719_6' }" + 'biocontainers/centrifuge:1.0.4_beta--h9a82719_6' }" input: tuple val(meta), path(reads) path db val save_unaligned val save_aligned - val sam_format output: tuple val(meta), path('*report.txt') , emit: report tuple val(meta), path('*results.txt') , emit: results - tuple val(meta), path('*.sam') , optional: true, emit: sam + tuple val(meta), path('*.{sam,tab}') , optional: true, emit: sam tuple val(meta), path('*.mapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_mapped tuple val(meta), path('*.unmapped.fastq{,.1,.2}.gz') , optional: true, emit: fastq_unmapped path "versions.yml" , emit: versions @@ -38,7 +37,6 @@ process CENTRIFUGE_CENTRIFUGE { unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : '' aligned = save_aligned ? "--al-conc-gz ${prefix}.mapped.fastq.gz" : '' } - def sam_output = sam_format ? "--out-fmt 'sam'" : '' """ ## we add "-no-name ._" to ensure silly Mac OSX metafiles files aren't included db_name=`find -L ${db} -name "*.1.cf" -not -name "._*" | sed 's/\\.1.cf\$//'` @@ -50,7 +48,6 @@ process CENTRIFUGE_CENTRIFUGE { -S ${prefix}.results.txt \\ $unaligned \\ $aligned \\ - $sam_output \\ $args cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/centrifuge/centrifuge/meta.yml b/modules/nf-core/centrifuge/centrifuge/meta.yml index a252c00c..b51e2522 100644 --- a/modules/nf-core/centrifuge/centrifuge/meta.yml +++ b/modules/nf-core/centrifuge/centrifuge/meta.yml @@ -27,10 +27,10 @@ input: type: directory description: Path to directory containing centrifuge database files - save_unaligned: - type: value + type: boolean description: If true unmapped fastq files are saved - save_aligned: - type: value + type: boolean description: If true mapped fastq files are saved output: - meta: @@ -48,6 +48,11 @@ output: description: | File containing classification results pattern: "*.{results.txt}" + - sam: + type: file + description: | + Optional output file containing read alignments (SAM format )or a table of per-read hit information (TAB)s + pattern: "*.{sam,tab}" - fastq_unmapped: type: file description: Unmapped fastq files diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index de11bf47..dd7c844d 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -230,7 +230,7 @@ workflow PROFILING { db: it[3] } - CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_reads, params.centrifuge_save_reads, params.centrifuge_save_reads ) + CENTRIFUGE_CENTRIFUGE ( ch_input_for_centrifuge.reads, ch_input_for_centrifuge.db, params.centrifuge_save_reads, params.centrifuge_save_reads ) ch_versions = ch_versions.mix( CENTRIFUGE_CENTRIFUGE.out.versions.first() ) ch_raw_classifications = ch_raw_classifications.mix( CENTRIFUGE_CENTRIFUGE.out.results ) From 34273d2656c5dc6fdc933bdacd27ce8cb2631669 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 27 Jul 2023 14:01:01 +0200 Subject: [PATCH 2/3] Fix PR number --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a18e7c89..748b0022 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,7 +45,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Deprecated` -- [#335](https://github.com/nf-core/taxprofiler/pull/335) Updated Centrifuge module to not generate (undocumented) SAM alignments by default if --save_centrifuge_reads supplied, to due Centrifuge bug modifying profile header. SAM alignments can still be generated if `--out-fmt` supplied in `database.csv` (♥ to @LilyAnderssonLee for reporting, fix by @jfy133) +- [#338](https://github.com/nf-core/taxprofiler/pull/338) Updated Centrifuge module to not generate (undocumented) SAM alignments by default if --save_centrifuge_reads supplied, to due Centrifuge bug modifying profile header. SAM alignments can still be generated if `--out-fmt` supplied in `database.csv` (♥ to @LilyAnderssonLee for reporting, fix by @jfy133) ## v1.0.1 - Dodgy Dachshund Patch [2023-05-15] From 9f0255b7cc5245c3eb0aa1c355ace2cba0f00438 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Thu, 27 Jul 2023 14:55:31 +0200 Subject: [PATCH 3/3] Update CHANGELOG --- CHANGELOG.md | 3 ++- subworkflows/local/profiling.nf | 6 +++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 748b0022..bb8e08e9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#313](https://github.com/nf-core/taxprofiler/pull/304) Fix pipeline not providing error when database sheet does not have a header (♥ to @noah472 for reporting, fix by @jfy133) - [#330](https://github.com/nf-core/taxprofiler/pull/330) Added better tagging to allow disambiguation of Kraken2 steps of Kraken2 vs Bracken (♥ to @MajoroMask for requesting, added by @jfy133) - [#334](https://github.com/nf-core/taxprofiler/pull/334) Increase the memory of the FALCO process to 4GB (fix by @LilyAnderssonLee) +- [#338](https://github.com/nf-core/taxprofiler/pull/338) Fixed wrong file 'out' file going to `centrifuge kreport` module (♥ to @LilyAnderssonLee for reporting, fix by @jfy133) ### `Dependencies` @@ -45,7 +46,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Deprecated` -- [#338](https://github.com/nf-core/taxprofiler/pull/338) Updated Centrifuge module to not generate (undocumented) SAM alignments by default if --save_centrifuge_reads supplied, to due Centrifuge bug modifying profile header. SAM alignments can still be generated if `--out-fmt` supplied in `database.csv` (♥ to @LilyAnderssonLee for reporting, fix by @jfy133) +- [#338](https://github.com/nf-core/taxprofiler/pull/338) Updated Centrifuge module to not generate (undocumented) SAM alignments by default if --save_centrifuge_reads supplied, to due to a Centrifuge bug modifying profile header. SAM alignments can still be generated if `--out-fmt` supplied in `database.csv` (♥ to @LilyAnderssonLee for reporting, fix by @jfy133) ## v1.0.1 - Dodgy Dachshund Patch [2023-05-15] diff --git a/subworkflows/local/profiling.nf b/subworkflows/local/profiling.nf index dd7c844d..e6bbfc51 100644 --- a/subworkflows/local/profiling.nf +++ b/subworkflows/local/profiling.nf @@ -239,7 +239,11 @@ workflow PROFILING { .filter { meta, db -> meta.tool == 'centrifuge' } .map { meta, db -> [meta.db_name, meta, db] } - ch_input_for_centrifuge_kreport = CENTRIFUGE_CENTRIFUGE.out.report + // We must combine the _results_ file to get correct output - sending the report file will + // weirdly still produce valid-looking output, however the numbers are nonsense. + // Unfortunately the Centrifuge documentation for this was unclear as to _which_ outfile + // goes into it. + ch_input_for_centrifuge_kreport = CENTRIFUGE_CENTRIFUGE.out.results .map { meta, profile -> [meta.db_name, meta, profile] } .combine(ch_database_for_centrifugekreport, by: 0) .multiMap {