From 9a79d311ca121b1899dd29176bb83f7f6deb8ab4 Mon Sep 17 00:00:00 2001 From: aidaanva Date: Fri, 28 Jul 2023 11:43:49 +0200 Subject: [PATCH] updated input, output and test after review --- modules/nf-core/topas/gencons/main.nf | 21 +++++- modules/nf-core/topas/gencons/meta.yml | 23 +++++- tests/modules/nf-core/topas/gencons/main.nf | 74 ++++++++++++++++++- .../nf-core/topas/gencons/nextflow.config | 8 +- tests/modules/nf-core/topas/gencons/test.yml | 34 ++++++++- 5 files changed, 148 insertions(+), 12 deletions(-) diff --git a/modules/nf-core/topas/gencons/main.nf b/modules/nf-core/topas/gencons/main.nf index 6e444db4b9c..d8816a25d1a 100644 --- a/modules/nf-core/topas/gencons/main.nf +++ b/modules/nf-core/topas/gencons/main.nf @@ -9,13 +9,17 @@ process TOPAS_GENCONS { input: tuple val(meta), path(vcf) + tuple val(meta), path(vcf_indels) tuple val(meta), path(reference) + tuple val(meta), path(fai) + val(vcf_output) output: tuple val(meta), path("*.fasta.gz"), emit: fasta - tuple val(meta), path("*.ccf") , emit: ccf - tuple val(meta), path("*.log") , emit: log - path "versions.yml" , emit: versions + tuple val(meta), path("*.vcf.gz") , emit: vcf , optional: true + tuple val(meta), path("*.ccf") , emit: ccf + tuple val(meta), path("*.log") , emit: log + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -23,6 +27,9 @@ process TOPAS_GENCONS { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def optionalvcfindels = vcf_indels ? "-indels ${vcf_indels}" : '' + def optionalfai = fai ? "-fai ${fai}" : '' + def vcfoutput = vcf_output ? "-vcf_out ${prefix}.vcf" : "" def VERSION = '1.0.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ @@ -32,9 +39,15 @@ process TOPAS_GENCONS { $args \\ -o ${prefix}.fasta \\ -snps $vcf \\ + $optionalvcfindels \\ + $vcfoutput \\ -ref $reference - gzip ${prefix}.fasta + gzip -n ${prefix}.fasta + + if [[ -f ${prefix}.vcf ]];then + gzip -n ${prefix}.vcf + fi cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/topas/gencons/meta.yml b/modules/nf-core/topas/gencons/meta.yml index 65760ab21aa..a50705a5a52 100644 --- a/modules/nf-core/topas/gencons/meta.yml +++ b/modules/nf-core/topas/gencons/meta.yml @@ -1,5 +1,5 @@ name: "topas_gencons" -description: Create fasta consensus with TOPAS toolkit +description: Create fasta consensus with TOPAS toolkit with options to penalize substitutions for typical DNA damage present in ancient DNA keywords: - consensus - fasta @@ -22,7 +22,12 @@ input: - vcf: type: file - description: compressed vcf file containing the called snps + description: Gzipped compressed vcf file generated with GATK UnifiedGenotyper containing the called snps + pattern: "*.vcf.gz" + + - vcf_indels: + type: file + description: Optional gzipped compressed vcf file generated with GATK UnifiedGenotyper containing the called indels pattern: "*.vcf.gz" - reference: @@ -30,6 +35,15 @@ input: description: Fasta file of reference genome pattern: "*.fasta" + - fai: + type: file + description: Optional index for the fasta file of reference genome + pattern: "*.fai" + + - vcf_output: + type: boolean + description: Boolean value to indicate if a compressed vcf file with the consensus calls included as SNPs should be produced + output: - meta: type: map @@ -47,6 +61,11 @@ output: description: Gzipped consensus fasta file with bases under threshold replaced with Ns pattern: "*.fasta.gz" + - vcf: + type: file + description: Gzipped vcf file with updated calls for the SNPs used in the consensus generation and for bases under threshold replaced with Ns + pattern: "*.vcf.gz" + - ccf: type: file description: Statistics file containing information about the consensus calls in the fasta file diff --git a/tests/modules/nf-core/topas/gencons/main.nf b/tests/modules/nf-core/topas/gencons/main.nf index c78c29f03b3..e3a2c716032 100644 --- a/tests/modules/nf-core/topas/gencons/main.nf +++ b/tests/modules/nf-core/topas/gencons/main.nf @@ -2,7 +2,8 @@ nextflow.enable.dsl = 2 -include { GATK_UNIFIEDGENOTYPER } from '../../../../modules/nf-core/gatk/unifiedgenotyper/main.nf' +include { GATK_UNIFIEDGENOTYPER as GATK_UNIFIEDGENOTYPERSNPS } from '../../../../modules/nf-core/gatk/unifiedgenotyper/main.nf' +include { GATK_UNIFIEDGENOTYPER as GATK_UNIFIEDGENOTYPERINDELS } from '../../../../modules/nf-core/gatk/unifiedgenotyper/main.nf' include { TOPAS_GENCONS } from '../../../../../modules/nf-core/topas/gencons/main.nf' workflow test_topas_gencons { @@ -24,13 +25,78 @@ workflow test_topas_gencons { file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) ] - GATK_UNIFIEDGENOTYPER ( input_gatk, fasta, fai, dict, [[],[]], [[],[]], [[],[]], [[],[]]) + GATK_UNIFIEDGENOTYPERSNPS ( input_gatk, fasta, fai, dict, [[],[]], [[],[]], [[],[]], [[],[]]) - gencons_vcf = GATK_UNIFIEDGENOTYPER.out.vcf + gencons_vcf = GATK_UNIFIEDGENOTYPERSNPS.out.vcf + gencons_vcf_indels = [[],[]] gencons_fasta =[ [ id:'test' ], // meta map file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] + gencons_vcf_output = false + TOPAS_GENCONS ( gencons_vcf, gencons_vcf_indels, gencons_fasta, [[],[]], gencons_vcf_output) +} + +workflow test_topas_gencons_fai { + + input_gatk = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + ] + fasta = [ + [id: 'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + fai = [ + [id: 'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + ] + dict = [ + [id: 'test'], + file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) + ] + + GATK_UNIFIEDGENOTYPERSNPS ( input_gatk, fasta, fai, dict, [[],[]], [[],[]], [[],[]], [[],[]]) + + gencons_vcf = GATK_UNIFIEDGENOTYPERSNPS.out.vcf + gencons_vcf_indels = [[],[]] + gencons_fasta =[ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + gencons_vcf_output = false + + TOPAS_GENCONS ( gencons_vcf, gencons_vcf_indels, gencons_fasta, fai, gencons_vcf_output) +} + +workflow test_topas_gencons_indels { + + input_gatk = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + ] + fasta = [ + [id: 'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + fai = [ + [id: 'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true) + ] + dict = [ + [id: 'test'], + file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true) + ] + + GATK_UNIFIEDGENOTYPERSNPS ( input_gatk, fasta, fai, dict, [[],[]], [[],[]], [[],[]], [[],[]]) + GATK_UNIFIEDGENOTYPERINDELS ( input_gatk, fasta, fai, dict, [[],[]], [[],[]], [[],[]], [[],[]]) + + + gencons_vcf = GATK_UNIFIEDGENOTYPERSNPS.out.vcf + gencons_vcf_indels = GATK_UNIFIEDGENOTYPERINDELS.out.vcf + gencons_fasta =[ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + gencons_vcf_output = true - TOPAS_GENCONS ( gencons_vcf, gencons_fasta ) + TOPAS_GENCONS ( gencons_vcf, gencons_vcf_indels, gencons_fasta, [[],[]], gencons_vcf_output) } diff --git a/tests/modules/nf-core/topas/gencons/nextflow.config b/tests/modules/nf-core/topas/gencons/nextflow.config index d8f372f684c..2206b4cd9dc 100644 --- a/tests/modules/nf-core/topas/gencons/nextflow.config +++ b/tests/modules/nf-core/topas/gencons/nextflow.config @@ -2,8 +2,14 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } - withName: GATK_UNIFIEDGENOTYPER { + withName: GATK_UNIFIEDGENOTYPERSNPS { ext.args = "--genotype_likelihoods_model SNP -stand_call_conf 30 --sample_ploidy 2 -dcov 250 --output_mode EMIT_ALL_SITES" + ext.prefix = { "${meta.id}_snps" } + } + + withName: GATK_UNIFIEDGENOTYPERINDELS { + ext.args = "--genotype_likelihoods_model INDEL -stand_call_conf 30 --sample_ploidy 2 -dcov 250 --output_mode EMIT_ALL_SITES" + ext.prefix = { "${meta.id}_indels" } } } diff --git a/tests/modules/nf-core/topas/gencons/test.yml b/tests/modules/nf-core/topas/gencons/test.yml index 0514104ca74..da8c1e1bdf1 100644 --- a/tests/modules/nf-core/topas/gencons/test.yml +++ b/tests/modules/nf-core/topas/gencons/test.yml @@ -1,14 +1,46 @@ - name: topas gencons test_topas_gencons command: nextflow run ./tests/modules/nf-core/topas/gencons -entry test_topas_gencons -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/topas/gencons/nextflow.config + tags: + - topas + - topas/gencons + files: + - path: output/topas/[].fasta.ccf + md5sum: 794d4231ee35302a9816df2c1f595041 + - path: output/topas/[].fasta.gz + md5sum: 1f219a39d28eed85be8a48cb167d5879 + - path: output/topas/[].fasta.log + contains: + - "Use -? for help" + - path: output/topas/versions.yml + +- name: topas gencons test_topas_gencons_fai + command: nextflow run ./tests/modules/nf-core/topas/gencons -entry test_topas_gencons_fai -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/topas/gencons/nextflow.config tags: - topas - topas/gencons files: - path: output/topas/test.fasta.ccf - md5sum: ed5d5767d014d76d5ba904017d937cd5 + md5sum: 794d4231ee35302a9816df2c1f595041 - path: output/topas/test.fasta.gz md5sum: 1f219a39d28eed85be8a48cb167d5879 - path: output/topas/test.fasta.log contains: - "Use -? for help" - path: output/topas/versions.yml + +- name: topas gencons test_topas_gencons_indels + command: nextflow run ./tests/modules/nf-core/topas/gencons -entry test_topas_gencons_indels -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/topas/gencons/nextflow.config + tags: + - topas + - topas/gencons + files: + - path: output/topas/[].fasta.ccf + md5sum: f7337d6e701ab7c5b3c8654742e5116a + - path: output/topas/[].fasta.gz + md5sum: 1f219a39d28eed85be8a48cb167d5879 + - path: output/topas/[].fasta.log + contains: + - "Use -? for help" + - path: output/topas/[].vcf.gz + md5sum: 91806432f141a2a7a071283aa962488d + - path: output/topas/versions.yml