Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TOPAS/GenConS #3662

Merged
merged 16 commits into from
Sep 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions modules/nf-core/topas/gencons/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
process TOPAS_GENCONS {
tag "$meta.id"
label 'process_single'

conda "bioconda::topas=1.0.1"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/topas:1.0.1--hdfd78af_1':
'biocontainers/topas:1.0.1--hdfd78af_1' }"

input:
tuple val(meta), path(vcf)
tuple val(meta2), path(vcf_indels)
tuple val(meta3), path(reference)
tuple val(meta4), path(fai)
val(vcf_output)

output:
tuple val(meta), path("*.fasta.gz"), emit: fasta
aidaanva marked this conversation as resolved.
Show resolved Hide resolved
tuple val(meta), path("*.vcf.gz") , emit: vcf , optional: true
tuple val(meta), path("*.ccf") , emit: ccf
tuple val(meta), path("*.log") , emit: log
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def optionalvcfindels = vcf_indels ? "-indels ${vcf_indels}" : ''
def optionalfai = fai ? "-fai ${fai}" : ''
def vcfoutput = vcf_output ? "-vcf_out ${prefix}.vcf" : ""
def VERSION = '1.0.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.

"""

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change

topas \\
GenConS \\
$args \\
-o ${prefix}.fasta \\
-snps $vcf \\
$optionalvcfindels \\
$vcfoutput \\
-ref $reference

gzip -n ${prefix}.fasta

if [[ -f ${prefix}.vcf ]];then
gzip -n ${prefix}.vcf
fi

cat <<-END_VERSIONS > versions.yml
"${task.process}":
topas: $VERSION
END_VERSIONS
"""
}
80 changes: 80 additions & 0 deletions modules/nf-core/topas/gencons/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
name: "topas_gencons"
description: Create fasta consensus with TOPAS toolkit with options to penalize substitutions for typical DNA damage present in ancient DNA
keywords:
- consensus
aidaanva marked this conversation as resolved.
Show resolved Hide resolved
- fasta
- ancient DNA
tools:
- "topas":
description: "This toolkit allows the efficient manipulation of sequence data in various ways. It is organized into modules: The FASTA processing modules, the FASTQ processing modules, the GFF processing modules and the VCF processing modules."
homepage: "https://github.com/subwaystation/TOPAS"
documentation: "https://github.com/subwaystation/TOPAS/wiki/Overview-Modules"
tool_dev_url: "https://github.com/subwaystation/TOPAS"
doi: "10.1038/s41598-017-17723-1"
licence: "['CC-BY']"

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]

- vcf:
type: file
description: Gzipped compressed vcf file generated with GATK UnifiedGenotyper containing the called snps
pattern: "*.vcf.gz"

- vcf_indels:
type: file
description: Optional gzipped compressed vcf file generated with GATK UnifiedGenotyper containing the called indels
pattern: "*.vcf.gz"

- reference:
type: file
description: Fasta file of reference genome
pattern: "*.fasta"

- fai:
type: file
description: Optional index for the fasta file of reference genome
pattern: "*.fai"

- vcf_output:
type: boolean
description: Boolean value to indicate if a compressed vcf file with the consensus calls included as SNPs should be produced

output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]

- versions:
type: file
description: File containing software versions
pattern: "versions.yml"

- fasta:
type: file
description: Gzipped consensus fasta file with bases under threshold replaced with Ns
pattern: "*.fasta.gz"

- vcf:
type: file
description: Gzipped vcf file with updated calls for the SNPs used in the consensus generation and for bases under threshold replaced with Ns
pattern: "*.vcf.gz"

- ccf:
type: file
description: Statistics file containing information about the consensus calls in the fasta file
pattern: "*.ccf"

- log:
type: file
description: Log file
pattern: "*.log"

authors:
- "@aidaanva"
4 changes: 4 additions & 0 deletions tests/config/pytest_modules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3988,6 +3988,10 @@ tiddit/sv:
- modules/nf-core/tiddit/sv/**
- tests/modules/nf-core/tiddit/sv/**

topas/gencons:
- modules/nf-core/topas/gencons/**
- tests/modules/nf-core/topas/gencons/**

transdecoder/longorf:
- modules/nf-core/transdecoder/longorf/**
- tests/modules/nf-core/transdecoder/longorf/**
Expand Down
102 changes: 102 additions & 0 deletions tests/modules/nf-core/topas/gencons/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#!/usr/bin/env nextflow

nextflow.enable.dsl = 2

include { GATK_UNIFIEDGENOTYPER as GATK_UNIFIEDGENOTYPERSNPS } from '../../../../modules/nf-core/gatk/unifiedgenotyper/main.nf'
include { GATK_UNIFIEDGENOTYPER as GATK_UNIFIEDGENOTYPERINDELS } from '../../../../modules/nf-core/gatk/unifiedgenotyper/main.nf'
include { TOPAS_GENCONS } from '../../../../../modules/nf-core/topas/gencons/main.nf'

workflow test_topas_gencons {

input_gatk = [ [ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
]
fasta = [
[id: 'test'],
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
]
fai = [
[id: 'test'],
file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
]
dict = [
[id: 'test'],
file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
]

GATK_UNIFIEDGENOTYPERSNPS ( input_gatk, fasta, fai, dict, [[],[]], [[],[]], [[],[]], [[],[]])

gencons_vcf = GATK_UNIFIEDGENOTYPERSNPS.out.vcf
gencons_vcf_indels = [[],[]]
gencons_fasta =[ [ id:'test' ], // meta map
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
]
gencons_vcf_output = false

TOPAS_GENCONS ( gencons_vcf, gencons_vcf_indels, gencons_fasta, [[],[]], gencons_vcf_output)
}

workflow test_topas_gencons_fai {

input_gatk = [ [ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
]
fasta = [
[id: 'test'],
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
]
fai = [
[id: 'test'],
file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
]
dict = [
[id: 'test'],
file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
]

GATK_UNIFIEDGENOTYPERSNPS ( input_gatk, fasta, fai, dict, [[],[]], [[],[]], [[],[]], [[],[]])

gencons_vcf = GATK_UNIFIEDGENOTYPERSNPS.out.vcf
gencons_vcf_indels = [[],[]]
gencons_fasta =[ [ id:'test' ], // meta map
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
]
gencons_vcf_output = false

TOPAS_GENCONS ( gencons_vcf, gencons_vcf_indels, gencons_fasta, fai, gencons_vcf_output)
}

workflow test_topas_gencons_indels {

input_gatk = [ [ id:'test' ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
]
fasta = [
[id: 'test'],
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
]
fai = [
[id: 'test'],
file(params.test_data['sarscov2']['genome']['genome_fasta_fai'], checkIfExists: true)
]
dict = [
[id: 'test'],
file(params.test_data['sarscov2']['genome']['genome_dict'], checkIfExists: true)
]

GATK_UNIFIEDGENOTYPERSNPS ( input_gatk, fasta, fai, dict, [[],[]], [[],[]], [[],[]], [[],[]])
GATK_UNIFIEDGENOTYPERINDELS ( input_gatk, fasta, fai, dict, [[],[]], [[],[]], [[],[]], [[],[]])


gencons_vcf = GATK_UNIFIEDGENOTYPERSNPS.out.vcf
gencons_vcf_indels = GATK_UNIFIEDGENOTYPERINDELS.out.vcf
gencons_fasta =[ [ id:'test' ], // meta map
file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
]
gencons_vcf_output = true

TOPAS_GENCONS ( gencons_vcf, gencons_vcf_indels, gencons_fasta, [[],[]], gencons_vcf_output)
}
15 changes: 15 additions & 0 deletions tests/modules/nf-core/topas/gencons/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
process {

publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }

withName: GATK_UNIFIEDGENOTYPERSNPS {
ext.args = "--genotype_likelihoods_model SNP -stand_call_conf 30 --sample_ploidy 2 -dcov 250 --output_mode EMIT_ALL_SITES"
ext.prefix = { "${meta.id}_snps" }
}

withName: GATK_UNIFIEDGENOTYPERINDELS {
ext.args = "--genotype_likelihoods_model INDEL -stand_call_conf 30 --sample_ploidy 2 -dcov 250 --output_mode EMIT_ALL_SITES"
ext.prefix = { "${meta.id}_indels" }
}

}
54 changes: 54 additions & 0 deletions tests/modules/nf-core/topas/gencons/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
- name: topas gencons test_topas_gencons
command: nextflow run ./tests/modules/nf-core/topas/gencons -entry test_topas_gencons -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/topas/gencons/nextflow.config
tags:
- topas/gencons
- topas
files:
- path: output/gatk/test_snps.vcf.gz
- path: output/gatk/versions.yml
- path: output/topas/test.fasta.ccf
md5sum: 794d4231ee35302a9816df2c1f595041
- path: output/topas/test.fasta.gz
md5sum: 1f219a39d28eed85be8a48cb167d5879
- path: output/topas/test.fasta.log
contains:
- "Parameters chosen: "
- path: output/topas/versions.yml

- name: topas gencons test_topas_gencons_fai
command: nextflow run ./tests/modules/nf-core/topas/gencons -entry test_topas_gencons_fai -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/topas/gencons/nextflow.config
tags:
- topas/gencons
- topas
files:
- path: output/gatk/test_snps.vcf.gz
- path: output/gatk/versions.yml
- path: output/topas/test.fasta.ccf
md5sum: 794d4231ee35302a9816df2c1f595041
- path: output/topas/test.fasta.gz
md5sum: 1f219a39d28eed85be8a48cb167d5879
- path: output/topas/test.fasta.log
contains:
- "Parameters chosen: "
- path: output/topas/versions.yml

- name: topas gencons test_topas_gencons_indels
command: nextflow run ./tests/modules/nf-core/topas/gencons -entry test_topas_gencons_indels -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/topas/gencons/nextflow.config
tags:
- topas/gencons
- topas
files:
- path: output/gatk/test_indels.vcf.gz
- path: output/gatk/test_snps.vcf.gz
- path: output/gatk/versions.yml
- path: output/topas/test.fasta.ccf
md5sum: f7337d6e701ab7c5b3c8654742e5116a
- path: output/topas/test.fasta.gz
md5sum: 1f219a39d28eed85be8a48cb167d5879
- path: output/topas/test.fasta.log
contains:
- "Parameters chosen: "
- path: output/topas/test.vcf.gz
contains:
- "##FILTER="
- path: output/topas/versions.yml
Loading