versions.yml
+ "${task.process}":
+ bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//')
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/bcftools/pluginsplit/meta.yml b/modules/nf-core/bcftools/pluginsplit/meta.yml
new file mode 100644
index 00000000..64bfce0a
--- /dev/null
+++ b/modules/nf-core/bcftools/pluginsplit/meta.yml
@@ -0,0 +1,74 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json
+name: "bcftools_pluginsplit"
+description: Split VCF by sample, creating single- or multi-sample VCFs.
+keywords:
+ - split
+ - vcf
+ - genomics
+tools:
+ - pluginsplit:
+ description: |
+ Split VCF by sample, creating single- or multi-sample VCFs.
+ homepage: http://samtools.github.io/bcftools/bcftools.html
+ documentation: http://www.htslib.org/doc/bcftools.html
+ doi: 10.1093/bioinformatics/btp352
+ licence: ["MIT"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - vcf:
+ type: file
+ description: The VCF file to split
+ pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}"
+ - tbi:
+ type: file
+ description: OPTIONAL - The index of the input VCF/BCF
+ pattern: "*.tbi"
+ - samples:
+ type: file
+ description: |
+ A tab-separated file determining which samples should be in which output file
+ column 1: The sample name(s) in the input file
+ column 2: The sample name(s) to use in the output file (use `-` to keep the original name)
+ column 3: The name of the output file
+ Either this or a groups file should be given
+ pattern: "*"
+ - groups:
+ type: file
+ description: |
+ A tab-separated file determining which samples should be in which output file(s)
+ column 1: The sample name(s) in the input file
+ column 2: The sample name(s) to use in the output file (use `-` to keep the original name)
+ column 3: The name of the output file(s)
+ Either this or a samples file should be given
+ pattern: "*"
+ - regions:
+ type: file
+ description: A BED file containing regions to use
+ pattern: "*.bed"
+ - targets:
+ type: file
+ description: A BED file containing regions to use (but streams rather than index-jumps)
+ pattern: "*.bed"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - vcf:
+ type: file
+ description: The resulting VCF files from the split
+ pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}"
+authors:
+ - "@nvnieuwk"
+maintainers:
+ - "@nvnieuwk"
diff --git a/modules/nf-core/bcftools/pluginsplit/tests/main.nf.test b/modules/nf-core/bcftools/pluginsplit/tests/main.nf.test
new file mode 100644
index 00000000..e3160851
--- /dev/null
+++ b/modules/nf-core/bcftools/pluginsplit/tests/main.nf.test
@@ -0,0 +1,192 @@
+nextflow_process {
+
+ name "Test Process BCFTOOLS_PLUGINSPLIT"
+ script "../main.nf"
+ process "BCFTOOLS_PLUGINSPLIT"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "bcftools"
+ tag "bcftools/pluginsplit"
+
+ test("homo_sapiens - [ vcf, tbi ], samples, [], [], []") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz.tbi', checkIfExists: true)
+ ]
+ input[1] = Channel.of("normal\t-\tnormal", "tumour\t-\ttumour")
+ .collectFile(name:"samples.txt", newLine:true)
+ input[2] = []
+ input[3] = []
+ input[4] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("homo_sapiens - [ vcf, tbi ], [], groups, regions, targets") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz.tbi', checkIfExists: true)
+ ]
+ input[1] = []
+ input[2] = Channel.of("normal\t-\tnormal", "tumour\t-\ttumour")
+ .collectFile(name:"samples.txt", newLine:true)
+ input[3] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
+ input[4] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true)
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.match).match() }
+
+ )
+ }
+
+ }
+
+ test("homo_sapiens - [ vcf, tbi ], [], groups, regions, targets - tbi") {
+
+ config "./nextflow.config"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz.tbi', checkIfExists: true)
+ ]
+ input[1] = []
+ input[2] = Channel.of("normal\t-\tnormal", "tumour\t-\ttumour")
+ .collectFile(name:"samples.txt", newLine:true)
+ input[3] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
+ input[4] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true)
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ process.out.vcf,
+ process.out.tbi.get(0).get(1).find { file(it).name.matches("normal.vcf.gz.tbi|tumor.vcf.gz.tbi") },
+ ) }
+ )
+ }
+
+ }
+
+ test("homo_sapiens - [ vcf, tbi ], samples, [], [], [] -stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz.tbi', checkIfExists: true)
+ ]
+ input[1] = Channel.of("normal\t-\tnormal", "tumour\t-\ttumour")
+ .collectFile(name:"samples.txt", newLine:true)
+ input[2] = []
+ input[3] = []
+ input[4] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("homo_sapiens - [ vcf, tbi ], [], groups, regions, targets -stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz.tbi', checkIfExists: true)
+ ]
+ input[1] = []
+ input[2] = Channel.of("normal\t-\tnormal", "tumour\t-\ttumour")
+ .collectFile(name:"samples.txt", newLine:true)
+ input[3] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
+ input[4] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true)
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("homo_sapiens - [ vcf, tbi ], [], groups, regions, targets - tbi -stub") {
+
+ config "./nextflow.config"
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/paired_mutect2_calls/test_test2_paired_mutect2_calls.vcf.gz.tbi', checkIfExists: true)
+ ]
+ input[1] = []
+ input[2] = Channel.of("normal\t-\tnormal", "tumour\t-\ttumour")
+ .collectFile(name:"samples.txt", newLine:true)
+ input[3] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
+ input[4] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true)
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+}
diff --git a/modules/nf-core/bcftools/pluginsplit/tests/main.nf.test.snap b/modules/nf-core/bcftools/pluginsplit/tests/main.nf.test.snap
new file mode 100644
index 00000000..66c3c1dd
--- /dev/null
+++ b/modules/nf-core/bcftools/pluginsplit/tests/main.nf.test.snap
@@ -0,0 +1,240 @@
+{
+ "homo_sapiens - [ vcf, tbi ], samples, [], [], [] -stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "normal.vcf:md5,d41d8cd98f00b204e9800998ecf8427e",
+ "tumour.vcf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,8625f8c08503e47d029d48430c0bfccc"
+ ],
+ "csi": [
+
+ ],
+ "tbi": [
+
+ ],
+ "vcf": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "normal.vcf:md5,d41d8cd98f00b204e9800998ecf8427e",
+ "tumour.vcf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,8625f8c08503e47d029d48430c0bfccc"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-09T15:56:42.307673651"
+ },
+ "homo_sapiens - [ vcf, tbi ], [], groups, regions, targets": {
+ "content": null,
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-09T15:56:21.498991402"
+ },
+ "homo_sapiens - [ vcf, tbi ], [], groups, regions, targets - tbi -stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "15000.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
+ "40001.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "15000.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e",
+ "40001.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,8625f8c08503e47d029d48430c0bfccc"
+ ],
+ "csi": [
+
+ ],
+ "tbi": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "15000.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e",
+ "40001.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ ],
+ "vcf": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "15000.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
+ "40001.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,8625f8c08503e47d029d48430c0bfccc"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-09T15:57:04.483688966"
+ },
+ "homo_sapiens - [ vcf, tbi ], samples, [], [], []": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "normal.vcf:md5,69cfc4bf92bf3e2847081a2026a4d3bb",
+ "tumour.vcf:md5,08fa5c8d5561c2a8d7c300cb0eea1042"
+ ]
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,8625f8c08503e47d029d48430c0bfccc"
+ ],
+ "csi": [
+
+ ],
+ "tbi": [
+
+ ],
+ "vcf": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "normal.vcf:md5,69cfc4bf92bf3e2847081a2026a4d3bb",
+ "tumour.vcf:md5,08fa5c8d5561c2a8d7c300cb0eea1042"
+ ]
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,8625f8c08503e47d029d48430c0bfccc"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-09T15:56:10.033818589"
+ },
+ "homo_sapiens - [ vcf, tbi ], [], groups, regions, targets -stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "15000.vcf:md5,d41d8cd98f00b204e9800998ecf8427e",
+ "40001.vcf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,8625f8c08503e47d029d48430c0bfccc"
+ ],
+ "csi": [
+
+ ],
+ "tbi": [
+
+ ],
+ "vcf": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "15000.vcf:md5,d41d8cd98f00b204e9800998ecf8427e",
+ "40001.vcf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,8625f8c08503e47d029d48430c0bfccc"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-09T15:56:53.641165787"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/bcftools/pluginsplit/tests/nextflow.config b/modules/nf-core/bcftools/pluginsplit/tests/nextflow.config
new file mode 100644
index 00000000..9b9a4783
--- /dev/null
+++ b/modules/nf-core/bcftools/pluginsplit/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+ withName: 'BCFTOOLS_PLUGINSPLIT' {
+ ext.args = '--write-index=tbi --output-type z'
+ }
+}
diff --git a/modules/nf-core/bcftools/pluginsplit/tests/tags.yml b/modules/nf-core/bcftools/pluginsplit/tests/tags.yml
new file mode 100644
index 00000000..2f29ef18
--- /dev/null
+++ b/modules/nf-core/bcftools/pluginsplit/tests/tags.yml
@@ -0,0 +1,2 @@
+bcftools/pluginsplit:
+ - "modules/nf-core/bcftools/pluginsplit/**"
diff --git a/modules/nf-core/bcftools/sort/tests/main.nf.test b/modules/nf-core/bcftools/sort/tests/main.nf.test
index 1207a739..b9bdd76a 100644
--- a/modules/nf-core/bcftools/sort/tests/main.nf.test
+++ b/modules/nf-core/bcftools/sort/tests/main.nf.test
@@ -15,7 +15,7 @@ nextflow_process {
"""
input[0] = [
[ id:'test' ], // meta map
- file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true)
]
"""
}
@@ -39,7 +39,7 @@ nextflow_process {
"""
input[0] = [
[ id:'test' ], // meta map
- file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true)
]
"""
}
@@ -69,7 +69,7 @@ nextflow_process {
"""
input[0] = [
[ id:'test' ], // meta map
- file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true)
]
"""
}
@@ -99,7 +99,7 @@ nextflow_process {
"""
input[0] = [
[ id:'test' ], // meta map
- file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true)
]
"""
}
@@ -127,7 +127,7 @@ nextflow_process {
"""
input[0] = [
[ id:'test' ], // meta map
- file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true)
]
"""
}
@@ -152,7 +152,7 @@ nextflow_process {
"""
input[0] = [
[ id:'test' ], // meta map
- file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true)
]
"""
}
@@ -178,7 +178,7 @@ nextflow_process {
"""
input[0] = [
[ id:'test' ], // meta map
- file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true)
]
"""
}
@@ -204,7 +204,7 @@ nextflow_process {
"""
input[0] = [
[ id:'test' ], // meta map
- file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true)
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true)
]
"""
}
diff --git a/modules/nf-core/bcftools/stats/environment.yml b/modules/nf-core/bcftools/stats/environment.yml
new file mode 100644
index 00000000..128fe204
--- /dev/null
+++ b/modules/nf-core/bcftools/stats/environment.yml
@@ -0,0 +1,8 @@
+name: bcftools_stats
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::bcftools=1.20
+ - bioconda::htslib=1.20
diff --git a/modules/nf-core/bcftools/stats/main.nf b/modules/nf-core/bcftools/stats/main.nf
new file mode 100644
index 00000000..20e5da77
--- /dev/null
+++ b/modules/nf-core/bcftools/stats/main.nf
@@ -0,0 +1,60 @@
+process BCFTOOLS_STATS {
+ tag "$meta.id"
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/bcftools:1.20--h8b25389_0':
+ 'biocontainers/bcftools:1.20--h8b25389_0' }"
+
+ input:
+ tuple val(meta), path(vcf), path(tbi)
+ tuple val(meta2), path(regions)
+ tuple val(meta3), path(targets)
+ tuple val(meta4), path(samples)
+ tuple val(meta5), path(exons)
+ tuple val(meta6), path(fasta)
+
+ output:
+ tuple val(meta), path("*stats.txt"), emit: stats
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def regions_file = regions ? "--regions-file ${regions}" : ""
+ def targets_file = targets ? "--targets-file ${targets}" : ""
+ def samples_file = samples ? "--samples-file ${samples}" : ""
+ def reference_fasta = fasta ? "--fasta-ref ${fasta}" : ""
+ def exons_file = exons ? "--exons ${exons}" : ""
+ """
+ bcftools stats \\
+ $args \\
+ $regions_file \\
+ $targets_file \\
+ $samples_file \\
+ $reference_fasta \\
+ $exons_file \\
+ $vcf > ${prefix}.bcftools_stats.txt
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//')
+ END_VERSIONS
+ """
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}"
+
+ """
+ touch ${prefix}.bcftools_stats.txt
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//')
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/bcftools/stats/meta.yml b/modules/nf-core/bcftools/stats/meta.yml
new file mode 100644
index 00000000..7ea2103e
--- /dev/null
+++ b/modules/nf-core/bcftools/stats/meta.yml
@@ -0,0 +1,77 @@
+name: bcftools_stats
+description: Generates stats from VCF files
+keywords:
+ - variant calling
+ - stats
+ - VCF
+tools:
+ - stats:
+ description: |
+ Parses VCF or BCF and produces text file stats which is suitable for
+ machine processing and can be plotted using plot-vcfstats.
+ homepage: http://samtools.github.io/bcftools/bcftools.html
+ documentation: http://www.htslib.org/doc/bcftools.html
+ doi: 10.1093/bioinformatics/btp352
+ licence: ["MIT"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - vcf:
+ type: file
+ description: VCF input file
+ pattern: "*.{vcf}"
+ - tbi:
+ type: file
+ description: |
+ The tab index for the VCF file to be inspected. Optional: only required when parameter regions is chosen.
+ pattern: "*.tbi"
+ - regions:
+ type: file
+ description: |
+ Optionally, restrict the operation to regions listed in this file. (VCF, BED or tab-delimited)
+ - targets:
+ type: file
+ description: |
+ Optionally, restrict the operation to regions listed in this file (doesn't rely upon tbi index files)
+ - samples:
+ type: file
+ description: |
+ Optional, file of sample names to be included or excluded.
+ e.g. 'file.tsv'
+ - exons:
+ type: file
+ description: |
+ Tab-delimited file with exons for indel frameshifts (chr,beg,end; 1-based, inclusive, optionally bgzip compressed).
+ e.g. 'exons.tsv.gz'
+ - fasta:
+ type: file
+ description: |
+ Faidx indexed reference sequence file to determine INDEL context.
+ e.g. 'reference.fa'
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - stats:
+ type: file
+ description: Text output file containing stats
+ pattern: "*_{stats.txt}"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@joseespinosa"
+ - "@drpatelh"
+ - "@SusiJo"
+ - "@TCLamnidis"
+maintainers:
+ - "@joseespinosa"
+ - "@drpatelh"
+ - "@SusiJo"
+ - "@TCLamnidis"
diff --git a/modules/nf-core/bcftools/stats/tests/main.nf.test b/modules/nf-core/bcftools/stats/tests/main.nf.test
new file mode 100644
index 00000000..be618b0b
--- /dev/null
+++ b/modules/nf-core/bcftools/stats/tests/main.nf.test
@@ -0,0 +1,182 @@
+nextflow_process {
+
+ name "Test Process BCFTOOLS_STATS"
+ script "../main.nf"
+ process "BCFTOOLS_STATS"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "bcftools"
+ tag "bcftools/stats"
+
+ test("sarscov2 - vcf_gz") {
+
+ when {
+ process {
+ """
+ input[0] = [ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
+ []]
+ input[1] = [ [], [] ]
+ input[2] = [ [], [] ]
+ input[3] = [ [], [] ]
+ input[4] = [ [], [] ]
+ input[5] = [ [], [] ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.versions).match("versions") },
+ { assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() },
+ )
+ }
+
+ }
+
+ test("sarscov2 - vcf_gz - regions") {
+
+ when {
+ process {
+ """
+ input[0] = [ [ id:'regions_test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)]
+ input[1] = [ [id:'regions_test'],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) ]
+ input[2] = [ [], [] ]
+ input[3] = [ [], [] ]
+ input[4] = [ [], [] ]
+ input[5] = [ [], [] ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.versions).match("regions_versions") },
+ { assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() },
+ )
+ }
+
+ }
+
+ test("sarscov2 - vcf_gz - targets") {
+
+ when {
+ process {
+ """
+ input[0] = [ [ id:'targets_test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
+ [] ]
+ input[1] = [ [], [] ]
+ input[2] = [ [id:'targets_test'],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.targets.tsv.gz', checkIfExists: true)
+ ]
+ input[3] = [ [], [] ]
+ input[4] = [ [], [] ]
+ input[5] = [ [], [] ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.versions).match("targets_versions") },
+ { assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() },
+ )
+ }
+
+ }
+
+ test("sarscov2 - vcf_gz - exons") {
+
+ when {
+ process {
+ """
+ input[0] = [ [ id:'exon_test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
+ [] ]
+ input[1] = [ [], [] ]
+ input[2] = [ [], [] ]
+ input[3] = [ [], [] ]
+ input[4] = [ [id: "exon_test"],
+ file(params.modules_testdata_base_path + 'delete_me/bcftools/stats/exons.tsv.gz', checkIfExists: true) ]
+ input[5] = [ [], [] ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.versions).match("exon_versions") },
+ { assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() },
+ )
+ }
+
+ }
+
+ test("sarscov2 - vcf_gz - reference") {
+
+ when {
+ process {
+ """
+ input[0] = [ [ id:'ref_test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
+ [] ]
+ input[1] = [ [], [] ]
+ input[2] = [ [], [] ]
+ input[3] = [ [], [] ]
+ input[4] = [ [], [] ]
+ input[5] = [ [id: 'ref_test'],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.versions).match("ref_versions") },
+ { assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() },
+ )
+ }
+
+ }
+
+
+ test("sarscov2 - vcf_gz - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
+ []]
+ input[1] = [ [], [] ]
+ input[2] = [ [], [] ]
+ input[3] = [ [], [] ]
+ input[4] = [ [], [] ]
+ input[5] = [ [], [] ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+}
\ No newline at end of file
diff --git a/modules/nf-core/bcftools/stats/tests/main.nf.test.snap b/modules/nf-core/bcftools/stats/tests/main.nf.test.snap
new file mode 100644
index 00000000..cd8cff6d
--- /dev/null
+++ b/modules/nf-core/bcftools/stats/tests/main.nf.test.snap
@@ -0,0 +1,180 @@
+{
+ "sarscov2 - vcf_gz - reference": {
+ "content": [
+ [
+ "# This file was produced by bcftools stats (1.20+htslib-1.20) and can be plotted using plot-vcfstats.",
+ "# The command line was:\tbcftools stats --fasta-ref genome.fasta test.vcf.gz",
+ "#",
+ "# Definition of sets:",
+ "# ID\t[2]id\t[3]tab-separated file names",
+ "ID\t0\ttest.vcf.gz"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-31T18:14:35.506777837"
+ },
+ "sarscov2 - vcf_gz - exons": {
+ "content": [
+ [
+ "# This file was produced by bcftools stats (1.20+htslib-1.20) and can be plotted using plot-vcfstats.",
+ "# The command line was:\tbcftools stats --exons exons.tsv.gz test.vcf.gz",
+ "#",
+ "# Definition of sets:",
+ "# ID\t[2]id\t[3]tab-separated file names",
+ "ID\t0\ttest.vcf.gz"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-31T18:14:30.57486244"
+ },
+ "versions": {
+ "content": [
+ [
+ "versions.yml:md5,17cdf9d1ad31f6b1f5935dfcc9fe7b9a"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-31T18:16:27.637515559"
+ },
+ "sarscov2 - vcf_gz - targets": {
+ "content": [
+ [
+ "# This file was produced by bcftools stats (1.20+htslib-1.20) and can be plotted using plot-vcfstats.",
+ "# The command line was:\tbcftools stats --targets-file test2.targets.tsv.gz test.vcf.gz",
+ "#",
+ "# Definition of sets:",
+ "# ID\t[2]id\t[3]tab-separated file names",
+ "ID\t0\ttest.vcf.gz"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-31T18:14:25.732997442"
+ },
+ "regions_versions": {
+ "content": [
+ [
+ "versions.yml:md5,17cdf9d1ad31f6b1f5935dfcc9fe7b9a"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-31T18:16:32.559884458"
+ },
+ "targets_versions": {
+ "content": [
+ [
+ "versions.yml:md5,17cdf9d1ad31f6b1f5935dfcc9fe7b9a"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-31T18:16:37.512009805"
+ },
+ "sarscov2 - vcf_gz - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.bcftools_stats.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,17cdf9d1ad31f6b1f5935dfcc9fe7b9a"
+ ],
+ "stats": [
+ [
+ {
+ "id": "test"
+ },
+ "test.bcftools_stats.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,17cdf9d1ad31f6b1f5935dfcc9fe7b9a"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-03T11:57:09.614976125"
+ },
+ "exon_versions": {
+ "content": [
+ [
+ "versions.yml:md5,17cdf9d1ad31f6b1f5935dfcc9fe7b9a"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-31T18:16:42.347397266"
+ },
+ "ref_versions": {
+ "content": [
+ [
+ "versions.yml:md5,17cdf9d1ad31f6b1f5935dfcc9fe7b9a"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-31T18:16:47.26823622"
+ },
+ "sarscov2 - vcf_gz": {
+ "content": [
+ [
+ "# This file was produced by bcftools stats (1.20+htslib-1.20) and can be plotted using plot-vcfstats.",
+ "# The command line was:\tbcftools stats test.vcf.gz",
+ "#",
+ "# Definition of sets:",
+ "# ID\t[2]id\t[3]tab-separated file names",
+ "ID\t0\ttest.vcf.gz"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-31T18:16:27.670416598"
+ },
+ "sarscov2 - vcf_gz - regions": {
+ "content": [
+ [
+ "# This file was produced by bcftools stats (1.20+htslib-1.20) and can be plotted using plot-vcfstats.",
+ "# The command line was:\tbcftools stats --regions-file test3.vcf.gz test.vcf.gz",
+ "#",
+ "# Definition of sets:",
+ "# ID\t[2]id\t[3]tab-separated file names",
+ "ID\t0\ttest.vcf.gz"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-31T18:14:20.759094062"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/bcftools/stats/tests/tags.yml b/modules/nf-core/bcftools/stats/tests/tags.yml
new file mode 100644
index 00000000..53c12d92
--- /dev/null
+++ b/modules/nf-core/bcftools/stats/tests/tags.yml
@@ -0,0 +1,2 @@
+bcftools/stats:
+ - "modules/nf-core/bcftools/stats/**"
diff --git a/modules/nf-core/bcftools/view/tests/main.nf.test b/modules/nf-core/bcftools/view/tests/main.nf.test
index 14a7283d..1e60c50d 100644
--- a/modules/nf-core/bcftools/view/tests/main.nf.test
+++ b/modules/nf-core/bcftools/view/tests/main.nf.test
@@ -18,8 +18,8 @@ nextflow_process {
"""
input[0] = [
[ id:'out', single_end:false ], // meta map
- file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true),
- file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)
]
input[1] = []
input[2] = []
@@ -49,8 +49,8 @@ nextflow_process {
"""
input[0] = [
[ id:'out', single_end:false ], // meta map
- file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true),
- file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)
]
input[1] = []
input[2] = []
@@ -83,8 +83,8 @@ nextflow_process {
"""
input[0] = [
[ id:'out', single_end:false ], // meta map
- file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true),
- file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)
]
input[1] = []
input[2] = []
@@ -117,8 +117,8 @@ nextflow_process {
"""
input[0] = [
[ id:'out', single_end:false ], // meta map
- file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true),
- file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)
]
input[1] = []
input[2] = []
@@ -151,11 +151,11 @@ nextflow_process {
"""
input[0] = [
[ id:'out', single_end:false ], // meta map
- file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true),
- file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)
]
- input[1] = file(params.test_data['sarscov2']['illumina']['test3_vcf_gz'], checkIfExists: true)
- input[2] = file(params.test_data['sarscov2']['illumina']['test2_vcf_targets_tsv_gz'], checkIfExists: true)
+ input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true)
+ input[2] = file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.targets.tsv.gz', checkIfExists: true)
input[3] = []
"""
}
@@ -183,8 +183,8 @@ nextflow_process {
"""
input[0] = [
[ id:'out', single_end:false ], // meta map
- file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true),
- file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)
]
input[1] = []
input[2] = []
@@ -215,8 +215,8 @@ nextflow_process {
"""
input[0] = [
[ id:'out', single_end:false ], // meta map
- file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true),
- file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)
]
input[1] = []
input[2] = []
@@ -245,8 +245,8 @@ nextflow_process {
"""
input[0] = [
[ id:'out', single_end:false ], // meta map
- file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true),
- file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)
]
input[1] = []
input[2] = []
@@ -275,8 +275,8 @@ nextflow_process {
"""
input[0] = [
[ id:'out', single_end:false ], // meta map
- file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true),
- file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true)
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)
]
input[1] = []
input[2] = []
diff --git a/modules/nf-core/bedtools/merge/environment.yml b/modules/nf-core/bedtools/merge/environment.yml
new file mode 100644
index 00000000..99707878
--- /dev/null
+++ b/modules/nf-core/bedtools/merge/environment.yml
@@ -0,0 +1,7 @@
+name: bedtools_merge
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::bedtools=2.31.1
diff --git a/modules/nf-core/bedtools/merge/main.nf b/modules/nf-core/bedtools/merge/main.nf
new file mode 100644
index 00000000..5310647d
--- /dev/null
+++ b/modules/nf-core/bedtools/merge/main.nf
@@ -0,0 +1,47 @@
+process BEDTOOLS_MERGE {
+ tag "$meta.id"
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' :
+ 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }"
+
+ input:
+ tuple val(meta), path(bed)
+
+ output:
+ tuple val(meta), path('*.bed'), emit: bed
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ if ("$bed" == "${prefix}.bed") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+ """
+ bedtools \\
+ merge \\
+ -i $bed \\
+ $args \\
+ > ${prefix}.bed
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ bedtools: \$(bedtools --version | sed -e "s/bedtools v//g")
+ END_VERSIONS
+ """
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ touch ${prefix}.bed
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ bedtools: \$(bedtools --version | sed -e "s/bedtools v//g")
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/bedtools/merge/meta.yml b/modules/nf-core/bedtools/merge/meta.yml
new file mode 100644
index 00000000..d7463e3d
--- /dev/null
+++ b/modules/nf-core/bedtools/merge/meta.yml
@@ -0,0 +1,45 @@
+name: bedtools_merge
+description: combines overlapping or “book-ended” features in an interval file into a single feature which spans all of the combined features.
+keywords:
+ - bed
+ - merge
+ - bedtools
+ - overlapped bed
+tools:
+ - bedtools:
+ description: |
+ A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types.
+ documentation: https://bedtools.readthedocs.io/en/latest/content/tools/merge.html
+ licence: ["MIT"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - bed:
+ type: file
+ description: Input BED file
+ pattern: "*.{bed}"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - bed:
+ type: file
+ description: Overlapped bed file with combined features
+ pattern: "*.{bed}"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@edmundmiller"
+ - "@sruthipsuresh"
+ - "@drpatelh"
+maintainers:
+ - "@edmundmiller"
+ - "@sruthipsuresh"
+ - "@drpatelh"
diff --git a/modules/nf-core/bedtools/merge/tests/main.nf.test b/modules/nf-core/bedtools/merge/tests/main.nf.test
new file mode 100644
index 00000000..95dba8e5
--- /dev/null
+++ b/modules/nf-core/bedtools/merge/tests/main.nf.test
@@ -0,0 +1,34 @@
+nextflow_process {
+
+ name "Test Process BEDTOOLS_MERGE"
+ script "../main.nf"
+ config "./nextflow.config"
+ process "BEDTOOLS_MERGE"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "bedtools"
+ tag "bedtools/merge"
+
+ test("test_bedtools_merge") {
+
+ when {
+ process {
+ """
+ input[0] = [ [ id:'test'],
+ file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+}
\ No newline at end of file
diff --git a/modules/nf-core/bedtools/merge/tests/main.nf.test.snap b/modules/nf-core/bedtools/merge/tests/main.nf.test.snap
new file mode 100644
index 00000000..ee6c4e63
--- /dev/null
+++ b/modules/nf-core/bedtools/merge/tests/main.nf.test.snap
@@ -0,0 +1,35 @@
+{
+ "test_bedtools_merge": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test_out.bed:md5,0cf6ed2b6f470cd44a247da74ca4fe4e"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,2d134badb4cd1e4e903696c7967f28d6"
+ ],
+ "bed": [
+ [
+ {
+ "id": "test"
+ },
+ "test_out.bed:md5,0cf6ed2b6f470cd44a247da74ca4fe4e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,2d134badb4cd1e4e903696c7967f28d6"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-03-18T17:07:09.721153"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/bedtools/merge/tests/nextflow.config b/modules/nf-core/bedtools/merge/tests/nextflow.config
new file mode 100644
index 00000000..16444e98
--- /dev/null
+++ b/modules/nf-core/bedtools/merge/tests/nextflow.config
@@ -0,0 +1,7 @@
+process {
+
+ withName: BEDTOOLS_MERGE {
+ ext.prefix = { "${meta.id}_out" }
+ }
+
+}
diff --git a/modules/nf-core/bedtools/merge/tests/tags.yml b/modules/nf-core/bedtools/merge/tests/tags.yml
new file mode 100644
index 00000000..60c8cad1
--- /dev/null
+++ b/modules/nf-core/bedtools/merge/tests/tags.yml
@@ -0,0 +1,2 @@
+bedtools/merge:
+ - "modules/nf-core/bedtools/merge/**"
diff --git a/modules/nf-core/bedtools/sort/environment.yml b/modules/nf-core/bedtools/sort/environment.yml
new file mode 100644
index 00000000..87b2e425
--- /dev/null
+++ b/modules/nf-core/bedtools/sort/environment.yml
@@ -0,0 +1,7 @@
+name: bedtools_sort
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::bedtools=2.31.1
diff --git a/modules/nf-core/bedtools/sort/main.nf b/modules/nf-core/bedtools/sort/main.nf
new file mode 100644
index 00000000..b833150a
--- /dev/null
+++ b/modules/nf-core/bedtools/sort/main.nf
@@ -0,0 +1,54 @@
+process BEDTOOLS_SORT {
+ tag "$meta.id"
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' :
+ 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }"
+
+ input:
+ tuple val(meta), path(intervals)
+ path genome_file
+
+ output:
+ tuple val(meta), path("*.${extension}"), emit: sorted
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def genome_cmd = genome_file ? "-g $genome_file" : ""
+ extension = task.ext.suffix ?: intervals.extension
+ if ("$intervals" == "${prefix}.${extension}") {
+ error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+ }
+ """
+ bedtools \\
+ sort \\
+ -i $intervals \\
+ $genome_cmd \\
+ $args \\
+ > ${prefix}.${extension}
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ bedtools: \$(bedtools --version | sed -e "s/bedtools v//g")
+ END_VERSIONS
+ """
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ extension = task.ext.suffix ?: intervals.extension
+ """
+ touch ${prefix}.${extension}
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ bedtools: \$(bedtools --version | sed -e "s/bedtools v//g")
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/bedtools/sort/meta.yml b/modules/nf-core/bedtools/sort/meta.yml
new file mode 100644
index 00000000..7c915f5f
--- /dev/null
+++ b/modules/nf-core/bedtools/sort/meta.yml
@@ -0,0 +1,54 @@
+name: bedtools_sort
+description: Sorts a feature file by chromosome and other criteria.
+keywords:
+ - bed
+ - sort
+ - bedtools
+ - chromosome
+tools:
+ - bedtools:
+ description: |
+ A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types.
+ documentation: https://bedtools.readthedocs.io/en/latest/content/tools/sort.html
+ licence: ["MIT"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - intervals:
+ type: file
+ description: BED/BEDGRAPH
+ pattern: "*.{bed|bedGraph}"
+ - genome_file:
+ type: file
+ description: |
+ Optional reference genome 2 column file that defines the expected chromosome order.
+ pattern: "*.{fai,txt,chromsizes}"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - sorted:
+ type: file
+ description: Sorted output file
+ pattern: "*.${extension}"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@edmundmiller"
+ - "@sruthipsuresh"
+ - "@drpatelh"
+ - "@chris-cheshire"
+ - "@adamrtalbot"
+maintainers:
+ - "@edmundmiller"
+ - "@sruthipsuresh"
+ - "@drpatelh"
+ - "@chris-cheshire"
+ - "@adamrtalbot"
diff --git a/modules/nf-core/bedtools/sort/tests/main.nf.test b/modules/nf-core/bedtools/sort/tests/main.nf.test
new file mode 100644
index 00000000..b1f36dd9
--- /dev/null
+++ b/modules/nf-core/bedtools/sort/tests/main.nf.test
@@ -0,0 +1,58 @@
+nextflow_process {
+
+ name "Test Process BEDTOOLS_SORT"
+ script "../main.nf"
+ config "./nextflow.config"
+ process "BEDTOOLS_SORT"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "bedtools"
+ tag "bedtools/sort"
+
+ test("test_bedtools_sort") {
+
+ when {
+ process {
+ """
+ input[0] = [ [ id:'test'],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true)
+ ]
+ input[1] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+
+ test("test_bedtools_sort_with_genome") {
+
+ when {
+ process {
+ """
+ input[0] = [ [ id:'test'],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true)
+ ]
+ input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+}
\ No newline at end of file
diff --git a/modules/nf-core/bedtools/sort/tests/main.nf.test.snap b/modules/nf-core/bedtools/sort/tests/main.nf.test.snap
new file mode 100644
index 00000000..f10e8b98
--- /dev/null
+++ b/modules/nf-core/bedtools/sort/tests/main.nf.test.snap
@@ -0,0 +1,68 @@
+{
+ "test_bedtools_sort_with_genome": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test_out.testtext:md5,fe4053cf4de3aebbdfc3be2efb125a74"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,cdbae2c7ebc41e534aaf0835779061f8"
+ ],
+ "sorted": [
+ [
+ {
+ "id": "test"
+ },
+ "test_out.testtext:md5,fe4053cf4de3aebbdfc3be2efb125a74"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,cdbae2c7ebc41e534aaf0835779061f8"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-03-19T10:13:11.830452"
+ },
+ "test_bedtools_sort": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test_out.testtext:md5,fe4053cf4de3aebbdfc3be2efb125a74"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,cdbae2c7ebc41e534aaf0835779061f8"
+ ],
+ "sorted": [
+ [
+ {
+ "id": "test"
+ },
+ "test_out.testtext:md5,fe4053cf4de3aebbdfc3be2efb125a74"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,cdbae2c7ebc41e534aaf0835779061f8"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-03-19T10:16:40.535947"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/bedtools/sort/tests/nextflow.config b/modules/nf-core/bedtools/sort/tests/nextflow.config
new file mode 100644
index 00000000..f203c99c
--- /dev/null
+++ b/modules/nf-core/bedtools/sort/tests/nextflow.config
@@ -0,0 +1,8 @@
+process {
+
+ withName: BEDTOOLS_SORT {
+ ext.prefix = { "${meta.id}_out" }
+ ext.suffix = "testtext"
+ }
+
+}
\ No newline at end of file
diff --git a/modules/nf-core/bedtools/sort/tests/tags.yml b/modules/nf-core/bedtools/sort/tests/tags.yml
new file mode 100644
index 00000000..47c85eea
--- /dev/null
+++ b/modules/nf-core/bedtools/sort/tests/tags.yml
@@ -0,0 +1,2 @@
+bedtools/sort:
+ - "modules/nf-core/bedtools/sort/**"
diff --git a/modules/nf-core/cadd/cadd.diff b/modules/nf-core/cadd/cadd.diff
new file mode 100644
index 00000000..2ee51723
--- /dev/null
+++ b/modules/nf-core/cadd/cadd.diff
@@ -0,0 +1,22 @@
+Changes in module 'nf-core/cadd'
+--- modules/nf-core/cadd/main.nf
++++ modules/nf-core/cadd/main.nf
+@@ -7,13 +7,14 @@
+
+ containerOptions {
+ (workflow.containerEngine == 'singularity') ?
+- "-B ${annotation_dir}:/opt/CADD-scripts-1.6.post1/data/annotations" :
+- "-v ${annotation_dir}:/opt/CADD-scripts-1.6.post1/data/annotations"
++ "-B ${annotation_dir}:/opt/CADD-scripts-1.6.post1/data/annotations -B ${prescored_dir}:/opt/CADD-scripts-1.6.post1/data/prescored" :
++ "-v ${annotation_dir}:/opt/CADD-scripts-1.6.post1/data/annotations -v ${prescored_dir}:/opt/CADD-scripts-1.6.post1/data/prescored"
+ }
+
+ input:
+ tuple val(meta), path(vcf)
+ path(annotation_dir)
++ path(prescored_dir)
+
+ output:
+ tuple val(meta), path("*.tsv.gz"), emit: tsv
+
+************************************************************
diff --git a/modules/nf-core/cadd/environment.yml b/modules/nf-core/cadd/environment.yml
new file mode 100644
index 00000000..26f170e9
--- /dev/null
+++ b/modules/nf-core/cadd/environment.yml
@@ -0,0 +1,9 @@
+name: cadd
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::cadd-scripts=1.6.post1
+ - conda-forge::conda=4.14.0
+ - conda-forge::mamba=1.4.0
diff --git a/modules/nf-core/cadd/main.nf b/modules/nf-core/cadd/main.nf
new file mode 100644
index 00000000..52490c64
--- /dev/null
+++ b/modules/nf-core/cadd/main.nf
@@ -0,0 +1,54 @@
+process CADD {
+ tag "$meta.id"
+ label 'process_medium'
+
+ conda "${moduleDir}/environment.yml"
+ container 'docker.io/biocontainers/cadd-scripts-with-envs:1.6.post1_cv1'
+
+ containerOptions {
+ (workflow.containerEngine == 'singularity') ?
+ "-B ${annotation_dir}:/opt/CADD-scripts-1.6.post1/data/annotations -B ${prescored_dir}:/opt/CADD-scripts-1.6.post1/data/prescored" :
+ "-v ${annotation_dir}:/opt/CADD-scripts-1.6.post1/data/annotations -v ${prescored_dir}:/opt/CADD-scripts-1.6.post1/data/prescored"
+ }
+
+ input:
+ tuple val(meta), path(vcf)
+ path(annotation_dir)
+ path(prescored_dir)
+
+ output:
+ tuple val(meta), path("*.tsv.gz"), emit: tsv
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def VERSION = "1.6.post1" // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
+ """
+ cadd.sh \\
+ -o ${prefix}.tsv.gz \\
+ $args \\
+ $vcf
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ cadd: $VERSION
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def VERSION = "1.6.post1" // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
+ """
+ touch ${prefix}.tsv.gz
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ cadd: $VERSION
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/cadd/meta.yml b/modules/nf-core/cadd/meta.yml
new file mode 100644
index 00000000..230ed9c0
--- /dev/null
+++ b/modules/nf-core/cadd/meta.yml
@@ -0,0 +1,49 @@
+name: "cadd"
+description: CADD is a tool for scoring the deleteriousness of single nucleotide variants as well as insertion/deletions variants in the human genome.
+keywords:
+ - cadd
+ - annotate
+ - variants
+tools:
+ - "cadd":
+ description: "CADD scripts release for offline scoring"
+ homepage: "https://cadd.gs.washington.edu/"
+ documentation: "https://github.com/kircherlab/CADD-scripts/blob/master/README.md"
+ tool_dev_url: "https://github.com/kircherlab/CADD-scripts/"
+ doi: "10.1093/nar/gky1016"
+ licence:
+ - Restricted. Free for non-commercial users.
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - vcf:
+ type: file
+ description: Input file for annotation in vcf or vcf.gz format
+ pattern: "*.{vcf,vcf.gz}"
+ - annotation_dir:
+ type: file
+ description: |
+ Path to folder containing the vcf files with precomputed CADD scores.
+ This folder contains the uncompressed files that would otherwise be in data/annotation folder as described in https://github.com/kircherlab/CADD-scripts/#manual-installation.
+ pattern: "*.{vcf,vcf.gz}"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - tsv:
+ type: file
+ description: Annotated tsv file
+ pattern: "*.{tsv,tsv.gz}"
+authors:
+ - "@ramprasadn"
+maintainers:
+ - "@ramprasadn"
diff --git a/modules/nf-core/cat/fastq/main.nf b/modules/nf-core/cat/fastq/main.nf
index f132b2ad..b68e5f91 100644
--- a/modules/nf-core/cat/fastq/main.nf
+++ b/modules/nf-core/cat/fastq/main.nf
@@ -53,9 +53,9 @@ process CAT_FASTQ {
def prefix = task.ext.prefix ?: "${meta.id}"
def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()]
if (meta.single_end) {
- if (readList.size > 1) {
+ if (readList.size >= 1) {
"""
- touch ${prefix}.merged.fastq.gz
+ echo '' | gzip > ${prefix}.merged.fastq.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
@@ -64,10 +64,10 @@ process CAT_FASTQ {
"""
}
} else {
- if (readList.size > 2) {
+ if (readList.size >= 2) {
"""
- touch ${prefix}_1.merged.fastq.gz
- touch ${prefix}_2.merged.fastq.gz
+ echo '' | gzip > ${prefix}_1.merged.fastq.gz
+ echo '' | gzip > ${prefix}_2.merged.fastq.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test b/modules/nf-core/cat/fastq/tests/main.nf.test
index a71dcb8d..f88a78b6 100644
--- a/modules/nf-core/cat/fastq/tests/main.nf.test
+++ b/modules/nf-core/cat/fastq/tests/main.nf.test
@@ -13,9 +13,6 @@ nextflow_process {
test("test_cat_fastq_single_end") {
when {
- params {
- outdir = "$outputDir"
- }
process {
"""
input[0] = Channel.of([
@@ -38,9 +35,6 @@ nextflow_process {
test("test_cat_fastq_paired_end") {
when {
- params {
- outdir = "$outputDir"
- }
process {
"""
input[0] = Channel.of([
@@ -65,9 +59,6 @@ nextflow_process {
test("test_cat_fastq_single_end_same_name") {
when {
- params {
- outdir = "$outputDir"
- }
process {
"""
input[0] = Channel.of([
@@ -90,9 +81,6 @@ nextflow_process {
test("test_cat_fastq_paired_end_same_name") {
when {
- params {
- outdir = "$outputDir"
- }
process {
"""
input[0] = Channel.of([
@@ -117,9 +105,129 @@ nextflow_process {
test("test_cat_fastq_single_end_single_file") {
when {
- params {
- outdir = "$outputDir"
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:true ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)]
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("test_cat_fastq_single_end - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:true ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)]
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("test_cat_fastq_paired_end - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true)]
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("test_cat_fastq_single_end_same_name - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:true ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)]
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("test_cat_fastq_paired_end_same_name - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)]
+ ])
+ """
}
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("test_cat_fastq_single_end_single_file - stub") {
+
+ options "-stub"
+
+ when {
process {
"""
input[0] = Channel.of([
diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test.snap b/modules/nf-core/cat/fastq/tests/main.nf.test.snap
index 43dfe28f..aec119a9 100644
--- a/modules/nf-core/cat/fastq/tests/main.nf.test.snap
+++ b/modules/nf-core/cat/fastq/tests/main.nf.test.snap
@@ -28,6 +28,10 @@
]
}
],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
"timestamp": "2024-01-17T17:30:39.816981"
},
"test_cat_fastq_single_end_same_name": {
@@ -59,6 +63,10 @@
]
}
],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
"timestamp": "2024-01-17T17:32:35.229332"
},
"test_cat_fastq_single_end_single_file": {
@@ -90,6 +98,10 @@
]
}
],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
"timestamp": "2024-01-17T17:34:00.058829"
},
"test_cat_fastq_paired_end_same_name": {
@@ -127,8 +139,123 @@
]
}
],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
"timestamp": "2024-01-17T17:33:33.031555"
},
+ "test_cat_fastq_single_end - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,d42d6e24d67004608495883e00bd501b"
+ ],
+ "reads": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,d42d6e24d67004608495883e00bd501b"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-05T12:07:28.244999"
+ },
+ "test_cat_fastq_paired_end_same_name - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test_1.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
+ "test_2.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,d42d6e24d67004608495883e00bd501b"
+ ],
+ "reads": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test_1.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
+ "test_2.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,d42d6e24d67004608495883e00bd501b"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-05T12:07:57.070911"
+ },
+ "test_cat_fastq_single_end_same_name - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,d42d6e24d67004608495883e00bd501b"
+ ],
+ "reads": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,d42d6e24d67004608495883e00bd501b"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-05T12:07:46.796254"
+ },
"test_cat_fastq_paired_end": {
"content": [
{
@@ -164,6 +291,86 @@
]
}
],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
"timestamp": "2024-01-17T17:32:02.270935"
+ },
+ "test_cat_fastq_paired_end - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test_1.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
+ "test_2.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,d42d6e24d67004608495883e00bd501b"
+ ],
+ "reads": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test_1.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
+ "test_2.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,d42d6e24d67004608495883e00bd501b"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-05T12:07:37.807553"
+ },
+ "test_cat_fastq_single_end_single_file - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,d42d6e24d67004608495883e00bd501b"
+ ],
+ "reads": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,d42d6e24d67004608495883e00bd501b"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-05T12:14:51.861264"
}
}
\ No newline at end of file
diff --git a/modules/nf-core/deepvariant/README.md b/modules/nf-core/deepvariant/README.md
index ca112a7d..9d1ceb34 100644
--- a/modules/nf-core/deepvariant/README.md
+++ b/modules/nf-core/deepvariant/README.md
@@ -1,6 +1,8 @@
# Conda is not supported at the moment
-The [bioconda](https://bioconda.github.io/recipes/deepvariant/README.html) recipe is not fully working as expected
+The [bioconda](https://bioconda.github.io/recipes/deepvariant/README.html) recipe is not fully working as expected.
+
+See https://github.com/bioconda/bioconda-recipes/issues/30310 and https://github.com/nf-core/modules/issues/1754 for more information.
Hence, we are using the docker container provided by the authors of the tool:
diff --git a/modules/nf-core/deepvariant/main.nf b/modules/nf-core/deepvariant/main.nf
index 507b6c11..8d3d0911 100644
--- a/modules/nf-core/deepvariant/main.nf
+++ b/modules/nf-core/deepvariant/main.nf
@@ -2,14 +2,17 @@ process DEEPVARIANT {
tag "$meta.id"
label 'process_high'
- //Conda is not supported at the moment
- container "nf-core/deepvariant:1.5.0"
+ // FIXME Conda is not supported at the moment
+ // BUG https://github.com/nf-core/modules/issues/1754
+ // BUG https://github.com/bioconda/bioconda-recipes/issues/30310
+ container "nf-core/deepvariant:1.6.1"
input:
tuple val(meta), path(input), path(index), path(intervals)
tuple val(meta2), path(fasta)
tuple val(meta3), path(fai)
tuple val(meta4), path(gzi)
+ tuple val(meta5), path(par_bed)
output:
tuple val(meta), path("${prefix}.vcf.gz") , emit: vcf
@@ -29,6 +32,10 @@ process DEEPVARIANT {
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
def regions = intervals ? "--regions=${intervals}" : ""
+ def par_regions = par_bed ? "--par_regions_bed=${par_bed}" : ""
+ // WARN https://github.com/nf-core/modules/pull/5801#issuecomment-2194293755
+ // FIXME Revert this on next version bump
+ def VERSION = '1.6.1'
"""
/opt/deepvariant/bin/run_deepvariant \\
@@ -38,12 +45,13 @@ process DEEPVARIANT {
--output_gvcf=${prefix}.g.vcf.gz \\
${args} \\
${regions} \\
- --intermediate_results_dir=. \\
+ ${par_regions} \\
+ --intermediate_results_dir=tmp \\
--num_shards=${task.cpus}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
- deepvariant: \$(echo \$(/opt/deepvariant/bin/run_deepvariant --version) | sed 's/^.*version //; s/ .*\$//' )
+ deepvariant: $VERSION
END_VERSIONS
"""
@@ -53,6 +61,9 @@ process DEEPVARIANT {
error "DEEPVARIANT module does not support Conda. Please use Docker / Singularity / Podman instead."
}
prefix = task.ext.prefix ?: "${meta.id}"
+ // WARN https://github.com/nf-core/modules/pull/5801#issuecomment-2194293755
+ // FIXME Revert this on next version bump
+ def VERSION = '1.6.1'
"""
touch ${prefix}.vcf.gz
touch ${prefix}.vcf.gz.tbi
@@ -61,7 +72,7 @@ process DEEPVARIANT {
cat <<-END_VERSIONS > versions.yml
"${task.process}":
- deepvariant: \$(echo \$(/opt/deepvariant/bin/run_deepvariant --version) | sed 's/^.*version //; s/ .*\$//' )
+ deepvariant: $VERSION
END_VERSIONS
"""
}
diff --git a/modules/nf-core/deepvariant/meta.yml b/modules/nf-core/deepvariant/meta.yml
index a50dc57d..2327dd5f 100644
--- a/modules/nf-core/deepvariant/meta.yml
+++ b/modules/nf-core/deepvariant/meta.yml
@@ -57,6 +57,15 @@ input:
type: file
description: GZI index of reference fasta file
pattern: "*.gzi"
+ - meta5:
+ type: map
+ description: |
+ Groovy Map containing reference information
+ e.g. [ id:'genome' ]
+ - par_bed:
+ type: file
+ description: BED file containing PAR regions
+ pattern: "*.bed"
output:
- meta:
type: map
diff --git a/modules/nf-core/deepvariant/tests/main.nf.test b/modules/nf-core/deepvariant/tests/main.nf.test
index 91612c1e..17765233 100644
--- a/modules/nf-core/deepvariant/tests/main.nf.test
+++ b/modules/nf-core/deepvariant/tests/main.nf.test
@@ -31,6 +31,9 @@ nextflow_process {
input[3] = [
[],[]
]
+ input[4] = [
+ [],[]
+ ]
"""
}
}
@@ -66,6 +69,48 @@ nextflow_process {
input[3] = [
[],[]
]
+ input[4] = [
+ [],[]
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("homo_sapiens - [cram, crai, genome_bed] - fasta - fai - par_bed") {
+ config "./nextflow-non-autosomal-calling.config"
+ tag "test"
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
+ ]
+ input[1] = [
+ [ id:'genome'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = [
+ [ id:'genome'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
+ ]
+ input[3] = [
+ [],[]
+ ]
+ input[4] = [
+ [ id:'genome'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.blacklist_intervals.bed', checkIfExists: true)
+ ]
"""
}
}
@@ -102,6 +147,9 @@ nextflow_process {
[ id:'genome'],
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.gz.gzi', checkIfExists: true)
]
+ input[4] = [
+ [],[]
+ ]
"""
}
}
diff --git a/modules/nf-core/deepvariant/tests/main.nf.test.snap b/modules/nf-core/deepvariant/tests/main.nf.test.snap
index 6ad76ae4..04f87774 100644
--- a/modules/nf-core/deepvariant/tests/main.nf.test.snap
+++ b/modules/nf-core/deepvariant/tests/main.nf.test.snap
@@ -1,269 +1,358 @@
{
- "homo_sapiens - [bam, bai] - fasta_gz - fasta_gz_fai": {
- "content": [
- {
- "0": [
- [
+ "homo_sapiens - [bam, bai] - fasta_gz - fasta_gz_fai": {
+ "content": [
{
- "id": "test",
- "single_end": false
- },
- "test_out.vcf.gz:md5,7cba1516f7cf0888765d5ee8caf275f4"
- ]
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.vcf.gz:md5,8b8ab4a675f01e437aa72e1438a717d0"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.vcf.gz.tbi:md5,0000833138104e87b05eaa906821eb21"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.g.vcf.gz:md5,0a629e1745926cfcedf4b169046a921a"
+ ]
+ ],
+ "3": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.g.vcf.gz.tbi:md5,49503913c28ec70a6f4aa52f6b357b4d"
+ ]
+ ],
+ "4": [
+ "versions.yml:md5,efbdcb1ad96e0209b31bcafedd0017a8"
+ ],
+ "gvcf": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.g.vcf.gz:md5,0a629e1745926cfcedf4b169046a921a"
+ ]
+ ],
+ "gvcf_tbi": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.g.vcf.gz.tbi:md5,49503913c28ec70a6f4aa52f6b357b4d"
+ ]
+ ],
+ "vcf": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.vcf.gz:md5,8b8ab4a675f01e437aa72e1438a717d0"
+ ]
+ ],
+ "vcf_tbi": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.vcf.gz.tbi:md5,0000833138104e87b05eaa906821eb21"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,efbdcb1ad96e0209b31bcafedd0017a8"
+ ]
+ }
],
- "1": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.vcf.gz.tbi:md5,02a78562bc83520a51010a01fb06f217"
- ]
- ],
- "2": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.g.vcf.gz:md5,8d6ac08997bfd848a0a4d9d295e76952"
- ]
- ],
- "3": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.g.vcf.gz.tbi:md5,37e2d8f4cca0a21113cede608f54885a"
- ]
- ],
- "4": [
- "versions.yml:md5,4678f778b58276933b165fe3e84afc6a"
- ],
- "gvcf": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.g.vcf.gz:md5,8d6ac08997bfd848a0a4d9d295e76952"
- ]
- ],
- "gvcf_tbi": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.g.vcf.gz.tbi:md5,37e2d8f4cca0a21113cede608f54885a"
- ]
- ],
- "vcf": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.vcf.gz:md5,7cba1516f7cf0888765d5ee8caf275f4"
- ]
- ],
- "vcf_tbi": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.vcf.gz.tbi:md5,02a78562bc83520a51010a01fb06f217"
- ]
- ],
- "versions": [
- "versions.yml:md5,4678f778b58276933b165fe3e84afc6a"
- ]
- }
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-01T12:09:40.987117305"
},
- "timestamp": "2024-03-20T13:54:42.757335334"
- },
- "homo_sapiens - [bam, bai] - fasta - fai": {
- "content": [
- {
- "0": [
- [
+ "homo_sapiens - [bam, bai] - fasta - fai": {
+ "content": [
{
- "id": "test",
- "single_end": false
- },
- "test_out.vcf.gz:md5,7cba1516f7cf0888765d5ee8caf275f4"
- ]
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.vcf.gz:md5,8b8ab4a675f01e437aa72e1438a717d0"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.vcf.gz.tbi:md5,0000833138104e87b05eaa906821eb21"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.g.vcf.gz:md5,0a629e1745926cfcedf4b169046a921a"
+ ]
+ ],
+ "3": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.g.vcf.gz.tbi:md5,49503913c28ec70a6f4aa52f6b357b4d"
+ ]
+ ],
+ "4": [
+ "versions.yml:md5,efbdcb1ad96e0209b31bcafedd0017a8"
+ ],
+ "gvcf": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.g.vcf.gz:md5,0a629e1745926cfcedf4b169046a921a"
+ ]
+ ],
+ "gvcf_tbi": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.g.vcf.gz.tbi:md5,49503913c28ec70a6f4aa52f6b357b4d"
+ ]
+ ],
+ "vcf": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.vcf.gz:md5,8b8ab4a675f01e437aa72e1438a717d0"
+ ]
+ ],
+ "vcf_tbi": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.vcf.gz.tbi:md5,0000833138104e87b05eaa906821eb21"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,efbdcb1ad96e0209b31bcafedd0017a8"
+ ]
+ }
],
- "1": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.vcf.gz.tbi:md5,02a78562bc83520a51010a01fb06f217"
- ]
- ],
- "2": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.g.vcf.gz:md5,8d6ac08997bfd848a0a4d9d295e76952"
- ]
- ],
- "3": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.g.vcf.gz.tbi:md5,37e2d8f4cca0a21113cede608f54885a"
- ]
- ],
- "4": [
- "versions.yml:md5,4678f778b58276933b165fe3e84afc6a"
- ],
- "gvcf": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.g.vcf.gz:md5,8d6ac08997bfd848a0a4d9d295e76952"
- ]
- ],
- "gvcf_tbi": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.g.vcf.gz.tbi:md5,37e2d8f4cca0a21113cede608f54885a"
- ]
- ],
- "vcf": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.vcf.gz:md5,7cba1516f7cf0888765d5ee8caf275f4"
- ]
- ],
- "vcf_tbi": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.vcf.gz.tbi:md5,02a78562bc83520a51010a01fb06f217"
- ]
- ],
- "versions": [
- "versions.yml:md5,4678f778b58276933b165fe3e84afc6a"
- ]
- }
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-01T12:08:47.058887374"
},
- "timestamp": "2024-03-20T13:54:18.409489045"
- },
- "homo_sapiens - [cram, crai, genome_bed] - fasta - fai": {
- "content": [
- {
- "0": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.vcf.gz:md5,7cba1516f7cf0888765d5ee8caf275f4"
- ]
- ],
- "1": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.vcf.gz.tbi:md5,02a78562bc83520a51010a01fb06f217"
- ]
- ],
- "2": [
- [
+ "homo_sapiens - [cram, crai, genome_bed] - fasta - fai": {
+ "content": [
{
- "id": "test",
- "single_end": false
- },
- "test_out.g.vcf.gz:md5,8d6ac08997bfd848a0a4d9d295e76952"
- ]
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.vcf.gz:md5,8b8ab4a675f01e437aa72e1438a717d0"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.vcf.gz.tbi:md5,0000833138104e87b05eaa906821eb21"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.g.vcf.gz:md5,0a629e1745926cfcedf4b169046a921a"
+ ]
+ ],
+ "3": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.g.vcf.gz.tbi:md5,49503913c28ec70a6f4aa52f6b357b4d"
+ ]
+ ],
+ "4": [
+ "versions.yml:md5,efbdcb1ad96e0209b31bcafedd0017a8"
+ ],
+ "gvcf": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.g.vcf.gz:md5,0a629e1745926cfcedf4b169046a921a"
+ ]
+ ],
+ "gvcf_tbi": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.g.vcf.gz.tbi:md5,49503913c28ec70a6f4aa52f6b357b4d"
+ ]
+ ],
+ "vcf": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.vcf.gz:md5,8b8ab4a675f01e437aa72e1438a717d0"
+ ]
+ ],
+ "vcf_tbi": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.vcf.gz.tbi:md5,0000833138104e87b05eaa906821eb21"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,efbdcb1ad96e0209b31bcafedd0017a8"
+ ]
+ }
],
- "3": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.g.vcf.gz.tbi:md5,37e2d8f4cca0a21113cede608f54885a"
- ]
- ],
- "4": [
- "versions.yml:md5,4678f778b58276933b165fe3e84afc6a"
- ],
- "gvcf": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.g.vcf.gz:md5,8d6ac08997bfd848a0a4d9d295e76952"
- ]
- ],
- "gvcf_tbi": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.g.vcf.gz.tbi:md5,37e2d8f4cca0a21113cede608f54885a"
- ]
- ],
- "vcf": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test_out.vcf.gz:md5,7cba1516f7cf0888765d5ee8caf275f4"
- ]
- ],
- "vcf_tbi": [
- [
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-01T12:09:13.952808655"
+ },
+ "homo_sapiens - [cram, crai, genome_bed] - fasta - fai - par_bed": {
+ "content": [
{
- "id": "test",
- "single_end": false
- },
- "test_out.vcf.gz.tbi:md5,02a78562bc83520a51010a01fb06f217"
- ]
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.vcf.gz:md5,d2e26d65dbbcea9b087ed191b5c9841c"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.vcf.gz.tbi:md5,0801296d0356415bbf1ef8deb4ec84c3"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.g.vcf.gz:md5,4fcaa9a8b55730d191382160c2b5bb0a"
+ ]
+ ],
+ "3": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.g.vcf.gz.tbi:md5,f468e846904733b3231ecf00ef7cd4a2"
+ ]
+ ],
+ "4": [
+ "versions.yml:md5,efbdcb1ad96e0209b31bcafedd0017a8"
+ ],
+ "gvcf": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.g.vcf.gz:md5,4fcaa9a8b55730d191382160c2b5bb0a"
+ ]
+ ],
+ "gvcf_tbi": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.g.vcf.gz.tbi:md5,f468e846904733b3231ecf00ef7cd4a2"
+ ]
+ ],
+ "vcf": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.vcf.gz:md5,d2e26d65dbbcea9b087ed191b5c9841c"
+ ]
+ ],
+ "vcf_tbi": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_out.vcf.gz.tbi:md5,0801296d0356415bbf1ef8deb4ec84c3"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,efbdcb1ad96e0209b31bcafedd0017a8"
+ ]
+ }
],
- "versions": [
- "versions.yml:md5,4678f778b58276933b165fe3e84afc6a"
- ]
- }
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-03-20T13:54:30.523871801"
- }
+ "meta": {
+ "nf-test": "0.9.0",
+ "nextflow": "24.04.3"
+ },
+ "timestamp": "2024-07-23T14:29:24.939680679"
+ }
}
diff --git a/modules/nf-core/deepvariant/tests/nextflow-non-autosomal-calling.config b/modules/nf-core/deepvariant/tests/nextflow-non-autosomal-calling.config
new file mode 100644
index 00000000..4be8986b
--- /dev/null
+++ b/modules/nf-core/deepvariant/tests/nextflow-non-autosomal-calling.config
@@ -0,0 +1,8 @@
+process {
+
+ withName: DEEPVARIANT {
+ ext.args = '--model_type=WGS --haploid_contigs chr22'
+ ext.prefix = { "${meta.id}_out" }
+ }
+
+}
diff --git a/modules/nf-core/ensemblvep/vep/ensemblvep-vep.diff b/modules/nf-core/ensemblvep/vep/ensemblvep-vep.diff
new file mode 100644
index 00000000..8626a663
--- /dev/null
+++ b/modules/nf-core/ensemblvep/vep/ensemblvep-vep.diff
@@ -0,0 +1,25 @@
+Changes in module 'nf-core/ensemblvep/vep'
+--- modules/nf-core/ensemblvep/vep/main.nf
++++ modules/nf-core/ensemblvep/vep/main.nf
+@@ -4,8 +4,8 @@
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+- 'https://depot.galaxyproject.org/singularity/ensembl-vep:111.0--pl5321h2a3209d_0' :
+- 'biocontainers/ensembl-vep:111.0--pl5321h2a3209d_0' }"
++ 'https://depot.galaxyproject.org/singularity/ensembl-vep:110.0--pl5321h2a3209d_0' :
++ 'biocontainers/ensembl-vep:110.0--pl5321h2a3209d_0' }"
+
+ input:
+ tuple val(meta), path(vcf), path(custom_extra_files)
+
+--- modules/nf-core/ensemblvep/vep/environment.yml
++++ modules/nf-core/ensemblvep/vep/environment.yml
+@@ -4,4 +4,4 @@
+ - bioconda
+ - defaults
+ dependencies:
+- - bioconda::ensembl-vep=111.0
++ - bioconda::ensembl-vep=110.0
+
+************************************************************
diff --git a/modules/nf-core/ensemblvep/vep/environment.yml b/modules/nf-core/ensemblvep/vep/environment.yml
new file mode 100644
index 00000000..7a127746
--- /dev/null
+++ b/modules/nf-core/ensemblvep/vep/environment.yml
@@ -0,0 +1,7 @@
+name: ensemblvep_vep
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::ensembl-vep=110.0
diff --git a/modules/nf-core/ensemblvep/vep/main.nf b/modules/nf-core/ensemblvep/vep/main.nf
index da0e3646..c3e5d53e 100644
--- a/modules/nf-core/ensemblvep/vep/main.nf
+++ b/modules/nf-core/ensemblvep/vep/main.nf
@@ -2,7 +2,7 @@ process ENSEMBLVEP_VEP {
tag "$meta.id"
label 'process_medium'
- conda "bioconda::ensembl-vep=110.0"
+ conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/ensembl-vep:110.0--pl5321h2a3209d_0' :
'biocontainers/ensembl-vep:110.0--pl5321h2a3209d_0' }"
@@ -20,7 +20,7 @@ process ENSEMBLVEP_VEP {
tuple val(meta), path("*.vcf.gz") , optional:true, emit: vcf
tuple val(meta), path("*.tab.gz") , optional:true, emit: tab
tuple val(meta), path("*.json.gz") , optional:true, emit: json
- path "*.summary.html" , emit: report
+ path "*.html" , optional:true, emit: report
path "versions.yml" , emit: versions
when:
@@ -45,8 +45,7 @@ process ENSEMBLVEP_VEP {
--cache \\
--cache_version $cache_version \\
--dir_cache $dir_cache \\
- --fork $task.cpus \\
- --stats_file ${prefix}.summary.html \\
+ --fork $task.cpus
cat <<-END_VERSIONS > versions.yml
@@ -58,10 +57,10 @@ process ENSEMBLVEP_VEP {
stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
- touch ${prefix}.vcf.gz
- touch ${prefix}.tab.gz
- touch ${prefix}.json.gz
- touch ${prefix}.summary.html
+ echo "" | gzip > ${prefix}.vcf.gz
+ echo "" | gzip > ${prefix}.tab.gz
+ echo "" | gzip > ${prefix}.json.gz
+ touch ${prefix}_summary.html
cat <<-END_VERSIONS > versions.yml
"${task.process}":
diff --git a/modules/nf-core/ensemblvep/vep/meta.yml b/modules/nf-core/ensemblvep/vep/meta.yml
index 7783847d..d8ff8d14 100644
--- a/modules/nf-core/ensemblvep/vep/meta.yml
+++ b/modules/nf-core/ensemblvep/vep/meta.yml
@@ -1,4 +1,4 @@
-name: ENSEMBLVEP_VEP
+name: ensemblvep_vep
description: Ensembl Variant Effect Predictor (VEP). The output-file-format is controlled through `task.ext.args`.
keywords:
- annotation
@@ -86,3 +86,7 @@ authors:
- "@maxulysse"
- "@matthdsm"
- "@nvnieuwk"
+maintainers:
+ - "@maxulysse"
+ - "@matthdsm"
+ - "@nvnieuwk"
diff --git a/modules/nf-core/ensemblvep/vep/tests/main.nf.test b/modules/nf-core/ensemblvep/vep/tests/main.nf.test
new file mode 100644
index 00000000..4aff84a3
--- /dev/null
+++ b/modules/nf-core/ensemblvep/vep/tests/main.nf.test
@@ -0,0 +1,114 @@
+nextflow_process {
+
+ name "Test Process ENSEMBLVEP_VEP"
+ script "../main.nf"
+ process "ENSEMBLVEP_VEP"
+ config "./nextflow.config"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "ensemblvep"
+ tag "ensemblvep/vep"
+ tag "ensemblvep/download"
+
+ test("test_ensemblvep_vep_fasta_vcf") {
+ config "./vcf.config"
+
+ setup {
+ run("ENSEMBLVEP_DOWNLOAD") {
+ script "../../download/main.nf"
+
+ process {
+ """
+ input[0] = Channel.of([
+ [id:"111_WBcel235"],
+ params.vep_genome,
+ params.vep_species,
+ params.vep_cache_version
+ ])
+ """
+ }
+ }
+ }
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true),
+ []
+ ])
+ input[1] = params.vep_genome
+ input[2] = params.vep_species
+ input[3] = params.vep_cache_version
+ input[4] = ENSEMBLVEP_DOWNLOAD.out.cache.map{ meta, cache -> [cache] }
+ input[5] = Channel.value([
+ [id:"fasta"],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ])
+ input[6] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.versions).match() },
+ { assert path(process.out.vcf.get(0).get(1)).linesGzip.contains("##fileformat=VCFv4.2") }
+ )
+ }
+
+ }
+
+ test("test_ensemblvep_vep_fasta_tab_gz") {
+ config "./tab.gz.config"
+
+ setup {
+ run("ENSEMBLVEP_DOWNLOAD") {
+ script "../../download/main.nf"
+
+ process {
+ """
+ input[0] = Channel.of([
+ [id:"111_WBcel235"],
+ params.vep_genome,
+ params.vep_species,
+ params.vep_cache_version
+ ])
+ """
+ }
+ }
+ }
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true),
+ []
+ ])
+ input[1] = params.vep_genome
+ input[2] = params.vep_species
+ input[3] = params.vep_cache_version
+ input[4] = ENSEMBLVEP_DOWNLOAD.out.cache.map{ meta, cache -> [cache] }
+ input[5] = Channel.value([
+ [id:"fasta"],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ])
+ input[6] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.versions).match() },
+ { assert path(process.out.tab.get(0).get(1)).linesGzip.contains("## ENSEMBL VARIANT EFFECT PREDICTOR v111.0") }
+ )
+ }
+ }
+}
diff --git a/modules/nf-core/ensemblvep/vep/tests/main.nf.test.snap b/modules/nf-core/ensemblvep/vep/tests/main.nf.test.snap
new file mode 100644
index 00000000..f937b299
--- /dev/null
+++ b/modules/nf-core/ensemblvep/vep/tests/main.nf.test.snap
@@ -0,0 +1,26 @@
+{
+ "test_ensemblvep_vep_fasta_tab_gz": {
+ "content": [
+ [
+ "versions.yml:md5,bd2ba1b4741a7d0a224160b50859f4ba"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.03.0"
+ },
+ "timestamp": "2024-04-15T17:35:20.694114"
+ },
+ "test_ensemblvep_vep_fasta_vcf": {
+ "content": [
+ [
+ "versions.yml:md5,bd2ba1b4741a7d0a224160b50859f4ba"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.03.0"
+ },
+ "timestamp": "2024-04-15T17:34:41.093843"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/ensemblvep/vep/tests/nextflow.config b/modules/nf-core/ensemblvep/vep/tests/nextflow.config
new file mode 100644
index 00000000..882bce41
--- /dev/null
+++ b/modules/nf-core/ensemblvep/vep/tests/nextflow.config
@@ -0,0 +1,12 @@
+params {
+ vep_cache_version = "111"
+ vep_genome = "WBcel235"
+ vep_species = "caenorhabditis_elegans"
+}
+
+process {
+ withName: ENSEMBLVEP_DOWNLOAD {
+ ext.args = '--AUTO c --CONVERT --NO_BIOPERL --NO_HTSLIB --NO_TEST --NO_UPDATE'
+ ext.prefix = { "${params.vep_cache_version}_${params.vep_genome}" }
+ }
+}
diff --git a/modules/nf-core/ensemblvep/vep/tests/tab.gz.config b/modules/nf-core/ensemblvep/vep/tests/tab.gz.config
new file mode 100644
index 00000000..40eb03e5
--- /dev/null
+++ b/modules/nf-core/ensemblvep/vep/tests/tab.gz.config
@@ -0,0 +1,5 @@
+process {
+ withName: ENSEMBLVEP_VEP {
+ ext.args = '--tab --compress_output bgzip'
+ }
+}
diff --git a/modules/nf-core/ensemblvep/vep/tests/tags.yml b/modules/nf-core/ensemblvep/vep/tests/tags.yml
new file mode 100644
index 00000000..4aa4aa45
--- /dev/null
+++ b/modules/nf-core/ensemblvep/vep/tests/tags.yml
@@ -0,0 +1,2 @@
+ensemblvep/vep:
+ - "modules/nf-core/ensemblvep/vep/**"
diff --git a/modules/nf-core/ensemblvep/vep/tests/vcf.config b/modules/nf-core/ensemblvep/vep/tests/vcf.config
new file mode 100644
index 00000000..ad8955a3
--- /dev/null
+++ b/modules/nf-core/ensemblvep/vep/tests/vcf.config
@@ -0,0 +1,5 @@
+process {
+ withName: ENSEMBLVEP_VEP {
+ ext.args = '--vcf'
+ }
+}
diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf
deleted file mode 100644
index 4fc19b74..00000000
--- a/modules/nf-core/fastp/main.nf
+++ /dev/null
@@ -1,120 +0,0 @@
-process FASTP {
- tag "$meta.id"
- label 'process_medium'
-
- conda "${moduleDir}/environment.yml"
- container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/fastp:0.23.4--h5f740d0_0' :
- 'biocontainers/fastp:0.23.4--h5f740d0_0' }"
-
- input:
- tuple val(meta), path(reads)
- path adapter_fasta
- val save_trimmed_fail
- val save_merged
-
- output:
- tuple val(meta), path('*.fastp.fastq.gz') , optional:true, emit: reads
- tuple val(meta), path('*.json') , emit: json
- tuple val(meta), path('*.html') , emit: html
- tuple val(meta), path('*.log') , emit: log
- path "versions.yml" , emit: versions
- tuple val(meta), path('*.fail.fastq.gz') , optional:true, emit: reads_fail
- tuple val(meta), path('*.merged.fastq.gz'), optional:true, emit: reads_merged
-
- when:
- task.ext.when == null || task.ext.when
-
- script:
- def args = task.ext.args ?: ''
- def prefix = task.ext.prefix ?: "${meta.id}"
- def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : ""
- def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--failed_out ${prefix}.paired.fail.fastq.gz --unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : ''
- // Added soft-links to original fastqs for consistent naming in MultiQC
- // Use single ended for interleaved. Add --interleaved_in in config.
- if ( task.ext.args?.contains('--interleaved_in') ) {
- """
- [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz
-
- fastp \\
- --stdout \\
- --in1 ${prefix}.fastq.gz \\
- --thread $task.cpus \\
- --json ${prefix}.fastp.json \\
- --html ${prefix}.fastp.html \\
- $adapter_list \\
- $fail_fastq \\
- $args \\
- 2> >(tee ${prefix}.fastp.log >&2) \\
- | gzip -c > ${prefix}.fastp.fastq.gz
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
- END_VERSIONS
- """
- } else if (meta.single_end) {
- """
- [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz
-
- fastp \\
- --in1 ${prefix}.fastq.gz \\
- --out1 ${prefix}.fastp.fastq.gz \\
- --thread $task.cpus \\
- --json ${prefix}.fastp.json \\
- --html ${prefix}.fastp.html \\
- $adapter_list \\
- $fail_fastq \\
- $args \\
- 2> >(tee ${prefix}.fastp.log >&2)
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
- END_VERSIONS
- """
- } else {
- def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : ''
- """
- [ ! -f ${prefix}_1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_1.fastq.gz
- [ ! -f ${prefix}_2.fastq.gz ] && ln -sf ${reads[1]} ${prefix}_2.fastq.gz
- fastp \\
- --in1 ${prefix}_1.fastq.gz \\
- --in2 ${prefix}_2.fastq.gz \\
- --out1 ${prefix}_1.fastp.fastq.gz \\
- --out2 ${prefix}_2.fastp.fastq.gz \\
- --json ${prefix}.fastp.json \\
- --html ${prefix}.fastp.html \\
- $adapter_list \\
- $fail_fastq \\
- $merge_fastq \\
- --thread $task.cpus \\
- --detect_adapter_for_pe \\
- $args \\
- 2> >(tee ${prefix}.fastp.log >&2)
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
- END_VERSIONS
- """
- }
-
- stub:
- def prefix = task.ext.prefix ?: "${meta.id}"
- def is_single_output = task.ext.args?.contains('--interleaved_in') || meta.single_end
- def touch_reads = is_single_output ? "${prefix}.fastp.fastq.gz" : "${prefix}_1.fastp.fastq.gz ${prefix}_2.fastp.fastq.gz"
- def touch_merged = (!is_single_output && save_merged) ? "touch ${prefix}.merged.fastq.gz" : ""
- """
- touch $touch_reads
- touch "${prefix}.fastp.json"
- touch "${prefix}.fastp.html"
- touch "${prefix}.fastp.log"
- $touch_merged
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
- END_VERSIONS
- """
-}
diff --git a/modules/nf-core/fastp/meta.yml b/modules/nf-core/fastp/meta.yml
deleted file mode 100644
index c22a16ab..00000000
--- a/modules/nf-core/fastp/meta.yml
+++ /dev/null
@@ -1,75 +0,0 @@
-name: fastp
-description: Perform adapter/quality trimming on sequencing reads
-keywords:
- - trimming
- - quality control
- - fastq
-tools:
- - fastp:
- description: |
- A tool designed to provide fast all-in-one preprocessing for FastQ files. This tool is developed in C++ with multithreading supported to afford high performance.
- documentation: https://github.com/OpenGene/fastp
- doi: 10.1093/bioinformatics/bty560
- licence: ["MIT"]
-input:
- - meta:
- type: map
- description: |
- Groovy Map containing sample information. Use 'single_end: true' to specify single ended or interleaved FASTQs. Use 'single_end: false' for paired-end reads.
- e.g. [ id:'test', single_end:false ]
- - reads:
- type: file
- description: |
- List of input FastQ files of size 1 and 2 for single-end and paired-end data,
- respectively. If you wish to run interleaved paired-end data, supply as single-end data
- but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module.
- - adapter_fasta:
- type: file
- description: File in FASTA format containing possible adapters to remove.
- pattern: "*.{fasta,fna,fas,fa}"
- - save_trimmed_fail:
- type: boolean
- description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz`
- - save_merged:
- type: boolean
- description: Specify true to save all merged reads to the a file ending in `*.merged.fastq.gz`
-output:
- - meta:
- type: map
- description: |
- Groovy Map containing sample information
- e.g. [ id:'test', single_end:false ]
- - reads:
- type: file
- description: The trimmed/modified/unmerged fastq reads
- pattern: "*fastp.fastq.gz"
- - json:
- type: file
- description: Results in JSON format
- pattern: "*.json"
- - html:
- type: file
- description: Results in HTML format
- pattern: "*.html"
- - log:
- type: file
- description: fastq log file
- pattern: "*.log"
- - versions:
- type: file
- description: File containing software versions
- pattern: "versions.yml"
- - reads_fail:
- type: file
- description: Reads the failed the preprocessing
- pattern: "*fail.fastq.gz"
- - reads_merged:
- type: file
- description: Reads that were successfully merged
- pattern: "*.{merged.fastq.gz}"
-authors:
- - "@drpatelh"
- - "@kevinmenden"
-maintainers:
- - "@drpatelh"
- - "@kevinmenden"
diff --git a/modules/nf-core/fastp/tests/main.nf.test b/modules/nf-core/fastp/tests/main.nf.test
deleted file mode 100644
index 6f1f4897..00000000
--- a/modules/nf-core/fastp/tests/main.nf.test
+++ /dev/null
@@ -1,725 +0,0 @@
-nextflow_process {
-
- name "Test Process FASTP"
- script "../main.nf"
- process "FASTP"
- tag "modules"
- tag "modules_nfcore"
- tag "fastp"
-
- test("test_fastp_single_end") {
-
- when {
- params {
- outdir = "$outputDir"
- }
- process {
- """
- adapter_fasta = []
- save_trimmed_fail = false
- save_merged = false
-
- input[0] = Channel.of([
- [ id:'test', single_end:true ],
- [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]
- ])
- input[1] = adapter_fasta
- input[2] = save_trimmed_fail
- input[3] = save_merged
- """
- }
- }
-
- then {
- def html_text = [ "Q20 bases:12.922000 K (92.984097%)",
- "single end (151 cycles)" ]
- def log_text = [ "Q20 bases: 12922(92.9841%)",
- "reads passed filter: 99" ]
- def read_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1",
- "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT",
- "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE
- { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) }
- }
- },
- { html_text.each { html_part ->
- { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
- }
- },
- { assert snapshot(process.out.json).match("test_fastp_single_end_json") },
- { log_text.each { log_part ->
- { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
- }
- },
- {
- assert snapshot(
- (
- [process.out.reads[0][0].toString()] + // meta
- process.out.reads.collect { file(it[1]).getName() } +
- process.out.json.collect { file(it[1]).getName() } +
- process.out.html.collect { file(it[1]).getName() } +
- process.out.log.collect { file(it[1]).getName() } +
- process.out.reads_fail.collect { file(it[1]).getName() } +
- process.out.reads_merged.collect { file(it[1]).getName() }
- ).sort()
- ).match("test_fastp_single_end-_match")
- },
- { assert snapshot(process.out.versions).match("versions_single_end") }
- )
- }
- }
-
- test("test_fastp_single_end-stub") {
-
- options '-stub'
-
- when {
- params {
- outdir = "$outputDir"
- }
- process {
- """
- adapter_fasta = []
- save_trimmed_fail = false
- save_merged = false
-
- input[0] = Channel.of([
- [ id:'test', single_end:true ],
- [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]
- ])
- input[1] = adapter_fasta
- input[2] = save_trimmed_fail
- input[3] = save_merged
- """
- }
- }
-
- then {
-
- assertAll(
- { assert process.success },
- {
- assert snapshot(
- (
- [process.out.reads[0][0].toString()] + // meta
- process.out.reads.collect { file(it[1]).getName() } +
- process.out.json.collect { file(it[1]).getName() } +
- process.out.html.collect { file(it[1]).getName() } +
- process.out.log.collect { file(it[1]).getName() } +
- process.out.reads_fail.collect { file(it[1]).getName() } +
- process.out.reads_merged.collect { file(it[1]).getName() }
- ).sort()
- ).match("test_fastp_single_end-for_stub_match")
- },
- { assert snapshot(process.out.versions).match("versions_single_end_stub") }
- )
- }
- }
-
- test("test_fastp_paired_end") {
-
- when {
- params {
- outdir = "$outputDir"
- }
- process {
- """
- adapter_fasta = []
- save_trimmed_fail = false
- save_merged = false
-
- input[0] = Channel.of([
- [ id:'test', single_end:false ], // meta map
- [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
- file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
- ])
- input[1] = adapter_fasta
- input[2] = save_trimmed_fail
- input[3] = save_merged
- """
- }
- }
-
- then {
- def html_text = [ "Q20 bases: | 25.719000 K (93.033098%)",
- "The input has little adapter percentage (~0.000000%), probably it's trimmed before."]
- def log_text = [ "No adapter detected for read1",
- "Q30 bases: 12281(88.3716%)"]
- def json_text = ['"passed_filter_reads": 198']
- def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1",
- "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT",
- "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE
- { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) }
- }
- },
- { read2_lines.each { read2_line ->
- { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) }
- }
- },
- { html_text.each { html_part ->
- { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
- }
- },
- { json_text.each { json_part ->
- { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) }
- }
- },
- { log_text.each { log_part ->
- { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
- }
- },
- {
- assert snapshot(
- (
- [process.out.reads[0][0].toString()] + // meta
- process.out.reads.collect { it[1].collect { item -> file(item).getName() } } +
- process.out.json.collect { file(it[1]).getName() } +
- process.out.html.collect { file(it[1]).getName() } +
- process.out.log.collect { file(it[1]).getName() } +
- process.out.reads_fail.collect { file(it[1]).getName() } +
- process.out.reads_merged.collect { file(it[1]).getName() }
- ).sort()
- ).match("test_fastp_paired_end_match")
- },
- { assert snapshot(process.out.versions).match("versions_paired_end") }
- )
- }
- }
-
- test("test_fastp_paired_end-stub") {
-
- options '-stub'
-
- when {
- params {
- outdir = "$outputDir"
- }
- process {
- """
- adapter_fasta = []
- save_trimmed_fail = false
- save_merged = false
-
- input[0] = Channel.of([
- [ id:'test', single_end:false ], // meta map
- [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
- file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
- ])
- input[1] = adapter_fasta
- input[2] = save_trimmed_fail
- input[3] = save_merged
- """
- }
- }
-
- then {
- assertAll(
- { assert process.success },
- {
- assert snapshot(
- (
- [process.out.reads[0][0].toString()] + // meta
- process.out.reads.collect { it[1].collect { item -> file(item).getName() } } +
- process.out.json.collect { file(it[1]).getName() } +
- process.out.html.collect { file(it[1]).getName() } +
- process.out.log.collect { file(it[1]).getName() } +
- process.out.reads_fail.collect { file(it[1]).getName() } +
- process.out.reads_merged.collect { file(it[1]).getName() }
- ).sort()
- ).match("test_fastp_paired_end-for_stub_match")
- },
- { assert snapshot(process.out.versions).match("versions_paired_end-stub") }
- )
- }
- }
-
- test("fastp test_fastp_interleaved") {
-
- config './nextflow.interleaved.config'
- when {
- params {
- outdir = "$outputDir"
- }
- process {
- """
- adapter_fasta = []
- save_trimmed_fail = false
- save_merged = false
-
- input[0] = Channel.of([
- [ id:'test', single_end:true ], // meta map
- [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ]
- ])
- input[1] = adapter_fasta
- input[2] = save_trimmed_fail
- input[3] = save_merged
- """
- }
- }
-
- then {
- def html_text = [ "Q20 bases: | 25.719000 K (93.033098%)",
- "paired end (151 cycles + 151 cycles)"]
- def log_text = [ "Q20 bases: 12922(92.9841%)",
- "reads passed filter: 162"]
- def read_lines = [ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1",
- "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT",
- "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE
- { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) }
- }
- },
- { html_text.each { html_part ->
- { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
- }
- },
- { assert snapshot(process.out.json).match("fastp test_fastp_interleaved_json") },
- { log_text.each { log_part ->
- { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
- }
- },
- {
- assert snapshot(
- (
- [process.out.reads[0][0].toString()] + // meta
- process.out.reads.collect { file(it[1]).getName() } +
- process.out.json.collect { file(it[1]).getName() } +
- process.out.html.collect { file(it[1]).getName() } +
- process.out.log.collect { file(it[1]).getName() } +
- process.out.reads_fail.collect { file(it[1]).getName() } +
- process.out.reads_merged.collect { file(it[1]).getName() }
- ).sort()
- ).match("test_fastp_interleaved-_match")
- },
- { assert snapshot(process.out.versions).match("versions_interleaved") }
- )
- }
- }
-
- test("fastp test_fastp_interleaved-stub") {
-
- options '-stub'
-
- config './nextflow.interleaved.config'
- when {
- params {
- outdir = "$outputDir"
- }
- process {
- """
- adapter_fasta = []
- save_trimmed_fail = false
- save_merged = false
-
- input[0] = Channel.of([
- [ id:'test', single_end:true ], // meta map
- [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ]
- ])
- input[1] = adapter_fasta
- input[2] = save_trimmed_fail
- input[3] = save_merged
- """
- }
- }
-
- then {
- assertAll(
- { assert process.success },
- {
- assert snapshot(
- (
- [process.out.reads[0][0].toString()] + // meta
- process.out.reads.collect { file(it[1]).getName() } +
- process.out.json.collect { file(it[1]).getName() } +
- process.out.html.collect { file(it[1]).getName() } +
- process.out.log.collect { file(it[1]).getName() } +
- process.out.reads_fail.collect { file(it[1]).getName() } +
- process.out.reads_merged.collect { file(it[1]).getName() }
- ).sort()
- ).match("test_fastp_interleaved-for_stub_match")
- },
- { assert snapshot(process.out.versions).match("versions_interleaved-stub") }
- )
- }
- }
-
- test("test_fastp_single_end_trim_fail") {
-
- when {
- params {
- outdir = "$outputDir"
- }
- process {
- """
- adapter_fasta = []
- save_trimmed_fail = true
- save_merged = false
-
- input[0] = Channel.of([
- [ id:'test', single_end:true ], // meta map
- [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]
- ])
- input[1] = adapter_fasta
- input[2] = save_trimmed_fail
- input[3] = save_merged
- """
- }
- }
-
- then {
- def html_text = [ "Q20 bases: | 12.922000 K (92.984097%)",
- "single end (151 cycles)"]
- def log_text = [ "Q20 bases: 12922(92.9841%)",
- "reads passed filter: 99" ]
- def read_lines = [ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1",
- "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT",
- "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE
- { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) }
- }
- },
- { failed_read_lines.each { failed_read_line ->
- { assert path(process.out.reads_fail.get(0).get(1)).linesGzip.contains(failed_read_line) }
- }
- },
- { html_text.each { html_part ->
- { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
- }
- },
- { assert snapshot(process.out.json).match("test_fastp_single_end_trim_fail_json") },
- { log_text.each { log_part ->
- { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
- }
- },
- { assert snapshot(process.out.versions).match("versions_single_end_trim_fail") }
- )
- }
- }
-
- test("test_fastp_paired_end_trim_fail") {
-
- config './nextflow.save_failed.config'
- when {
- params {
- outdir = "$outputDir"
- }
- process {
- """
- adapter_fasta = []
- save_trimmed_fail = true
- save_merged = false
-
- input[0] = Channel.of([
- [ id:'test', single_end:false ], // meta map
- [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
- file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)]
- ])
- input[1] = adapter_fasta
- input[2] = save_trimmed_fail
- input[3] = save_merged
- """
- }
- }
-
- then {
- def html_text = [ "Q20 bases: | 25.719000 K (93.033098%)",
- "The input has little adapter percentage (~0.000000%), probably it's trimmed before."]
- def log_text = [ "No adapter detected for read1",
- "Q30 bases: 12281(88.3716%)"]
- def json_text = ['"passed_filter_reads": 162']
- def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1",
- "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT",
- "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE
- { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) }
- }
- },
- { read2_lines.each { read2_line ->
- { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) }
- }
- },
- { failed_read2_lines.each { failed_read2_line ->
- { assert path(process.out.reads_fail.get(0).get(1).get(2)).linesGzip.contains(failed_read2_line) }
- }
- },
- { html_text.each { html_part ->
- { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
- }
- },
- { json_text.each { json_part ->
- { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) }
- }
- },
- { log_text.each { log_part ->
- { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
- }
- },
- { assert snapshot(process.out.versions).match("versions_paired_end_trim_fail") }
- )
- }
- }
-
- test("test_fastp_paired_end_merged") {
-
- when {
- params {
- outdir = "$outputDir"
- }
- process {
- """
- adapter_fasta = []
- save_trimmed_fail = false
- save_merged = true
- input[0] = Channel.of([
- [ id:'test', single_end:false ], // meta map
- [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
- file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
- ])
- input[1] = adapter_fasta
- input[2] = save_trimmed_fail
- input[3] = save_merged
- """
- }
- }
-
- then {
- def html_text = [ ""]
- def log_text = [ "Merged and filtered:",
- "total reads: 75",
- "total bases: 13683"]
- def json_text = ['"merged_and_filtered": {', '"total_reads": 75', '"total_bases": 13683']
- def read1_lines = [ "@ERR5069949.1066259 NS500628:121:HK3MMAFX2:1:11312:18369:8333/1",
- "CCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTC",
- "AAAAAEAEEAEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEAEEEEEEEEEEEEEEEEE/EAEEEEEE/6EEEEEEEEEEAEEAEEE/EE/AEEAEEEEEAEEEA/EEAAEAE
- { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) }
- }
- },
- { read2_lines.each { read2_line ->
- { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) }
- }
- },
- { read_merged_lines.each { read_merged_line ->
- { assert path(process.out.reads_merged.get(0).get(1)).linesGzip.contains(read_merged_line) }
- }
- },
- { html_text.each { html_part ->
- { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
- }
- },
- { json_text.each { json_part ->
- { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) }
- }
- },
- { log_text.each { log_part ->
- { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
- }
- },
- {
- assert snapshot(
- (
- [process.out.reads[0][0].toString()] + // meta
- process.out.reads.collect { it[1].collect { item -> file(item).getName() } } +
- process.out.json.collect { file(it[1]).getName() } +
- process.out.html.collect { file(it[1]).getName() } +
- process.out.log.collect { file(it[1]).getName() } +
- process.out.reads_fail.collect { file(it[1]).getName() } +
- process.out.reads_merged.collect { file(it[1]).getName() }
- ).sort()
- ).match("test_fastp_paired_end_merged_match")
- },
- { assert snapshot(process.out.versions).match("versions_paired_end_merged") }
- )
- }
- }
-
- test("test_fastp_paired_end_merged-stub") {
-
- options '-stub'
-
- when {
- params {
- outdir = "$outputDir"
- }
- process {
- """
- adapter_fasta = []
- save_trimmed_fail = false
- save_merged = true
-
- input[0] = Channel.of([
- [ id:'test', single_end:false ], // meta map
- [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
- file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
- ])
- input[1] = adapter_fasta
- input[2] = save_trimmed_fail
- input[3] = save_merged
- """
- }
- }
-
- then {
- assertAll(
- { assert process.success },
- {
- assert snapshot(
- (
- [process.out.reads[0][0].toString()] + // meta
- process.out.reads.collect { it[1].collect { item -> file(item).getName() } } +
- process.out.json.collect { file(it[1]).getName() } +
- process.out.html.collect { file(it[1]).getName() } +
- process.out.log.collect { file(it[1]).getName() } +
- process.out.reads_fail.collect { file(it[1]).getName() } +
- process.out.reads_merged.collect { file(it[1]).getName() }
- ).sort()
- ).match("test_fastp_paired_end_merged-for_stub_match")
- },
- { assert snapshot(process.out.versions).match("versions_paired_end_merged_stub") }
- )
- }
- }
-
- test("test_fastp_paired_end_merged_adapterlist") {
-
- when {
- params {
- outdir = "$outputDir"
- }
- process {
- """
- adapter_fasta = Channel.of([ file(params.modules_testdata_base_path + 'delete_me/fastp/adapters.fasta', checkIfExists: true) ])
- save_trimmed_fail = false
- save_merged = true
-
- input[0] = Channel.of([
- [ id:'test', single_end:false ], // meta map
- [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
- file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
- ])
- input[1] = adapter_fasta
- input[2] = save_trimmed_fail
- input[3] = save_merged
- """
- }
- }
-
- then {
- def html_text = [ ""]
- def log_text = [ "Merged and filtered:",
- "total reads: 75",
- "total bases: 13683"]
- def json_text = ['"merged_and_filtered": {', '"total_reads": 75', '"total_bases": 13683',"--adapter_fasta"]
- def read1_lines = ["@ERR5069949.1066259 NS500628:121:HK3MMAFX2:1:11312:18369:8333/1",
- "CCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTC",
- "AAAAAEAEEAEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEAEEEEEEEEEEEEEEEEE/EAEEEEEE/6EEEEEEEEEEAEEAEEE/EE/AEEAEEEEEAEEEA/EEAAEAE
- { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) }
- }
- },
- { read2_lines.each { read2_line ->
- { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) }
- }
- },
- { read_merged_lines.each { read_merged_line ->
- { assert path(process.out.reads_merged.get(0).get(1)).linesGzip.contains(read_merged_line) }
- }
- },
- { html_text.each { html_part ->
- { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
- }
- },
- { json_text.each { json_part ->
- { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) }
- }
- },
- { log_text.each { log_part ->
- { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
- }
- },
- { assert snapshot(process.out.versions).match("versions_paired_end_merged_adapterlist") }
- )
- }
- }
-}
diff --git a/modules/nf-core/fastp/tests/main.nf.test.snap b/modules/nf-core/fastp/tests/main.nf.test.snap
deleted file mode 100644
index 3e876288..00000000
--- a/modules/nf-core/fastp/tests/main.nf.test.snap
+++ /dev/null
@@ -1,330 +0,0 @@
-{
- "fastp test_fastp_interleaved_json": {
- "content": [
- [
- [
- {
- "id": "test",
- "single_end": true
- },
- "test.fastp.json:md5,b24e0624df5cc0b11cd5ba21b726fb22"
- ]
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-03-18T16:19:15.063001"
- },
- "test_fastp_paired_end_merged-for_stub_match": {
- "content": [
- [
- [
- "test_1.fastp.fastq.gz",
- "test_2.fastp.fastq.gz"
- ],
- "test.fastp.html",
- "test.fastp.json",
- "test.fastp.log",
- "test.merged.fastq.gz",
- "{id=test, single_end=false}"
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-01-17T18:10:13.467574"
- },
- "versions_interleaved": {
- "content": [
- [
- "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-02-01T11:56:24.615634793"
- },
- "test_fastp_single_end_json": {
- "content": [
- [
- [
- {
- "id": "test",
- "single_end": true
- },
- "test.fastp.json:md5,c852d7a6dba5819e4ac8d9673bedcacc"
- ]
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-03-18T16:18:43.526412"
- },
- "versions_paired_end": {
- "content": [
- [
- "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-02-01T11:55:42.333545689"
- },
- "test_fastp_paired_end_match": {
- "content": [
- [
- [
- "test_1.fastp.fastq.gz",
- "test_2.fastp.fastq.gz"
- ],
- "test.fastp.html",
- "test.fastp.json",
- "test.fastp.log",
- "{id=test, single_end=false}"
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-02-01T12:03:06.431833729"
- },
- "test_fastp_interleaved-_match": {
- "content": [
- [
- "test.fastp.fastq.gz",
- "test.fastp.html",
- "test.fastp.json",
- "test.fastp.log",
- "{id=test, single_end=true}"
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-03-18T16:19:15.111894"
- },
- "test_fastp_paired_end_merged_match": {
- "content": [
- [
- [
- "test_1.fastp.fastq.gz",
- "test_2.fastp.fastq.gz"
- ],
- "test.fastp.html",
- "test.fastp.json",
- "test.fastp.log",
- "test.merged.fastq.gz",
- "{id=test, single_end=false}"
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-02-01T12:08:44.496251446"
- },
- "versions_single_end_stub": {
- "content": [
- [
- "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-02-01T11:55:27.354051299"
- },
- "versions_interleaved-stub": {
- "content": [
- [
- "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-02-01T11:56:46.535528418"
- },
- "versions_single_end_trim_fail": {
- "content": [
- [
- "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-02-01T11:59:03.724591407"
- },
- "test_fastp_paired_end-for_stub_match": {
- "content": [
- [
- [
- "test_1.fastp.fastq.gz",
- "test_2.fastp.fastq.gz"
- ],
- "test.fastp.html",
- "test.fastp.json",
- "test.fastp.log",
- "{id=test, single_end=false}"
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-01-17T18:07:15.398827"
- },
- "versions_paired_end-stub": {
- "content": [
- [
- "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-02-01T11:56:06.50017282"
- },
- "versions_single_end": {
- "content": [
- [
- "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-02-01T11:55:07.67921647"
- },
- "versions_paired_end_merged_stub": {
- "content": [
- [
- "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-02-01T11:59:47.350653154"
- },
- "test_fastp_interleaved-for_stub_match": {
- "content": [
- [
- "test.fastp.fastq.gz",
- "test.fastp.html",
- "test.fastp.json",
- "test.fastp.log",
- "{id=test, single_end=true}"
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-01-17T18:08:06.127974"
- },
- "versions_paired_end_trim_fail": {
- "content": [
- [
- "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-02-01T11:59:18.140484878"
- },
- "test_fastp_single_end-for_stub_match": {
- "content": [
- [
- "test.fastp.fastq.gz",
- "test.fastp.html",
- "test.fastp.json",
- "test.fastp.log",
- "{id=test, single_end=true}"
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-01-17T18:06:00.244202"
- },
- "test_fastp_single_end-_match": {
- "content": [
- [
- "test.fastp.fastq.gz",
- "test.fastp.html",
- "test.fastp.json",
- "test.fastp.log",
- "{id=test, single_end=true}"
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-03-18T16:18:43.580336"
- },
- "versions_paired_end_merged_adapterlist": {
- "content": [
- [
- "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-02-01T12:05:37.845370554"
- },
- "versions_paired_end_merged": {
- "content": [
- [
- "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-02-01T11:59:32.860543858"
- },
- "test_fastp_single_end_trim_fail_json": {
- "content": [
- [
- [
- {
- "id": "test",
- "single_end": true
- },
- "test.fastp.json:md5,9a7ee180f000e8d00c7fb67f06293eb5"
- ]
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-01-17T18:08:41.942317"
- }
-}
\ No newline at end of file
diff --git a/modules/nf-core/fastp/tests/nextflow.interleaved.config b/modules/nf-core/fastp/tests/nextflow.interleaved.config
deleted file mode 100644
index 4be8dbd2..00000000
--- a/modules/nf-core/fastp/tests/nextflow.interleaved.config
+++ /dev/null
@@ -1,5 +0,0 @@
-process {
- withName: FASTP {
- ext.args = "--interleaved_in -e 30"
- }
-}
diff --git a/modules/nf-core/fastp/tests/nextflow.save_failed.config b/modules/nf-core/fastp/tests/nextflow.save_failed.config
deleted file mode 100644
index 53b61b0c..00000000
--- a/modules/nf-core/fastp/tests/nextflow.save_failed.config
+++ /dev/null
@@ -1,5 +0,0 @@
-process {
- withName: FASTP {
- ext.args = "-e 30"
- }
-}
diff --git a/modules/nf-core/fastp/tests/tags.yml b/modules/nf-core/fastp/tests/tags.yml
deleted file mode 100644
index c1afcce7..00000000
--- a/modules/nf-core/fastp/tests/tags.yml
+++ /dev/null
@@ -1,2 +0,0 @@
-fastp:
- - modules/nf-core/fastp/**
diff --git a/modules/nf-core/fastp/environment.yml b/modules/nf-core/gawk/environment.yml
similarity index 62%
rename from modules/nf-core/fastp/environment.yml
rename to modules/nf-core/gawk/environment.yml
index 70389e66..3d98a08b 100644
--- a/modules/nf-core/fastp/environment.yml
+++ b/modules/nf-core/gawk/environment.yml
@@ -1,7 +1,7 @@
-name: fastp
+name: gawk
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- - bioconda::fastp=0.23.4
+ - conda-forge::gawk=5.3.0
diff --git a/modules/nf-core/gawk/main.nf b/modules/nf-core/gawk/main.nf
new file mode 100644
index 00000000..ca468929
--- /dev/null
+++ b/modules/nf-core/gawk/main.nf
@@ -0,0 +1,55 @@
+process GAWK {
+ tag "$meta.id"
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/gawk:5.3.0' :
+ 'biocontainers/gawk:5.3.0' }"
+
+ input:
+ tuple val(meta), path(input)
+ path(program_file)
+
+ output:
+ tuple val(meta), path("${prefix}.${suffix}"), emit: output
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: '' // args is used for the main arguments of the tool
+ def args2 = task.ext.args2 ?: '' // args2 is used to specify a program when no program file has been given
+ prefix = task.ext.prefix ?: "${meta.id}"
+ suffix = task.ext.suffix ?: "${input.getExtension()}"
+
+ program = program_file ? "-f ${program_file}" : "${args2}"
+
+ """
+ awk \\
+ ${args} \\
+ ${program} \\
+ ${input} \\
+ > ${prefix}.${suffix}
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//')
+ END_VERSIONS
+ """
+
+ stub:
+ prefix = task.ext.prefix ?: "${meta.id}"
+ suffix = task.ext.suffix ?: "${input.getExtension()}"
+ def create_cmd = suffix.endsWith("gz") ? "echo '' | gzip >" : "touch"
+
+ """
+ ${create_cmd} ${prefix}.${suffix}
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//')
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/gawk/meta.yml b/modules/nf-core/gawk/meta.yml
new file mode 100644
index 00000000..2b6033b0
--- /dev/null
+++ b/modules/nf-core/gawk/meta.yml
@@ -0,0 +1,50 @@
+name: "gawk"
+description: |
+ If you are like many computer users, you would frequently like to make changes in various text files
+ wherever certain patterns appear, or extract data from parts of certain lines while discarding the rest.
+ The job is easy with awk, especially the GNU implementation gawk.
+keywords:
+ - gawk
+ - awk
+ - txt
+ - text
+ - file parsing
+tools:
+ - "gawk":
+ description: "GNU awk"
+ homepage: "https://www.gnu.org/software/gawk/"
+ documentation: "https://www.gnu.org/software/gawk/manual/"
+ tool_dev_url: "https://www.gnu.org/prep/ftp.html"
+ licence: ["GPL v3"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - input:
+ type: file
+ description: The input file - Specify the logic that needs to be executed on this file on the `ext.args2` or in the program file
+ pattern: "*"
+ - program_file:
+ type: file
+ description: Optional file containing logic for awk to execute. If you don't wish to use a file, you can use `ext.args2` to specify the logic.
+ pattern: "*"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - output:
+ type: file
+ description: The output file - specify the name of this file using `ext.prefix` and the extension using `ext.suffix`
+ pattern: "*"
+authors:
+ - "@nvnieuwk"
+maintainers:
+ - "@nvnieuwk"
diff --git a/modules/nf-core/gawk/tests/main.nf.test b/modules/nf-core/gawk/tests/main.nf.test
new file mode 100644
index 00000000..fce82ca9
--- /dev/null
+++ b/modules/nf-core/gawk/tests/main.nf.test
@@ -0,0 +1,56 @@
+nextflow_process {
+
+ name "Test Process GAWK"
+ script "../main.nf"
+ process "GAWK"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "gawk"
+
+ test("convert fasta to bed") {
+ config "./nextflow.config"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
+ ]
+ input[1] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("convert fasta to bed with program file") {
+ config "./nextflow_with_program_file.config"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
+ ]
+ input[1] = Channel.of('BEGIN {FS="\t"}; {print \$1 FS "0" FS \$2}').collectFile(name:"program.txt")
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/gawk/tests/main.nf.test.snap b/modules/nf-core/gawk/tests/main.nf.test.snap
new file mode 100644
index 00000000..4f3a759c
--- /dev/null
+++ b/modules/nf-core/gawk/tests/main.nf.test.snap
@@ -0,0 +1,68 @@
+{
+ "convert fasta to bed with program file": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a"
+ ],
+ "output": [
+ [
+ {
+ "id": "test"
+ },
+ "test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.03.0"
+ },
+ "timestamp": "2024-05-17T15:20:02.495430346"
+ },
+ "convert fasta to bed": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a"
+ ],
+ "output": [
+ [
+ {
+ "id": "test"
+ },
+ "test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,842acc9870dc8ac280954047cb2aa23a"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.03.0"
+ },
+ "timestamp": "2024-05-17T15:19:53.291809648"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/gawk/tests/nextflow.config b/modules/nf-core/gawk/tests/nextflow.config
new file mode 100644
index 00000000..6e5d43a3
--- /dev/null
+++ b/modules/nf-core/gawk/tests/nextflow.config
@@ -0,0 +1,6 @@
+process {
+ withName: GAWK {
+ ext.suffix = "bed"
+ ext.args2 = '\'BEGIN {FS="\t"}; {print \$1 FS "0" FS \$2}\''
+ }
+}
diff --git a/modules/nf-core/gawk/tests/nextflow_with_program_file.config b/modules/nf-core/gawk/tests/nextflow_with_program_file.config
new file mode 100644
index 00000000..693ad419
--- /dev/null
+++ b/modules/nf-core/gawk/tests/nextflow_with_program_file.config
@@ -0,0 +1,5 @@
+process {
+ withName: GAWK {
+ ext.suffix = "bed"
+ }
+}
diff --git a/modules/nf-core/gawk/tests/tags.yml b/modules/nf-core/gawk/tests/tags.yml
new file mode 100644
index 00000000..72e4531d
--- /dev/null
+++ b/modules/nf-core/gawk/tests/tags.yml
@@ -0,0 +1,2 @@
+gawk:
+ - "modules/nf-core/gawk/**"
diff --git a/modules/nf-core/genmod/annotate/environment.yml b/modules/nf-core/genmod/annotate/environment.yml
new file mode 100644
index 00000000..ac8140fd
--- /dev/null
+++ b/modules/nf-core/genmod/annotate/environment.yml
@@ -0,0 +1,7 @@
+name: genmod_annotate
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::genmod=3.8.2
diff --git a/modules/nf-core/genmod/annotate/main.nf b/modules/nf-core/genmod/annotate/main.nf
new file mode 100644
index 00000000..eb161187
--- /dev/null
+++ b/modules/nf-core/genmod/annotate/main.nf
@@ -0,0 +1,46 @@
+process GENMOD_ANNOTATE {
+ tag "$meta.id"
+ label 'process_medium'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/genmod:3.8.2--pyhdfd78af_0':
+ 'biocontainers/genmod:3.8.2--pyhdfd78af_0' }"
+
+ input:
+ tuple val(meta), path(input_vcf)
+
+ output:
+ tuple val(meta), path("*_annotate.vcf"), emit: vcf
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ genmod \\
+ annotate \\
+ $args \\
+ --outfile ${prefix}_annotate.vcf \\
+ $input_vcf
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ genmod: \$(echo \$(genmod --version 2>&1) | sed 's/^.*genmod version: //' )
+ END_VERSIONS
+ """
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ touch ${prefix}_annotate.vcf
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ genmod: \$(echo \$(genmod --version 2>&1) | sed 's/^.*genmod version: //' )
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/genmod/annotate/meta.yml b/modules/nf-core/genmod/annotate/meta.yml
new file mode 100644
index 00000000..3c876ac5
--- /dev/null
+++ b/modules/nf-core/genmod/annotate/meta.yml
@@ -0,0 +1,41 @@
+name: "genmod_annotate"
+description: for annotating regions, frequencies, cadd scores
+keywords:
+ - annotate
+ - genmod
+ - ranking
+tools:
+ - "genmod":
+ description: "Annotate genetic inheritance models in variant files"
+ homepage: "https://github.com/Clinical-Genomics/genmod"
+ documentation: "https://github.com/Clinical-Genomics/genmod"
+ tool_dev_url: "https://github.com/moonso"
+ licence: ["MIT"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - input_vcf:
+ type: file
+ description: VCF file
+ pattern: "*.{vcf}"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - vcf:
+ type: file
+ description: Annotated VCF file
+ pattern: "*.{vcf}"
+authors:
+ - "@ramprasadn"
+maintainers:
+ - "@ramprasadn"
diff --git a/modules/nf-core/genmod/annotate/tests/main.nf.test b/modules/nf-core/genmod/annotate/tests/main.nf.test
new file mode 100644
index 00000000..d17ebc9e
--- /dev/null
+++ b/modules/nf-core/genmod/annotate/tests/main.nf.test
@@ -0,0 +1,34 @@
+nextflow_process {
+
+ name "Test Process GENMOD_ANNOTATE"
+ script "modules/nf-core/genmod/annotate/main.nf"
+ process "GENMOD_ANNOTATE"
+ config "./nextflow.config"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "genmod"
+ tag "genmod/annotate"
+
+ test("genmod_annotate") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ],
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/genmod.vcf.gz', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ {assert process.success},
+ {assert path(process.out.vcf.get(0).get(1)).readLines().contains("##fileformat=VCFv4.2")}
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/genmod/annotate/tests/nextflow.config b/modules/nf-core/genmod/annotate/tests/nextflow.config
new file mode 100644
index 00000000..a1860460
--- /dev/null
+++ b/modules/nf-core/genmod/annotate/tests/nextflow.config
@@ -0,0 +1,5 @@
+process{
+ withName: GENMOD_ANNOTATE {
+ ext.args = " --annotate_regions "
+ }
+}
diff --git a/modules/nf-core/genmod/annotate/tests/tags.yml b/modules/nf-core/genmod/annotate/tests/tags.yml
new file mode 100644
index 00000000..88bb35a1
--- /dev/null
+++ b/modules/nf-core/genmod/annotate/tests/tags.yml
@@ -0,0 +1,2 @@
+genmod/annotate:
+ - modules/nf-core/genmod/annotate/**
diff --git a/modules/nf-core/genmod/compound/environment.yml b/modules/nf-core/genmod/compound/environment.yml
new file mode 100644
index 00000000..1e2561fd
--- /dev/null
+++ b/modules/nf-core/genmod/compound/environment.yml
@@ -0,0 +1,7 @@
+name: genmod_compound
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::genmod=3.8.2
diff --git a/modules/nf-core/genmod/compound/main.nf b/modules/nf-core/genmod/compound/main.nf
new file mode 100644
index 00000000..1731b722
--- /dev/null
+++ b/modules/nf-core/genmod/compound/main.nf
@@ -0,0 +1,47 @@
+process GENMOD_COMPOUND {
+ tag "$meta.id"
+ label 'process_medium'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/genmod:3.8.2--pyhdfd78af_0':
+ 'biocontainers/genmod:3.8.2--pyhdfd78af_0' }"
+
+ input:
+ tuple val(meta), path(input_vcf)
+
+ output:
+ tuple val(meta), path("*_compound.vcf"), emit: vcf
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ genmod \\
+ compound \\
+ $args \\
+ --processes ${task.cpus} \\
+ --outfile ${prefix}_compound.vcf \\
+ $input_vcf
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ genmod: \$(echo \$(genmod --version 2>&1) | sed 's/^.*genmod version: //' )
+ END_VERSIONS
+ """
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ touch ${prefix}_compound.vcf
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ genmod: \$(echo \$(genmod --version 2>&1) | sed 's/^.*genmod version: //' )
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/genmod/compound/meta.yml b/modules/nf-core/genmod/compound/meta.yml
new file mode 100644
index 00000000..aa5f7da5
--- /dev/null
+++ b/modules/nf-core/genmod/compound/meta.yml
@@ -0,0 +1,41 @@
+name: "genmod_compound"
+description: Score compounds
+keywords:
+ - compound
+ - genmod
+ - ranking
+tools:
+ - "genmod":
+ description: "Annotate genetic inheritance models in variant files"
+ homepage: "https://github.com/Clinical-Genomics/genmod"
+ documentation: "https://github.com/Clinical-Genomics/genmod"
+ tool_dev_url: "https://github.com/moonso"
+ licence: ["MIT"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - input_vcf:
+ type: file
+ description: VCF file
+ pattern: "*.{vcf}"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ] #
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - vcf:
+ type: file
+ description: Output VCF file
+ pattern: "*.{vcf}"
+authors:
+ - "@ramprasadn"
+maintainers:
+ - "@ramprasadn"
diff --git a/modules/nf-core/genmod/compound/tests/main.nf.test b/modules/nf-core/genmod/compound/tests/main.nf.test
new file mode 100644
index 00000000..43ed9f0d
--- /dev/null
+++ b/modules/nf-core/genmod/compound/tests/main.nf.test
@@ -0,0 +1,33 @@
+nextflow_process {
+
+ name "Test Process GENMOD_COMPOUND"
+ script "modules/nf-core/genmod/compound/main.nf"
+ process "GENMOD_COMPOUND"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "genmod"
+ tag "genmod/compound"
+
+ test("genmod_compound") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ],
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/genmod_compound.vcf', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ {assert process.success},
+ {assert path(process.out.vcf.get(0).get(1)).readLines().contains("##fileformat=VCFv4.2")}
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/genmod/compound/tests/tags.yml b/modules/nf-core/genmod/compound/tests/tags.yml
new file mode 100644
index 00000000..870b35e8
--- /dev/null
+++ b/modules/nf-core/genmod/compound/tests/tags.yml
@@ -0,0 +1,2 @@
+genmod/compound:
+ - modules/nf-core/genmod/compound/**
diff --git a/modules/nf-core/genmod/models/environment.yml b/modules/nf-core/genmod/models/environment.yml
new file mode 100644
index 00000000..62a746c7
--- /dev/null
+++ b/modules/nf-core/genmod/models/environment.yml
@@ -0,0 +1,7 @@
+name: genmod_models
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::genmod=3.8.2
diff --git a/modules/nf-core/genmod/models/main.nf b/modules/nf-core/genmod/models/main.nf
new file mode 100644
index 00000000..0504574a
--- /dev/null
+++ b/modules/nf-core/genmod/models/main.nf
@@ -0,0 +1,53 @@
+process GENMOD_MODELS {
+ tag "$meta.id"
+ label 'process_medium'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/genmod:3.8.2--pyhdfd78af_0':
+ 'biocontainers/genmod:3.8.2--pyhdfd78af_0' }"
+
+ input:
+ tuple val(meta), path(input_vcf)
+ path (fam)
+ path (reduced_penetrance)
+
+ output:
+ tuple val(meta), path("*_models.vcf"), emit: vcf
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def family_file = fam ? "--family_file ${fam}" : ""
+ def pen_file = reduced_penetrance ? "--reduced_penetrance ${reduced_penetrance}" : ""
+ """
+ genmod \\
+ models \\
+ $args \\
+ $pen_file \\
+ $family_file \\
+ --processes ${task.cpus} \\
+ --outfile ${prefix}_models.vcf \\
+ $input_vcf
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ genmod: \$(echo \$(genmod --version 2>&1) | sed 's/^.*genmod version: //' )
+ END_VERSIONS
+ """
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ touch ${prefix}_models.vcf
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ genmod: \$(echo \$(genmod --version 2>&1) | sed 's/^.*genmod version: //' )
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/genmod/models/meta.yml b/modules/nf-core/genmod/models/meta.yml
new file mode 100644
index 00000000..dd9001e0
--- /dev/null
+++ b/modules/nf-core/genmod/models/meta.yml
@@ -0,0 +1,49 @@
+name: "genmod_models"
+description: annotate models of inheritance
+keywords:
+ - models
+ - genmod
+ - ranking
+tools:
+ - "genmod":
+ description: "Annotate genetic inheritance models in variant files"
+ homepage: "https://github.com/Clinical-Genomics/genmod"
+ documentation: "https://github.com/Clinical-Genomics/genmod"
+ tool_dev_url: "https://github.com/moonso"
+ licence: ["MIT"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - input_vcf:
+ type: file
+ description: vcf file
+ pattern: "*.{vcf}"
+ - reduced_penetrance:
+ type: file
+ description: file with gene ids that have reduced penetrance
+ pattern: "*.{tsv}"
+ - family_file:
+ type: file
+ description: ped file
+ pattern: "*.{ped}"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - vcf:
+ type: file
+ description: Output VCF file
+ pattern: "*.{vcf}"
+authors:
+ - "@ramprasadn"
+maintainers:
+ - "@ramprasadn"
diff --git a/modules/nf-core/genmod/models/tests/main.nf.test b/modules/nf-core/genmod/models/tests/main.nf.test
new file mode 100644
index 00000000..9eba8dd5
--- /dev/null
+++ b/modules/nf-core/genmod/models/tests/main.nf.test
@@ -0,0 +1,35 @@
+nextflow_process {
+
+ name "Test Process GENMOD_MODELS"
+ script "modules/nf-core/genmod/models/main.nf"
+ process "GENMOD_MODELS"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "genmod"
+ tag "genmod/models"
+
+ test("genmod_models") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/test_annotate.vcf.gz', checkIfExists: true)
+ ]
+ input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/ped/justhusky.ped', checkIfExists: true)
+ input[2] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ {assert process.success},
+ {assert path(process.out.vcf.get(0).get(1)).readLines().contains("##fileformat=VCFv4.2")}
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/genmod/models/tests/tags.yml b/modules/nf-core/genmod/models/tests/tags.yml
new file mode 100644
index 00000000..72b3b6bb
--- /dev/null
+++ b/modules/nf-core/genmod/models/tests/tags.yml
@@ -0,0 +1,2 @@
+genmod/models:
+ - modules/nf-core/genmod/models/**
diff --git a/modules/nf-core/genmod/score/environment.yml b/modules/nf-core/genmod/score/environment.yml
new file mode 100644
index 00000000..bacc4cec
--- /dev/null
+++ b/modules/nf-core/genmod/score/environment.yml
@@ -0,0 +1,7 @@
+name: genmod_score
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::genmod=3.8.2
diff --git a/modules/nf-core/genmod/score/main.nf b/modules/nf-core/genmod/score/main.nf
new file mode 100644
index 00000000..15be5f74
--- /dev/null
+++ b/modules/nf-core/genmod/score/main.nf
@@ -0,0 +1,52 @@
+process GENMOD_SCORE {
+ tag "$meta.id"
+ label 'process_medium'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/genmod:3.8.2--pyhdfd78af_0':
+ 'biocontainers/genmod:3.8.2--pyhdfd78af_0' }"
+
+ input:
+ tuple val(meta), path(input_vcf)
+ path (fam)
+ path (score_config)
+
+ output:
+ tuple val(meta), path("*_score.vcf"), emit: vcf
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def family_file = fam ? "--family_file ${fam}" : ""
+ def config_file = score_config ? "--score_config ${score_config}" : ""
+ """
+ genmod \\
+ score \\
+ $args \\
+ $family_file \\
+ $config_file \\
+ --outfile ${prefix}_score.vcf \\
+ $input_vcf
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ genmod: \$(echo \$(genmod --version 2>&1) | sed 's/^.*genmod version: //' )
+ END_VERSIONS
+ """
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ touch ${prefix}_score.vcf
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ genmod: \$(echo \$(genmod --version 2>&1) | sed 's/^.*genmod version: //' )
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/genmod/score/meta.yml b/modules/nf-core/genmod/score/meta.yml
new file mode 100644
index 00000000..8998b00c
--- /dev/null
+++ b/modules/nf-core/genmod/score/meta.yml
@@ -0,0 +1,49 @@
+name: "genmod_score"
+description: Score the variants of a vcf based on their annotation
+keywords:
+ - score
+ - ranking
+ - genmod
+tools:
+ - "genmod":
+ description: "Annotate genetic inheritance models in variant files"
+ homepage: "https://github.com/Clinical-Genomics/genmod"
+ documentation: "https://github.com/Clinical-Genomics/genmod"
+ tool_dev_url: "https://github.com/moonso"
+ licence: ["MIT"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - input_vcf:
+ type: file
+ description: vcf file
+ pattern: "*.{vcf}"
+ - family_file:
+ type: file
+ description: ped file
+ pattern: "*.{ped}"
+ - score_config:
+ type: file
+ description: rank model config file
+ pattern: "*.{ini}"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - vcf:
+ type: file
+ description: Output VCF file
+ pattern: "*.{vcf}"
+authors:
+ - "@ramprasadn"
+maintainers:
+ - "@ramprasadn"
diff --git a/modules/nf-core/genmod/score/tests/main.nf.test b/modules/nf-core/genmod/score/tests/main.nf.test
new file mode 100644
index 00000000..030ef6ba
--- /dev/null
+++ b/modules/nf-core/genmod/score/tests/main.nf.test
@@ -0,0 +1,35 @@
+nextflow_process {
+
+ name "Test Process GENMOD_SCORE"
+ script "modules/nf-core/genmod/score/main.nf"
+ process "GENMOD_SCORE"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "genmod"
+ tag "genmod/score"
+
+ test("genmod_score") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/test_models.vcf.gz', checkIfExists: true)
+ ]
+ input[1] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/vcf/ped/justhusky.ped', checkIfExists: true)
+ input[2] = file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/genmod/svrank_model_-v1.8-.ini', checkIfExists: true)
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ {assert process.success},
+ {assert path(process.out.vcf.get(0).get(1)).readLines().contains("##fileformat=VCFv4.2")}
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/genmod/score/tests/tags.yml b/modules/nf-core/genmod/score/tests/tags.yml
new file mode 100644
index 00000000..cef831ed
--- /dev/null
+++ b/modules/nf-core/genmod/score/tests/tags.yml
@@ -0,0 +1,2 @@
+genmod/score:
+ - modules/nf-core/genmod/score/**
diff --git a/modules/nf-core/gunzip/environment.yml b/modules/nf-core/gunzip/environment.yml
index 25910b34..dfc02a7b 100644
--- a/modules/nf-core/gunzip/environment.yml
+++ b/modules/nf-core/gunzip/environment.yml
@@ -4,4 +4,6 @@ channels:
- bioconda
- defaults
dependencies:
- - conda-forge::sed=4.7
+ - conda-forge::grep=3.11
+ - conda-forge::sed=4.8
+ - conda-forge::tar=1.34
diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf
index 468a6f28..5e67e3b9 100644
--- a/modules/nf-core/gunzip/main.nf
+++ b/modules/nf-core/gunzip/main.nf
@@ -4,8 +4,8 @@ process GUNZIP {
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
- 'nf-core/ubuntu:20.04' }"
+ 'https://depot.galaxyproject.org/singularity/ubuntu:22.04' :
+ 'nf-core/ubuntu:22.04' }"
input:
tuple val(meta), path(archive)
@@ -18,8 +18,11 @@ process GUNZIP {
task.ext.when == null || task.ext.when
script:
- def args = task.ext.args ?: ''
- gunzip = archive.toString() - '.gz'
+ def args = task.ext.args ?: ''
+ def extension = ( archive.toString() - '.gz' ).tokenize('.')[-1]
+ def name = archive.toString() - '.gz' - ".$extension"
+ def prefix = task.ext.prefix ?: name
+ gunzip = prefix + ".$extension"
"""
# Not calling gunzip itself because it creates files
# with the original group ownership rather than the
@@ -37,7 +40,11 @@ process GUNZIP {
"""
stub:
- gunzip = archive.toString() - '.gz'
+ def args = task.ext.args ?: ''
+ def extension = ( archive.toString() - '.gz' ).tokenize('.')[-1]
+ def name = archive.toString() - '.gz' - ".$extension"
+ def prefix = task.ext.prefix ?: name
+ gunzip = prefix + ".$extension"
"""
touch $gunzip
cat <<-END_VERSIONS > versions.yml
diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml
index 231034f2..f32973a0 100644
--- a/modules/nf-core/gunzip/meta.yml
+++ b/modules/nf-core/gunzip/meta.yml
@@ -37,3 +37,4 @@ maintainers:
- "@joseespinosa"
- "@drpatelh"
- "@jfy133"
+ - "@gallvp"
diff --git a/modules/nf-core/gunzip/tests/main.nf.test b/modules/nf-core/gunzip/tests/main.nf.test
index 6406008e..776211ad 100644
--- a/modules/nf-core/gunzip/tests/main.nf.test
+++ b/modules/nf-core/gunzip/tests/main.nf.test
@@ -33,4 +33,89 @@ nextflow_process {
}
+ test("Should run without failures - prefix") {
+
+ config './nextflow.config'
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = Channel.of([
+ [ id: 'test' ],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+ ]
+ )
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("Should run without failures - stub") {
+
+ options '-stub'
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = Channel.of([
+ [],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+ ]
+ )
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("Should run without failures - prefix - stub") {
+
+ options '-stub'
+ config './nextflow.config'
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = Channel.of([
+ [ id: 'test' ],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+ ]
+ )
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
}
diff --git a/modules/nf-core/gunzip/tests/main.nf.test.snap b/modules/nf-core/gunzip/tests/main.nf.test.snap
index 720fd9ff..069967e7 100644
--- a/modules/nf-core/gunzip/tests/main.nf.test.snap
+++ b/modules/nf-core/gunzip/tests/main.nf.test.snap
@@ -1,4 +1,70 @@
{
+ "Should run without failures - prefix - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,54376d32aca20e937a4ec26dac228e84"
+ ],
+ "gunzip": [
+ [
+ {
+ "id": "test"
+ },
+ "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,54376d32aca20e937a4ec26dac228e84"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-25T11:35:10.861293"
+ },
+ "Should run without failures - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ [
+
+ ],
+ "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,54376d32aca20e937a4ec26dac228e84"
+ ],
+ "gunzip": [
+ [
+ [
+
+ ],
+ "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,54376d32aca20e937a4ec26dac228e84"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-25T11:35:05.857145"
+ },
"Should run without failures": {
"content": [
{
@@ -26,6 +92,43 @@
]
}
],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
"timestamp": "2023-10-17T15:35:37.690477896"
+ },
+ "Should run without failures - prefix": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,54376d32aca20e937a4ec26dac228e84"
+ ],
+ "gunzip": [
+ [
+ {
+ "id": "test"
+ },
+ "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,54376d32aca20e937a4ec26dac228e84"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-25T11:33:32.921739"
}
}
\ No newline at end of file
diff --git a/modules/nf-core/gunzip/tests/nextflow.config b/modules/nf-core/gunzip/tests/nextflow.config
new file mode 100644
index 00000000..dec77642
--- /dev/null
+++ b/modules/nf-core/gunzip/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+ withName: GUNZIP {
+ ext.prefix = { "${meta.id}.xyz" }
+ }
+}
diff --git a/modules/nf-core/hifiasm/hifiasm.diff b/modules/nf-core/hifiasm/hifiasm.diff
index 650cf697..03790816 100644
--- a/modules/nf-core/hifiasm/hifiasm.diff
+++ b/modules/nf-core/hifiasm/hifiasm.diff
@@ -8,7 +8,7 @@ Changes in module 'nf-core/hifiasm'
- tuple val(meta), path(reads)
- path paternal_kmer_dump
- path maternal_kmer_dump
-+ tuple val(meta), path(reads), path(paternal_kmer_dump), path(maternal_kmer_dump)
++ tuple val(meta), path(reads, stageAs: "?/*"), path(paternal_kmer_dump), path(maternal_kmer_dump)
path hic_read1
path hic_read2
diff --git a/modules/nf-core/hifiasm/main.nf b/modules/nf-core/hifiasm/main.nf
index 040d8369..a1bf30a0 100644
--- a/modules/nf-core/hifiasm/main.nf
+++ b/modules/nf-core/hifiasm/main.nf
@@ -8,7 +8,7 @@ process HIFIASM {
'biocontainers/hifiasm:0.19.8--h43eeafb_0' }"
input:
- tuple val(meta), path(reads), path(paternal_kmer_dump), path(maternal_kmer_dump)
+ tuple val(meta), path(reads, stageAs: "?/*"), path(paternal_kmer_dump), path(maternal_kmer_dump)
path hic_read1
path hic_read2
diff --git a/modules/nf-core/minimap2/align/main.nf b/modules/nf-core/minimap2/align/main.nf
index 66745efc..d82dc14d 100644
--- a/modules/nf-core/minimap2/align/main.nf
+++ b/modules/nf-core/minimap2/align/main.nf
@@ -28,17 +28,25 @@ process MINIMAP2_ALIGN {
script:
def args = task.ext.args ?: ''
def args2 = task.ext.args2 ?: ''
+ def args3 = task.ext.args3 ?: ''
+ def args4 = task.ext.args4 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def bam_index = bam_index_extension ? "${prefix}.bam##idx##${prefix}.bam.${bam_index_extension} --write-index" : "${prefix}.bam"
def bam_output = bam_format ? "-a | samtools sort -@ ${task.cpus-1} -o ${bam_index} ${args2}" : "-o ${prefix}.paf"
def cigar_paf = cigar_paf_format && !bam_format ? "-c" : ''
def set_cigar_bam = cigar_bam && bam_format ? "-L" : ''
+ def bam_input = "${reads.extension}".matches('sam|bam|cram')
+ def samtools_reset_fastq = bam_input ? "samtools reset --threads ${task.cpus-1} $args3 $reads | samtools fastq --threads ${task.cpus-1} $args4 |" : ''
+ def query = bam_input ? "-" : reads
+ def target = reference ?: (bam_input ? error("BAM input requires reference") : reads)
+
"""
+ $samtools_reset_fastq \\
minimap2 \\
$args \\
-t $task.cpus \\
- ${reference ?: reads} \\
- $reads \\
+ $target \\
+ $query \\
$cigar_paf \\
$set_cigar_bam \\
$bam_output
@@ -55,6 +63,9 @@ process MINIMAP2_ALIGN {
def prefix = task.ext.prefix ?: "${meta.id}"
def output_file = bam_format ? "${prefix}.bam" : "${prefix}.paf"
def bam_index = bam_index_extension ? "touch ${prefix}.bam.${bam_index_extension}" : ""
+ def bam_input = "${reads.extension}".matches('sam|bam|cram')
+ def target = reference ?: (bam_input ? error("BAM input requires reference") : reads)
+
"""
touch $output_file
${bam_index}
diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test b/modules/nf-core/minimap2/align/tests/main.nf.test
index c93f4364..4072c171 100644
--- a/modules/nf-core/minimap2/align/tests/main.nf.test
+++ b/modules/nf-core/minimap2/align/tests/main.nf.test
@@ -34,7 +34,8 @@ nextflow_process {
assertAll(
{ assert process.success },
{ assert snapshot(
- file(process.out.bam[0][1]).name,
+ bam(process.out.bam[0][1]).getHeader(),
+ bam(process.out.bam[0][1]).getReadsMD5(),
process.out.versions
).match() }
)
@@ -67,7 +68,8 @@ nextflow_process {
assertAll(
{ assert process.success },
{ assert snapshot(
- file(process.out.bam[0][1]).name,
+ bam(process.out.bam[0][1]).getHeader(),
+ bam(process.out.bam[0][1]).getReadsMD5(),
file(process.out.index[0][1]).name,
process.out.versions
).match() }
@@ -104,7 +106,8 @@ nextflow_process {
assertAll(
{ assert process.success },
{ assert snapshot(
- file(process.out.bam[0][1]).name,
+ bam(process.out.bam[0][1]).getHeader(),
+ bam(process.out.bam[0][1]).getReadsMD5(),
process.out.versions
).match() }
)
@@ -137,7 +140,8 @@ nextflow_process {
assertAll(
{ assert process.success },
{ assert snapshot(
- file(process.out.bam[0][1]).name,
+ bam(process.out.bam[0][1]).getHeader(),
+ bam(process.out.bam[0][1]).getReadsMD5(),
process.out.versions
).match() }
)
@@ -145,6 +149,104 @@ nextflow_process {
}
+ test("sarscov2 - bam, fasta, true, [], false, false") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:true ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true)
+ ]
+ input[1] = [
+ [ id:'test_ref' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = true
+ input[3] = []
+ input[4] = false
+ input[5] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ bam(process.out.bam[0][1]).getHeader(),
+ bam(process.out.bam[0][1]).getReadsMD5(),
+ process.out.versions
+ ).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - bam, fasta, true, 'bai', false, false") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:true ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true)
+ ]
+ input[1] = [
+ [ id:'test_ref' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = true
+ input[3] = 'bai'
+ input[4] = false
+ input[5] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ bam(process.out.bam[0][1]).getHeader(),
+ bam(process.out.bam[0][1]).getReadsMD5(),
+ file(process.out.index[0][1]).name,
+ process.out.versions
+ ).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - bam, [], true, false, false") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:true ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true)
+ ]
+ input[1] = [
+ [ id:'test_ref' ], // meta map
+ []
+ ]
+ input[2] = true
+ input[3] = []
+ input[4] = false
+ input[5] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.failed }
+ )
+ }
+
+ }
+
test("sarscov2 - fastq, fasta, true, [], false, false - stub") {
options "-stub"
@@ -241,4 +343,99 @@ nextflow_process {
}
+ test("sarscov2 - bam, fasta, true, [], false, false - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:true ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true)
+ ]
+ input[1] = [
+ [ id:'test_ref' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = true
+ input[3] = []
+ input[4] = false
+ input[5] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - bam, fasta, true, 'bai', false, false - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:true ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true)
+ ]
+ input[1] = [
+ [ id:'test_ref' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = true
+ input[3] = 'bai'
+ input[4] = false
+ input[5] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - bam, [], true, false, false - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:true ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true)
+ ]
+ input[1] = [
+ [ id:'test_ref' ], // meta map
+ []
+ ]
+ input[2] = true
+ input[3] = []
+ input[4] = false
+ input[5] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.failed }
+ )
+ }
+
+ }
+
}
\ No newline at end of file
diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test.snap b/modules/nf-core/minimap2/align/tests/main.nf.test.snap
index ecb3e4ee..12264a85 100644
--- a/modules/nf-core/minimap2/align/tests/main.nf.test.snap
+++ b/modules/nf-core/minimap2/align/tests/main.nf.test.snap
@@ -1,4 +1,83 @@
{
+ "sarscov2 - bam, fasta, true, 'bai', false, false": {
+ "content": [
+ [
+ "@HD\tVN:1.6\tSO:coordinate",
+ "@SQ\tSN:MT192765.1\tLN:29829",
+ "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta -",
+ "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam##idx##test.bam.bai --write-index"
+ ],
+ "5d426b9a5f5b2c54f1d7f1e4c238ae94",
+ "test.bam.bai",
+ [
+ "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-25T09:03:00.827260362"
+ },
+ "sarscov2 - bam, fasta, true, 'bai', false, false - stub": {
+ "content": [
+ {
+ "0": [
+
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "3": [
+ "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+ ],
+ "bam": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "index": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "paf": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-23T11:21:37.92353539"
+ },
"sarscov2 - fastq, fasta, true, 'bai', false, false - stub": {
"content": [
{
@@ -154,7 +233,13 @@
},
"sarscov2 - [fastq1, fastq2], fasta, true, false, false": {
"content": [
- "test.bam",
+ [
+ "@HD\tVN:1.6\tSO:coordinate",
+ "@SQ\tSN:MT192765.1\tLN:29829",
+ "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz test_2.fastq.gz",
+ "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam"
+ ],
+ "1bc392244f228bf52cf0b5a8f6a654c9",
[
"versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd"
]
@@ -163,11 +248,17 @@
"nf-test": "0.8.4",
"nextflow": "24.04.2"
},
- "timestamp": "2024-06-03T12:21:31.582849048"
+ "timestamp": "2024-07-23T11:18:18.964586894"
},
"sarscov2 - fastq, fasta, true, [], false, false": {
"content": [
- "test.bam",
+ [
+ "@HD\tVN:1.6\tSO:coordinate",
+ "@SQ\tSN:MT192765.1\tLN:29829",
+ "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz",
+ "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam"
+ ],
+ "f194745c0ccfcb2a9c0aee094a08750",
[
"versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd"
]
@@ -176,11 +267,17 @@
"nf-test": "0.8.4",
"nextflow": "24.04.2"
},
- "timestamp": "2024-06-03T12:29:11.968586416"
+ "timestamp": "2024-07-23T11:17:48.667488325"
},
"sarscov2 - fastq, fasta, true, 'bai', false, false": {
"content": [
- "test.bam",
+ [
+ "@HD\tVN:1.6\tSO:coordinate",
+ "@SQ\tSN:MT192765.1\tLN:29829",
+ "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz",
+ "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam##idx##test.bam.bai --write-index"
+ ],
+ "f194745c0ccfcb2a9c0aee094a08750",
"test.bam.bai",
[
"versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd"
@@ -190,11 +287,182 @@
"nf-test": "0.8.4",
"nextflow": "24.04.2"
},
- "timestamp": "2024-06-03T12:21:20.362186362"
+ "timestamp": "2024-07-23T11:18:02.517416733"
+ },
+ "sarscov2 - bam, fasta, true, [], false, false": {
+ "content": [
+ [
+ "@HD\tVN:1.6\tSO:coordinate",
+ "@SQ\tSN:MT192765.1\tLN:29829",
+ "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta -",
+ "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam"
+ ],
+ "5d426b9a5f5b2c54f1d7f1e4c238ae94",
+ [
+ "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-25T09:02:49.64829488"
+ },
+ "sarscov2 - bam, fasta, true, [], false, false - stub": {
+ "content": [
+ {
+ "0": [
+
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+ ],
+ "bam": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "index": [
+
+ ],
+ "paf": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-23T11:21:22.162291795"
},
"sarscov2 - fastq, [], true, false, false": {
"content": [
- "test.bam",
+ [
+ "@HD\tVN:1.6\tSO:coordinate",
+ "@SQ\tSN:ERR5069949.2151832\tLN:150",
+ "@SQ\tSN:ERR5069949.576388\tLN:77",
+ "@SQ\tSN:ERR5069949.501486\tLN:146",
+ "@SQ\tSN:ERR5069949.1331889\tLN:132",
+ "@SQ\tSN:ERR5069949.2161340\tLN:80",
+ "@SQ\tSN:ERR5069949.973930\tLN:79",
+ "@SQ\tSN:ERR5069949.2417063\tLN:150",
+ "@SQ\tSN:ERR5069949.376959\tLN:151",
+ "@SQ\tSN:ERR5069949.1088785\tLN:149",
+ "@SQ\tSN:ERR5069949.1066259\tLN:147",
+ "@SQ\tSN:ERR5069949.2832676\tLN:139",
+ "@SQ\tSN:ERR5069949.2953930\tLN:151",
+ "@SQ\tSN:ERR5069949.324865\tLN:151",
+ "@SQ\tSN:ERR5069949.2185111\tLN:150",
+ "@SQ\tSN:ERR5069949.937422\tLN:151",
+ "@SQ\tSN:ERR5069949.2431709\tLN:150",
+ "@SQ\tSN:ERR5069949.1246538\tLN:148",
+ "@SQ\tSN:ERR5069949.1189252\tLN:98",
+ "@SQ\tSN:ERR5069949.2216307\tLN:147",
+ "@SQ\tSN:ERR5069949.3273002\tLN:148",
+ "@SQ\tSN:ERR5069949.3277445\tLN:151",
+ "@SQ\tSN:ERR5069949.3022231\tLN:147",
+ "@SQ\tSN:ERR5069949.184542\tLN:151",
+ "@SQ\tSN:ERR5069949.540529\tLN:149",
+ "@SQ\tSN:ERR5069949.686090\tLN:150",
+ "@SQ\tSN:ERR5069949.2787556\tLN:106",
+ "@SQ\tSN:ERR5069949.2650879\tLN:150",
+ "@SQ\tSN:ERR5069949.2064910\tLN:149",
+ "@SQ\tSN:ERR5069949.2328704\tLN:150",
+ "@SQ\tSN:ERR5069949.1067032\tLN:150",
+ "@SQ\tSN:ERR5069949.3338256\tLN:151",
+ "@SQ\tSN:ERR5069949.1412839\tLN:147",
+ "@SQ\tSN:ERR5069949.1538968\tLN:150",
+ "@SQ\tSN:ERR5069949.147998\tLN:94",
+ "@SQ\tSN:ERR5069949.366975\tLN:106",
+ "@SQ\tSN:ERR5069949.1372331\tLN:151",
+ "@SQ\tSN:ERR5069949.1709367\tLN:129",
+ "@SQ\tSN:ERR5069949.2388984\tLN:150",
+ "@SQ\tSN:ERR5069949.1132353\tLN:150",
+ "@SQ\tSN:ERR5069949.1151736\tLN:151",
+ "@SQ\tSN:ERR5069949.479807\tLN:150",
+ "@SQ\tSN:ERR5069949.2176303\tLN:151",
+ "@SQ\tSN:ERR5069949.2772897\tLN:151",
+ "@SQ\tSN:ERR5069949.1020777\tLN:122",
+ "@SQ\tSN:ERR5069949.465452\tLN:151",
+ "@SQ\tSN:ERR5069949.1704586\tLN:149",
+ "@SQ\tSN:ERR5069949.1258508\tLN:151",
+ "@SQ\tSN:ERR5069949.986441\tLN:119",
+ "@SQ\tSN:ERR5069949.2674295\tLN:148",
+ "@SQ\tSN:ERR5069949.885966\tLN:79",
+ "@SQ\tSN:ERR5069949.2342766\tLN:151",
+ "@SQ\tSN:ERR5069949.3122970\tLN:127",
+ "@SQ\tSN:ERR5069949.3279513\tLN:72",
+ "@SQ\tSN:ERR5069949.309410\tLN:151",
+ "@SQ\tSN:ERR5069949.532979\tLN:149",
+ "@SQ\tSN:ERR5069949.2888794\tLN:151",
+ "@SQ\tSN:ERR5069949.2205229\tLN:150",
+ "@SQ\tSN:ERR5069949.786562\tLN:151",
+ "@SQ\tSN:ERR5069949.919671\tLN:151",
+ "@SQ\tSN:ERR5069949.1328186\tLN:151",
+ "@SQ\tSN:ERR5069949.870926\tLN:149",
+ "@SQ\tSN:ERR5069949.2257580\tLN:151",
+ "@SQ\tSN:ERR5069949.3249622\tLN:77",
+ "@SQ\tSN:ERR5069949.611123\tLN:125",
+ "@SQ\tSN:ERR5069949.651338\tLN:142",
+ "@SQ\tSN:ERR5069949.169513\tLN:92",
+ "@SQ\tSN:ERR5069949.155944\tLN:150",
+ "@SQ\tSN:ERR5069949.2033605\tLN:150",
+ "@SQ\tSN:ERR5069949.2730382\tLN:142",
+ "@SQ\tSN:ERR5069949.2125592\tLN:150",
+ "@SQ\tSN:ERR5069949.1062611\tLN:151",
+ "@SQ\tSN:ERR5069949.1778133\tLN:151",
+ "@SQ\tSN:ERR5069949.3057020\tLN:95",
+ "@SQ\tSN:ERR5069949.2972968\tLN:141",
+ "@SQ\tSN:ERR5069949.2734474\tLN:149",
+ "@SQ\tSN:ERR5069949.856527\tLN:151",
+ "@SQ\tSN:ERR5069949.2098070\tLN:151",
+ "@SQ\tSN:ERR5069949.1552198\tLN:150",
+ "@SQ\tSN:ERR5069949.2385514\tLN:150",
+ "@SQ\tSN:ERR5069949.2270078\tLN:151",
+ "@SQ\tSN:ERR5069949.114870\tLN:150",
+ "@SQ\tSN:ERR5069949.2668880\tLN:147",
+ "@SQ\tSN:ERR5069949.257821\tLN:139",
+ "@SQ\tSN:ERR5069949.2243023\tLN:150",
+ "@SQ\tSN:ERR5069949.2605155\tLN:146",
+ "@SQ\tSN:ERR5069949.1340552\tLN:151",
+ "@SQ\tSN:ERR5069949.1561137\tLN:150",
+ "@SQ\tSN:ERR5069949.2361683\tLN:149",
+ "@SQ\tSN:ERR5069949.2521353\tLN:150",
+ "@SQ\tSN:ERR5069949.1261808\tLN:149",
+ "@SQ\tSN:ERR5069949.2734873\tLN:98",
+ "@SQ\tSN:ERR5069949.3017828\tLN:107",
+ "@SQ\tSN:ERR5069949.573706\tLN:150",
+ "@SQ\tSN:ERR5069949.1980512\tLN:151",
+ "@SQ\tSN:ERR5069949.1014693\tLN:150",
+ "@SQ\tSN:ERR5069949.3184655\tLN:150",
+ "@SQ\tSN:ERR5069949.29668\tLN:89",
+ "@SQ\tSN:ERR5069949.3258358\tLN:151",
+ "@SQ\tSN:ERR5069949.1476386\tLN:151",
+ "@SQ\tSN:ERR5069949.2415814\tLN:150",
+ "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a test_1.fastq.gz test_1.fastq.gz",
+ "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam"
+ ],
+ "16c1c651f8ec67383bcdee3c55aed94f",
[
"versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd"
]
@@ -203,6 +471,6 @@
"nf-test": "0.8.4",
"nextflow": "24.04.2"
},
- "timestamp": "2024-06-03T12:21:37.233709954"
+ "timestamp": "2024-07-23T11:18:34.246998277"
}
}
\ No newline at end of file
diff --git a/modules/nf-core/samtools/fastq/environment.yml b/modules/nf-core/samtools/fastq/environment.yml
index 1d1bc98f..4455904e 100644
--- a/modules/nf-core/samtools/fastq/environment.yml
+++ b/modules/nf-core/samtools/fastq/environment.yml
@@ -1,10 +1,8 @@
name: samtools_fastq
-
channels:
- conda-forge
- bioconda
- defaults
-
dependencies:
- - bioconda::htslib=1.20
- bioconda::samtools=1.20
+ - bioconda::htslib=1.20
diff --git a/modules/nf-core/samtools/fastq/main.nf b/modules/nf-core/samtools/fastq/main.nf
index 66192d2a..6796c02b 100644
--- a/modules/nf-core/samtools/fastq/main.nf
+++ b/modules/nf-core/samtools/fastq/main.nf
@@ -23,21 +23,17 @@ process SAMTOOLS_FASTQ {
script:
def args = task.ext.args ?: ''
- def args2 = task.ext.args2 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def output = ( interleave && ! meta.single_end ) ? "> ${prefix}_interleaved.fastq" :
meta.single_end ? "-1 ${prefix}_1.fastq.gz -s ${prefix}_singleton.fastq.gz" :
"-1 ${prefix}_1.fastq.gz -2 ${prefix}_2.fastq.gz -s ${prefix}_singleton.fastq.gz"
"""
- samtools reset \\
- --threads ${task.cpus-1} \\
+ samtools \\
+ fastq \\
$args \\
- $input \\
- | \\
- samtools fastq \\
- $args2 \\
--threads ${task.cpus-1} \\
-0 ${prefix}_other.fastq.gz \\
+ $input \\
$output
cat <<-END_VERSIONS > versions.yml
diff --git a/modules/nf-core/samtools/fastq/samtools-fastq.diff b/modules/nf-core/samtools/fastq/samtools-fastq.diff
deleted file mode 100644
index 000ce76a..00000000
--- a/modules/nf-core/samtools/fastq/samtools-fastq.diff
+++ /dev/null
@@ -1,45 +0,0 @@
-Changes in module 'nf-core/samtools/fastq'
---- modules/nf-core/samtools/fastq/main.nf
-+++ modules/nf-core/samtools/fastq/main.nf
-@@ -23,17 +23,21 @@
-
- script:
- def args = task.ext.args ?: ''
-+ def args2 = task.ext.args2 ?: ''
- def prefix = task.ext.prefix ?: "${meta.id}"
- def output = ( interleave && ! meta.single_end ) ? "> ${prefix}_interleaved.fastq" :
- meta.single_end ? "-1 ${prefix}_1.fastq.gz -s ${prefix}_singleton.fastq.gz" :
- "-1 ${prefix}_1.fastq.gz -2 ${prefix}_2.fastq.gz -s ${prefix}_singleton.fastq.gz"
- """
-- samtools \\
-- fastq \\
-+ samtools reset \\
-+ --threads ${task.cpus-1} \\
- $args \\
-+ $input \\
-+ | \\
-+ samtools fastq \\
-+ $args2 \\
- --threads ${task.cpus-1} \\
- -0 ${prefix}_other.fastq.gz \\
-- $input \\
- $output
-
- cat <<-END_VERSIONS > versions.yml
-
---- modules/nf-core/samtools/fastq/environment.yml
-+++ modules/nf-core/samtools/fastq/environment.yml
-@@ -1,8 +1,10 @@
- name: samtools_fastq
-+
- channels:
- - conda-forge
- - bioconda
- - defaults
-+
- dependencies:
-+ - bioconda::htslib=1.20
- - bioconda::samtools=1.20
-- - bioconda::htslib=1.20
-
-************************************************************
diff --git a/modules/nf-core/samtools/import/environment.yml b/modules/nf-core/samtools/import/environment.yml
new file mode 100644
index 00000000..38f1ebab
--- /dev/null
+++ b/modules/nf-core/samtools/import/environment.yml
@@ -0,0 +1,8 @@
+name: samtools_import
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::samtools=1.20
+ - bioconda::htslib=1.20
diff --git a/modules/nf-core/samtools/import/main.nf b/modules/nf-core/samtools/import/main.nf
new file mode 100644
index 00000000..954a8052
--- /dev/null
+++ b/modules/nf-core/samtools/import/main.nf
@@ -0,0 +1,60 @@
+process SAMTOOLS_IMPORT {
+ tag "$meta.id"
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0':
+ 'biocontainers/samtools:1.20--h50ea8bc_0' }"
+
+ input:
+ tuple val(meta), path(reads)
+
+ output:
+ tuple val(meta), path("*.sam") , emit: sam, optional: true
+ tuple val(meta), path("*.bam") , emit: bam, optional: true
+ tuple val(meta), path("*.cram"), emit: cram, optional: true
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def suffix = args.contains("--output-fmt sam") ? "sam" :
+ args.contains("--output-fmt bam") ? "bam" :
+ args.contains("--output-fmt cram") ? "cram" :
+ "bam"
+ def input = reads instanceof List && meta.single_end ? reads.join(" -0") : // multiple single-end files
+ reads instanceof List && !meta.single_end ? "-1 ${reads[0]} -2 ${reads[1]}": // paired end file
+ meta.single_end ? "-0 $reads" : // single single-end file
+ !meta.single_end ? "-s $reads": // interleave paired-end file
+ reads // if all else fails, just add the reads without flags
+ """
+ samtools \\
+ import \\
+ $input \\
+ $args \\
+ -@ $task.cpus \\
+ -o ${prefix}.${suffix}
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+
+ """
+ touch ${prefix}.bam
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/samtools/import/meta.yml b/modules/nf-core/samtools/import/meta.yml
new file mode 100644
index 00000000..9002e092
--- /dev/null
+++ b/modules/nf-core/samtools/import/meta.yml
@@ -0,0 +1,56 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json
+name: "samtools_import"
+description: converts FASTQ files to unmapped SAM/BAM/CRAM
+keywords:
+ - import
+ - fastq
+ - bam
+ - sam
+ - cram
+tools:
+ - samtools:
+ description: |
+ SAMtools is a set of utilities for interacting with and post-processing
+ short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
+ These files are generated as output by short read aligners like BWA.
+ homepage: http://www.htslib.org/
+ documentation: http://www.htslib.org/doc/samtools.html
+ doi: 10.1093/bioinformatics/btp352
+ licence: ["MIT"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'test', single_end:false ]`
+ - reads:
+ type: file
+ description: fastq data to be converted to SAM/BAM/CRAM
+ pattern: "*.{fastq,fq,fastq.gz,fq.gz}"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'test', single_end:false ]`
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - sam:
+ type: file
+ description: SAM file
+ pattern: "*.sam"
+ - bam:
+ type: file
+ description: Unaligned BAM file
+ pattern: "*.bam"
+ - cram:
+ type: file
+ description: Unaligned CRAM file
+ pattern: "*.cram"
+authors:
+ - "@matthdsm"
+maintainers:
+ - "@matthdsm"
diff --git a/modules/nf-core/samtools/import/tests/main.nf.test b/modules/nf-core/samtools/import/tests/main.nf.test
new file mode 100644
index 00000000..d029ca70
--- /dev/null
+++ b/modules/nf-core/samtools/import/tests/main.nf.test
@@ -0,0 +1,83 @@
+nextflow_process {
+
+ name "Test Process SAMTOOLS_IMPORT"
+ script "../main.nf"
+ process "SAMTOOLS_IMPORT"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "samtools"
+ tag "samtools/import"
+
+ test("samtools_import_single ") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:true ], // meta map
+ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ {assert process.success},
+ {assert snapshot(process.out.bam.collect { it.collect { it instanceof Map ? it : file(it).name } }).match()}
+ )
+ }
+ }
+
+ test("samtools_import_paired ") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+ file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ {assert process.success},
+ {assert snapshot(process.out.bam.collect { it.collect { it instanceof Map ? it : file(it).name } }).match()}
+ )
+ }
+ }
+
+ test("samtools_import_interleaved") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ {assert process.success},
+ {assert snapshot(process.out.bam.collect { it.collect { it instanceof Map ? it : file(it).name } }).match()}
+ )
+ }
+ }
+}
diff --git a/modules/nf-core/samtools/import/tests/main.nf.test.snap b/modules/nf-core/samtools/import/tests/main.nf.test.snap
new file mode 100644
index 00000000..eb730a06
--- /dev/null
+++ b/modules/nf-core/samtools/import/tests/main.nf.test.snap
@@ -0,0 +1,103 @@
+{
+ "samtools_import_single ": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.bam"
+ ]
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-05-31T11:38:44.388259606"
+ },
+ "samtools_import_interleaved": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.bam"
+ ]
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-05-31T11:38:56.393371331"
+ },
+ "samtools_import_paired ": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.bam"
+ ]
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-05-31T11:38:50.437197406"
+ },
+ "samtools_import_interleaved ": {
+ "content": [
+ {
+ "0": [
+
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.bam:md5,fad91b070f51c77d7abe22cd31243710"
+ ]
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,a529fc2aa6485db14986c95c53638b11"
+ ],
+ "bam": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.bam:md5,fad91b070f51c77d7abe22cd31243710"
+ ]
+ ],
+ "cram": [
+
+ ],
+ "sam": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,a529fc2aa6485db14986c95c53638b11"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-05-30T12:12:43.491200967"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/samtools/import/tests/tags.yml b/modules/nf-core/samtools/import/tests/tags.yml
new file mode 100644
index 00000000..89c89128
--- /dev/null
+++ b/modules/nf-core/samtools/import/tests/tags.yml
@@ -0,0 +1,2 @@
+samtools/import:
+ - modules/nf-core/samtools/import/**
diff --git a/modules/nf-core/samtools/index/environment.yml b/modules/nf-core/samtools/index/environment.yml
index 369b58a2..260d516b 100644
--- a/modules/nf-core/samtools/index/environment.yml
+++ b/modules/nf-core/samtools/index/environment.yml
@@ -1,10 +1,8 @@
name: samtools_index
-
channels:
- conda-forge
- bioconda
- defaults
-
dependencies:
- - bioconda::htslib=1.20
- bioconda::samtools=1.20
+ - bioconda::htslib=1.20
diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf
index b523c21b..e002585b 100644
--- a/modules/nf-core/samtools/index/main.nf
+++ b/modules/nf-core/samtools/index/main.nf
@@ -35,10 +35,11 @@ process SAMTOOLS_INDEX {
"""
stub:
+ def args = task.ext.args ?: ''
+ def extension = file(input).getExtension() == 'cram' ?
+ "crai" : args.contains("-c") ? "csi" : "bai"
"""
- touch ${input}.bai
- touch ${input}.crai
- touch ${input}.csi
+ touch ${input}.${extension}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
diff --git a/modules/nf-core/samtools/index/tests/main.nf.test b/modules/nf-core/samtools/index/tests/main.nf.test
index bb7756d1..ca34fb5c 100644
--- a/modules/nf-core/samtools/index/tests/main.nf.test
+++ b/modules/nf-core/samtools/index/tests/main.nf.test
@@ -9,11 +9,7 @@ nextflow_process {
tag "samtools/index"
test("bai") {
-
when {
- params {
- outdir = "$outputDir"
- }
process {
"""
input[0] = Channel.of([
@@ -27,18 +23,13 @@ nextflow_process {
then {
assertAll (
{ assert process.success },
- { assert snapshot(process.out.bai).match("bai") },
- { assert snapshot(process.out.versions).match("bai_versions") }
+ { assert snapshot(process.out).match() }
)
}
}
test("crai") {
-
when {
- params {
- outdir = "$outputDir"
- }
process {
"""
input[0] = Channel.of([
@@ -52,20 +43,83 @@ nextflow_process {
then {
assertAll (
{ assert process.success },
- { assert snapshot(process.out.crai).match("crai") },
- { assert snapshot(process.out.versions).match("crai_versions") }
+ { assert snapshot(process.out).match() }
)
}
}
test("csi") {
-
config "./csi.nextflow.config"
when {
- params {
- outdir = "$outputDir"
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(
+ file(process.out.csi[0][1]).name,
+ process.out.versions
+ ).match() }
+ )
+ }
+ }
+
+ test("bai - stub") {
+ options "-stub"
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("crai - stub") {
+ options "-stub"
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true)
+ ])
+ """
}
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("csi - stub") {
+ options "-stub"
+ config "./csi.nextflow.config"
+
+ when {
process {
"""
input[0] = Channel.of([
@@ -79,8 +133,7 @@ nextflow_process {
then {
assertAll (
{ assert process.success },
- { assert path(process.out.csi.get(0).get(1)).exists() },
- { assert snapshot(process.out.versions).match("csi_versions") }
+ { assert snapshot(process.out).match() }
)
}
}
diff --git a/modules/nf-core/samtools/index/tests/main.nf.test.snap b/modules/nf-core/samtools/index/tests/main.nf.test.snap
index 52756e85..799d199c 100644
--- a/modules/nf-core/samtools/index/tests/main.nf.test.snap
+++ b/modules/nf-core/samtools/index/tests/main.nf.test.snap
@@ -1,74 +1,250 @@
{
- "crai_versions": {
+ "csi - stub": {
"content": [
- [
- "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77"
- ]
+ {
+ "0": [
+
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77"
+ ],
+ "bai": [
+
+ ],
+ "crai": [
+
+ ],
+ "csi": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77"
+ ]
+ }
],
"meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
+ "nf-test": "0.9.0",
+ "nextflow": "24.04.3"
},
- "timestamp": "2024-05-28T15:42:04.203740976"
+ "timestamp": "2024-07-22T16:51:53.9057"
},
- "csi_versions": {
+ "crai - stub": {
"content": [
- [
- "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77"
- ]
+ {
+ "0": [
+
+ ],
+ "1": [
+
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "3": [
+ "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77"
+ ],
+ "bai": [
+
+ ],
+ "crai": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "csi": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77"
+ ]
+ }
],
"meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
+ "nf-test": "0.9.0",
+ "nextflow": "24.04.3"
},
- "timestamp": "2024-05-28T15:42:09.57475878"
+ "timestamp": "2024-07-22T16:51:45.931558"
},
- "crai": {
+ "bai - stub": {
"content": [
- [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029"
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77"
+ ],
+ "bai": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "crai": [
+
+ ],
+ "csi": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77"
]
- ]
+ }
],
"meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.04.3"
+ "nf-test": "0.9.0",
+ "nextflow": "24.04.3"
},
- "timestamp": "2024-02-12T18:41:38.446424"
+ "timestamp": "2024-07-22T16:51:34.807525"
},
- "bai": {
+ "csi": {
"content": [
+ "test.paired_end.sorted.bam.csi",
[
- [
- {
- "id": "test",
- "single_end": false
- },
- "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4"
- ]
+ "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77"
]
],
"meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.04.3"
+ "nf-test": "0.9.0",
+ "nextflow": "24.04.3"
},
- "timestamp": "2024-02-12T18:40:46.579747"
+ "timestamp": "2024-07-22T16:52:55.688799"
},
- "bai_versions": {
+ "crai": {
"content": [
- [
- "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77"
- ]
+ {
+ "0": [
+
+ ],
+ "1": [
+
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029"
+ ]
+ ],
+ "3": [
+ "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77"
+ ],
+ "bai": [
+
+ ],
+ "crai": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029"
+ ]
+ ],
+ "csi": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.9.0",
+ "nextflow": "24.04.3"
+ },
+ "timestamp": "2024-07-22T16:51:17.609533"
+ },
+ "bai": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4"
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77"
+ ],
+ "bai": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4"
+ ]
+ ],
+ "crai": [
+
+ ],
+ "csi": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,802c9776d9c5e95314e888cf18e96d77"
+ ]
+ }
],
"meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
+ "nf-test": "0.9.0",
+ "nextflow": "24.04.3"
},
- "timestamp": "2024-05-28T15:41:57.929287369"
+ "timestamp": "2024-07-22T16:51:04.16585"
}
}
\ No newline at end of file
diff --git a/modules/nf-core/samtools/sort/environment.yml b/modules/nf-core/samtools/sort/environment.yml
index af1f2cf6..36a12eab 100644
--- a/modules/nf-core/samtools/sort/environment.yml
+++ b/modules/nf-core/samtools/sort/environment.yml
@@ -1,10 +1,8 @@
name: samtools_sort
-
channels:
- conda-forge
- bioconda
- defaults
-
dependencies:
- - bioconda::htslib=1.20
- bioconda::samtools=1.20
+ - bioconda::htslib=1.20
diff --git a/modules/nf-core/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf
index 596c6f7e..8e019099 100644
--- a/modules/nf-core/samtools/sort/main.nf
+++ b/modules/nf-core/samtools/sort/main.nf
@@ -50,10 +50,20 @@ process SAMTOOLS_SORT {
"""
stub:
+ def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
+ def extension = args.contains("--output-fmt sam") ? "sam" :
+ args.contains("--output-fmt cram") ? "cram" :
+ "bam"
"""
- touch ${prefix}.bam
- touch ${prefix}.bam.csi
+ touch ${prefix}.${extension}
+ if [ "${extension}" == "bam" ];
+ then
+ touch ${prefix}.${extension}.csi
+ elif [ "${extension}" == "cram" ];
+ then
+ touch ${prefix}.${extension}.crai
+ fi
cat <<-END_VERSIONS > versions.yml
"${task.process}":
diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test b/modules/nf-core/samtools/sort/tests/main.nf.test
index fb38ed9b..c2ea9c72 100644
--- a/modules/nf-core/samtools/sort/tests/main.nf.test
+++ b/modules/nf-core/samtools/sort/tests/main.nf.test
@@ -32,16 +32,16 @@ nextflow_process {
{ assert process.success },
{ assert snapshot(
process.out.bam,
- process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }
- ).match("test_bam")
- }
+ process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } },
+ process.out.versions
+ ).match()}
)
}
}
test("cram") {
- config "./nextflow.config"
+ config "./nextflow_cram.config"
when {
process {
@@ -62,23 +62,20 @@ nextflow_process {
assertAll (
{ assert process.success },
{ assert snapshot(
- process.out.bam,
- process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }
- ).match("test_cram")
- }
+ process.out.cram.collect { it.collect { it instanceof Map ? it : file(it).name } },
+ process.out.crai.collect { it.collect { it instanceof Map ? it : file(it).name } },
+ process.out.versions
+ ).match()}
)
}
}
- test("bam_stub") {
+ test("bam - stub") {
- config "./nextflow.config"
options "-stub"
+ config "./nextflow.config"
when {
- params {
- outdir = "$outputDir"
- }
process {
"""
input[0] = Channel.of([
@@ -96,8 +93,35 @@ nextflow_process {
then {
assertAll (
{ assert process.success },
- { assert snapshot(file(process.out.bam[0][1]).name).match("bam_stub_bam") },
- { assert snapshot(process.out.versions).match("bam_stub_versions") }
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("cram - stub") {
+
+ options "-stub"
+ config "./nextflow_cram.config"
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true)
+ ])
+ input[1] = Channel.of([
+ [ id:'fasta' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() }
)
}
}
diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test.snap b/modules/nf-core/samtools/sort/tests/main.nf.test.snap
index 5a27de1d..da38d5d1 100644
--- a/modules/nf-core/samtools/sort/tests/main.nf.test.snap
+++ b/modules/nf-core/samtools/sort/tests/main.nf.test.snap
@@ -7,54 +7,159 @@
"id": "test",
"single_end": false
},
- "test.sorted.bam:md5,21c992d59615936b99f2ad008aa54400"
+ "test.sorted.cram"
]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.sorted.cram.crai"
+ ]
+ ],
+ [
+ "versions.yml:md5,7a360de20e1d7a6f15a5e8fbe0a9c062"
]
],
"meta": {
- "nf-test": "0.8.4",
- "nextflow": "24.04.2"
+ "nf-test": "0.9.0",
+ "nextflow": "24.04.3"
},
- "timestamp": "2024-05-31T08:13:54.512837189"
+ "timestamp": "2024-07-22T17:19:37.196205"
},
- "bam_stub_bam": {
+ "bam - stub": {
"content": [
- "test.sorted.bam"
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.sorted.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+
+ ],
+ "3": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "4": [
+ "versions.yml:md5,7a360de20e1d7a6f15a5e8fbe0a9c062"
+ ],
+ "bam": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.sorted.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "crai": [
+
+ ],
+ "cram": [
+
+ ],
+ "csi": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,7a360de20e1d7a6f15a5e8fbe0a9c062"
+ ]
+ }
],
"meta": {
- "nf-test": "0.8.4",
- "nextflow": "24.04.2"
+ "nf-test": "0.9.0",
+ "nextflow": "24.04.3"
},
- "timestamp": "2024-05-31T07:29:00.761845507"
+ "timestamp": "2024-07-22T15:54:46.580756"
},
- "test_cram": {
+ "cram - stub": {
"content": [
- [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test.sorted.bam:md5,22b2093be34a7637f5fbc84272b89d06"
- ]
- ],
- [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test.sorted.bam.csi"
+ {
+ "0": [
+
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.sorted.cram:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "3": [
+
+ ],
+ "4": [
+ "versions.yml:md5,7a360de20e1d7a6f15a5e8fbe0a9c062"
+ ],
+ "bam": [
+
+ ],
+ "crai": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "cram": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.sorted.cram:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "csi": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,7a360de20e1d7a6f15a5e8fbe0a9c062"
]
- ]
+ }
],
"meta": {
- "nf-test": "0.8.4",
- "nextflow": "24.04.2"
+ "nf-test": "0.9.0",
+ "nextflow": "24.04.3"
},
- "timestamp": "2024-05-31T09:16:51.924951855"
+ "timestamp": "2024-07-22T15:57:30.505698"
},
- "test_bam": {
+ "bam": {
"content": [
[
[
@@ -73,42 +178,15 @@
},
"test.sorted.bam.csi"
]
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "24.04.2"
- },
- "timestamp": "2024-05-31T08:28:12.15952312"
- },
- "bam_stub_versions": {
- "content": [
+ ],
[
"versions.yml:md5,7a360de20e1d7a6f15a5e8fbe0a9c062"
]
],
"meta": {
- "nf-test": "0.8.4",
- "nextflow": "24.04.2"
- },
- "timestamp": "2024-05-31T07:29:00.765038811"
- },
- "bam": {
- "content": [
- [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test.sorted.bam:md5,21c992d59615936b99f2ad008aa54400"
- ]
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "24.04.2"
+ "nf-test": "0.9.0",
+ "nextflow": "24.04.3"
},
- "timestamp": "2024-05-31T08:13:48.538030517"
+ "timestamp": "2024-07-22T15:54:25.872954"
}
}
\ No newline at end of file
diff --git a/modules/nf-core/samtools/sort/tests/nextflow_cram.config b/modules/nf-core/samtools/sort/tests/nextflow_cram.config
new file mode 100644
index 00000000..3a8c0188
--- /dev/null
+++ b/modules/nf-core/samtools/sort/tests/nextflow_cram.config
@@ -0,0 +1,8 @@
+process {
+
+ withName: SAMTOOLS_SORT {
+ ext.prefix = { "${meta.id}.sorted" }
+ ext.args = "--write-index --output-fmt cram"
+ }
+
+}
diff --git a/modules/nf-core/samtools/view/environment.yml b/modules/nf-core/samtools/view/environment.yml
index 38185dd3..150c3777 100644
--- a/modules/nf-core/samtools/view/environment.yml
+++ b/modules/nf-core/samtools/view/environment.yml
@@ -1,10 +1,8 @@
name: samtools_view
-
channels:
- conda-forge
- bioconda
- defaults
-
dependencies:
- - bioconda::htslib=1.20
- bioconda::samtools=1.20
+ - bioconda::htslib=1.20
diff --git a/modules/nf-core/samtools/view/main.nf b/modules/nf-core/samtools/view/main.nf
index 38df8576..dc611448 100644
--- a/modules/nf-core/samtools/view/main.nf
+++ b/modules/nf-core/samtools/view/main.nf
@@ -13,13 +13,15 @@ process SAMTOOLS_VIEW {
path qname
output:
- tuple val(meta), path("*.bam"), emit: bam, optional: true
- tuple val(meta), path("*.cram"), emit: cram, optional: true
- tuple val(meta), path("*.sam"), emit: sam, optional: true
- tuple val(meta), path("*.bai"), emit: bai, optional: true
- tuple val(meta), path("*.csi"), emit: csi, optional: true
- tuple val(meta), path("*.crai"), emit: crai, optional: true
- path "versions.yml", emit: versions
+ tuple val(meta), path("${prefix}.bam"), emit: bam, optional: true
+ tuple val(meta), path("${prefix}.cram"), emit: cram, optional: true
+ tuple val(meta), path("${prefix}.sam"), emit: sam, optional: true
+ tuple val(meta), path("${prefix}.${file_type}.bai"), emit: bai, optional: true
+ tuple val(meta), path("${prefix}.${file_type}.csi"), emit: csi, optional: true
+ tuple val(meta), path("${prefix}.${file_type}.crai"), emit: crai, optional: true
+ tuple val(meta), path("${prefix}.unselected.${file_type}"), emit: unselected, optional: true
+ tuple val(meta), path("${prefix}.unselected.${file_type}.{bai,csi,crsi}"), emit: unselected_index, optional: true
+ path "versions.yml", emit: versions
when:
task.ext.when == null || task.ext.when
@@ -27,13 +29,13 @@ process SAMTOOLS_VIEW {
script:
def args = task.ext.args ?: ''
def args2 = task.ext.args2 ?: ''
- def prefix = task.ext.prefix ?: "${meta.id}"
+ prefix = task.ext.prefix ?: "${meta.id}"
def reference = fasta ? "--reference ${fasta}" : ""
- def readnames = qname ? "--qname-file ${qname}": ""
- def file_type = args.contains("--output-fmt sam") ? "sam" :
- args.contains("--output-fmt bam") ? "bam" :
- args.contains("--output-fmt cram") ? "cram" :
- input.getExtension()
+ file_type = args.contains("--output-fmt sam") ? "sam" :
+ args.contains("--output-fmt bam") ? "bam" :
+ args.contains("--output-fmt cram") ? "cram" :
+ input.getExtension()
+ readnames = qname ? "--qname-file ${qname} --output-unselected ${prefix}.unselected.${file_type}": ""
if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
"""
samtools \\
@@ -54,14 +56,14 @@ process SAMTOOLS_VIEW {
stub:
def args = task.ext.args ?: ''
- def prefix = task.ext.prefix ?: "${meta.id}"
- def file_type = args.contains("--output-fmt sam") ? "sam" :
- args.contains("--output-fmt bam") ? "bam" :
- args.contains("--output-fmt cram") ? "cram" :
- input.getExtension()
+ prefix = task.ext.prefix ?: "${meta.id}"
+ file_type = args.contains("--output-fmt sam") ? "sam" :
+ args.contains("--output-fmt bam") ? "bam" :
+ args.contains("--output-fmt cram") ? "cram" :
+ input.getExtension()
if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
- def index = args.contains("--write-index") ? "touch ${prefix}.csi" : ""
+ def index = args.contains("--write-index") ? "touch ${prefix}.${file_type}.csi" : ""
"""
touch ${prefix}.${file_type}
diff --git a/modules/nf-core/samtools/view/meta.yml b/modules/nf-core/samtools/view/meta.yml
index 3dadafae..27be60d0 100644
--- a/modules/nf-core/samtools/view/meta.yml
+++ b/modules/nf-core/samtools/view/meta.yml
@@ -73,6 +73,15 @@ output:
type: file
description: optional CRAM file index
pattern: "*.{crai}"
+ # unselected and unselected_index are created when passing a qname
+ - unselected:
+ type: file
+ description: optional file with unselected alignments
+ pattern: "*.unselected.{bam,cram,sam}"
+ - unselected_index:
+ type: file
+ description: index for the "unselected" file
+ pattern: "*.unselected.{bai,csi,crai}"
- versions:
type: file
description: File containing software versions
diff --git a/modules/nf-core/samtools/view/tests/main.nf.test b/modules/nf-core/samtools/view/tests/main.nf.test
index 45a0defb..37b81a91 100644
--- a/modules/nf-core/samtools/view/tests/main.nf.test
+++ b/modules/nf-core/samtools/view/tests/main.nf.test
@@ -172,6 +172,8 @@ nextflow_process {
{ assert snapshot(process.out.crai).match("cram_to_bam_index_qname_crai") },
{ assert snapshot(process.out.cram).match("cram_to_bam_index_qname_cram") },
{ assert snapshot(process.out.sam).match("cram_to_bam_index_qname_sam") },
+ { assert snapshot(file(process.out.unselected[0][1]).name).match("cram_to_bam_index_qname_unselected") },
+ { assert snapshot(file(process.out.unselected_index[0][1]).name).match("cram_to_bam_index_qname_unselected_csi") },
{ assert snapshot(process.out.versions).match("cram_to_bam_index_qname_versions") }
)
}
diff --git a/modules/nf-core/samtools/view/tests/main.nf.test.snap b/modules/nf-core/samtools/view/tests/main.nf.test.snap
index eb0c577c..6bcce9fe 100644
--- a/modules/nf-core/samtools/view/tests/main.nf.test.snap
+++ b/modules/nf-core/samtools/view/tests/main.nf.test.snap
@@ -355,6 +355,26 @@
},
"timestamp": "2024-02-12T19:38:23.322874"
},
+ "cram_to_bam_index_qname_unselected": {
+ "content": [
+ "test.unselected.bam"
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.04.3"
+ },
+ "timestamp": "2024-02-12T19:38:23.322874"
+ },
+ "cram_to_bam_index_qname_unselected_csi": {
+ "content": [
+ "test.unselected.bam.csi"
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.04.3"
+ },
+ "timestamp": "2024-02-12T19:38:23.328458"
+ },
"bam_versions": {
"content": [
[
@@ -477,7 +497,7 @@
},
"bam_stub_csi": {
"content": [
- "test.csi"
+ "test.bam.csi"
],
"meta": {
"nf-test": "0.8.4",
diff --git a/modules/nf-core/sniffles/main.nf b/modules/nf-core/sniffles/main.nf
index 83f63586..41bc76c1 100644
--- a/modules/nf-core/sniffles/main.nf
+++ b/modules/nf-core/sniffles/main.nf
@@ -49,7 +49,6 @@ process SNIFFLES {
stub:
def prefix = task.ext.prefix ?: "${meta.id}"
-
"""
touch ${prefix}.vcf.gz
touch ${prefix}.snf
@@ -59,6 +58,4 @@ process SNIFFLES {
sniffles: \$(sniffles --help 2>&1 | grep Version |sed 's/^.*Version //')
END_VERSIONS
"""
-
}
-
diff --git a/modules/nf-core/sniffles/meta.yml b/modules/nf-core/sniffles/meta.yml
index 333d5de1..c8b2da7d 100644
--- a/modules/nf-core/sniffles/meta.yml
+++ b/modules/nf-core/sniffles/meta.yml
@@ -3,7 +3,7 @@ description: structural-variant calling with sniffles
keywords:
- sniffles
- structural-variant calling
- - long-read sequencing
+ - long-read
tools:
- sniffles:
description: a fast structural variant caller for long-read sequencing
diff --git a/modules/nf-core/sniffles/sniffles.diff b/modules/nf-core/sniffles/sniffles.diff
index 9b8461a9..9b748dd3 100644
--- a/modules/nf-core/sniffles/sniffles.diff
+++ b/modules/nf-core/sniffles/sniffles.diff
@@ -1,15 +1,7 @@
Changes in module 'nf-core/sniffles'
--- modules/nf-core/sniffles/meta.yml
+++ modules/nf-core/sniffles/meta.yml
-@@ -3,6 +3,7 @@
- keywords:
- - sniffles
- - structural-variant calling
-+ - long-read sequencing
- tools:
- - sniffles:
- description: a fast structural variant caller for long-read sequencing
-@@ -16,10 +17,10 @@
+@@ -17,10 +17,10 @@
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
@@ -23,7 +15,7 @@ Changes in module 'nf-core/sniffles'
- bai:
type: file
description: Index of BAM file
-@@ -33,6 +34,15 @@
+@@ -34,6 +34,15 @@
type: file
description: |
Reference database in FASTA format
@@ -39,7 +31,7 @@ Changes in module 'nf-core/sniffles'
output:
- meta:
type: map
-@@ -45,8 +55,8 @@
+@@ -46,8 +55,8 @@
pattern: "*.vcf.gz"
- snf:
type: file
@@ -76,7 +68,7 @@ Changes in module 'nf-core/sniffles'
when:
task.ext.when == null || task.ext.when
-@@ -23,18 +26,39 @@
+@@ -23,14 +26,33 @@
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
@@ -99,26 +91,20 @@ Changes in module 'nf-core/sniffles'
+ $snf \\
$args
+
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- sniffles: \$(sniffles --help 2>&1 | grep Version |sed 's/^.*Version //')
- END_VERSIONS
- """
++ cat <<-END_VERSIONS > versions.yml
++ "${task.process}":
++ sniffles: \$(sniffles --help 2>&1 | grep Version |sed 's/^.*Version //')
++ END_VERSIONS
++ """
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}"
-+
+ """
+ touch ${prefix}.vcf.gz
+ touch ${prefix}.snf
+
-+ cat <<-END_VERSIONS > versions.yml
-+ "${task.process}":
-+ sniffles: \$(sniffles --help 2>&1 | grep Version |sed 's/^.*Version //')
-+ END_VERSIONS
-+ """
-+
- }
-
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ sniffles: \$(sniffles --help 2>&1 | grep Version |sed 's/^.*Version //')
************************************************************
diff --git a/modules/nf-core/somalier/extract/environment.yml b/modules/nf-core/somalier/extract/environment.yml
index e0dd11c9..fe32ebda 100644
--- a/modules/nf-core/somalier/extract/environment.yml
+++ b/modules/nf-core/somalier/extract/environment.yml
@@ -4,4 +4,4 @@ channels:
- bioconda
- defaults
dependencies:
- - bioconda::somalier=0.2.15
+ - bioconda::somalier=0.2.18
diff --git a/modules/nf-core/somalier/extract/main.nf b/modules/nf-core/somalier/extract/main.nf
index 8dc3c31c..4a20f6b3 100644
--- a/modules/nf-core/somalier/extract/main.nf
+++ b/modules/nf-core/somalier/extract/main.nf
@@ -5,14 +5,14 @@ process SOMALIER_EXTRACT {
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/somalier:0.2.15--h37c5b7d_0':
- 'biocontainers/somalier:0.2.15--h37c5b7d_0' }"
+ 'https://depot.galaxyproject.org/singularity/somalier:0.2.18--hb57907c_0':
+ 'biocontainers/somalier:0.2.18--hb57907c_0' }"
input:
tuple val(meta), path(input), path(input_index)
- path(fasta)
- path(fai)
- path(sites)
+ tuple val(meta2), path(fasta)
+ tuple val(meta3), path(fai)
+ tuple val(meta4), path(sites)
output:
tuple val(meta), path("*.somalier") , emit: extract
diff --git a/modules/nf-core/somalier/extract/meta.yml b/modules/nf-core/somalier/extract/meta.yml
index 7aa426ab..aabaf5d6 100644
--- a/modules/nf-core/somalier/extract/meta.yml
+++ b/modules/nf-core/somalier/extract/meta.yml
@@ -19,7 +19,7 @@ tools:
documentation: "https://github.com/brentp/somalier/blob/master/README.md"
tool_dev_url: "https://github.com/brentp/somalier"
doi: "10.1186/s13073-020-00761-2"
- licence: "MIT License"
+ licence: ["MIT"]
input:
- meta:
type: map
@@ -32,14 +32,29 @@ input:
- input_index:
type: file
description: index file of the input data, e.g., bam.bai, cram.crai
+ - meta2:
+ type: map
+ description: |
+ Groovy Map containing reference information
+ e.g. [ id:'hg38' ]
- fasta:
type: file
description: The reference fasta file
pattern: "*.{fasta,fna,fas,fa}"
+ - meta3:
+ type: map
+ description: |
+ Groovy Map containing reference information
+ e.g. [ id:'hg38' ]
- fai:
type: file
description: FASTA index file
pattern: "*.fai"
+ - meta4:
+ type: map
+ description: |
+ Groovy Map containing sites information
+ e.g. [ id:'hg38' ]
- sites:
type: file
description: sites file in VCF format which can be taken from https://github.com/brentp/somalier
diff --git a/modules/nf-core/somalier/extract/somalier-extract.diff b/modules/nf-core/somalier/extract/somalier-extract.diff
new file mode 100644
index 00000000..59f472ab
--- /dev/null
+++ b/modules/nf-core/somalier/extract/somalier-extract.diff
@@ -0,0 +1,25 @@
+Changes in module 'nf-core/somalier/extract'
+--- modules/nf-core/somalier/extract/main.nf
++++ modules/nf-core/somalier/extract/main.nf
+@@ -5,8 +5,8 @@
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+- 'https://depot.galaxyproject.org/singularity/somalier:0.2.19--h0c29559_0':
+- 'biocontainers/somalier:0.2.19--h0c29559_0' }"
++ 'https://depot.galaxyproject.org/singularity/somalier:0.2.18--hb57907c_0':
++ 'biocontainers/somalier:0.2.18--hb57907c_0' }"
+
+ input:
+ tuple val(meta), path(input), path(input_index)
+
+--- modules/nf-core/somalier/extract/environment.yml
++++ modules/nf-core/somalier/extract/environment.yml
+@@ -4,4 +4,4 @@
+ - bioconda
+ - defaults
+ dependencies:
+- - bioconda::somalier=0.2.19
++ - bioconda::somalier=0.2.18
+
+************************************************************
diff --git a/modules/nf-core/somalier/extract/tests/main.nf.test b/modules/nf-core/somalier/extract/tests/main.nf.test
new file mode 100644
index 00000000..dfc7e8a5
--- /dev/null
+++ b/modules/nf-core/somalier/extract/tests/main.nf.test
@@ -0,0 +1,84 @@
+nextflow_process {
+
+ name "Test Process SOMALIER_EXTRACT"
+ script "../main.nf"
+ process "SOMALIER_EXTRACT"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "somalier"
+ tag "somalier/extract"
+
+ test("homo_sapiens - [ bam, bai ], fasta, fai, sites") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.bam.bai', checkIfExists: true)
+ ]
+ input[1] = [
+ [ id:'genome' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = [
+ [ id:'genome' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', checkIfExists: true)
+ ]
+ input[3] = [
+ [ id:'genome' ], // meta map
+ file(params.modules_testdata_base_path + 'delete_me/somalier/sites_chr21.hg38.vcf.gz', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("homo_sapiens - [ bam, bai ], fasta, fai, sites -stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.bam.bai', checkIfExists: true)
+ ]
+ input[1] = [
+ [ id:'genome' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = [
+ [ id:'genome' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
+ ]
+ input[3] = [
+ [ id:'genome' ], // meta map
+ file(params.modules_testdata_base_path + 'delete_me/somalier/sites_chr21.hg38.vcf.gz', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+}
\ No newline at end of file
diff --git a/modules/nf-core/somalier/extract/tests/main.nf.test.snap b/modules/nf-core/somalier/extract/tests/main.nf.test.snap
new file mode 100644
index 00000000..83df72a4
--- /dev/null
+++ b/modules/nf-core/somalier/extract/tests/main.nf.test.snap
@@ -0,0 +1,72 @@
+{
+ "homo_sapiens - [ bam, bai ], fasta, fai, sites": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "normal.somalier:md5,21f0b980edd42ddaa8ab964959c1de02"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,df4e49e9520f49a862026da1aa81dde0"
+ ],
+ "extract": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "normal.somalier:md5,21f0b980edd42ddaa8ab964959c1de02"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,df4e49e9520f49a862026da1aa81dde0"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-01T13:25:23.384476049"
+ },
+ "homo_sapiens - [ bam, bai ], fasta, fai, sites -stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.somalier:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,df4e49e9520f49a862026da1aa81dde0"
+ ],
+ "extract": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.somalier:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,df4e49e9520f49a862026da1aa81dde0"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-01T13:25:35.741711687"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/somalier/extract/tests/tags.yml b/modules/nf-core/somalier/extract/tests/tags.yml
new file mode 100644
index 00000000..836e3524
--- /dev/null
+++ b/modules/nf-core/somalier/extract/tests/tags.yml
@@ -0,0 +1,2 @@
+somalier/extract:
+ - "modules/nf-core/somalier/extract/**"
diff --git a/modules/nf-core/somalier/relate/environment.yml b/modules/nf-core/somalier/relate/environment.yml
index b8b70f67..3a730471 100644
--- a/modules/nf-core/somalier/relate/environment.yml
+++ b/modules/nf-core/somalier/relate/environment.yml
@@ -4,4 +4,4 @@ channels:
- bioconda
- defaults
dependencies:
- - bioconda::somalier=0.2.15
+ - bioconda::somalier=0.2.18
diff --git a/modules/nf-core/somalier/relate/main.nf b/modules/nf-core/somalier/relate/main.nf
index 45a4c1e8..fb9d3742 100644
--- a/modules/nf-core/somalier/relate/main.nf
+++ b/modules/nf-core/somalier/relate/main.nf
@@ -5,8 +5,8 @@ process SOMALIER_RELATE {
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/somalier:0.2.15--h37c5b7d_0':
- 'biocontainers/somalier:0.2.15--h37c5b7d_0' }"
+ 'https://depot.galaxyproject.org/singularity/somalier:0.2.18--hb57907c_0':
+ 'biocontainers/somalier:0.2.18--hb57907c_0' }"
input:
tuple val(meta), path(extract), path(ped)
diff --git a/modules/nf-core/somalier/relate/meta.yml b/modules/nf-core/somalier/relate/meta.yml
index d1d81c87..42638f4f 100644
--- a/modules/nf-core/somalier/relate/meta.yml
+++ b/modules/nf-core/somalier/relate/meta.yml
@@ -19,7 +19,7 @@ tools:
documentation: "https://github.com/brentp/somalier/blob/master/README.md"
tool_dev_url: "https://github.com/brentp/somalier"
doi: "10.1186/s13073-020-00761-2"
- licence: "MIT License"
+ licence: ["MIT"]
input:
- meta:
type: map
@@ -27,7 +27,7 @@ input:
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- extract:
- type: file(s)
+ type: file
description: extract file(s) from Somalier extract
pattern: "*.somalier"
- ped:
diff --git a/modules/nf-core/somalier/relate/somalier-relate.diff b/modules/nf-core/somalier/relate/somalier-relate.diff
new file mode 100644
index 00000000..f583d612
--- /dev/null
+++ b/modules/nf-core/somalier/relate/somalier-relate.diff
@@ -0,0 +1,25 @@
+Changes in module 'nf-core/somalier/relate'
+--- modules/nf-core/somalier/relate/main.nf
++++ modules/nf-core/somalier/relate/main.nf
+@@ -5,8 +5,8 @@
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+- 'https://depot.galaxyproject.org/singularity/somalier:0.2.19--h0c29559_0':
+- 'biocontainers/somalier:0.2.19--h0c29559_0' }"
++ 'https://depot.galaxyproject.org/singularity/somalier:0.2.18--hb57907c_0':
++ 'biocontainers/somalier:0.2.18--hb57907c_0' }"
+
+ input:
+ tuple val(meta), path(extract), path(ped)
+
+--- modules/nf-core/somalier/relate/environment.yml
++++ modules/nf-core/somalier/relate/environment.yml
+@@ -4,4 +4,4 @@
+ - bioconda
+ - defaults
+ dependencies:
+- - bioconda::somalier=0.2.19
++ - bioconda::somalier=0.2.18
+
+************************************************************
diff --git a/modules/nf-core/somalier/relate/tests/main.nf.test b/modules/nf-core/somalier/relate/tests/main.nf.test
new file mode 100644
index 00000000..5f17456d
--- /dev/null
+++ b/modules/nf-core/somalier/relate/tests/main.nf.test
@@ -0,0 +1,124 @@
+nextflow_process {
+
+ name "Test Process SOMALIER_RELATE"
+ script "../main.nf"
+ process "SOMALIER_RELATE"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "somalier"
+ tag "somalier/relate"
+
+ test("[ delete_me, [] ], []") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'cohort', single_end:false ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'delete_me/somalier/normal.somalier', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'delete_me/somalier/tumour.somalier', checkIfExists: true)
+ ],
+ []
+ ]
+ input[1] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("[ delete_me, ped ], groups") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'cohort', single_end:false ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'delete_me/somalier/normal.somalier', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'delete_me/somalier/tumour.somalier', checkIfExists: true)
+ ],
+ file(params.modules_testdata_base_path + 'delete_me/somalier/family.ped', checkIfExists: true)
+ ]
+ input[1] = file(params.modules_testdata_base_path + 'delete_me/somalier/groups.txt', checkIfExists: true)
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("[ delete_me, [] ], [] -stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'cohort', single_end:false ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'delete_me/somalier/normal.somalier', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'delete_me/somalier/tumour.somalier', checkIfExists: true)
+ ],
+ []
+ ]
+ input[1] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("[ delete_me, ped ], groups -stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'cohort', single_end:false ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'delete_me/somalier/normal.somalier', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'delete_me/somalier/tumour.somalier', checkIfExists: true)
+ ],
+ file(params.modules_testdata_base_path + 'delete_me/somalier/family.ped', checkIfExists: true)
+ ]
+ input[1] = file(params.modules_testdata_base_path + 'delete_me/somalier/groups.txt', checkIfExists: true)
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+}
\ No newline at end of file
diff --git a/modules/nf-core/somalier/relate/tests/main.nf.test.snap b/modules/nf-core/somalier/relate/tests/main.nf.test.snap
new file mode 100644
index 00000000..54a73033
--- /dev/null
+++ b/modules/nf-core/somalier/relate/tests/main.nf.test.snap
@@ -0,0 +1,286 @@
+{
+ "[ delete_me, [] ], []": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "cohort",
+ "single_end": false
+ },
+ "cohort.html:md5,03cac9b2c67a8a06f63e07f83ee11e18"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "cohort",
+ "single_end": false
+ },
+ "cohort.pairs.tsv:md5,54d1e9fca1bf9d747d4254c6fa98edcf"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "cohort",
+ "single_end": false
+ },
+ "cohort.samples.tsv:md5,97257d88886db1325c4d7d10cefa7169"
+ ]
+ ],
+ "3": [
+ "versions.yml:md5,59d805a9f89558414535c136c814bea6"
+ ],
+ "html": [
+ [
+ {
+ "id": "cohort",
+ "single_end": false
+ },
+ "cohort.html:md5,03cac9b2c67a8a06f63e07f83ee11e18"
+ ]
+ ],
+ "pairs_tsv": [
+ [
+ {
+ "id": "cohort",
+ "single_end": false
+ },
+ "cohort.pairs.tsv:md5,54d1e9fca1bf9d747d4254c6fa98edcf"
+ ]
+ ],
+ "samples_tsv": [
+ [
+ {
+ "id": "cohort",
+ "single_end": false
+ },
+ "cohort.samples.tsv:md5,97257d88886db1325c4d7d10cefa7169"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,59d805a9f89558414535c136c814bea6"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-02T05:29:21.162582556"
+ },
+ "[ delete_me, [] ], [] -stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "cohort",
+ "single_end": false
+ },
+ "cohort.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "cohort",
+ "single_end": false
+ },
+ "cohort.pairs.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "cohort",
+ "single_end": false
+ },
+ "cohort.samples.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "3": [
+ "versions.yml:md5,59d805a9f89558414535c136c814bea6"
+ ],
+ "html": [
+ [
+ {
+ "id": "cohort",
+ "single_end": false
+ },
+ "cohort.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "pairs_tsv": [
+ [
+ {
+ "id": "cohort",
+ "single_end": false
+ },
+ "cohort.pairs.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "samples_tsv": [
+ [
+ {
+ "id": "cohort",
+ "single_end": false
+ },
+ "cohort.samples.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,59d805a9f89558414535c136c814bea6"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-02T05:29:43.887124223"
+ },
+ "[ delete_me, ped ], groups -stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "cohort",
+ "single_end": false
+ },
+ "cohort.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "cohort",
+ "single_end": false
+ },
+ "cohort.pairs.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "cohort",
+ "single_end": false
+ },
+ "cohort.samples.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "3": [
+ "versions.yml:md5,59d805a9f89558414535c136c814bea6"
+ ],
+ "html": [
+ [
+ {
+ "id": "cohort",
+ "single_end": false
+ },
+ "cohort.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "pairs_tsv": [
+ [
+ {
+ "id": "cohort",
+ "single_end": false
+ },
+ "cohort.pairs.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "samples_tsv": [
+ [
+ {
+ "id": "cohort",
+ "single_end": false
+ },
+ "cohort.samples.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,59d805a9f89558414535c136c814bea6"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-02T05:29:55.034913513"
+ },
+ "[ delete_me, ped ], groups": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "cohort",
+ "single_end": false
+ },
+ "cohort.html:md5,0d573016c9279ccdfdcfd4eb01d73b89"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "cohort",
+ "single_end": false
+ },
+ "cohort.pairs.tsv:md5,8655714f1e5359329188e9f501168131"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "cohort",
+ "single_end": false
+ },
+ "cohort.samples.tsv:md5,38ef93340e55fbeef47640abda9e48b0"
+ ]
+ ],
+ "3": [
+ "versions.yml:md5,59d805a9f89558414535c136c814bea6"
+ ],
+ "html": [
+ [
+ {
+ "id": "cohort",
+ "single_end": false
+ },
+ "cohort.html:md5,0d573016c9279ccdfdcfd4eb01d73b89"
+ ]
+ ],
+ "pairs_tsv": [
+ [
+ {
+ "id": "cohort",
+ "single_end": false
+ },
+ "cohort.pairs.tsv:md5,8655714f1e5359329188e9f501168131"
+ ]
+ ],
+ "samples_tsv": [
+ [
+ {
+ "id": "cohort",
+ "single_end": false
+ },
+ "cohort.samples.tsv:md5,38ef93340e55fbeef47640abda9e48b0"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,59d805a9f89558414535c136c814bea6"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-02T05:29:32.451456985"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/somalier/relate/tests/tags.yml b/modules/nf-core/somalier/relate/tests/tags.yml
new file mode 100644
index 00000000..c80de730
--- /dev/null
+++ b/modules/nf-core/somalier/relate/tests/tags.yml
@@ -0,0 +1,2 @@
+somalier/relate:
+ - "modules/nf-core/somalier/relate/**"
diff --git a/modules/nf-core/splitubam/environment.yml b/modules/nf-core/splitubam/environment.yml
new file mode 100644
index 00000000..ef128202
--- /dev/null
+++ b/modules/nf-core/splitubam/environment.yml
@@ -0,0 +1,9 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+name: "splitubam"
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - "bioconda::splitubam=0.1.1"
diff --git a/modules/nf-core/splitubam/main.nf b/modules/nf-core/splitubam/main.nf
new file mode 100644
index 00000000..3d413d69
--- /dev/null
+++ b/modules/nf-core/splitubam/main.nf
@@ -0,0 +1,53 @@
+process SPLITUBAM {
+ tag "$meta.id"
+ label 'process_medium'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/splitubam:0.1.1--hc9368f3_0':
+ 'biocontainers/splitubam:0.1.1--hc9368f3_0' }"
+
+ input:
+ tuple val(meta), path(bam)
+
+ output:
+ tuple val(meta), path("*.bam"), emit: bam
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ """
+ splitubam \\
+ $args \\
+ --threads $task.cpus \\
+ $bam
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ splitubam: \$(splitubam --version | sed 's/splitubam //')
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def match = (args =~ /--split\s+(\d+)/)
+ def create_cmd = ""
+ if (match) {
+ def n_splits = match[0][1].toInteger()
+ (1..n_splits).each { i ->
+ def formattedIteration = String.format('%03d', i)
+ create_cmd += "touch ${formattedIteration}.${bam}.bam\n"
+ }
+ } else { error("No `--split N` detected in args") }
+ """
+ $create_cmd
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ splitubam: \$(splitubam --version | sed 's/splitubam //')
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/splitubam/meta.yml b/modules/nf-core/splitubam/meta.yml
new file mode 100644
index 00000000..3d11eaae
--- /dev/null
+++ b/modules/nf-core/splitubam/meta.yml
@@ -0,0 +1,46 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "splitubam"
+description: split one ubam into multiple, per line, fast
+keywords:
+ - long-read
+ - bam
+ - genomics
+tools:
+ - "splitubam":
+ description: "Split one ubam into multiple, per line, fast"
+ homepage: "https://github.com/fellen31/splitubam"
+ documentation: "https://github.com/fellen31/splitubam"
+ tool_dev_url: "https://github.com/fellen31/splitubam"
+ licence: ["MIT"]
+
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1', single_end:true ]`
+ - bam:
+ type: file
+ description: (u)BAM file
+ pattern: "*.{bam}"
+
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1', single_end:false ]`
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - bam:
+ type: file
+ description: Split (u)BAM file
+ pattern: "*.{bam}"
+
+authors:
+ - "@fellen31"
+maintainers:
+ - "@fellen31"
diff --git a/modules/nf-core/splitubam/tests/main.nf.test b/modules/nf-core/splitubam/tests/main.nf.test
new file mode 100644
index 00000000..270df28b
--- /dev/null
+++ b/modules/nf-core/splitubam/tests/main.nf.test
@@ -0,0 +1,67 @@
+
+nextflow_process {
+
+ name "Test Process SPLITUBAM"
+ script "../main.nf"
+ process "SPLITUBAM"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "splitubam"
+
+ test("sarscov2 - bam") {
+
+ config "./nextflow.config"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ bam(process.out.bam.get(0).get(1).get(0)).getHeader(),
+ bam(process.out.bam.get(0).get(1).get(0)).getReadsMD5(),
+ bam(process.out.bam.get(0).get(1).get(1)).getHeader(),
+ bam(process.out.bam.get(0).get(1).get(1)).getReadsMD5(),
+ process.out.versions
+ ).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - bam - stub") {
+
+ options "-stub"
+ config "./nextflow.config"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/splitubam/tests/main.nf.test.snap b/modules/nf-core/splitubam/tests/main.nf.test.snap
new file mode 100644
index 00000000..df573efa
--- /dev/null
+++ b/modules/nf-core/splitubam/tests/main.nf.test.snap
@@ -0,0 +1,73 @@
+{
+ "sarscov2 - bam - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "001.test.paired_end.bam.bam:md5,d41d8cd98f00b204e9800998ecf8427e",
+ "002.test.paired_end.bam.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,78006e47ec8ddb5d6f098dcef4a3e099"
+ ],
+ "bam": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "001.test.paired_end.bam.bam:md5,d41d8cd98f00b204e9800998ecf8427e",
+ "002.test.paired_end.bam.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,78006e47ec8ddb5d6f098dcef4a3e099"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-23T13:07:19.115592832"
+ },
+ "sarscov2 - bam": {
+ "content": [
+ [
+ "@HD\tVN:1.6\tSO:unsorted",
+ "@SQ\tSN:MT192765.1\tLN:29829",
+ "@RG\tID:1\tLB:lib1\tPL:ILLUMINA\tSM:test\tPU:barcode1",
+ "@PG\tID:minimap2\tPN:minimap2\tVN:2.17-r941\tCL:minimap2 -ax sr tests/data/fasta/sarscov2/GCA_011545545.1_ASM1154554v1_genomic.fna tests/data/fastq/dna/sarscov2_1.fastq.gz tests/data/fastq/dna/sarscov2_2.fastq.gz",
+ "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.11\tCL:samtools view -Sb sarscov2_aln.sam",
+ "@PG\tID:splitbam\tPN:splitbam\tVN:0.1.0\tCL:splitubam --split 2 --threads 2 test.paired_end.bam"
+ ],
+ "4933fd727ab0ca4e215dddee7de73a2c",
+ [
+ "@HD\tVN:1.6\tSO:unsorted",
+ "@SQ\tSN:MT192765.1\tLN:29829",
+ "@RG\tID:1\tLB:lib1\tPL:ILLUMINA\tSM:test\tPU:barcode1",
+ "@PG\tID:minimap2\tPN:minimap2\tVN:2.17-r941\tCL:minimap2 -ax sr tests/data/fasta/sarscov2/GCA_011545545.1_ASM1154554v1_genomic.fna tests/data/fastq/dna/sarscov2_1.fastq.gz tests/data/fastq/dna/sarscov2_2.fastq.gz",
+ "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.11\tCL:samtools view -Sb sarscov2_aln.sam",
+ "@PG\tID:splitbam\tPN:splitbam\tVN:0.1.0\tCL:splitubam --split 2 --threads 2 test.paired_end.bam"
+ ],
+ "1a9e3bfa97c43dcbeba1ed01e51a6a54",
+ [
+ "versions.yml:md5,e5c9bb35328e8dcde2e934d9e6729fa6"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-23T13:07:07.013916943"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/splitubam/tests/nextflow.config b/modules/nf-core/splitubam/tests/nextflow.config
new file mode 100644
index 00000000..191f4bfb
--- /dev/null
+++ b/modules/nf-core/splitubam/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+ withName: 'SPLITUBAM' {
+ ext.args = '--split 2'
+ }
+}
diff --git a/modules/nf-core/splitubam/tests/tags.yml b/modules/nf-core/splitubam/tests/tags.yml
new file mode 100644
index 00000000..3a60af57
--- /dev/null
+++ b/modules/nf-core/splitubam/tests/tags.yml
@@ -0,0 +1,2 @@
+splitubam:
+ - "modules/nf-core/splitubam/**"
diff --git a/modules/nf-core/stranger/environment.yml b/modules/nf-core/stranger/environment.yml
new file mode 100644
index 00000000..530ce375
--- /dev/null
+++ b/modules/nf-core/stranger/environment.yml
@@ -0,0 +1,7 @@
+name: stranger
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::stranger=0.9.1
diff --git a/modules/nf-core/stranger/main.nf b/modules/nf-core/stranger/main.nf
new file mode 100644
index 00000000..923483f4
--- /dev/null
+++ b/modules/nf-core/stranger/main.nf
@@ -0,0 +1,47 @@
+process STRANGER {
+ tag "$meta.id"
+ label 'process_low'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/stranger:0.9.1--pyh7e72e81_0':
+ 'biocontainers/stranger:0.9.1--pyh7e72e81_0' }"
+
+ input:
+ tuple val(meta), path(vcf)
+ tuple val(meta2), path(variant_catalog)
+
+ output:
+ tuple val(meta), path("*.gz"), emit: vcf
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def options_variant_catalog = variant_catalog ? "--repeats-file $variant_catalog" : ""
+ """
+ stranger \\
+ $args \\
+ $vcf \\
+ $options_variant_catalog | gzip --no-name > ${prefix}.vcf.gz
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ stranger: \$( stranger --version )
+ END_VERSIONS
+ """
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ echo "" | gzip > ${prefix}.vcf.gz
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ stranger: \$( stranger --version )
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/stranger/meta.yml b/modules/nf-core/stranger/meta.yml
new file mode 100644
index 00000000..5e0bc0bb
--- /dev/null
+++ b/modules/nf-core/stranger/meta.yml
@@ -0,0 +1,52 @@
+name: stranger
+description: Annotates output files from ExpansionHunter with the pathologic implications of the repeat sizes.
+keywords:
+ - STR
+ - repeat_expansions
+ - annotate
+ - vcf
+tools:
+ - stranger:
+ description: Annotate VCF files with str variants
+ homepage: https://github.com/moonso/stranger
+ documentation: https://github.com/moonso/stranger
+ tool_dev_url: https://github.com/moonso/stranger
+ doi: "10.5281/zenodo.4548873"
+ licence: ["MIT"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - vcf:
+ type: file
+ description: VCF with repeat expansions
+ pattern: "*.{vcf.gz,vcf}"
+ - meta2:
+ type: map
+ description: |
+ Groovy Map containing reference information
+ e.g. [ id:'test' ]
+ - variant_catalog:
+ type: file
+ description: json file with repeat expansion sites to genotype
+ pattern: "*.{json}"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - vcf:
+ type: file
+ description: annotated VCF with keys STR_STATUS, NormalMax and PathologicMin
+ pattern: "*.{vcf.gz}"
+authors:
+ - "@ljmesi"
+maintainers:
+ - "@ljmesi"
diff --git a/modules/nf-core/stranger/tests/main.nf.test b/modules/nf-core/stranger/tests/main.nf.test
new file mode 100644
index 00000000..13c61f43
--- /dev/null
+++ b/modules/nf-core/stranger/tests/main.nf.test
@@ -0,0 +1,82 @@
+nextflow_process {
+
+ name "Test Process STRANGER"
+ script "../main.nf"
+ process "STRANGER"
+ config "./nextflow.config"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "stranger"
+ tag "expansionhunter"
+
+ setup {
+ run("EXPANSIONHUNTER") {
+ script "../../expansionhunter/main.nf"
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true)
+ ]
+ input[1] = [
+ [id:'fasta'],
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = [
+ [id:'fai'],
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
+ ]
+ input[3] = [
+ [id:'catalogue'],
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/loci/repeat_expansions.json', checkIfExists: true)
+ ]
+ """
+ }
+ }
+ }
+
+ test("homo_sapiens - vcf, repeat_catalogue") {
+
+ when {
+ process {
+ """
+ input[0] = EXPANSIONHUNTER.out.vcf
+ input[1] = [
+ [id:'catalogue'],
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/loci/repeat_expansions.json', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assert process.success
+ assert snapshot(process.out).match()
+ }
+
+ }
+
+ test("homo_sapiens - vcf, repeat_catalogue - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = EXPANSIONHUNTER.out.vcf
+ input[1] = [
+ [id:'catalogue'],
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/loci/repeat_expansions.json', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assert process.success
+ assert snapshot(process.out).match()
+ }
+ }
+}
diff --git a/modules/nf-core/stranger/tests/main.nf.test.snap b/modules/nf-core/stranger/tests/main.nf.test.snap
new file mode 100644
index 00000000..3faa4e58
--- /dev/null
+++ b/modules/nf-core/stranger/tests/main.nf.test.snap
@@ -0,0 +1,68 @@
+{
+ "homo_sapiens - vcf, repeat_catalogue - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "stranger.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,68446c9e91e20825f7430db64e85aa7c"
+ ],
+ "vcf": [
+ [
+ {
+ "id": "test"
+ },
+ "stranger.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,68446c9e91e20825f7430db64e85aa7c"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-03T08:09:27.725425094"
+ },
+ "homo_sapiens - vcf, repeat_catalogue": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "stranger.vcf.gz:md5,e933cd71d9f9f146265b8e8dd90b712c"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,68446c9e91e20825f7430db64e85aa7c"
+ ],
+ "vcf": [
+ [
+ {
+ "id": "test"
+ },
+ "stranger.vcf.gz:md5,e933cd71d9f9f146265b8e8dd90b712c"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,68446c9e91e20825f7430db64e85aa7c"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-03T08:09:14.472394433"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/stranger/tests/nextflow.config b/modules/nf-core/stranger/tests/nextflow.config
new file mode 100644
index 00000000..98459330
--- /dev/null
+++ b/modules/nf-core/stranger/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+ withName: 'STRANGER' {
+ ext.prefix = "stranger"
+ }
+ }
diff --git a/modules/nf-core/stranger/tests/tags.yml b/modules/nf-core/stranger/tests/tags.yml
new file mode 100644
index 00000000..6d11ef42
--- /dev/null
+++ b/modules/nf-core/stranger/tests/tags.yml
@@ -0,0 +1,2 @@
+stranger:
+ - "modules/nf-core/stranger/**"
diff --git a/modules/nf-core/tabix/bgzip/environment.yml b/modules/nf-core/tabix/bgzip/environment.yml
new file mode 100644
index 00000000..56cc0fb1
--- /dev/null
+++ b/modules/nf-core/tabix/bgzip/environment.yml
@@ -0,0 +1,8 @@
+name: tabix_bgzip
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::tabix=1.11
+ - bioconda::htslib=1.20
diff --git a/modules/nf-core/tabix/bgzip/main.nf b/modules/nf-core/tabix/bgzip/main.nf
new file mode 100644
index 00000000..67991c74
--- /dev/null
+++ b/modules/nf-core/tabix/bgzip/main.nf
@@ -0,0 +1,55 @@
+process TABIX_BGZIP {
+ tag "$meta.id"
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/htslib:1.20--h5efdd21_2' :
+ 'biocontainers/htslib:1.20--h5efdd21_2' }"
+
+ input:
+ tuple val(meta), path(input)
+
+ output:
+ tuple val(meta), path("${output}") , emit: output
+ tuple val(meta), path("${output}.gzi"), emit: gzi, optional: true
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ prefix = task.ext.prefix ?: "${meta.id}"
+ in_bgzip = ["gz", "bgz", "bgzf"].contains(input.getExtension())
+ extension = in_bgzip ? input.getBaseName().tokenize(".")[-1] : input.getExtension()
+ output = in_bgzip ? "${prefix}.${extension}" : "${prefix}.${extension}.gz"
+ command = in_bgzip ? '-d' : ''
+ // Name the index according to $prefix, unless a name has been requested
+ if ((args.matches("(^| )-i\\b") || args.matches("(^| )--index(\$| )")) && !args.matches("(^| )-I\\b") && !args.matches("(^| )--index-name\\b")) {
+ args = args + " -I ${output}.gzi"
+ }
+ """
+ bgzip $command -c $args -@${task.cpus} $input > ${output}
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//')
+ END_VERSIONS
+ """
+
+ stub:
+ prefix = task.ext.prefix ?: "${meta.id}"
+ in_bgzip = ["gz", "bgz", "bgzf"].contains(input.getExtension())
+ output = in_bgzip ? input.getBaseName() : "${prefix}.${input.getExtension()}.gz"
+
+ """
+ echo "" | gzip > ${output}
+ touch ${output}.gzi
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//')
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/tabix/bgzip/meta.yml b/modules/nf-core/tabix/bgzip/meta.yml
new file mode 100644
index 00000000..621d49ea
--- /dev/null
+++ b/modules/nf-core/tabix/bgzip/meta.yml
@@ -0,0 +1,52 @@
+name: tabix_bgzip
+description: Compresses/decompresses files
+keywords:
+ - compress
+ - decompress
+ - bgzip
+ - tabix
+tools:
+ - bgzip:
+ description: |
+ Bgzip compresses or decompresses files in a similar manner to, and compatible with, gzip.
+ homepage: https://www.htslib.org/doc/tabix.html
+ documentation: http://www.htslib.org/doc/bgzip.html
+ doi: 10.1093/bioinformatics/btp352
+ licence: ["MIT"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - input:
+ type: file
+ description: file to compress or to decompress
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - output:
+ type: file
+ description: Output compressed/decompressed file
+ pattern: "*."
+ - gzi:
+ type: file
+ description: Optional gzip index file for compressed inputs
+ pattern: "*.gzi"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@joseespinosa"
+ - "@drpatelh"
+ - "@maxulysse"
+ - "@nvnieuwk"
+maintainers:
+ - "@joseespinosa"
+ - "@drpatelh"
+ - "@maxulysse"
+ - "@nvnieuwk"
diff --git a/modules/nf-core/tabix/bgzip/tests/bgzip_compress.config b/modules/nf-core/tabix/bgzip/tests/bgzip_compress.config
new file mode 100644
index 00000000..6b6ff55f
--- /dev/null
+++ b/modules/nf-core/tabix/bgzip/tests/bgzip_compress.config
@@ -0,0 +1,5 @@
+process {
+ withName: TABIX_BGZIP {
+ ext.args = ' -i'
+ }
+}
diff --git a/modules/nf-core/tabix/bgzip/tests/main.nf.test b/modules/nf-core/tabix/bgzip/tests/main.nf.test
new file mode 100644
index 00000000..d784aa07
--- /dev/null
+++ b/modules/nf-core/tabix/bgzip/tests/main.nf.test
@@ -0,0 +1,111 @@
+nextflow_process {
+
+ name "Test Process TABIX_BGZIP"
+ script "modules/nf-core/tabix/bgzip/main.nf"
+ process "TABIX_BGZIP"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "tabix"
+ tag "tabix/bgzip"
+
+ test("sarscov2_vcf_bgzip_compress") {
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'bgzip_test' ],
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) ]
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ { assert snapshot(
+ file(process.out.output[0][1]).name
+ ).match("bgzip_test")
+ }
+ )
+ }
+ }
+
+ test("homo_genome_bedgz_compress") {
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'bedgz_test' ],
+ [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed.gz', checkIfExists: true) ]
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ { assert snapshot(
+ file(process.out.output[0][1]).name
+ ).match("bedgz_test")
+ }
+ )
+ }
+ }
+
+ test("sarscov2_vcf_bgzip_compress_stub") {
+ options '-stub'
+ config "./bgzip_compress.config"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:"test_stub" ],
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) ]
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ { assert snapshot(
+ file(process.out.output[0][1]).name
+ ).match("test_stub")
+ }
+ )
+ }
+ }
+
+ test("sarscov2_vcf_bgzip_compress_gzi") {
+ config "./bgzip_compress.config"
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:"gzi_compress_test" ],
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) ]
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ { assert snapshot(
+ file(process.out.gzi[0][1]).name
+ ).match("gzi_compress_test")
+ }
+ )
+ }
+ }
+}
diff --git a/modules/nf-core/tabix/bgzip/tests/main.nf.test.snap b/modules/nf-core/tabix/bgzip/tests/main.nf.test.snap
new file mode 100644
index 00000000..0748143f
--- /dev/null
+++ b/modules/nf-core/tabix/bgzip/tests/main.nf.test.snap
@@ -0,0 +1,218 @@
+{
+ "gzi_compress_test": {
+ "content": [
+ "gzi_compress_test.vcf.gz.gzi"
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-02-19T14:52:29.328146"
+ },
+ "homo_genome_bedgz_compress": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "bedgz_test"
+ },
+ "bedgz_test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7"
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+ "versions.yml:md5,753c29916d45debdde52f4ac7c745f61"
+ ],
+ "gzi": [
+
+ ],
+ "output": [
+ [
+ {
+ "id": "bedgz_test"
+ },
+ "bedgz_test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,753c29916d45debdde52f4ac7c745f61"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-19T11:28:34.159992362"
+ },
+ "test_stub": {
+ "content": [
+ "test_stub.vcf.gz"
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-02-19T14:52:20.811489"
+ },
+ "sarscov2_vcf_bgzip_compress": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "bgzip_test"
+ },
+ "bgzip_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766"
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+ "versions.yml:md5,753c29916d45debdde52f4ac7c745f61"
+ ],
+ "gzi": [
+
+ ],
+ "output": [
+ [
+ {
+ "id": "bgzip_test"
+ },
+ "bgzip_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,753c29916d45debdde52f4ac7c745f61"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-19T11:28:22.087769106"
+ },
+ "sarscov2_vcf_bgzip_compress_gzi": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "gzi_compress_test"
+ },
+ "gzi_compress_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "gzi_compress_test"
+ },
+ "gzi_compress_test.vcf.gz.gzi:md5,26fd00d4e26141cd11561f6e7d4a2ad0"
+ ]
+ ],
+ "2": [
+ "versions.yml:md5,753c29916d45debdde52f4ac7c745f61"
+ ],
+ "gzi": [
+ [
+ {
+ "id": "gzi_compress_test"
+ },
+ "gzi_compress_test.vcf.gz.gzi:md5,26fd00d4e26141cd11561f6e7d4a2ad0"
+ ]
+ ],
+ "output": [
+ [
+ {
+ "id": "gzi_compress_test"
+ },
+ "gzi_compress_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,753c29916d45debdde52f4ac7c745f61"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-19T11:28:57.15091665"
+ },
+ "bgzip_test": {
+ "content": [
+ "bgzip_test.vcf.gz"
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-02-19T14:52:03.768295"
+ },
+ "bedgz_test": {
+ "content": [
+ "bedgz_test.bed"
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-02-19T14:52:12.453855"
+ },
+ "sarscov2_vcf_bgzip_compress_stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test_stub"
+ },
+ "test_stub.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test_stub"
+ },
+ "test_stub.vcf.gz.gzi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+ "versions.yml:md5,753c29916d45debdde52f4ac7c745f61"
+ ],
+ "gzi": [
+ [
+ {
+ "id": "test_stub"
+ },
+ "test_stub.vcf.gz.gzi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "output": [
+ [
+ {
+ "id": "test_stub"
+ },
+ "test_stub.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,753c29916d45debdde52f4ac7c745f61"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-19T11:28:45.219404786"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/tabix/bgzip/tests/tags.yml b/modules/nf-core/tabix/bgzip/tests/tags.yml
new file mode 100644
index 00000000..de0eec86
--- /dev/null
+++ b/modules/nf-core/tabix/bgzip/tests/tags.yml
@@ -0,0 +1,2 @@
+tabix/bgzip:
+ - "modules/nf-core/tabix/bgzip/**"
diff --git a/modules/nf-core/tabix/bgzip/tests/vcf_none.config b/modules/nf-core/tabix/bgzip/tests/vcf_none.config
new file mode 100644
index 00000000..f3a3c467
--- /dev/null
+++ b/modules/nf-core/tabix/bgzip/tests/vcf_none.config
@@ -0,0 +1,5 @@
+process {
+ withName: TABIX_BGZIP {
+ ext.args = ''
+ }
+}
diff --git a/modules/nf-core/tabix/bgziptabix/environment.yml b/modules/nf-core/tabix/bgziptabix/environment.yml
index 30128876..b9644de9 100644
--- a/modules/nf-core/tabix/bgziptabix/environment.yml
+++ b/modules/nf-core/tabix/bgziptabix/environment.yml
@@ -1,10 +1,8 @@
name: tabix_bgziptabix
-
channels:
- conda-forge
- bioconda
- defaults
-
dependencies:
- - bioconda::htslib=1.19.1
- bioconda::tabix=1.11
+ - bioconda::htslib=1.20
diff --git a/modules/nf-core/tabix/bgziptabix/main.nf b/modules/nf-core/tabix/bgziptabix/main.nf
index bcdcf2a6..05041f49 100644
--- a/modules/nf-core/tabix/bgziptabix/main.nf
+++ b/modules/nf-core/tabix/bgziptabix/main.nf
@@ -4,8 +4,8 @@ process TABIX_BGZIPTABIX {
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/htslib:1.19.1--h81da01d_1' :
- 'biocontainers/htslib:1.19.1--h81da01d_1' }"
+ 'https://depot.galaxyproject.org/singularity/htslib:1.20--h5efdd21_2' :
+ 'biocontainers/htslib:1.20--h5efdd21_2' }"
input:
tuple val(meta), path(input)
@@ -24,7 +24,7 @@ process TABIX_BGZIPTABIX {
def prefix = task.ext.prefix ?: "${meta.id}"
"""
bgzip --threads ${task.cpus} -c $args $input > ${prefix}.${input.getExtension()}.gz
- tabix $args2 ${prefix}.${input.getExtension()}.gz
+ tabix --threads ${task.cpus} $args2 ${prefix}.${input.getExtension()}.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
diff --git a/modules/nf-core/tabix/bgziptabix/tests/main.nf.test b/modules/nf-core/tabix/bgziptabix/tests/main.nf.test
index 87ea2c84..1a84d74f 100644
--- a/modules/nf-core/tabix/bgziptabix/tests/main.nf.test
+++ b/modules/nf-core/tabix/bgziptabix/tests/main.nf.test
@@ -17,7 +17,7 @@ nextflow_process {
"""
input[0] = [
[ id:'tbi_test' ],
- [ file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) ]
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ]
]
"""
}
@@ -43,7 +43,7 @@ nextflow_process {
"""
input[0] = [
[ id:'csi_test' ],
- [ file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) ]
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ]
]
"""
}
@@ -72,7 +72,7 @@ nextflow_process {
"""
input[0] = [
[ id:'test' ],
- [ file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) ]
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) ]
]
"""
}
diff --git a/modules/nf-core/tabix/bgziptabix/tests/main.nf.test.snap b/modules/nf-core/tabix/bgziptabix/tests/main.nf.test.snap
index fcecb2e4..c166ea72 100644
--- a/modules/nf-core/tabix/bgziptabix/tests/main.nf.test.snap
+++ b/modules/nf-core/tabix/bgziptabix/tests/main.nf.test.snap
@@ -8,14 +8,14 @@
"id": "tbi_test"
},
"tbi_test.bed.gz:md5,fe4053cf4de3aebbdfc3be2efb125a74",
- "tbi_test.bed.gz.tbi:md5,24908545311cf2b7c803c41d716872c4"
+ "tbi_test.bed.gz.tbi:md5,ca06caf88b1e3c67d5fcba0a1460b52c"
]
],
"1": [
],
"2": [
- "versions.yml:md5,b4765e4d896ce4a4cdd6c896d12555fc"
+ "versions.yml:md5,736e7c3b16a3ac525253e5b5f5d8fdfa"
],
"gz_csi": [
@@ -26,15 +26,19 @@
"id": "tbi_test"
},
"tbi_test.bed.gz:md5,fe4053cf4de3aebbdfc3be2efb125a74",
- "tbi_test.bed.gz.tbi:md5,24908545311cf2b7c803c41d716872c4"
+ "tbi_test.bed.gz.tbi:md5,ca06caf88b1e3c67d5fcba0a1460b52c"
]
],
"versions": [
- "versions.yml:md5,b4765e4d896ce4a4cdd6c896d12555fc"
+ "versions.yml:md5,736e7c3b16a3ac525253e5b5f5d8fdfa"
]
}
],
- "timestamp": "2024-02-19T14:50:51.513838"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-19T11:29:16.053817543"
},
"sarscov2_bed_csi": {
"content": [
@@ -48,11 +52,11 @@
"id": "csi_test"
},
"csi_test.bed.gz:md5,fe4053cf4de3aebbdfc3be2efb125a74",
- "csi_test.bed.gz.csi:md5,e06165ddd34640783728cb07f2558b43"
+ "csi_test.bed.gz.csi:md5,c9c0377de58fdc89672bb3005a0d69f5"
]
],
"2": [
- "versions.yml:md5,b4765e4d896ce4a4cdd6c896d12555fc"
+ "versions.yml:md5,736e7c3b16a3ac525253e5b5f5d8fdfa"
],
"gz_csi": [
[
@@ -60,35 +64,51 @@
"id": "csi_test"
},
"csi_test.bed.gz:md5,fe4053cf4de3aebbdfc3be2efb125a74",
- "csi_test.bed.gz.csi:md5,e06165ddd34640783728cb07f2558b43"
+ "csi_test.bed.gz.csi:md5,c9c0377de58fdc89672bb3005a0d69f5"
]
],
"gz_tbi": [
],
"versions": [
- "versions.yml:md5,b4765e4d896ce4a4cdd6c896d12555fc"
+ "versions.yml:md5,736e7c3b16a3ac525253e5b5f5d8fdfa"
]
}
],
- "timestamp": "2024-02-19T14:51:00.513777"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-19T11:29:27.667745444"
},
"csi_test": {
"content": [
"csi_test.bed.gz"
],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
"timestamp": "2024-02-19T14:51:00.548801"
},
"csi_stub": {
"content": [
"test.bed.gz"
],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
"timestamp": "2024-02-19T14:51:09.218454"
},
"tbi_test": {
"content": [
"tbi_test.bed.gz"
],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
"timestamp": "2024-02-19T14:50:51.579654"
},
"sarscov2_bed_csi_stub": {
@@ -113,7 +133,7 @@
]
],
"2": [
- "versions.yml:md5,b4765e4d896ce4a4cdd6c896d12555fc"
+ "versions.yml:md5,736e7c3b16a3ac525253e5b5f5d8fdfa"
],
"gz_csi": [
[
@@ -134,10 +154,14 @@
]
],
"versions": [
- "versions.yml:md5,b4765e4d896ce4a4cdd6c896d12555fc"
+ "versions.yml:md5,736e7c3b16a3ac525253e5b5f5d8fdfa"
]
}
],
- "timestamp": "2024-02-19T14:51:09.164254"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-19T11:29:45.105209991"
}
}
\ No newline at end of file
diff --git a/modules/nf-core/tabix/tabix/environment.yml b/modules/nf-core/tabix/tabix/environment.yml
index a07ad9d1..4d1f9dd4 100644
--- a/modules/nf-core/tabix/tabix/environment.yml
+++ b/modules/nf-core/tabix/tabix/environment.yml
@@ -6,5 +6,5 @@ channels:
- defaults
dependencies:
- - bioconda::htslib=1.19.1
+ - bioconda::htslib=1.20
- bioconda::tabix=1.11
diff --git a/modules/nf-core/tabix/tabix/main.nf b/modules/nf-core/tabix/tabix/main.nf
index 1737141d..13acd670 100644
--- a/modules/nf-core/tabix/tabix/main.nf
+++ b/modules/nf-core/tabix/tabix/main.nf
@@ -4,8 +4,8 @@ process TABIX_TABIX {
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/htslib:1.19.1--h81da01d_1' :
- 'biocontainers/htslib:1.19.1--h81da01d_1' }"
+ 'https://depot.galaxyproject.org/singularity/htslib:1.20--h5efdd21_2' :
+ 'biocontainers/htslib:1.20--h5efdd21_2' }"
input:
tuple val(meta), path(tab)
@@ -21,7 +21,10 @@ process TABIX_TABIX {
script:
def args = task.ext.args ?: ''
"""
- tabix $args $tab
+ tabix \\
+ --threads $task.cpus \\
+ $args \\
+ $tab
cat <<-END_VERSIONS > versions.yml
"${task.process}":
@@ -33,8 +36,8 @@ process TABIX_TABIX {
"""
touch ${tab}.tbi
touch ${tab}.csi
- cat <<-END_VERSIONS > versions.yml
+ cat <<-END_VERSIONS > versions.yml
"${task.process}":
tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//')
END_VERSIONS
diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test b/modules/nf-core/tabix/tabix/tests/main.nf.test
index 3a150c70..102b0d7b 100644
--- a/modules/nf-core/tabix/tabix/tests/main.nf.test
+++ b/modules/nf-core/tabix/tabix/tests/main.nf.test
@@ -16,7 +16,7 @@ nextflow_process {
"""
input[0] = [
[ id:'tbi_bed' ],
- [ file(params.test_data['sarscov2']['genome']['test_bed_gz'], checkIfExists: true) ]
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed.gz', checkIfExists: true) ]
]
"""
}
@@ -25,11 +25,10 @@ nextflow_process {
then {
assertAll (
{ assert process.success },
- { assert snapshot(process.out).match() },
{ assert snapshot(
- file(process.out.tbi[0][1]).name
- ).match("tbi_bed")
- }
+ process.out,
+ file(process.out.tbi[0][1]).name
+ ).match() }
)
}
}
@@ -41,7 +40,7 @@ nextflow_process {
"""
input[0] = [
[ id:'tbi_gff' ],
- [ file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true) ]
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true) ]
]
"""
}
@@ -50,11 +49,9 @@ nextflow_process {
then {
assertAll (
{ assert process.success },
- { assert snapshot(process.out).match() },
{ assert snapshot(
- file(process.out.tbi[0][1]).name
- ).match("tbi_gff")
- }
+ process.out,
+ file(process.out.tbi[0][1]).name).match() }
)
}
@@ -67,7 +64,7 @@ nextflow_process {
"""
input[0] = [
[ id:'tbi_vcf' ],
- [ file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true) ]
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) ]
]
"""
}
@@ -76,11 +73,10 @@ nextflow_process {
then {
assertAll (
{ assert process.success },
- { assert snapshot(process.out).match() },
{ assert snapshot(
- file(process.out.tbi[0][1]).name
- ).match("tbi_vcf")
- }
+ process.out,
+ file(process.out.tbi[0][1]).name
+ ).match() }
)
}
@@ -93,7 +89,7 @@ nextflow_process {
"""
input[0] = [
[ id:'vcf_csi' ],
- [ file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true) ]
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) ]
]
"""
}
@@ -102,11 +98,10 @@ nextflow_process {
then {
assertAll (
{ assert process.success },
- { assert snapshot(process.out).match() },
{ assert snapshot(
- file(process.out.csi[0][1]).name
- ).match("vcf_csi")
- }
+ process.out,
+ file(process.out.csi[0][1]).name
+ ).match() }
)
}
@@ -120,7 +115,7 @@ nextflow_process {
"""
input[0] = [
[ id:'vcf_csi_stub' ],
- [ file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true) ]
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) ]
]
"""
}
@@ -129,11 +124,10 @@ nextflow_process {
then {
assertAll (
{ assert process.success },
- { assert snapshot(process.out).match() },
{ assert snapshot(
- file(process.out.csi[0][1]).name
- ).match("vcf_csi_stub")
- }
+ process.out,
+ file(process.out.csi[0][1]).name
+ ).match() }
)
}
diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test.snap b/modules/nf-core/tabix/tabix/tests/main.nf.test.snap
index 034e38b6..c2b9ed0b 100644
--- a/modules/nf-core/tabix/tabix/tests/main.nf.test.snap
+++ b/modules/nf-core/tabix/tabix/tests/main.nf.test.snap
@@ -1,16 +1,4 @@
{
- "vcf_csi_stub": {
- "content": [
- "test.vcf.gz.csi"
- ],
- "timestamp": "2024-03-04T14:51:59.788002"
- },
- "tbi_gff": {
- "content": [
- "genome.gff3.gz.tbi"
- ],
- "timestamp": "2024-02-19T14:53:37.420216"
- },
"sarscov2_gff_tbi": {
"content": [
{
@@ -19,14 +7,14 @@
{
"id": "tbi_gff"
},
- "genome.gff3.gz.tbi:md5,53fc683fd217aae47ef10d23c52a9178"
+ "genome.gff3.gz.tbi:md5,f79a67d95a98076e04fbe0455d825926"
]
],
"1": [
],
"2": [
- "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3"
+ "versions.yml:md5,07064637fb8a217174052be8e40234e2"
],
"csi": [
@@ -36,15 +24,20 @@
{
"id": "tbi_gff"
},
- "genome.gff3.gz.tbi:md5,53fc683fd217aae47ef10d23c52a9178"
+ "genome.gff3.gz.tbi:md5,f79a67d95a98076e04fbe0455d825926"
]
],
"versions": [
- "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3"
+ "versions.yml:md5,07064637fb8a217174052be8e40234e2"
]
- }
+ },
+ "genome.gff3.gz.tbi"
],
- "timestamp": "2024-02-19T14:53:37.388157"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-19T12:06:25.653807564"
},
"sarscov2_bedgz_tbi": {
"content": [
@@ -54,14 +47,14 @@
{
"id": "tbi_bed"
},
- "test.bed.gz.tbi:md5,0f17d85e7f0a042b2aa367b70df224f8"
+ "test.bed.gz.tbi:md5,9a761d51cc81835fd1199201fdbcdd5d"
]
],
"1": [
],
"2": [
- "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3"
+ "versions.yml:md5,07064637fb8a217174052be8e40234e2"
],
"csi": [
@@ -71,27 +64,20 @@
{
"id": "tbi_bed"
},
- "test.bed.gz.tbi:md5,0f17d85e7f0a042b2aa367b70df224f8"
+ "test.bed.gz.tbi:md5,9a761d51cc81835fd1199201fdbcdd5d"
]
],
"versions": [
- "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3"
+ "versions.yml:md5,07064637fb8a217174052be8e40234e2"
]
- }
- ],
- "timestamp": "2024-02-19T14:53:28.879408"
- },
- "tbi_vcf": {
- "content": [
- "test.vcf.gz.tbi"
- ],
- "timestamp": "2024-02-19T14:53:46.402522"
- },
- "vcf_csi": {
- "content": [
- "test.vcf.gz.csi"
+ },
+ "test.bed.gz.tbi"
],
- "timestamp": "2024-02-19T14:53:54.921189"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-19T12:06:09.754082161"
},
"sarscov2_vcf_tbi": {
"content": [
@@ -101,14 +87,14 @@
{
"id": "tbi_vcf"
},
- "test.vcf.gz.tbi:md5,897f3f378a811b90e6dee56ce08d2bcf"
+ "test.vcf.gz.tbi:md5,d22e5b84e4fcd18792179f72e6da702e"
]
],
"1": [
],
"2": [
- "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3"
+ "versions.yml:md5,07064637fb8a217174052be8e40234e2"
],
"csi": [
@@ -118,15 +104,20 @@
{
"id": "tbi_vcf"
},
- "test.vcf.gz.tbi:md5,897f3f378a811b90e6dee56ce08d2bcf"
+ "test.vcf.gz.tbi:md5,d22e5b84e4fcd18792179f72e6da702e"
]
],
"versions": [
- "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3"
+ "versions.yml:md5,07064637fb8a217174052be8e40234e2"
]
- }
+ },
+ "test.vcf.gz.tbi"
],
- "timestamp": "2024-02-19T14:53:46.370358"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-19T12:06:40.042648294"
},
"sarscov2_vcf_csi_stub": {
"content": [
@@ -148,7 +139,7 @@
]
],
"2": [
- "versions.yml:md5,3d45df6d80883bad358631069a2940fd"
+ "versions.yml:md5,07064637fb8a217174052be8e40234e2"
],
"csi": [
[
@@ -167,11 +158,16 @@
]
],
"versions": [
- "versions.yml:md5,3d45df6d80883bad358631069a2940fd"
+ "versions.yml:md5,07064637fb8a217174052be8e40234e2"
]
- }
+ },
+ "test.vcf.gz.csi"
],
- "timestamp": "2024-03-04T14:51:59.766184"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-19T12:07:08.700367261"
},
"sarscov2_vcf_csi": {
"content": [
@@ -184,34 +180,33 @@
{
"id": "vcf_csi"
},
- "test.vcf.gz.csi:md5,0731ad6f40104d2bbb1a2cc478ef8f03"
+ "test.vcf.gz.csi:md5,04b41c1efd9ab3c6b1e008a286e27d2b"
]
],
"2": [
- "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3"
+ "versions.yml:md5,07064637fb8a217174052be8e40234e2"
],
"csi": [
[
{
"id": "vcf_csi"
},
- "test.vcf.gz.csi:md5,0731ad6f40104d2bbb1a2cc478ef8f03"
+ "test.vcf.gz.csi:md5,04b41c1efd9ab3c6b1e008a286e27d2b"
]
],
"tbi": [
],
"versions": [
- "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3"
+ "versions.yml:md5,07064637fb8a217174052be8e40234e2"
]
- }
- ],
- "timestamp": "2024-02-19T14:53:54.886876"
- },
- "tbi_bed": {
- "content": [
- "test.bed.gz.tbi"
+ },
+ "test.vcf.gz.csi"
],
- "timestamp": "2024-02-19T14:53:28.947628"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-19T12:06:55.362067748"
}
}
\ No newline at end of file
diff --git a/modules/nf-core/untar/environment.yml b/modules/nf-core/untar/environment.yml
index 0c9cbb10..4f498244 100644
--- a/modules/nf-core/untar/environment.yml
+++ b/modules/nf-core/untar/environment.yml
@@ -1,11 +1,9 @@
name: untar
-
channels:
- conda-forge
- bioconda
- defaults
-
dependencies:
- conda-forge::grep=3.11
- - conda-forge::sed=4.7
+ - conda-forge::sed=4.8
- conda-forge::tar=1.34
diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf
index 8a75bb95..9bd8f554 100644
--- a/modules/nf-core/untar/main.nf
+++ b/modules/nf-core/untar/main.nf
@@ -4,8 +4,8 @@ process UNTAR {
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
- 'nf-core/ubuntu:20.04' }"
+ 'https://depot.galaxyproject.org/singularity/ubuntu:22.04' :
+ 'nf-core/ubuntu:22.04' }"
input:
tuple val(meta), path(archive)
@@ -52,8 +52,29 @@ process UNTAR {
stub:
prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, ""))
"""
- mkdir $prefix
- touch ${prefix}/file.txt
+ mkdir ${prefix}
+ ## Dry-run untaring the archive to get the files and place all in prefix
+ if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then
+ for i in `tar -tf ${archive}`;
+ do
+ if [[ \$(echo "\${i}" | grep -E "/\$") == "" ]];
+ then
+ touch \${i}
+ else
+ mkdir -p \${i}
+ fi
+ done
+ else
+ for i in `tar -tf ${archive}`;
+ do
+ if [[ \$(echo "\${i}" | grep -E "/\$") == "" ]];
+ then
+ touch ${prefix}/\${i}
+ else
+ mkdir -p ${prefix}/\${i}
+ fi
+ done
+ fi
cat <<-END_VERSIONS > versions.yml
"${task.process}":
diff --git a/modules/nf-core/untar/tests/main.nf.test b/modules/nf-core/untar/tests/main.nf.test
index 2a7c97bf..c957517a 100644
--- a/modules/nf-core/untar/tests/main.nf.test
+++ b/modules/nf-core/untar/tests/main.nf.test
@@ -6,6 +6,7 @@ nextflow_process {
tag "modules"
tag "modules_nfcore"
tag "untar"
+
test("test_untar") {
when {
@@ -19,10 +20,9 @@ nextflow_process {
then {
assertAll (
{ assert process.success },
- { assert snapshot(process.out.untar).match("test_untar") },
+ { assert snapshot(process.out).match() },
)
}
-
}
test("test_untar_onlyfiles") {
@@ -38,10 +38,48 @@ nextflow_process {
then {
assertAll (
{ assert process.success },
- { assert snapshot(process.out.untar).match("test_untar_onlyfiles") },
+ { assert snapshot(process.out).match() },
)
}
+ }
+
+ test("test_untar - stub") {
+
+ options "-stub"
+ when {
+ process {
+ """
+ input[0] = [ [], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true) ]
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ )
+ }
}
+ test("test_untar_onlyfiles - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [ [], file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) ]
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ )
+ }
+ }
}
diff --git a/modules/nf-core/untar/tests/main.nf.test.snap b/modules/nf-core/untar/tests/main.nf.test.snap
index 64550292..ceb91b79 100644
--- a/modules/nf-core/untar/tests/main.nf.test.snap
+++ b/modules/nf-core/untar/tests/main.nf.test.snap
@@ -1,42 +1,158 @@
{
"test_untar_onlyfiles": {
"content": [
- [
- [
+ {
+ "0": [
[
-
- ],
+ [
+
+ ],
+ [
+ "hello.txt:md5,e59ff97941044f85df5297e1c302d260"
+ ]
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,6063247258c56fd271d076bb04dd7536"
+ ],
+ "untar": [
+ [
+ [
+
+ ],
+ [
+ "hello.txt:md5,e59ff97941044f85df5297e1c302d260"
+ ]
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,6063247258c56fd271d076bb04dd7536"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.3"
+ },
+ "timestamp": "2024-07-10T12:04:28.231047"
+ },
+ "test_untar_onlyfiles - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ [
+
+ ],
+ [
+ "hello.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,6063247258c56fd271d076bb04dd7536"
+ ],
+ "untar": [
[
- "hello.txt:md5,e59ff97941044f85df5297e1c302d260"
+ [
+
+ ],
+ [
+ "hello.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
]
+ ],
+ "versions": [
+ "versions.yml:md5,6063247258c56fd271d076bb04dd7536"
]
- ]
+ }
],
"meta": {
"nf-test": "0.8.4",
- "nextflow": "23.10.1"
+ "nextflow": "24.04.3"
},
- "timestamp": "2024-02-28T11:49:41.320643"
+ "timestamp": "2024-07-10T12:04:45.773103"
+ },
+ "test_untar - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ [
+
+ ],
+ [
+ "hash.k2d:md5,d41d8cd98f00b204e9800998ecf8427e",
+ "opts.k2d:md5,d41d8cd98f00b204e9800998ecf8427e",
+ "taxo.k2d:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,6063247258c56fd271d076bb04dd7536"
+ ],
+ "untar": [
+ [
+ [
+
+ ],
+ [
+ "hash.k2d:md5,d41d8cd98f00b204e9800998ecf8427e",
+ "opts.k2d:md5,d41d8cd98f00b204e9800998ecf8427e",
+ "taxo.k2d:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,6063247258c56fd271d076bb04dd7536"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.3"
+ },
+ "timestamp": "2024-07-10T12:04:36.777441"
},
"test_untar": {
"content": [
- [
- [
+ {
+ "0": [
[
-
- ],
+ [
+
+ ],
+ [
+ "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9",
+ "opts.k2d:md5,a033d00cf6759407010b21700938f543",
+ "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c"
+ ]
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,6063247258c56fd271d076bb04dd7536"
+ ],
+ "untar": [
[
- "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9",
- "opts.k2d:md5,a033d00cf6759407010b21700938f543",
- "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c"
+ [
+
+ ],
+ [
+ "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9",
+ "opts.k2d:md5,a033d00cf6759407010b21700938f543",
+ "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c"
+ ]
]
+ ],
+ "versions": [
+ "versions.yml:md5,6063247258c56fd271d076bb04dd7536"
]
- ]
+ }
],
"meta": {
"nf-test": "0.8.4",
- "nextflow": "23.10.1"
+ "nextflow": "24.04.3"
},
- "timestamp": "2024-02-28T11:49:33.795172"
+ "timestamp": "2024-07-10T12:04:19.377674"
}
}
\ No newline at end of file
diff --git a/nextflow.config b/nextflow.config
index 1806b025..44e6630e 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -12,24 +12,33 @@ params {
// Input options
input = null
bed = null
- dipcall_par = null
- extra_snfs = null
- extra_gvcfs = null
+ cadd_resources = null
+ cadd_prescored = null
+ par_regions = null
tandem_repeats = null
trgt_repeats = null
+ variant_catalog = null
+ reduced_penetrance = null
+ score_config_snv = null
snp_db = null
+ variant_consequences_snv = null
vep_cache = null
+ vep_plugin_files = null
hificnv_xy = null
hificnv_xx = null
hificnv_exclude = null
somalier_sites = null
+ deepvariant_model_type = params.preset == 'ONT_R10' ? 'ONT_R104' : 'PACBIO'
// Skip parts of the workflow
+ skip_aligned_read_qc = false
skip_raw_read_qc = false
- skip_assembly_wf = false
+ skip_assembly_wf = params.preset == 'ONT_R10' ? true : false
skip_mapping_wf = false
- skip_methylation_wf = false
- skip_repeat_wf = false
+ skip_methylation_wf = params.preset == 'pacbio' ? true : false
+ skip_rank_variants = false
+ skip_repeat_calling = params.preset == 'ONT_R10' ? true : false
+ skip_repeat_annotation = params.preset == 'ONT_R10' ? true : false
skip_phasing_wf = false
skip_short_variant_calling = false
skip_snv_annotation = false
@@ -45,7 +54,7 @@ params {
// Preprocessing/parallelisation
parallel_snv = 13
- split_fastq = 0
+ parallel_alignments = 1
// References
fasta = null
@@ -279,23 +288,28 @@ manifest {
description = """Long-read variant calling pipeline"""
mainScript = 'main.nf'
nextflowVersion = '!>=23.04.0'
- version = '0.2.0'
+ version = '0.3.0'
doi = ''
}
// Load modules.config for DSL2 module specific options
includeConfig 'conf/base.config'
+includeConfig 'conf/modules/annotate_cadd.config'
+includeConfig 'conf/modules/annotate_consequence_pli.config'
+includeConfig 'conf/modules/annotate_repeat_expansions.config'
includeConfig 'conf/modules/general.config'
includeConfig 'conf/modules/bam_infer_sex.config'
-includeConfig 'conf/modules/bam_to_fastq.config'
includeConfig 'conf/modules/call_paralogs.config'
+includeConfig 'conf/modules/call_repeat_expansions.config'
+includeConfig 'conf/modules/convert_input_files.config'
includeConfig 'conf/modules/assembly_variant_calling.config'
includeConfig 'conf/modules/genome_assembly.config'
includeConfig 'conf/modules/methylation.config'
includeConfig 'conf/modules/phasing.config'
includeConfig 'conf/modules/prepare_genome.config'
-includeConfig 'conf/modules/qc.config'
-includeConfig 'conf/modules/repeat_calling.config'
+includeConfig 'conf/modules/qc_aligned_reads.config'
+includeConfig 'conf/modules/scatter_genome.config'
+includeConfig 'conf/modules/rank_variants.config'
includeConfig 'conf/modules/short_variant_calling.config'
includeConfig 'conf/modules/snv_annotation.config'
includeConfig 'conf/modules/structural_variant_calling.config'
diff --git a/nextflow_schema.json b/nextflow_schema.json
index f0601054..8e83ba04 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -8,55 +8,82 @@
"workflow_skip_options": {
"title": "Workflow skip options",
"type": "object",
- "description": "Options to skip various steps within the workflow",
+ "description": "Allows skipping certain parts of the pipeline",
"default": "",
"properties": {
+ "skip_aligned_read_qc": {
+ "type": "boolean",
+ "fa_icon": "fas fa-fast-forward",
+ "description": "Skip QC of aligned reads",
+ "default": false
+ },
"skip_raw_read_qc": {
"type": "boolean",
"fa_icon": "fas fa-fast-forward",
- "description": "Skip raw read QC"
+ "description": "Skip QC of unaligned (raw) reads",
+ "default": false
},
"skip_short_variant_calling": {
"type": "boolean",
"description": "Skip short variant calling",
- "fa_icon": "fas fa-fast-forward"
+ "fa_icon": "fas fa-fast-forward",
+ "default": false
},
"skip_assembly_wf": {
"type": "boolean",
- "description": "Skip assembly and downstream processes",
- "fa_icon": "fas fa-fast-forward"
+ "description": "Skip genome assembly and assembly variant calling",
+ "fa_icon": "fas fa-fast-forward",
+ "default": false
},
"skip_mapping_wf": {
"type": "boolean",
- "description": "Skip read mapping and downstream processes",
- "fa_icon": "fas fa-fast-forward"
+ "description": "Skip read mapping (alignment)",
+ "fa_icon": "fas fa-fast-forward",
+ "default": false
},
"skip_methylation_wf": {
"type": "boolean",
- "description": "Skip methylation workflow",
- "fa_icon": "fas fa-fast-forward"
+ "description": "Skip generation of methylation pileups",
+ "fa_icon": "fas fa-fast-forward",
+ "default": false
},
- "skip_repeat_wf": {
+ "skip_repeat_calling": {
"type": "boolean",
- "description": "Skip repeat analysis workflow",
- "fa_icon": "fas fa-fast-forward"
+ "description": "Skip tandem repeat calling",
+ "fa_icon": "fas fa-fast-forward",
+ "default": false
+ },
+ "skip_repeat_annotation": {
+ "type": "boolean",
+ "description": "Skip tandem repeat annotation",
+ "fa_icon": "fas fa-fast-forward",
+ "default": false
},
"skip_phasing_wf": {
"type": "boolean",
"fa_icon": "fas fa-fast-forward",
- "description": "Skip phasing workflow"
+ "description": "Skip phasing of variants and haplotagging of reads",
+ "default": false
},
"skip_snv_annotation": {
"type": "boolean",
- "description": "Skip SNV annotation"
+ "description": "Skip short variant annotation",
+ "default": false
},
"skip_cnv_calling": {
"type": "boolean",
- "description": "Skip CNV workflow"
+ "description": "Skip CNV calling",
+ "default": false
},
"skip_call_paralogs": {
"type": "boolean",
- "description": "Skip call paralogs (Paraphase)"
+ "description": "Skip the calling of specific paralogous genes",
+ "default": false
+ },
+ "skip_rank_variants": {
+ "type": "boolean",
+ "description": "Skip ranking of short variants",
+ "default": false
}
},
"fa_icon": "fas fa-american-sign-language-interpreting"
@@ -325,92 +352,105 @@
"workflow_options": {
"title": "Workflow options",
"type": "object",
- "description": "",
+ "description": "Workflow options specific to genomic-medicine-sweden/nallo",
"default": "",
"properties": {
"preset": {
"type": "string",
"default": "revio",
- "description": "Choose a preset depending on data type",
+ "description": "Enable or disable certain parts of the pipeline by default, depending on data type (`revio`, `pacbio`, `ONT_R10`)",
"enum": ["revio", "pacbio", "ONT_R10"]
},
"variant_caller": {
"type": "string",
"default": "deepvariant",
- "description": "Choose variant caller",
+ "description": "Which short variant software to use (`deepvariant`)",
"enum": ["deepvariant"]
},
"phaser": {
"type": "string",
"default": "whatshap",
- "description": "Choose phasing software",
+ "description": "Which phasing software to use (`whatshap`, `hiphase_snv`, `hiphase_sv`)",
"enum": ["whatshap", "hiphase_snv", "hiphase_sv"]
},
"hifiasm_mode": {
"type": "string",
- "description": "Run hifiasm in hifi-only or hifi-trio mode",
+ "description": "Run hifiasm in hifi-only or hifi-trio mode (`hifi-only`, `trio-binning`)",
"enum": ["hifi-only", "trio-binning"],
"default": "hifi-only"
},
- "split_fastq": {
+ "parallel_alignments": {
"type": "integer",
- "default": 0,
- "minimum": 0,
- "description": "Split alignment into n processes"
+ "minimum": 1,
+ "default": 1,
+ "description": "If parallel_alignments is bigger than 1, input files will be split and aligned in parallel to reduce processing time."
},
"parallel_snv": {
"type": "integer",
"default": 13,
- "description": "Split SNV calling into n chunks",
+ "description": "If parallel_snv is bigger than 1, short variant calling will be done in parallel to reduce processing time.",
"minimum": 1
},
"vep_cache_version": {
"type": "integer",
"default": 110,
"description": "VEP cache version"
+ },
+ "vep_plugin_files": {
+ "type": "string",
+ "mimetype": "text/csv",
+ "description": "A csv file with paths to vep plugin files. pLI and LoFtool are required.",
+ "schema": "assets/vep_plugin_files_schema.json"
+ },
+ "deepvariant_model_type": {
+ "type": "string",
+ "default": "PACBIO",
+ "description": "Sets the model type used for DeepVariant. This is set automatically using `--preset` by default.",
+ "hidden": true,
+ "enum": ["PACBIO", "ONT_R104"]
}
},
"required": ["preset"]
},
- "extra_file_inputs": {
- "title": "Extra file inputs",
+ "file_inputs": {
+ "title": "File inputs",
"type": "object",
- "description": "Different processes may need extra input files",
+ "description": "The different files that are required. Some are only required by certain workflows, see the usage documentation.",
"default": "",
"fa_icon": "fas fa-copy",
"properties": {
- "dipcall_par": {
+ "cadd_prescored": {
"type": "string",
- "description": "Provide a bed file of chrX PAR regions for dipcall",
- "format": "file-path",
- "exists": true
+ "exists": true,
+ "format": "directory-path",
+ "fa_icon": "fas fa-file",
+ "description": "Path to a directory containing prescored indels for CADD.",
+ "help_text": "This folder contains the compressed files and indexes that would otherwise be in data/prescored folder as described in https://github.com/kircherlab/CADD-scripts/#manual-installation."
},
- "extra_gvcfs": {
+ "cadd_resources": {
"type": "string",
- "description": "Extra input files for GLNexus",
- "pattern": "^\\S+\\.csv$",
- "format": "file-path",
- "schema": "assets/schema_gvcfs.json",
- "exists": true
+ "exists": true,
+ "format": "directory-path",
+ "fa_icon": "fas fa-file",
+ "description": "Path to a directory containing CADD annotations.",
+ "help_text": "This folder contains the uncompressed files that would otherwise be in data/annotation folder as described in https://github.com/kircherlab/CADD-scripts/#manual-installation."
},
- "extra_snfs": {
+ "par_regions": {
"type": "string",
- "description": "Extra input files for Sniffles",
- "pattern": "^\\S+\\.csv$",
+ "description": "Provide a bed file of chrX and chrY PAR regions for dipcall and DeepVariant",
"format": "file-path",
- "schema": "assets/schema_snfs.json",
"exists": true
},
"tandem_repeats": {
"type": "string",
"format": "file-path",
- "description": "Tandem repeat BED-file for sniffles",
+ "description": "A tandem repeat BED file for sniffles",
"pattern": "^\\S+\\.bed$",
"exists": true
},
"trgt_repeats": {
"type": "string",
- "description": "BED-file for repeats to be genotyped",
+ "description": "A BED file with repeats to be genotyped with TRGT",
"format": "file-path",
"exists": true
},
@@ -420,12 +460,24 @@
"format": "file-path",
"mimetype": "text/csv",
"schema": "/assets/schema_snpdb.json",
- "description": "Extra echtvar-databases to annotate SNVs with",
+ "description": "A csv file with echtvar databases to annotate SNVs with",
+ "exists": true
+ },
+ "variant_catalog": {
+ "type": "string",
+ "description": "A variant catalog json-file for stranger",
+ "format": "file-path",
"exists": true
},
+ "variant_consequences_snv": {
+ "type": "string",
+ "description": "File containing list of SO terms listed in the order of severity from most severe to lease severe for annotating genomic SNVs.",
+ "help_text": "For more information check https://ensembl.org/info/genome/variation/prediction/predicted_data.html",
+ "fa_icon": "fas fa-file-csv"
+ },
"vep_cache": {
"type": "string",
- "description": "Path to directory of vep_cache",
+ "description": "A path to the VEP cache location",
"format": "path",
"exists": true
},
@@ -433,29 +485,45 @@
"type": "string",
"pattern": "^\\S+\\.bed$",
"format": "file-path",
- "description": "BED file with regions of interest",
+ "description": "A BED file with regions of interest, used to limit short variant calling.",
"exists": true
},
"hificnv_xy": {
"type": "string",
"format": "file-path",
+ "description": "A BED file containing expected copy number regions for XY samples.",
"exists": true
},
"hificnv_xx": {
"type": "string",
"format": "file-path",
+ "description": "A BED file containing expected copy number regions for XX samples.",
"exists": true
},
"hificnv_exclude": {
"type": "string",
"format": "file-path",
- "description": "HiFiCNV BED file specifying regions to exclude",
+ "description": "A BED file specifying regions to exclude with HiFiCNV, such as centromeres.",
"exists": true
},
+ "reduced_penetrance": {
+ "type": "string",
+ "exists": true,
+ "format": "path",
+ "fa_icon": "fas fa-file-csv",
+ "description": "A file with gene ids that have reduced penetrance. For use with genmod."
+ },
+ "score_config_snv": {
+ "type": "string",
+ "exists": true,
+ "format": "path",
+ "fa_icon": "fas fa-file",
+ "description": "A SNV rank model config file for genmod."
+ },
"somalier_sites": {
"type": "string",
"pattern": "^\\S+\\.vcf(\\.gz)?$",
- "description": "A VCF of known polymorphic sites",
+ "description": "A VCF of known polymorphic sites for somalier",
"format": "file-path",
"exists": true
},
@@ -464,7 +532,7 @@
"fa_icon": "far fa-check-circle",
"description": "Validation of parameters fails when an unrecognised parameter is found.",
"hidden": true,
- "help_text": "By default, when an unrecognised parameter is found, it returns a warinig."
+ "help_text": "By default, when an unrecognised parameter is found, it returns a warning."
},
"validationLenientMode": {
"type": "boolean",
@@ -506,7 +574,7 @@
"$ref": "#/definitions/workflow_options"
},
{
- "$ref": "#/definitions/extra_file_inputs"
+ "$ref": "#/definitions/file_inputs"
}
]
}
diff --git a/nf-test.config b/nf-test.config
new file mode 100644
index 00000000..67cadb45
--- /dev/null
+++ b/nf-test.config
@@ -0,0 +1,13 @@
+config {
+ plugins {
+ load "nft-bam@0.3.0"
+ }
+ stage {
+ copy './nextflow_schema.json'
+ }
+ testsDir "."
+ workDir ".nf-test"
+ configFile "tests/nextflow.config"
+ profile ""
+
+}
diff --git a/subworkflows/local/annotate_cadd/main.nf b/subworkflows/local/annotate_cadd/main.nf
new file mode 100644
index 00000000..9a1dc047
--- /dev/null
+++ b/subworkflows/local/annotate_cadd/main.nf
@@ -0,0 +1,76 @@
+//
+// A subworkflow to annotate snvs
+//
+
+include { BCFTOOLS_ANNOTATE as ANNOTATE_INDELS } from '../../../modules/nf-core/bcftools/annotate/main'
+include { BCFTOOLS_ANNOTATE as RENAME_CHRNAMES } from '../../../modules/nf-core/bcftools/annotate/main'
+include { BCFTOOLS_VIEW } from '../../../modules/nf-core/bcftools/view/main'
+include { CADD } from '../../../modules/nf-core/cadd/main'
+include { GAWK as REFERENCE_TO_CADD_CHRNAMES } from '../../../modules/nf-core/gawk/main'
+include { GAWK as CADD_TO_REFERENCE_CHRNAMES } from '../../../modules/nf-core/gawk/main'
+include { TABIX_TABIX as TABIX_ANNOTATE } from '../../../modules/nf-core/tabix/tabix/main'
+include { TABIX_TABIX as TABIX_CADD } from '../../../modules/nf-core/tabix/tabix/main'
+
+workflow ANNOTATE_CADD {
+
+ take:
+ ch_fai // channel: [mandatory] [ val(meta), path(fai) ]
+ ch_vcf // channel: [mandatory] [ val(meta), path(vcfs) ]
+ ch_index // channel: [mandatory] [ val(meta), path(tbis) ]
+ ch_header // channel: [mandatory] [ path(txt) ]
+ ch_cadd_resources // channel: [mandatory] [ path(dir) ]
+ ch_cadd_prescored // channel: [mandatory] [ path(dir) ]
+
+ main:
+ ch_versions = Channel.empty()
+
+ REFERENCE_TO_CADD_CHRNAMES ( ch_fai , [] )
+ ch_versions = ch_versions.mix(REFERENCE_TO_CADD_CHRNAMES.out.versions)
+
+ CADD_TO_REFERENCE_CHRNAMES ( ch_fai , [] )
+ ch_versions = ch_versions.mix(CADD_TO_REFERENCE_CHRNAMES.out.versions)
+
+ ch_vcf
+ .join(ch_index)
+ .map { meta, vcf, tbi -> [ meta, vcf, tbi, [], [] ] }
+ .set { rename_chrnames_in }
+
+ RENAME_CHRNAMES (
+ rename_chrnames_in,
+ [],
+ REFERENCE_TO_CADD_CHRNAMES.out.output.map { meta, txt -> txt }
+ )
+ ch_versions = ch_versions.mix(RENAME_CHRNAMES.out.versions)
+
+ BCFTOOLS_VIEW (
+ RENAME_CHRNAMES.out.vcf.map { meta, vcf -> [ meta, vcf, [] ] },
+ [],
+ [],
+ []
+ )
+ ch_versions = ch_versions.mix(BCFTOOLS_VIEW.out.versions)
+
+ CADD ( BCFTOOLS_VIEW.out.vcf, ch_cadd_resources, ch_cadd_prescored )
+ ch_versions = ch_versions.mix(CADD.out.versions)
+
+ TABIX_CADD ( CADD.out.tsv )
+ ch_versions = ch_versions.mix(TABIX_CADD.out.versions)
+
+ RENAME_CHRNAMES.out.vcf
+ .join(CADD.out.tsv)
+ .join(TABIX_CADD.out.tbi)
+ .map { meta, vcf, annotations, annotations_index -> [ meta, vcf, [], annotations, annotations_index ] }
+ .set { ch_annotate_indels_in }
+
+ ANNOTATE_INDELS (
+ ch_annotate_indels_in,
+ ch_header,
+ CADD_TO_REFERENCE_CHRNAMES.out.output.map { meta, txt -> txt }
+ )
+ ch_versions = ch_versions.mix(ANNOTATE_INDELS.out.versions)
+
+ emit:
+ vcf = ANNOTATE_INDELS.out.vcf // channel: [ val(meta), path(vcf) ]
+ tbi = ANNOTATE_INDELS.out.tbi // channel: [ val(meta), path(tbi) ]
+ versions = ch_versions // channel: [ path(versions.yml) ]
+}
diff --git a/subworkflows/local/annotate_consequence_pli.nf b/subworkflows/local/annotate_consequence_pli.nf
new file mode 100644
index 00000000..203a1405
--- /dev/null
+++ b/subworkflows/local/annotate_consequence_pli.nf
@@ -0,0 +1,30 @@
+//
+// A subworkflow to add most severe consequence and pli to a vep annotated vcf
+//
+
+include { ADD_MOST_SEVERE_CSQ } from '../../modules/local/add_most_severe_consequence'
+include { ADD_MOST_SEVERE_PLI } from '../../modules/local/add_most_severe_pli'
+include { TABIX_BGZIPTABIX } from '../../modules/nf-core/tabix/bgziptabix/main'
+
+workflow ANNOTATE_CSQ_PLI {
+ take:
+ ch_vcf // channel: [mandatory] [ val(meta), path(vcf) ]
+ ch_variant_consequences // channel: [mandatory] [ path(consequences) ]
+
+ main:
+ ch_versions = Channel.empty()
+
+ ADD_MOST_SEVERE_CSQ (ch_vcf, ch_variant_consequences)
+ ch_versions = ch_versions.mix(ADD_MOST_SEVERE_CSQ.out.versions)
+
+ ADD_MOST_SEVERE_PLI (ADD_MOST_SEVERE_CSQ.out.vcf)
+ ch_versions = ch_versions.mix(ADD_MOST_SEVERE_PLI.out.versions)
+
+ TABIX_BGZIPTABIX (ADD_MOST_SEVERE_PLI.out.vcf)
+ ch_versions = ch_versions.mix(TABIX_BGZIPTABIX.out.versions)
+
+ emit:
+ vcf_ann = TABIX_BGZIPTABIX.out.gz_tbi.map { meta, vcf, tbi -> return [ meta, vcf ] } // channel: [ val(meta), path(vcf) ]
+ tbi_ann = TABIX_BGZIPTABIX.out.gz_tbi.map { meta, vcf, tbi -> return [ meta, tbi ] } // channel: [ val(meta), path(tbi) ]
+ versions = ch_versions // channel: [ path(versions.yml) ]
+}
diff --git a/subworkflows/local/annotate_repeat_expansions/main.nf b/subworkflows/local/annotate_repeat_expansions/main.nf
new file mode 100644
index 00000000..62b138c6
--- /dev/null
+++ b/subworkflows/local/annotate_repeat_expansions/main.nf
@@ -0,0 +1,31 @@
+//
+// Annotate repeat expansions
+//
+
+include { BCFTOOLS_VIEW as COMPRESS_STRANGER } from '../../../modules/nf-core/bcftools/view/main'
+include { STRANGER } from '../../../modules/nf-core/stranger/main'
+
+workflow ANNOTATE_REPEAT_EXPANSIONS {
+ take:
+ ch_variant_catalog // channel: [mandatory] [ path(variant_catalog.json) ]
+ ch_vcf // channel: [mandatory] [ val(meta), path(vcf) ]
+
+ main:
+ ch_versions = Channel.empty()
+
+ // Annotate, compress and index
+ STRANGER ( ch_vcf, ch_variant_catalog )
+ ch_versions = ch_versions.mix(STRANGER.out.versions)
+
+ COMPRESS_STRANGER (
+ STRANGER.out.vcf.map { meta, vcf -> [meta, vcf, [] ] },
+ [], [], []
+ )
+ ch_versions = ch_versions.mix(COMPRESS_STRANGER.out.versions)
+
+ ch_vcf_idx = COMPRESS_STRANGER.out.vcf.join(COMPRESS_STRANGER.out.tbi, failOnMismatch:true, failOnDuplicate:true)
+
+ emit:
+ vcf_idx = ch_vcf_idx // channel: [ val(meta), path(vcf), path(tbi) ]
+ versions = ch_versions // channel: [ path(versions.yml) ]
+}
diff --git a/subworkflows/local/bam_infer_sex.nf b/subworkflows/local/bam_infer_sex.nf
index 3f49e5e5..16486ea8 100644
--- a/subworkflows/local/bam_infer_sex.nf
+++ b/subworkflows/local/bam_infer_sex.nf
@@ -1,5 +1,6 @@
-include { SOMALIER_EXTRACT } from '../../modules/nf-core/somalier/extract/main'
-include { SOMALIER_RELATE } from '../../modules/nf-core/somalier/relate/main'
+include { SOMALIER_EXTRACT } from '../../modules/nf-core/somalier/extract/main'
+include { SOMALIER_RELATE as RELATE_INFER } from '../../modules/nf-core/somalier/relate/main'
+include { SOMALIER_RELATE as RELATE_RELATE } from '../../modules/nf-core/somalier/relate/main'
workflow BAM_INFER_SEX {
@@ -11,29 +12,27 @@ workflow BAM_INFER_SEX {
ch_ped // channel: [ val(meta), path(ped) ]
main:
-
ch_versions = Channel.empty()
// Extract sites
SOMALIER_EXTRACT (
ch_bam_bai,
- ch_fasta.map { it[1] },
- ch_fai.map { it[1] },
- ch_somalier_sites.map { it[1] }
+ ch_fasta,
+ ch_fai,
+ ch_somalier_sites
)
ch_versions = ch_versions.mix(SOMALIER_EXTRACT.out.versions)
SOMALIER_EXTRACT.out.extract
- .map { meta, extract -> [ [ id: 'multisample' ], extract ] }
- .groupTuple()
- .join( ch_ped.map { ped -> [ [ id:'multisample'], ped ] } )
- .set { ch_somalier_relate_in }
+ .combine( ch_ped.map { meta, ped -> ped } )
+ .filter { meta, extract, ped -> meta.sex == 0 }
+ .set { ch_relate_infer_in }
- // Infer sex
- SOMALIER_RELATE ( ch_somalier_relate_in, [] )
- ch_versions = ch_versions.mix(SOMALIER_RELATE.out.versions)
+ // 1. Run somalier relate on one sample at a time to infer sex
+ RELATE_INFER ( ch_relate_infer_in, [] )
+ ch_versions = ch_versions.mix(RELATE_INFER.out.versions)
- SOMALIER_RELATE.out.samples_tsv
+ RELATE_INFER.out.samples_tsv
.map { meta, tsv -> tsv }
.splitCsv(header: true, sep: '\t')
.set { somalier_tsv }
@@ -47,30 +46,43 @@ workflow BAM_INFER_SEX {
}
.set { ch_somalier_sex }
- // Use sex from somalier for samples with unknown sex (sex == 0) in samplesheet
+ // Branch on samples with known/unknown sex
ch_bam_bai
+ .branch { meta, bam, bai ->
+ unknown_sex: meta.sex == 0
+ known_sex: meta.sex != 0
+ }
+ .set { ch_samples }
+
+ // Update sex with sex from somalier for samples with unknown sex
+ ch_samples.unknown_sex
.map { meta, bam, bai -> [ meta.id, meta, bam, bai ] }
.join( ch_somalier_sex )
.map { id, meta, bam, bai, somalier ->
- new_meta = [
- id : meta.id,
- family_id : meta.family_id,
- paternal_id : meta.paternal_id,
- maternal_id : meta.maternal_id,
- sex : meta.sex == 0 ? somalier.sex.toInteger() : meta.sex,
- phenotype : meta.phenotype,
- single_end : meta.single_end
- ]
- [ new_meta, bam, bai ]
+ updated_sex = (meta.sex == 0 ? somalier.sex.toInteger() : meta.sex)
+ [ meta + [sex: updated_sex], bam, bai ]
}
.set { ch_updated_sex }
+ // Add samples with known sex
+ ch_updated_sex = ch_updated_sex.mix(ch_samples.known_sex)
+
+ // 2. Run relate on all samples at once to check relatedness
+ SOMALIER_EXTRACT.out.extract
+ .map { meta, extract -> [ [ id: meta.project ], extract ] }
+ .groupTuple()
+ .join( ch_ped )
+ .set { ch_relate_relate_in }
+
+ RELATE_RELATE ( ch_relate_relate_in, [] )
+ ch_versions = ch_versions.mix(RELATE_RELATE.out.versions)
+
emit:
bam = ch_updated_sex.map { meta, bam, bai -> [ meta, bam ] } // channel: [ val(meta), path(bam) ]
bai = ch_updated_sex.map { meta, bam, bai -> [ meta, bai ] } // channel: [ val(meta), path(bai) ]
bam_bai = ch_updated_sex // channel: [ val(meta), path(bam), path(bai) ]
- somalier_samples = SOMALIER_RELATE.out.samples_tsv // channel: [ val(meta), path(samples_tsv) ]
- somalier_pairs = SOMALIER_RELATE.out.pairs_tsv // channel: [ val(meta), path(pairs_tsv) ]
+ somalier_samples = RELATE_RELATE.out.samples_tsv // channel: [ val(meta), path(samples_tsv) ]
+ somalier_pairs = RELATE_RELATE.out.pairs_tsv // channel: [ val(meta), path(pairs_tsv) ]
versions = ch_versions // channel: [ versions.yml ]
}
diff --git a/subworkflows/local/bam_to_fastq.nf b/subworkflows/local/bam_to_fastq.nf
deleted file mode 100644
index 0a7bf330..00000000
--- a/subworkflows/local/bam_to_fastq.nf
+++ /dev/null
@@ -1,32 +0,0 @@
-include { SAMTOOLS_FASTQ } from '../../modules/nf-core/samtools/fastq/main'
-
-workflow BAM_TO_FASTQ {
-
- take:
- ch_sample // channel: [ val(meta), reads ]
-
- main:
- ch_versions = Channel.empty()
-
- // Filter out BAM from fastq
- ch_sample
- .map { meta, fastq -> [ meta + [ 'single_end': true ], fastq ] }
- .branch { meta, reads ->
- fastq: reads.extension == 'gz'
- bam: reads.extension == 'bam'
- }
- .set { ch_filetypes }
-
- ch_filetypes.fastq.set { ch_sample }
-
- SAMTOOLS_FASTQ ( ch_filetypes.bam, false )
- ch_versions = ch_versions.mix(SAMTOOLS_FASTQ.out.versions)
-
- // Mix converted BAM back in
- ch_sample = ch_sample.mix(SAMTOOLS_FASTQ.out.other)
-
- emit:
- fastq = ch_sample // channel: [ val(meta), fastq ]
- versions = ch_versions // channel: [ versions.yml ]
-}
-
diff --git a/subworkflows/local/call_repeat_expansions/main.nf b/subworkflows/local/call_repeat_expansions/main.nf
new file mode 100644
index 00000000..8b71453e
--- /dev/null
+++ b/subworkflows/local/call_repeat_expansions/main.nf
@@ -0,0 +1,55 @@
+include { TRGT } from '../../../modules/local/trgt'
+include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_TRGT } from '../../../modules/nf-core/samtools/index/main'
+include { SAMTOOLS_SORT as SAMTOOLS_SORT_TRGT } from '../../../modules/nf-core/samtools/sort/main'
+include { BCFTOOLS_SORT as BCFTOOLS_SORT_TRGT } from '../../../modules/nf-core/bcftools/sort/main'
+include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_MERGE } from '../../../modules/nf-core/bcftools/index/main'
+include { BCFTOOLS_MERGE } from '../../../modules/nf-core/bcftools/merge/main'
+
+workflow CALL_REPEAT_EXPANSIONS {
+
+ take:
+ ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ]
+ ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
+ ch_fai // channel: [mandatory] [ val(meta), path(fai) ]
+ ch_trgt_bed // channel: [mandatory] [ val(meta), path(bed) ]
+
+ main:
+ ch_repeat_calls_vcf = Channel.empty()
+ ch_versions = Channel.empty()
+
+ ch_bam_bai
+ .map { meta, bam, bai -> [meta, bam, bai, meta.sex] }
+ .set { ch_trgt_input }
+
+ // Run TGRT
+ TRGT ( ch_trgt_input, ch_fasta, ch_fai, ch_trgt_bed.map { it[1] } )
+
+ // Sort and index bam
+ SAMTOOLS_SORT_TRGT ( TRGT.out.bam, [[],[]] )
+ SAMTOOLS_INDEX_TRGT(SAMTOOLS_SORT_TRGT.out.bam)
+
+ // Sort and index bcf
+ BCFTOOLS_SORT_TRGT(TRGT.out.vcf)
+
+ BCFTOOLS_SORT_TRGT.out.vcf
+ .join( BCFTOOLS_SORT_TRGT.out.tbi )
+ .map { meta, bcf, csi -> [ [ id : meta.project ], bcf, csi ] }
+ .groupTuple()
+ .set{ ch_bcftools_merge_in }
+
+ BCFTOOLS_MERGE ( ch_bcftools_merge_in, ch_fasta, ch_fai, [] )
+
+ BCFTOOLS_INDEX_MERGE ( BCFTOOLS_MERGE.out.merged_variants )
+
+ ch_versions = ch_versions.mix(TRGT.out.versions)
+ ch_versions = ch_versions.mix(SAMTOOLS_SORT_TRGT.out.versions)
+ ch_versions = ch_versions.mix(SAMTOOLS_INDEX_TRGT.out.versions)
+ ch_versions = ch_versions.mix(BCFTOOLS_SORT_TRGT.out.versions)
+ ch_versions = ch_versions.mix(BCFTOOLS_MERGE.out.versions)
+ ch_versions = ch_versions.mix(BCFTOOLS_INDEX_MERGE.out.versions)
+
+ emit:
+ vcf = BCFTOOLS_SORT_TRGT.out.vcf // channel: [ val(meta), path(vcf) ]
+ versions = ch_versions // channel: [ versions.yml ]
+}
+
diff --git a/subworkflows/local/convert_input_files.nf b/subworkflows/local/convert_input_files.nf
new file mode 100644
index 00000000..5c44e521
--- /dev/null
+++ b/subworkflows/local/convert_input_files.nf
@@ -0,0 +1,37 @@
+include { SAMTOOLS_IMPORT } from '../../modules/nf-core/samtools/import/main'
+include { SAMTOOLS_FASTQ } from '../../modules/nf-core/samtools/fastq/main'
+
+workflow CONVERT_INPUT_FILES {
+
+ take:
+ ch_sample // channel: [ val(meta), reads ]
+
+ main:
+ ch_versions = Channel.empty()
+
+ ch_sample
+ .branch { meta, reads ->
+ fastq: reads.extension == 'gz'
+ bam: reads.extension == 'bam'
+ }
+ .set { ch_filetypes }
+
+ ch_bam = ch_filetypes.bam
+ ch_fastq = ch_filetypes.fastq
+
+ SAMTOOLS_FASTQ ( ch_filetypes.bam, false )
+ ch_versions = ch_versions.mix(SAMTOOLS_FASTQ.out.versions)
+
+ SAMTOOLS_IMPORT ( ch_filetypes.fastq )
+ ch_versions = ch_versions.mix(SAMTOOLS_IMPORT.out.versions)
+
+ // Mix converted files back in
+ ch_bam = ch_bam.mix(SAMTOOLS_IMPORT.out.bam)
+ ch_fastq = ch_fastq.mix(SAMTOOLS_FASTQ.out.other)
+
+ emit:
+ bam = ch_bam // channel: [ val(meta), bam ]
+ fastq = ch_fastq // channel: [ val(meta), fastq ]
+ versions = ch_versions // channel: [ versions.yml ]
+}
+
diff --git a/subworkflows/local/genome_assembly.nf b/subworkflows/local/genome_assembly.nf
index 7764d15f..650af99f 100644
--- a/subworkflows/local/genome_assembly.nf
+++ b/subworkflows/local/genome_assembly.nf
@@ -27,7 +27,7 @@ workflow ASSEMBLY {
ch_versions = ch_versions.mix(HIFIASM.out.versions)
} else if(params.hifiasm_mode == 'trio-binning') {
- // TODO: Multiple trios with different parents may not work?
+ // Multiple trios with different parents may not work?
ch_reads.groupTuple()
.map{ meta, reads -> meta } // Takes meta, then
// combine to create all possible combinations of [ meta, meta ]
diff --git a/subworkflows/local/methylation.nf b/subworkflows/local/methylation.nf
index 8e7eea7d..6541a9f4 100644
--- a/subworkflows/local/methylation.nf
+++ b/subworkflows/local/methylation.nf
@@ -1,7 +1,7 @@
-include { MODKIT_PILEUP } from '../../modules/nf-core/modkit/pileup/main'
-include { MODKIT_PILEUP as MODKIT_PILEUP_HAPLOTYPES } from '../../modules/nf-core/modkit/pileup/main'
-include { TABIX_BGZIPTABIX as BGZIP_MODKIT_PILEUP } from '../../modules/nf-core/tabix/bgziptabix/main'
-include { TABIX_BGZIPTABIX as BGZIP_MODKIT_PILEUP_HAPLOTYPES } from '../../modules/nf-core/tabix/bgziptabix/main'
+include { MODKIT_PILEUP as MODKIT_PILEUP_UNPHASED } from '../../modules/nf-core/modkit/pileup/main'
+include { MODKIT_PILEUP as MODKIT_PILEUP_PHASED } from '../../modules/nf-core/modkit/pileup/main'
+include { TABIX_BGZIPTABIX as BGZIP_MODKIT_PILEUP_UNPHASED } from '../../modules/nf-core/tabix/bgziptabix/main'
+include { TABIX_BGZIPTABIX as BGZIP_MODKIT_PILEUP_PHASED } from '../../modules/nf-core/tabix/bgziptabix/main'
workflow METHYLATION {
@@ -15,22 +15,22 @@ workflow METHYLATION {
ch_versions = Channel.empty()
// Run modkit pileup once without dividing by HP-tag and once with
- MODKIT_PILEUP(ch_haplotagged_bam_bai, ch_fasta, ch_bed)
- ch_versions = ch_versions.mix(MODKIT_PILEUP.out.versions)
+ MODKIT_PILEUP_UNPHASED (ch_haplotagged_bam_bai, ch_fasta, ch_bed)
+ ch_versions = ch_versions.mix(MODKIT_PILEUP_UNPHASED.out.versions)
- MODKIT_PILEUP_HAPLOTYPES(ch_haplotagged_bam_bai, ch_fasta, ch_bed)
- ch_versions = ch_versions.mix(MODKIT_PILEUP_HAPLOTYPES.out.versions)
+ MODKIT_PILEUP_PHASED (ch_haplotagged_bam_bai, ch_fasta, ch_bed)
+ ch_versions = ch_versions.mix(MODKIT_PILEUP_PHASED.out.versions)
// Bgzip and index output "BED"
- BGZIP_MODKIT_PILEUP ( MODKIT_PILEUP.out.bed )
- ch_versions = ch_versions.mix(BGZIP_MODKIT_PILEUP.out.versions)
+ BGZIP_MODKIT_PILEUP_UNPHASED ( MODKIT_PILEUP_UNPHASED.out.bed )
+ ch_versions = ch_versions.mix(BGZIP_MODKIT_PILEUP_UNPHASED.out.versions)
- MODKIT_PILEUP_HAPLOTYPES.out.bed
+ MODKIT_PILEUP_PHASED.out.bed
.transpose()
- .set { ch_bgzip_modkit_haplotypes_in }
+ .set { ch_bgzip_modkit_pileup_phased_in }
- BGZIP_MODKIT_PILEUP_HAPLOTYPES ( ch_bgzip_modkit_haplotypes_in )
- ch_versions = ch_versions.mix(BGZIP_MODKIT_PILEUP_HAPLOTYPES.out.versions)
+ BGZIP_MODKIT_PILEUP_PHASED ( ch_bgzip_modkit_pileup_phased_in )
+ ch_versions = ch_versions.mix(BGZIP_MODKIT_PILEUP_PHASED.out.versions)
emit:
versions = ch_versions // channel: [ versions.yml ]
diff --git a/subworkflows/local/phasing.nf b/subworkflows/local/phasing.nf
index 19455fcb..fb87a27b 100644
--- a/subworkflows/local/phasing.nf
+++ b/subworkflows/local/phasing.nf
@@ -19,8 +19,9 @@ workflow PHASING {
fai // channel: [ val(meta), fai ]
main:
- ch_versions = Channel.empty()
+ ch_versions = Channel.empty()
ch_bam_bai_haplotagged = Channel.empty()
+ ch_vcf_index = Channel.empty()
TABIX_TABIX(ch_vcf)
ch_versions = ch_versions.mix(TABIX_TABIX.out.versions)
@@ -30,9 +31,6 @@ workflow PHASING {
WHATSHAP_PHASE( ch_vcf.join(ch_bam_bai), fasta, fai )
ch_versions = ch_versions.mix(WHATSHAP_PHASE.out.versions)
- WHATSHAP_STATS( WHATSHAP_PHASE.out.vcf_tbi )
- ch_versions = ch_versions.mix(WHATSHAP_STATS.out.versions)
-
WHATSHAP_PHASE.out.vcf_tbi
.join(ch_bam_bai)
.set { ch_whatshap_haplotag_in }
@@ -48,10 +46,11 @@ workflow PHASING {
.join(SAMTOOLS_INDEX_WHATSHAP.out.bai)
.set { ch_bam_bai_haplotagged }
- } else if (params.phaser.equals("hiphase_snv")) {
+ ch_vcf_index = ch_vcf_index.mix( WHATSHAP_PHASE.out.vcf_tbi )
+ } else if (params.phaser.equals("hiphase_snv")) {
ch_vcf
- .join(TABIX_TABIX.out.tbi)
+ .join(TABIX_TABIX.out.csi)
.join(ch_bam_bai)
.set { ch_hiphase_snv_in }
@@ -62,6 +61,8 @@ workflow PHASING {
.join(HIPHASE_SNV.out.bais)
.set { ch_bam_bai_haplotagged }
+ ch_vcf_index = ch_vcf_index.mix( HIPHASE_SNV.out.vcfs.join(HIPHASE_SNV.out.vcfs_tbi) )
+
} else if (params.phaser.equals("hiphase_sv")) {
// Sniffles specific...
BCFTOOLS_REHEADER(
@@ -92,7 +93,7 @@ workflow PHASING {
.groupTuple()
.set { ch_hiphase_vcf }
- TABIX_TABIX.out.tbi
+ TABIX_TABIX.out.csi
.concat(ch_sv_tbi)
.groupTuple()
.set { ch_hiphase_tbi }
@@ -108,12 +109,18 @@ workflow PHASING {
HIPHASE_SV.out.bams
.join(HIPHASE_SV.out.bais)
.set { ch_bam_bai_haplotagged }
+
+ ch_vcf_index = ch_vcf_index.mix( HIPHASE_SV.out.vcfs.join(HIPHASE_SV.out.vcfs_tbi) )
}
+ WHATSHAP_STATS( ch_vcf_index )
+ ch_versions = ch_versions.mix(WHATSHAP_STATS.out.versions)
+
CRAMINO_PHASED( ch_bam_bai_haplotagged )
ch_versions = ch_versions.mix(CRAMINO_PHASED.out.versions)
emit:
- haplotagged_bam_bai = ch_bam_bai_haplotagged // channel: [ val(meta), bam, bai ]
- versions = ch_versions // channel: [ versions.yml ]
+ haplotagged_bam_bai = ch_bam_bai_haplotagged // channel: [ val(meta), bam, bai ]
+ stats = WHATSHAP_STATS.out.stats // channel: [ val(meta), txt ]
+ versions = ch_versions // channel: [ versions.yml ]
}
diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf
index 07117db5..1babff3b 100644
--- a/subworkflows/local/prepare_genome.nf
+++ b/subworkflows/local/prepare_genome.nf
@@ -1,13 +1,13 @@
-include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main'
-include { MINIMAP2_INDEX } from '../../modules/nf-core/minimap2/index/main'
include { GUNZIP as GUNZIP_FASTA } from '../../modules/nf-core/gunzip/main'
+include { MINIMAP2_INDEX } from '../../modules/nf-core/minimap2/index/main'
+include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main'
include { UNTAR as UNTAR_VEP_CACHE } from '../../modules/nf-core/untar/main'
workflow PREPARE_GENOME {
take:
- fasta_in // channel: [ val(meta), fasta ]
- ch_vep_cache // channel: [mandatory for annotation] [ path(cache) ]
+ fasta_in // channel: [mandatory] [ val(meta), path(fasta) ]
+ ch_vep_cache // channel: [optional] [ path(cache) ]
main:
ch_versions = Channel.empty()
@@ -29,24 +29,23 @@ workflow PREPARE_GENOME {
}
SAMTOOLS_FAIDX ( ch_fasta, [[],[]] )
+ ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions)
+
MINIMAP2_INDEX ( ch_fasta )
+ ch_versions = ch_versions.mix(MINIMAP2_INDEX.out.versions)
UNTAR_VEP_CACHE (ch_vep_cache)
+ ch_versions = ch_versions.mix(UNTAR_VEP_CACHE.out.versions)
UNTAR_VEP_CACHE.out.untar
.map { meta, files -> [files] }
.collect()
.set { untarred_vep }
- // Gather versions
- ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions.first())
- ch_versions = ch_versions.mix(MINIMAP2_INDEX.out.versions.first())
- ch_versions = ch_versions.mix(UNTAR_VEP_CACHE.out.versions)
-
emit:
- fasta = ch_fasta // channel: [ val(meta), fasta ]
- fai = SAMTOOLS_FAIDX.out.fai.collect() // channel: [ val(meta), fai ]
+ mmi = MINIMAP2_INDEX.out.index.collect() // channel: [ val(meta), path(mmi) ]
+ fai = SAMTOOLS_FAIDX.out.fai.collect() // channel: [ val(meta), path(fai) ]
+ fasta = ch_fasta // channel: [ val(meta), path(fasta) ]
vep_resources = untarred_vep // channel: [ path(cache) ]
- mmi = MINIMAP2_INDEX.out.index.collect() // channel: [ val(meta), mmi ]
versions = ch_versions // channel: [ versions.yml ]
}
diff --git a/subworkflows/local/qc_aligned_reads.nf b/subworkflows/local/qc_aligned_reads.nf
index c7eba920..2176f807 100644
--- a/subworkflows/local/qc_aligned_reads.nf
+++ b/subworkflows/local/qc_aligned_reads.nf
@@ -11,8 +11,6 @@ workflow QC_ALIGNED_READS {
main:
ch_versions = Channel.empty()
- // Prepare inputs
-
CRAMINO (ch_bam_bai)
ch_versions = ch_versions.mix(CRAMINO.out.versions)
@@ -24,6 +22,9 @@ workflow QC_ALIGNED_READS {
ch_versions = ch_versions.mix(MOSDEPTH.out.versions)
emit:
- versions = ch_versions // channel: [ versions.yml ]
+ mosdepth_summary = MOSDEPTH.out.summary_txt // channel: [ val(meta), path(txt) ]
+ mosdepth_global_dist = MOSDEPTH.out.global_txt // channel: [ val(meta), path(txt) ]
+ mosdepth_region_dist = MOSDEPTH.out.regions_txt // channel: [ val(meta), path(txt) ]
+ versions = ch_versions // channel: [ versions.yml ]
}
diff --git a/subworkflows/local/rank_variants/main.nf b/subworkflows/local/rank_variants/main.nf
new file mode 100644
index 00000000..c6820e14
--- /dev/null
+++ b/subworkflows/local/rank_variants/main.nf
@@ -0,0 +1,52 @@
+//
+// A subworkflow to score and rank variants.
+//
+
+include { GENMOD_ANNOTATE } from '../../../modules/nf-core/genmod/annotate/main'
+include { GENMOD_MODELS } from '../../../modules/nf-core/genmod/models/main'
+include { GENMOD_SCORE } from '../../../modules/nf-core/genmod/score/main'
+include { GENMOD_COMPOUND } from '../../../modules/nf-core/genmod/compound/main'
+include { BCFTOOLS_SORT } from '../../../modules/nf-core/bcftools/sort/main'
+include { TABIX_BGZIP } from '../../../modules/nf-core/tabix/bgzip/main'
+include { TABIX_TABIX } from '../../../modules/nf-core/tabix/tabix/main'
+
+workflow RANK_VARIANTS {
+
+ take:
+ ch_vcf // channel: [mandatory] [ val(meta), path(vcf) ]
+ ch_pedfile // channel: [mandatory] [ path(ped) ]
+ ch_reduced_penetrance // channel: [mandatory] [ path(pentrance) ]
+ ch_score_config // channel: [mandatory] [ path(ini) ]
+
+ main:
+ ch_versions = Channel.empty()
+
+ GENMOD_ANNOTATE(ch_vcf)
+
+ GENMOD_MODELS(GENMOD_ANNOTATE.out.vcf, ch_pedfile, ch_reduced_penetrance)
+
+ GENMOD_SCORE(GENMOD_MODELS.out.vcf, ch_pedfile, ch_score_config)
+
+ GENMOD_COMPOUND(GENMOD_SCORE.out.vcf)
+
+ BCFTOOLS_SORT(GENMOD_COMPOUND.out.vcf) // SV file needs to be sorted before indexing
+
+ TABIX_BGZIP(GENMOD_COMPOUND.out.vcf) //run only for SNVs
+
+ ch_vcf = TABIX_BGZIP.out.output.mix(BCFTOOLS_SORT.out.vcf)
+
+ TABIX_TABIX (ch_vcf)
+
+ ch_versions = ch_versions.mix(GENMOD_ANNOTATE.out.versions)
+ ch_versions = ch_versions.mix(GENMOD_MODELS.out.versions)
+ ch_versions = ch_versions.mix(GENMOD_SCORE.out.versions)
+ ch_versions = ch_versions.mix(GENMOD_COMPOUND.out.versions)
+ ch_versions = ch_versions.mix(BCFTOOLS_SORT.out.versions)
+ ch_versions = ch_versions.mix(TABIX_BGZIP.out.versions)
+ ch_versions = ch_versions.mix(TABIX_TABIX.out.versions)
+
+ emit:
+ vcf = ch_vcf // channel: [ val(meta), path(vcf) ]
+ tbi = TABIX_TABIX.out.tbi // channel: [ val(meta), path(tbi) ]
+ versions = ch_versions // channel: [ path(versions.yml) ]
+}
diff --git a/subworkflows/local/repeat_analysis.nf b/subworkflows/local/repeat_analysis.nf
deleted file mode 100644
index b6b0b79e..00000000
--- a/subworkflows/local/repeat_analysis.nf
+++ /dev/null
@@ -1,57 +0,0 @@
-include { TRGT } from '../../modules/local/trgt'
-include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_TRGT } from '../../modules/nf-core/samtools/index/main'
-include { SAMTOOLS_SORT as SAMTOOLS_SORT_TRGT } from '../../modules/nf-core/samtools/sort/main'
-include { BCFTOOLS_SORT as BCFTOOLS_SORT_TRGT } from '../../modules/nf-core/bcftools/sort/main'
-include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_MERGE } from '../../modules/nf-core/bcftools/index/main'
-include { BCFTOOLS_MERGE } from '../../modules/nf-core/bcftools/merge/main'
-
-workflow REPEAT_ANALYSIS {
-
- take:
- ch_bam_bai
- ch_fasta
- ch_fai
- ch_trgt_bed
-
- main:
- ch_repeat_calls_vcf = Channel.empty()
- ch_versions = Channel.empty()
-
- ch_bam_bai
- .map{ meta, bam, bai -> [meta, bam, bai, meta.sex] }
- .set{ ch_trgt_input }
-
- // Run TGRT
- TRGT ( ch_trgt_input, ch_fasta, ch_trgt_bed )
-
- // Sort and index bam
- SAMTOOLS_SORT_TRGT ( TRGT.out.bam, [[],[]] )
- SAMTOOLS_INDEX_TRGT(SAMTOOLS_SORT_TRGT.out.bam)
-
- // Sort and index bcf
- BCFTOOLS_SORT_TRGT(TRGT.out.vcf)
-
- BCFTOOLS_SORT_TRGT.out.vcf
- .join( BCFTOOLS_SORT_TRGT.out.tbi )
- .toList()
- .filter { it.size() > 1 }
- .flatMap()
- .map { meta, bcf, csi -> [ [ id : 'multisample' ], bcf, csi ] }
- .groupTuple()
- .set{ ch_bcftools_merge_in }
-
- BCFTOOLS_MERGE ( ch_bcftools_merge_in, ch_fasta, ch_fai, [] )
-
- BCFTOOLS_INDEX_MERGE ( BCFTOOLS_MERGE.out.merged_variants )
-
- ch_versions = ch_versions.mix(TRGT.out.versions)
- ch_versions = ch_versions.mix(SAMTOOLS_SORT_TRGT.out.versions)
- ch_versions = ch_versions.mix(SAMTOOLS_INDEX_TRGT.out.versions)
- ch_versions = ch_versions.mix(BCFTOOLS_SORT_TRGT.out.versions)
- ch_versions = ch_versions.mix(BCFTOOLS_MERGE.out.versions)
- ch_versions = ch_versions.mix(BCFTOOLS_INDEX_MERGE.out.versions)
-
- emit:
- versions = ch_versions // channel: [ versions.yml ]
-}
-
diff --git a/subworkflows/local/scatter_genome/main.nf b/subworkflows/local/scatter_genome/main.nf
new file mode 100644
index 00000000..5e611c29
--- /dev/null
+++ b/subworkflows/local/scatter_genome/main.nf
@@ -0,0 +1,65 @@
+include { BEDTOOLS_MERGE } from '../../../modules/nf-core/bedtools/merge/main'
+include { BEDTOOLS_SORT } from '../../../modules/nf-core/bedtools/sort/main'
+include { BUILD_INTERVALS } from '../../../modules/local/build_intervals/main'
+include { SPLIT_BED_CHUNKS } from '../../../modules/local/split_bed_chunks/main'
+
+workflow SCATTER_GENOME {
+
+ take:
+ ch_fai // channel: [optional] [ val(meta), path(fai) ]
+ ch_input_bed // channel: [optional] [ val(meta), path(bed) ]
+ make_bed_from_fai // bool
+ make_bed_intervals // bool
+ split_n // integer: split bed into n regions
+
+ main:
+ ch_versions = Channel.empty()
+ ch_bed = Channel.empty()
+ ch_bed_intervals = Channel.empty()
+
+ //
+ // If no BED-file is provided then build intervals from reference
+ //
+ if( make_bed_from_fai ) {
+
+ BUILD_INTERVALS ( ch_fai.map { name, fai -> [ [ id: name ], fai ] } )
+ ch_versions = ch_versions.mix(BUILD_INTERVALS.out.versions)
+
+ BUILD_INTERVALS.out.bed
+ .set{ ch_bed }
+ } else {
+ ch_input_bed
+ .set{ ch_bed }
+ }
+
+ //
+ // Merge overlapping and then split BED regions for SNV calling
+ //
+ if( make_bed_intervals ) {
+
+ if( split_n < 1 ) { error "Can't split bed file into less than one file" }
+
+ // Sort and merge overlapping regions
+ BEDTOOLS_SORT ( ch_bed, [] )
+ ch_versions = ch_versions.mix(BEDTOOLS_SORT.out.versions)
+
+ BEDTOOLS_MERGE ( BEDTOOLS_SORT.out.sorted )
+ ch_versions = ch_versions.mix(BEDTOOLS_MERGE.out.versions)
+
+ SPLIT_BED_CHUNKS( BEDTOOLS_MERGE.out.bed, split_n )
+ ch_versions = ch_versions.mix(SPLIT_BED_CHUNKS.out.versions)
+
+ // Create a channel with the bed file and the total number of intervals (for groupKey)
+ SPLIT_BED_CHUNKS.out.split_beds
+ .collect()
+ .map{ it -> [ it, it.size() ] }
+ .transpose()
+ .set { ch_bed_intervals }
+ }
+
+ emit:
+ bed = ch_bed // channel: [ val(meta), path(bed) ]
+ bed_intervals = ch_bed_intervals // channel: [ path(bed), val(num_intervals) ]
+ versions = ch_versions // channel: [ versions.yml ]
+}
+
diff --git a/subworkflows/local/scatter_genome/tests/main.nf.test b/subworkflows/local/scatter_genome/tests/main.nf.test
new file mode 100644
index 00000000..48b3265c
--- /dev/null
+++ b/subworkflows/local/scatter_genome/tests/main.nf.test
@@ -0,0 +1,188 @@
+nextflow_workflow {
+
+ name "Test Workflow SCATTER_GENOME"
+ script "../main.nf"
+ workflow "SCATTER_GENOME"
+ config "./nextflow.config"
+
+ setup {
+ run("SAMTOOLS_FAIDX") {
+ script "../../../../modules/nf-core/samtools/faidx/main.nf"
+ process {
+ """
+ input[0] = [
+ [ id:'test' ],
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[1] = [[],[]]
+ """
+ }
+ }
+ }
+
+ test("fai, [], true, false, []") {
+
+ when {
+ workflow {
+ """
+ input[0] = SAMTOOLS_FAIDX.out.fai
+ input[1] = [[],[]]
+ input[2] = true
+ input[3] = false
+ input[4] = []
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert workflow.success },
+ { assert snapshot(
+ workflow.out,
+ path(workflow.out.bed.get(0).get(1)).readLines(),
+ workflow.out.bed_intervals.findAll { it[0] instanceof String }.collect { path(it[0]).readLines() },
+ ).match() }
+ )
+ }
+
+ }
+
+ test("fai, [], true, true, 3") {
+
+ when {
+ workflow {
+ """
+ input[0] = SAMTOOLS_FAIDX.out.fai
+ input[1] = [[],[]]
+ input[2] = true
+ input[3] = true
+ input[4] = 3
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert workflow.success },
+ { assert snapshot(
+ workflow.out,
+ path(workflow.out.bed.get(0).get(1)).readLines(),
+ workflow.out.bed_intervals.findAll { it[0] instanceof String }.collect { path(it[0]).readLines() },
+ ).match() }
+ )
+ }
+
+ }
+
+ test("fai, [], bed, true, 3") {
+
+ when {
+ workflow {
+ """
+ input[0] = SAMTOOLS_FAIDX.out.fai
+ input[1] = [[],[]]
+ input[2] = Channel.of('chr22\t0\t1000')
+ .collectFile(name: 'chr22.bed', newLine: true)
+ .map { file -> [ [ id:'chr22' ], file ] }
+ input[3] = true
+ input[4] = 3
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert workflow.success },
+ { assert snapshot(
+ workflow.out,
+ path(workflow.out.bed.get(0).get(1)).readLines(),
+ workflow.out.bed_intervals.findAll { it[0] instanceof String }.collect { path(it[0]).readLines() },
+ ).match() }
+ )
+ }
+
+ }
+
+ test("[], bed, false, false, 3") {
+
+ when {
+ workflow {
+ """
+ input[0] = [[],[]]
+ input[1] = Channel.of('chr22\t0\t1000')
+ .collectFile(name: 'chr22.bed', newLine: true)
+ .map { file -> [ [ id:'chr22' ], file ] }
+ input[2] = false
+ input[3] = false
+ input[4] = 3
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert workflow.success },
+ { assert snapshot(
+ workflow.out,
+ path(workflow.out.bed.get(0).get(1)).readLines(),
+ workflow.out.bed_intervals.findAll { it[0] instanceof String }.collect { path(it[0]).readLines() },
+ ).match() }
+ )
+ }
+
+ }
+
+ test("[], bed, false, true, 3") {
+
+ when {
+ workflow {
+ """
+ input[0] = [[],[]]
+ input[1] = Channel.of('''chr22\t0\t1000\nchr22\t500\t1500\nchr22\t2000\t3000''')
+ .collectFile(name: 'chr22.bed', newLine: true)
+ .map { file -> [ [ id:'chr22' ], file ] }
+ input[2] = false
+ input[3] = true
+ input[4] = 3
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert workflow.success },
+ { assert snapshot(
+ workflow.out,
+ path(workflow.out.bed.get(0).get(1)).readLines(),
+ workflow.out.bed_intervals.findAll { it[0] instanceof String }.collect { path(it[0]).readLines() },
+ ).match() }
+ )
+ }
+
+ }
+
+ test("[], bed, false, true, 0 - should fail") {
+
+ when {
+ workflow {
+ """
+ input[0] = [[],[]]
+ input[1] = Channel.of('''chr22\t0\t1000\nchr22\t500\t1500\nchr22\t2000\t3000''')
+ .collectFile(name: 'chr22.bed', newLine: true)
+ .map { file -> [ [ id:'chr22' ], file ] }
+ input[2] = false
+ input[3] = true
+ input[4] = 0
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert workflow.failed },
+ )
+ }
+
+ }
+
+}
diff --git a/subworkflows/local/scatter_genome/tests/main.nf.test.snap b/subworkflows/local/scatter_genome/tests/main.nf.test.snap
new file mode 100644
index 00000000..163401f0
--- /dev/null
+++ b/subworkflows/local/scatter_genome/tests/main.nf.test.snap
@@ -0,0 +1,280 @@
+{
+ "fai, [], bed, true, 3": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "genome.fasta.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7"
+ ]
+ ],
+ "1": [
+ [
+ "1.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7",
+ 1
+ ]
+ ],
+ "2": [
+ "versions.yml:md5,8174fa0cc1f95ed4b353a948a2cbd4e9",
+ "versions.yml:md5,beb3374eccd225096e2f7c8ecd3a28be",
+ "versions.yml:md5,d69b1c1fc4e665c357d541ea3b0a0240",
+ "versions.yml:md5,e70d387c0dd30530a3e7fe2846ddffe1"
+ ],
+ "bed": [
+ [
+ {
+ "id": "test"
+ },
+ "genome.fasta.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7"
+ ]
+ ],
+ "bed_intervals": [
+ [
+ "1.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7",
+ 1
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,8174fa0cc1f95ed4b353a948a2cbd4e9",
+ "versions.yml:md5,beb3374eccd225096e2f7c8ecd3a28be",
+ "versions.yml:md5,d69b1c1fc4e665c357d541ea3b0a0240",
+ "versions.yml:md5,e70d387c0dd30530a3e7fe2846ddffe1"
+ ]
+ },
+ [
+ "chr22\t0\t40001"
+ ],
+ [
+ [
+ "chr22\t0\t40001"
+ ]
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-11T13:03:46.414904212"
+ },
+ "[], bed, false, false, 3": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "chr22"
+ },
+ "chr22.bed:md5,3b0b598acca89a84aa414e4c95abec1f"
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+
+ ],
+ "bed": [
+ [
+ {
+ "id": "chr22"
+ },
+ "chr22.bed:md5,3b0b598acca89a84aa414e4c95abec1f"
+ ]
+ ],
+ "bed_intervals": [
+
+ ],
+ "versions": [
+
+ ]
+ },
+ [
+ "chr22\t0\t1000"
+ ],
+ [
+
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-11T12:53:03.541050122"
+ },
+ "[], bed, false, true, 3": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "chr22"
+ },
+ "chr22.bed:md5,88ebd3fd77139b3d33af226231eff0df"
+ ]
+ ],
+ "1": [
+ [
+ "1.bed:md5,37978db9b095a29ebbd64f65dd8f13b3",
+ 2
+ ],
+ [
+ "2.bed:md5,0da4774e61b9c9005122e46f24522604",
+ 2
+ ]
+ ],
+ "2": [
+ "versions.yml:md5,8174fa0cc1f95ed4b353a948a2cbd4e9",
+ "versions.yml:md5,beb3374eccd225096e2f7c8ecd3a28be",
+ "versions.yml:md5,d69b1c1fc4e665c357d541ea3b0a0240"
+ ],
+ "bed": [
+ [
+ {
+ "id": "chr22"
+ },
+ "chr22.bed:md5,88ebd3fd77139b3d33af226231eff0df"
+ ]
+ ],
+ "bed_intervals": [
+ [
+ "1.bed:md5,37978db9b095a29ebbd64f65dd8f13b3",
+ 2
+ ],
+ [
+ "2.bed:md5,0da4774e61b9c9005122e46f24522604",
+ 2
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,8174fa0cc1f95ed4b353a948a2cbd4e9",
+ "versions.yml:md5,beb3374eccd225096e2f7c8ecd3a28be",
+ "versions.yml:md5,d69b1c1fc4e665c357d541ea3b0a0240"
+ ]
+ },
+ [
+ "chr22\t0\t1000",
+ "chr22\t500\t1500",
+ "chr22\t2000\t3000"
+ ],
+ [
+ [
+ "chr22\t0\t1500"
+ ],
+ [
+ "chr22\t2000\t3000"
+ ]
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-11T13:06:34.382235567"
+ },
+ "fai, [], true, true, 3": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "genome.fasta.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7"
+ ]
+ ],
+ "1": [
+ [
+ "1.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7",
+ 1
+ ]
+ ],
+ "2": [
+ "versions.yml:md5,8174fa0cc1f95ed4b353a948a2cbd4e9",
+ "versions.yml:md5,beb3374eccd225096e2f7c8ecd3a28be",
+ "versions.yml:md5,d69b1c1fc4e665c357d541ea3b0a0240",
+ "versions.yml:md5,e70d387c0dd30530a3e7fe2846ddffe1"
+ ],
+ "bed": [
+ [
+ {
+ "id": "test"
+ },
+ "genome.fasta.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7"
+ ]
+ ],
+ "bed_intervals": [
+ [
+ "1.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7",
+ 1
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,8174fa0cc1f95ed4b353a948a2cbd4e9",
+ "versions.yml:md5,beb3374eccd225096e2f7c8ecd3a28be",
+ "versions.yml:md5,d69b1c1fc4e665c357d541ea3b0a0240",
+ "versions.yml:md5,e70d387c0dd30530a3e7fe2846ddffe1"
+ ]
+ },
+ [
+ "chr22\t0\t40001"
+ ],
+ [
+ [
+ "chr22\t0\t40001"
+ ]
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-11T13:03:35.848426979"
+ },
+ "fai, [], true, false, []": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "genome.fasta.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7"
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+ "versions.yml:md5,e70d387c0dd30530a3e7fe2846ddffe1"
+ ],
+ "bed": [
+ [
+ {
+ "id": "test"
+ },
+ "genome.fasta.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7"
+ ]
+ ],
+ "bed_intervals": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,e70d387c0dd30530a3e7fe2846ddffe1"
+ ]
+ },
+ [
+ "chr22\t0\t40001"
+ ],
+ [
+
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-11T13:03:25.327078297"
+ }
+}
\ No newline at end of file
diff --git a/subworkflows/local/scatter_genome/tests/nextflow.config b/subworkflows/local/scatter_genome/tests/nextflow.config
new file mode 100644
index 00000000..2efae8e4
--- /dev/null
+++ b/subworkflows/local/scatter_genome/tests/nextflow.config
@@ -0,0 +1,8 @@
+process {
+ withName: 'BEDTOOLS_MERGE' {
+ ext.prefix = { "${meta.id}_merged" }
+ }
+ withName: 'BEDTOOLS_SORT' {
+ ext.prefix = { "${meta.id}_sorted" }
+ }
+}
diff --git a/subworkflows/local/short_variant_calling.nf b/subworkflows/local/short_variant_calling.nf
deleted file mode 100644
index 9b050f0c..00000000
--- a/subworkflows/local/short_variant_calling.nf
+++ /dev/null
@@ -1,108 +0,0 @@
-include { DEEPVARIANT } from '../../modules/nf-core/deepvariant'
-include { GLNEXUS } from '../../modules/nf-core/glnexus'
-include { BCFTOOLS_VIEW_REGIONS } from '../../modules/local/bcftools/view_regions'
-include { TABIX_TABIX as TABIX_EXTRA_GVCFS } from '../../modules/nf-core/tabix/tabix/main'
-include { TABIX_TABIX as TABIX_DV } from '../../modules/nf-core/tabix/tabix/main'
-include { TABIX_TABIX as TABIX_DV_VCF } from '../../modules/nf-core/tabix/tabix/main'
-include { TABIX_TABIX as TABIX_GLNEXUS } from '../../modules/nf-core/tabix/tabix/main'
-include { BCFTOOLS_CONCAT as BCFTOOLS_CONCAT_DV } from '../../modules/nf-core/bcftools/concat/main'
-include { BCFTOOLS_CONCAT as BCFTOOLS_CONCAT_DV_VCF } from '../../modules/nf-core/bcftools/concat/main'
-include { BCFTOOLS_SORT as BCFTOOLS_SORT_DV } from '../../modules/nf-core/bcftools/sort/main'
-include { BCFTOOLS_SORT as BCFTOOLS_SORT_DV_VCF } from '../../modules/nf-core/bcftools/sort/main'
-
-workflow SHORT_VARIANT_CALLING {
-
- take:
- ch_bam_bai
- ch_extra_gvcfs
- ch_fasta
- ch_fai
- ch_bed
-
- main:
- ch_snp_calls_vcf = Channel.empty()
- ch_snp_calls_gvcf = Channel.empty()
- ch_combined_bcf = Channel.empty()
- ch_versions = Channel.empty()
-
- // Does splitting BAMs and copying to node make sense to reduce IO?
-
- // Only one of these is run depending on params.variant_caller (when clause condition is defined in the conf/modules.config)
- DEEPVARIANT ( ch_bam_bai, ch_fasta, ch_fai, [[],[]] )
-
- // Collect VCFs
- ch_snp_calls_vcf = ch_snp_calls_vcf.mix(DEEPVARIANT.out.vcf)
-
- // Collect GVCFs
- ch_snp_calls_gvcf = ch_snp_calls_gvcf.mix(DEEPVARIANT.out.gvcf)
-
- // Extra gVCFs
- TABIX_EXTRA_GVCFS(ch_extra_gvcfs)
-
- ch_extra_gvcfs
- .join(TABIX_EXTRA_GVCFS.out.tbi)
- .groupTuple()
- .set{ ch_bcftools_view_regions_in }
-
- // This cuts all regions in BED file from extra gVCFS, better than nothing
- BCFTOOLS_VIEW_REGIONS( ch_bcftools_view_regions_in, ch_bed )
-
- // DV gVCFs
- TABIX_DV(ch_snp_calls_gvcf)
-
- ch_snp_calls_gvcf
- .groupTuple() // size not working here if there are less than specifed regions..
- .join(TABIX_DV.out.tbi.groupTuple())
- .set{ bcftools_concat_dv_in }
-
-
- // Concat into one gVCF per sample & sort
- BCFTOOLS_CONCAT_DV ( bcftools_concat_dv_in )
- ch_versions = ch_versions.mix(BCFTOOLS_CONCAT_DV.out.versions)
-
- BCFTOOLS_SORT_DV ( BCFTOOLS_CONCAT_DV.out.vcf )
- ch_versions = ch_versions.mix(BCFTOOLS_SORT_DV.out.versions)
-
- // DV VCFs
- TABIX_DV_VCF(ch_snp_calls_vcf)
- ch_versions = ch_versions.mix(TABIX_DV_VCF.out.versions)
-
- ch_snp_calls_vcf
- .groupTuple() // size not working here if there are less than specifed regions..
- .join(TABIX_DV_VCF.out.tbi.groupTuple())
- .set{ bcftools_concat_dv_vcf_in }
-
-
- // Concat into one VCF per sample & sort
- BCFTOOLS_CONCAT_DV_VCF ( bcftools_concat_dv_vcf_in )
- ch_versions = ch_versions.mix(BCFTOOLS_CONCAT_DV_VCF.out.versions)
-
- BCFTOOLS_SORT_DV_VCF ( BCFTOOLS_CONCAT_DV_VCF.out.vcf )
- ch_versions = ch_versions.mix(BCFTOOLS_SORT_DV_VCF.out.versions)
-
- // Put DV and extra gvCFs together -> send to glnexus
- BCFTOOLS_SORT_DV.out.vcf
- .concat(BCFTOOLS_VIEW_REGIONS.out.vcf)
- .map { meta, gvcf -> [ ['id':'multisample'], gvcf ]}
- .groupTuple()
- .set{ ch_glnexus_in }
-
- // Multisample
- GLNEXUS( ch_glnexus_in, ch_bed )
- TABIX_GLNEXUS(GLNEXUS.out.bcf)
-
- // Get versions
- ch_versions = ch_versions.mix(DEEPVARIANT.out.versions)
- ch_versions = ch_versions.mix(GLNEXUS.out.versions)
- ch_versions = ch_versions.mix(BCFTOOLS_VIEW_REGIONS.out.versions)
- ch_versions = ch_versions.mix(TABIX_EXTRA_GVCFS.out.versions)
- ch_versions = ch_versions.mix(TABIX_DV.out.versions)
- ch_versions = ch_versions.mix(BCFTOOLS_CONCAT_DV.out.versions)
- ch_versions = ch_versions.mix(BCFTOOLS_SORT_DV.out.versions)
- ch_versions = ch_versions.mix(TABIX_GLNEXUS.out.versions)
-
- emit:
- snp_calls_vcf = BCFTOOLS_SORT_DV_VCF.out.vcf
- combined_bcf = GLNEXUS.out.bcf
- versions = ch_versions
-}
diff --git a/subworkflows/local/short_variant_calling/main.nf b/subworkflows/local/short_variant_calling/main.nf
new file mode 100644
index 00000000..5395cd2e
--- /dev/null
+++ b/subworkflows/local/short_variant_calling/main.nf
@@ -0,0 +1,100 @@
+//
+// Workflow to call and merge SNVs
+//
+include { BCFTOOLS_CONCAT } from '../../../modules/nf-core/bcftools/concat/main'
+include { BCFTOOLS_FILLTAGS } from '../../../modules/local/bcftools/filltags/main'
+include { BCFTOOLS_NORM as BCFTOOLS_NORM_MULTISAMPLE } from '../../../modules/nf-core/bcftools/norm/main'
+include { BCFTOOLS_NORM as BCFTOOLS_NORM_SINGLESAMPLE } from '../../../modules/nf-core/bcftools/norm/main'
+include { DEEPVARIANT } from '../../../modules/nf-core/deepvariant/main'
+include { GLNEXUS } from '../../../modules/nf-core/glnexus/main'
+
+workflow SHORT_VARIANT_CALLING {
+
+ take:
+ ch_bam_bai_bed // channel: [mandatory] [ val(meta), path(bam), path(bai), path(call_region_bed) ]
+ ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
+ ch_fai // channel: [mandatory] [ val(meta), path(fai) ]
+ ch_bed // channel: [optional] [ val(meta), path(input_bed) ]
+ ch_par_bed // channel: [mandatory] [ val(meta), path(par_bed) ]
+
+ main:
+ ch_versions = Channel.empty()
+
+ ch_bam_bai_bed
+ // Add call region to meta so we can group by it later
+ .map { meta, bam, bai, bed ->
+ [ meta + [ 'region': bed ], bam, bai, bed ]
+ }
+ .set { ch_deepvariant_in }
+
+ DEEPVARIANT ( ch_deepvariant_in, ch_fasta, ch_fai, [[],[]], ch_par_bed )
+ ch_versions = ch_versions.mix(DEEPVARIANT.out.versions)
+
+ // First remove region so we can group per sample
+ // Then after grouping remove num_intervals since to match the meta of other workflows
+ DEEPVARIANT.out.vcf
+ .map { meta, vcf ->
+ new_meta = meta - meta.subMap('region')
+ [ groupKey(new_meta, new_meta.num_intervals ), vcf ]
+ }
+ .groupTuple()
+ .join( DEEPVARIANT.out.vcf_tbi
+ .map{ meta, tbi ->
+ new_meta = meta - meta.subMap('region')
+ [ groupKey(new_meta, new_meta.num_intervals ), tbi ]
+ }
+ .groupTuple()
+ )
+ .map { meta, vcf, tbi ->
+ [ meta - meta.subMap('num_intervals'), vcf, tbi ]
+ }
+ .set{ ch_concat_singlesample_in }
+
+ // This creates a singlesample VCF containing ALL regions
+ BCFTOOLS_CONCAT ( ch_concat_singlesample_in )
+ ch_versions = ch_versions.mix(BCFTOOLS_CONCAT.out.versions)
+
+ // Which is then normalized, and ready to be used
+ // in processes that require SNVs, but not annotated SNVs
+ BCFTOOLS_NORM_SINGLESAMPLE ( BCFTOOLS_CONCAT.out.vcf.map { meta, vcf -> [ meta, vcf, [] ] }, ch_fasta )
+ ch_versions = ch_versions.mix(BCFTOOLS_NORM_SINGLESAMPLE.out.versions)
+
+ // This creates a multisample VCF, with regions from ONE bed file
+ DEEPVARIANT.out.gvcf
+ .map { meta, gvcf ->
+ [ meta.region.name, meta.project, meta.phenotype == 2, gvcf ]
+ }
+ .groupTuple() // Group all files together per region
+ // If any of the samples in the VCF have an affected phenotype (2)
+ // add this to the meta of the multisample VCF to know if we should run RANK_VARIANTS or not
+ .map { meta, project, affected, gvcfs ->
+ new_meta = [
+ 'id': meta,
+ 'project': project.first(), // Works only because only one project per run is allowed
+ 'contains_affected': affected.any(),
+ ]
+ [ new_meta, gvcfs ]
+ }
+ .set{ glnexus_in }
+
+ GLNEXUS( glnexus_in, ch_bed )
+ ch_versions = ch_versions.mix(GLNEXUS.out.versions)
+
+ // Add allele count tag to multisample bcf
+ BCFTOOLS_FILLTAGS ( GLNEXUS.out.bcf )
+ ch_versions = ch_versions.mix(BCFTOOLS_FILLTAGS.out.versions)
+
+ BCFTOOLS_FILLTAGS.out.vcf
+ .map { meta, vcf -> [ meta, vcf, [] ] }
+ .set { bcftools_norm_in }
+
+ // Decompose and normalize variants
+ BCFTOOLS_NORM_MULTISAMPLE ( bcftools_norm_in, ch_fasta )
+ ch_versions = ch_versions.mix(BCFTOOLS_NORM_MULTISAMPLE.out.versions)
+
+ emit:
+ snp_calls_vcf = BCFTOOLS_NORM_SINGLESAMPLE.out.vcf // channel: [ val(meta), path(bcf) ]
+ combined_bcf = BCFTOOLS_NORM_MULTISAMPLE.out.vcf // channel: [ val(meta), path(bcf) ]
+ combined_csi = BCFTOOLS_NORM_MULTISAMPLE.out.csi // channel: [ val(meta), path(csi) ]
+ versions = ch_versions // channel: [ path(versions.yml) ]
+}
diff --git a/subworkflows/local/short_variant_calling/tests/main.nf.test b/subworkflows/local/short_variant_calling/tests/main.nf.test
new file mode 100644
index 00000000..1d07358f
--- /dev/null
+++ b/subworkflows/local/short_variant_calling/tests/main.nf.test
@@ -0,0 +1,564 @@
+nextflow_workflow {
+
+ name "Test Workflow SHORT_VARIANT_CALLING"
+ script "../main.nf"
+ config "./nextflow.config"
+ workflow "SHORT_VARIANT_CALLING"
+
+ test("1 sample - no bed, fasta, fai, [], []") {
+
+ when {
+ workflow {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false, num_intervals:1 ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
+ []
+ ])
+ input[1] = [
+ [ id:'genome'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = [
+ [ id:'genome'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
+ ]
+ input[3] = [[],[]]
+ input[4] = [[],[]]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert snapshot(workflow.out).match() }
+ )
+ }
+
+ }
+
+ test("1 sample - 1 bed, fasta, fai, [], []") {
+
+ when {
+ workflow {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false, num_intervals:1 ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
+ ])
+ input[1] = [
+ [ id:'genome'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = [
+ [ id:'genome'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
+ ]
+ input[3] = [[],[]]
+ input[4] = [[],[]]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert snapshot(workflow.out).match()
+ }
+ )
+ }
+
+ }
+
+ test("1 sample - 1 bed, fasta, fai, bed, []") {
+
+ when {
+ workflow {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false, num_intervals:1 ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
+ ])
+ input[1] = [
+ [ id:'genome'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = [
+ [ id:'genome'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
+ ]
+ input[3] = [
+ [ id:'bed'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
+ ]
+ input[4] = [[],[]]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert snapshot(workflow.out).match()
+ }
+ )
+ }
+
+ }
+
+ test("1 sample - 2 bed, fasta, fai, bed, []") {
+
+ when {
+ workflow {
+ """
+ input[0] = Channel.of(
+ [
+ [ id:'test', single_end:false, num_intervals:2 ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
+ ],
+ [
+ [ id:'test', single_end:false, num_intervals:2 ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true)
+ ]
+ )
+ input[1] = [
+ [ id:'genome'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = [
+ [ id:'genome'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
+ ]
+ input[3] = [
+ [ id:'bed'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
+ ]
+ input[4] = [[],[]]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert snapshot(workflow.out).match()
+ }
+ )
+ }
+
+ }
+
+ test("2 samples - 2 bed, fasta, fai, bed, []") {
+
+ when {
+ workflow {
+ """
+ input[0] = Channel.of(
+ [
+ [ id:'test', single_end:false, num_intervals:2 ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
+ ],
+ [
+ [ id:'test', single_end:false, num_intervals:2 ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true)
+ ],
+ [
+ [ id:'test2', single_end:false, num_intervals:2 ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
+ ],
+ [
+ [ id:'test2', single_end:false, num_intervals:2 ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true)
+ ]
+ )
+ input[1] = [
+ [ id:'genome'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = [
+ [ id:'genome'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
+ ]
+ input[3] = [
+ [ id:'bed'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
+ ]
+ input[4] = [[],[]]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert snapshot(workflow.out).match()
+ }
+ )
+ }
+
+ }
+
+ test("2 samples - 2 bed, fasta, fai, bed, par_bed") {
+
+ when {
+ workflow {
+ """
+ input[0] = Channel.of(
+ [
+ [ id:'test', single_end:false, num_intervals:2 ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
+ ],
+ [
+ [ id:'test', single_end:false, num_intervals:2 ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true)
+ ],
+ [
+ [ id:'test2', single_end:false, num_intervals:2 ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
+ ],
+ [
+ [ id:'test2', single_end:false, num_intervals:2 ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true)
+ ]
+ )
+ input[1] = [
+ [ id:'genome'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = [
+ [ id:'genome'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
+ ]
+ input[3] = [
+ [ id:'bed'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
+ ]
+ input[4] = [
+ [ id:'par_bed'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.blacklist_intervals.bed', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert snapshot(workflow.out).match()
+ }
+ )
+ }
+ }
+
+ test("1 sample - no bed, fasta, fai, [], [] - stub") {
+
+ options "-stub"
+
+ when {
+ workflow {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false, num_intervals:1 ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
+ []
+ ])
+ input[1] = [
+ [ id:'genome'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = [
+ [ id:'genome'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
+ ]
+ input[3] = [[],[]]
+ input[4] = [[],[]]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert snapshot(workflow.out).match()
+ }
+ )
+ }
+
+ }
+
+ test("1 sample - 1 bed, fasta, fai, [], [] - stub") {
+
+ options "-stub"
+
+ when {
+ workflow {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false, num_intervals:1 ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
+ ])
+ input[1] = [
+ [ id:'genome'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = [
+ [ id:'genome'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
+ ]
+ input[3] = [[],[]]
+ input[4] = [[],[]]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert snapshot(workflow.out).match()
+ }
+ )
+ }
+
+ }
+
+ test("1 sample - 1 bed, fasta, fai, bed, [] - stub") {
+
+ options "-stub"
+
+ when {
+ workflow {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false, num_intervals:1 ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
+ ])
+ input[1] = [
+ [ id:'genome'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = [
+ [ id:'genome'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
+ ]
+ input[3] = [
+ [ id:'bed'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
+ ]
+ input[4] = [[],[]]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert snapshot(workflow.out).match()
+ }
+ )
+ }
+
+ }
+
+ test("1 sample - 2 bed, fasta, fai, bed, [] - stub") {
+
+ options "-stub"
+
+ when {
+ workflow {
+ """
+ input[0] = Channel.of(
+ [
+ [ id:'test', single_end:false, num_intervals:2 ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
+ ],
+ [
+ [ id:'test', single_end:false, num_intervals:2 ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true)
+ ]
+ )
+ input[1] = [
+ [ id:'genome'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = [
+ [ id:'genome'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
+ ]
+ input[3] = [
+ [ id:'bed'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
+ ]
+ input[4] = [[],[]]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert snapshot(workflow.out).match()
+ }
+ )
+ }
+
+ }
+
+ test("2 samples - 2 bed, fasta, fai, bed, [] - stub") {
+
+ options "-stub"
+
+ when {
+ workflow {
+ """
+ input[0] = Channel.of(
+ [
+ [ id:'test', single_end:false, num_intervals:2 ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
+ ],
+ [
+ [ id:'test', single_end:false, num_intervals:2 ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true)
+ ],
+ [
+ [ id:'test2', single_end:false, num_intervals:2 ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
+ ],
+ [
+ [ id:'test2', single_end:false, num_intervals:2 ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true)
+ ]
+ )
+ input[1] = [
+ [ id:'genome'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = [
+ [ id:'genome'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
+ ]
+ input[3] = [
+ [ id:'bed'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
+ ]
+ input[4] = [[],[]]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert snapshot(workflow.out).match()
+ }
+ )
+ }
+
+ }
+
+ test("2 samples - 2 bed, fasta, fai, bed, par_bed - stub") {
+
+ options "-stub"
+
+ when {
+ workflow {
+ """
+ input[0] = Channel.of(
+ [
+ [ id:'test', single_end:false, num_intervals:2 ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
+ ],
+ [
+ [ id:'test', single_end:false, num_intervals:2 ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true)
+ ],
+ [
+ [ id:'test2', single_end:false, num_intervals:2 ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
+ ],
+ [
+ [ id:'test2', single_end:false, num_intervals:2 ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true)
+ ]
+ )
+ input[1] = [
+ [ id:'genome'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = [
+ [ id:'genome'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
+ ]
+ input[3] = [
+ [ id:'bed'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
+ ]
+ input[4] = [
+ [ id:'par_bed'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.blacklist_intervals.bed', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert snapshot(workflow.out).match()
+ }
+ )
+ }
+
+ }
+
+}
diff --git a/subworkflows/local/short_variant_calling/tests/main.nf.test.snap b/subworkflows/local/short_variant_calling/tests/main.nf.test.snap
new file mode 100644
index 00000000..d35e9387
--- /dev/null
+++ b/subworkflows/local/short_variant_calling/tests/main.nf.test.snap
@@ -0,0 +1,1294 @@
+{
+ "1 sample - 1 bed, fasta, fai, bed, []": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_norm_singlesample.bcf:md5,94d38d8fd3c50a7ede72a080fbd4992c"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz:md5,56d662f0a527803ae72e40625c01014b"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9"
+ ]
+ ],
+ "3": [
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6"
+ ],
+ "combined_bcf": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz:md5,56d662f0a527803ae72e40625c01014b"
+ ]
+ ],
+ "combined_csi": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9"
+ ]
+ ],
+ "snp_calls_vcf": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_norm_singlesample.bcf:md5,94d38d8fd3c50a7ede72a080fbd4992c"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.3"
+ },
+ "timestamp": "2024-08-13T12:18:48.194341252"
+ },
+ "2 samples - 2 bed, fasta, fai, bed, [] - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ],
+ [
+ {
+ "id": "test2",
+ "single_end": false
+ },
+ "test2_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ],
+ [
+ {
+ "id": "genome.multi_intervals.bed",
+ "contains_affected": false
+ },
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ],
+ [
+ {
+ "id": "genome.multi_intervals.bed",
+ "contains_affected": false
+ },
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "3": [
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6"
+ ],
+ "combined_bcf": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ],
+ [
+ {
+ "id": "genome.multi_intervals.bed",
+ "contains_affected": false
+ },
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "combined_csi": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ],
+ [
+ {
+ "id": "genome.multi_intervals.bed",
+ "contains_affected": false
+ },
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "snp_calls_vcf": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ],
+ [
+ {
+ "id": "test2",
+ "single_end": false
+ },
+ "test2_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.3"
+ },
+ "timestamp": "2024-08-13T12:38:57.50673848"
+ },
+ "2 samples - 2 bed, fasta, fai, bed, par_bed - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ],
+ [
+ {
+ "id": "test2",
+ "single_end": false
+ },
+ "test2_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ],
+ [
+ {
+ "id": "genome.multi_intervals.bed",
+ "contains_affected": false
+ },
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ],
+ [
+ {
+ "id": "genome.multi_intervals.bed",
+ "contains_affected": false
+ },
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "3": [
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6"
+ ],
+ "combined_bcf": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ],
+ [
+ {
+ "id": "genome.multi_intervals.bed",
+ "contains_affected": false
+ },
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "combined_csi": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ],
+ [
+ {
+ "id": "genome.multi_intervals.bed",
+ "contains_affected": false
+ },
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "snp_calls_vcf": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ],
+ [
+ {
+ "id": "test2",
+ "single_end": false
+ },
+ "test2_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.3"
+ },
+ "timestamp": "2024-08-13T12:39:09.239296314"
+ },
+ "1 sample - 1 bed, fasta, fai, bed, [] - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "3": [
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6"
+ ],
+ "combined_bcf": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "combined_csi": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "snp_calls_vcf": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.3"
+ },
+ "timestamp": "2024-08-13T12:20:26.146017974"
+ },
+ "2 samples - 2 bed, fasta, fai, bed, []": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_norm_singlesample.bcf:md5,f978f9f75d4892499156218a300fd246"
+ ],
+ [
+ {
+ "id": "test2",
+ "single_end": false
+ },
+ "test2_norm_singlesample.bcf:md5,562e4ab7cea09c4f296550784e02ca84"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz:md5,ec12cca4d6849a5f47823331c4b9a8f3"
+ ],
+ [
+ {
+ "id": "genome.multi_intervals.bed",
+ "contains_affected": false
+ },
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,ec12cca4d6849a5f47823331c4b9a8f3"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz.csi:md5,12d9802de9b9e071408e526930050626"
+ ],
+ [
+ {
+ "id": "genome.multi_intervals.bed",
+ "contains_affected": false
+ },
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,12d9802de9b9e071408e526930050626"
+ ]
+ ],
+ "3": [
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6"
+ ],
+ "combined_bcf": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz:md5,ec12cca4d6849a5f47823331c4b9a8f3"
+ ],
+ [
+ {
+ "id": "genome.multi_intervals.bed",
+ "contains_affected": false
+ },
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,ec12cca4d6849a5f47823331c4b9a8f3"
+ ]
+ ],
+ "combined_csi": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz.csi:md5,12d9802de9b9e071408e526930050626"
+ ],
+ [
+ {
+ "id": "genome.multi_intervals.bed",
+ "contains_affected": false
+ },
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,12d9802de9b9e071408e526930050626"
+ ]
+ ],
+ "snp_calls_vcf": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_norm_singlesample.bcf:md5,f978f9f75d4892499156218a300fd246"
+ ],
+ [
+ {
+ "id": "test2",
+ "single_end": false
+ },
+ "test2_norm_singlesample.bcf:md5,562e4ab7cea09c4f296550784e02ca84"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.3"
+ },
+ "timestamp": "2024-08-13T12:37:29.720749753"
+ },
+ "1 sample - 1 bed, fasta, fai, [], []": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_norm_singlesample.bcf:md5,94d38d8fd3c50a7ede72a080fbd4992c"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz:md5,56d662f0a527803ae72e40625c01014b"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9"
+ ]
+ ],
+ "3": [
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6"
+ ],
+ "combined_bcf": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz:md5,56d662f0a527803ae72e40625c01014b"
+ ]
+ ],
+ "combined_csi": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9"
+ ]
+ ],
+ "snp_calls_vcf": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_norm_singlesample.bcf:md5,94d38d8fd3c50a7ede72a080fbd4992c"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.3"
+ },
+ "timestamp": "2024-08-13T12:18:20.527237885"
+ },
+ "1 sample - 2 bed, fasta, fai, bed, [] - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ],
+ [
+ {
+ "id": "genome.multi_intervals.bed",
+ "contains_affected": false
+ },
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ],
+ [
+ {
+ "id": "genome.multi_intervals.bed",
+ "contains_affected": false
+ },
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "3": [
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6"
+ ],
+ "combined_bcf": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ],
+ [
+ {
+ "id": "genome.multi_intervals.bed",
+ "contains_affected": false
+ },
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "combined_csi": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ],
+ [
+ {
+ "id": "genome.multi_intervals.bed",
+ "contains_affected": false
+ },
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "snp_calls_vcf": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.3"
+ },
+ "timestamp": "2024-08-13T12:38:45.950944715"
+ },
+ "2 samples - 2 bed, fasta, fai, bed, par_bed": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_norm_singlesample.bcf:md5,f978f9f75d4892499156218a300fd246"
+ ],
+ [
+ {
+ "id": "test2",
+ "single_end": false
+ },
+ "test2_norm_singlesample.bcf:md5,562e4ab7cea09c4f296550784e02ca84"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz:md5,ec12cca4d6849a5f47823331c4b9a8f3"
+ ],
+ [
+ {
+ "id": "genome.multi_intervals.bed",
+ "contains_affected": false
+ },
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,ec12cca4d6849a5f47823331c4b9a8f3"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz.csi:md5,12d9802de9b9e071408e526930050626"
+ ],
+ [
+ {
+ "id": "genome.multi_intervals.bed",
+ "contains_affected": false
+ },
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,12d9802de9b9e071408e526930050626"
+ ]
+ ],
+ "3": [
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6"
+ ],
+ "combined_bcf": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz:md5,ec12cca4d6849a5f47823331c4b9a8f3"
+ ],
+ [
+ {
+ "id": "genome.multi_intervals.bed",
+ "contains_affected": false
+ },
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,ec12cca4d6849a5f47823331c4b9a8f3"
+ ]
+ ],
+ "combined_csi": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz.csi:md5,12d9802de9b9e071408e526930050626"
+ ],
+ [
+ {
+ "id": "genome.multi_intervals.bed",
+ "contains_affected": false
+ },
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,12d9802de9b9e071408e526930050626"
+ ]
+ ],
+ "snp_calls_vcf": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_norm_singlesample.bcf:md5,f978f9f75d4892499156218a300fd246"
+ ],
+ [
+ {
+ "id": "test2",
+ "single_end": false
+ },
+ "test2_norm_singlesample.bcf:md5,562e4ab7cea09c4f296550784e02ca84"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.3"
+ },
+ "timestamp": "2024-08-13T12:40:41.412678218"
+ },
+ "1 sample - no bed, fasta, fai, [], []": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_norm_singlesample.bcf:md5,94d38d8fd3c50a7ede72a080fbd4992c"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": [
+
+ ],
+ "contains_affected": false
+ },
+ "[]_norm_multisample.vcf.gz:md5,56d662f0a527803ae72e40625c01014b"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": [
+
+ ],
+ "contains_affected": false
+ },
+ "[]_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9"
+ ]
+ ],
+ "3": [
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6"
+ ],
+ "combined_bcf": [
+ [
+ {
+ "id": [
+
+ ],
+ "contains_affected": false
+ },
+ "[]_norm_multisample.vcf.gz:md5,56d662f0a527803ae72e40625c01014b"
+ ]
+ ],
+ "combined_csi": [
+ [
+ {
+ "id": [
+
+ ],
+ "contains_affected": false
+ },
+ "[]_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9"
+ ]
+ ],
+ "snp_calls_vcf": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_norm_singlesample.bcf:md5,94d38d8fd3c50a7ede72a080fbd4992c"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.3"
+ },
+ "timestamp": "2024-08-13T12:17:52.991579386"
+ },
+ "1 sample - 2 bed, fasta, fai, bed, []": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_norm_singlesample.bcf:md5,f978f9f75d4892499156218a300fd246"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz:md5,56d662f0a527803ae72e40625c01014b"
+ ],
+ [
+ {
+ "id": "genome.multi_intervals.bed",
+ "contains_affected": false
+ },
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,56d662f0a527803ae72e40625c01014b"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9"
+ ],
+ [
+ {
+ "id": "genome.multi_intervals.bed",
+ "contains_affected": false
+ },
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9"
+ ]
+ ],
+ "3": [
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6"
+ ],
+ "combined_bcf": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz:md5,56d662f0a527803ae72e40625c01014b"
+ ],
+ [
+ {
+ "id": "genome.multi_intervals.bed",
+ "contains_affected": false
+ },
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,56d662f0a527803ae72e40625c01014b"
+ ]
+ ],
+ "combined_csi": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9"
+ ],
+ [
+ {
+ "id": "genome.multi_intervals.bed",
+ "contains_affected": false
+ },
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9"
+ ]
+ ],
+ "snp_calls_vcf": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_norm_singlesample.bcf:md5,f978f9f75d4892499156218a300fd246"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.3"
+ },
+ "timestamp": "2024-08-13T12:36:53.97901474"
+ },
+ "1 sample - no bed, fasta, fai, [], [] - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": [
+
+ ],
+ "contains_affected": false
+ },
+ "[]_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": [
+
+ ],
+ "contains_affected": false
+ },
+ "[]_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "3": [
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6"
+ ],
+ "combined_bcf": [
+ [
+ {
+ "id": [
+
+ ],
+ "contains_affected": false
+ },
+ "[]_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "combined_csi": [
+ [
+ {
+ "id": [
+
+ ],
+ "contains_affected": false
+ },
+ "[]_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "snp_calls_vcf": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.3"
+ },
+ "timestamp": "2024-08-13T12:20:07.024378325"
+ },
+ "1 sample - 1 bed, fasta, fai, [], [] - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "3": [
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6"
+ ],
+ "combined_bcf": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "combined_csi": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "snp_calls_vcf": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.3"
+ },
+ "timestamp": "2024-08-13T12:20:16.739088461"
+ }
+}
\ No newline at end of file
diff --git a/subworkflows/local/short_variant_calling/tests/nextflow.config b/subworkflows/local/short_variant_calling/tests/nextflow.config
new file mode 100644
index 00000000..f9b1d1a5
--- /dev/null
+++ b/subworkflows/local/short_variant_calling/tests/nextflow.config
@@ -0,0 +1,52 @@
+process {
+ withName: 'DEEPVARIANT' {
+ ext.prefix = { intervals ? "${meta.id}_${intervals}_deepvariant" : "${meta.id}_deepvariant" }
+ ext.args = { [
+ '--model_type WGS',
+ "--sample_name=${meta.id}",
+ '-vcf_stats_report=False'
+ ].join(' ') }
+ }
+
+ withName: 'GLNEXUS' {
+ ext.args = '--config DeepVariant_unfiltered'
+ }
+
+ withName: 'BCFTOOLS_CONCAT' {
+ ext.prefix = { "${meta.id}_concat" }
+ ext.args = [
+ '--no-version',
+ '--allow-overlaps'
+ ].join(' ')
+ }
+
+ withName: 'BCFTOOLS_NORM_MULTISAMPLE' {
+ ext.prefix = { "${meta.id}_norm_multisample" }
+ ext.args = [
+ '--no-version',
+ '-m -',
+ '--output-type z',
+ '--write-index=csi',
+ '-w 10000'
+ ].join(' ')
+ }
+
+ withName: 'BCFTOOLS_NORM_SINGLESAMPLE' {
+ ext.prefix = { "${meta.id}_norm_singlesample" }
+ ext.args = [
+ '--no-version',
+ '-m -',
+ '-w 10000',
+ '--output-type u',
+ ].join(' ')
+ }
+
+ withName: 'BCFTOOLS_FILLTAGS' {
+ ext.prefix = { "${meta.id}_ac" }
+ ext.args = [
+ '--no-version',
+ '--output-type u'
+ ].join(' ')
+ }
+
+}
diff --git a/subworkflows/local/snv_annotation.nf b/subworkflows/local/snv_annotation.nf
deleted file mode 100644
index f65c214f..00000000
--- a/subworkflows/local/snv_annotation.nf
+++ /dev/null
@@ -1,81 +0,0 @@
-// TODO: BCFTOOLS processes should have unique names so that they are not used multiple times in other workflows?
-include { ECHTVAR_ANNO } from '../../modules/local/echtvar/anno/main'
-include { ECHTVAR_ENCODE } from '../../modules/local/echtvar/encode/main'
-include { BCFTOOLS_NORM } from '../../modules/nf-core/bcftools/norm/main'
-include { BCFTOOLS_NORM as BCFTOOLS_NORM_SINGLESAMPLE } from '../../modules/nf-core/bcftools/norm/main'
-include { BCFTOOLS_INDEX } from '../../modules/nf-core/bcftools/index/main'
-include { BCFTOOLS_INDEX as BCFTOOLS_INDEX_SINGLESAMPLE } from '../../modules/nf-core/bcftools/index/main'
-include { BCFTOOLS_FILLTAGS } from '../../modules/local/bcftools/filltags/main'
-include { BCFTOOLS_FILLTAGS as BCFTOOLS_FILLTAGS_ANNO } from '../../modules/local/bcftools/filltags/main'
-include { ENSEMBLVEP_VEP } from '../../modules/nf-core/ensemblvep/vep/main'
-include { TABIX_TABIX as TABIX_VEP } from '../../modules/nf-core/tabix/tabix/main'
-
-workflow SNV_ANNOTATION {
-
- take:
- ch_bcf
- ch_single_sample_vcf
- ch_databases
- ch_fasta
- ch_vep_cache
- val_vep_cache_version // string: [mandatory] default: 110
-
- main:
- ch_versions = Channel.empty()
-
- // Add allele count tag to mutlisample vcf
- BCFTOOLS_FILLTAGS(ch_bcf)
- // Index and normalize multisample vcf
- BCFTOOLS_INDEX(BCFTOOLS_FILLTAGS.out.vcf)
- BCFTOOLS_NORM(BCFTOOLS_FILLTAGS.out.vcf.join(BCFTOOLS_INDEX.out.csi), ch_fasta)
-
- // Index and normalize single sample vcfs
- BCFTOOLS_INDEX_SINGLESAMPLE(ch_single_sample_vcf)
-
- BCFTOOLS_NORM_SINGLESAMPLE(
- ch_single_sample_vcf.join(BCFTOOLS_INDEX_SINGLESAMPLE.out.csi),
- ch_fasta
- )
-
- // Make a cohort database using mutisample vcf
- ECHTVAR_ENCODE(BCFTOOLS_NORM.out.vcf)
-
- // combine input databases with cohort database
- db = ch_databases.concat(ECHTVAR_ENCODE.out.db.map{it[1]}).collect()
-
- // Annotate with chosen databases (GNOMAD,CADD + SAMPLES_DB)
-
- ECHTVAR_ANNO(BCFTOOLS_NORM_SINGLESAMPLE.out.vcf, db)
- BCFTOOLS_FILLTAGS_ANNO(ECHTVAR_ANNO.out.bcf)
-
- vep_in = BCFTOOLS_FILLTAGS_ANNO.out.vcf.map{ meta, vcf -> return [meta, vcf, []]}
-
- // Annotate with VEP as well
-
- ENSEMBLVEP_VEP(
- vep_in,
- "GRCh38",
- "homo_sapiens",
- val_vep_cache_version,
- ch_vep_cache,
- ch_fasta,
- []
- )
-
- TABIX_VEP ( ENSEMBLVEP_VEP.out.vcf )
-
- // Get versions
- ch_versions = ch_versions.mix(BCFTOOLS_FILLTAGS.out.versions)
- ch_versions = ch_versions.mix(BCFTOOLS_INDEX.out.versions)
- ch_versions = ch_versions.mix(BCFTOOLS_NORM.out.versions)
- ch_versions = ch_versions.mix(BCFTOOLS_INDEX_SINGLESAMPLE.out.versions)
- ch_versions = ch_versions.mix(BCFTOOLS_NORM_SINGLESAMPLE.out.versions)
- ch_versions = ch_versions.mix(ECHTVAR_ENCODE.out.versions)
- ch_versions = ch_versions.mix(ECHTVAR_ANNO.out.versions)
- ch_versions = ch_versions.mix(BCFTOOLS_FILLTAGS_ANNO.out.versions)
- ch_versions = ch_versions.mix(ENSEMBLVEP_VEP.out.versions)
- ch_versions = ch_versions.mix(TABIX_VEP.out.versions)
-
- emit:
- versions = ch_versions
-}
diff --git a/subworkflows/local/snv_annotation/main.nf b/subworkflows/local/snv_annotation/main.nf
new file mode 100644
index 00000000..1bcc22e7
--- /dev/null
+++ b/subworkflows/local/snv_annotation/main.nf
@@ -0,0 +1,74 @@
+include { ANNOTATE_CADD } from '../annotate_cadd/main'
+include { ECHTVAR_ANNO } from '../../../modules/local/echtvar/anno/main'
+include { BCFTOOLS_FILLTAGS as BCFTOOLS_FILLTAGS_ANNO } from '../../../modules/local/bcftools/filltags/main'
+include { ENSEMBLVEP_VEP } from '../../../modules/nf-core/ensemblvep/vep/main'
+include { TABIX_TABIX as TABIX_VEP } from '../../../modules/nf-core/tabix/tabix/main'
+
+workflow SNV_ANNOTATION {
+
+ take:
+ ch_vcf // channel [mandatory] [ val(meta), path(vcf) ]
+ ch_databases // channel: [mandatory] [ val(meta), path(db) ]
+ ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
+ ch_fai // channel: [mandatory] [ val(meta), path(fai) ]
+ ch_vep_cache // channel: [mandatory] [ path(cache) ]
+ val_vep_cache_version // string: [mandatory] default: 110
+ ch_vep_extra_files // channel: [mandatory] [ path(files) ]
+ val_annotate_cadd // bool: [mandatory]
+ ch_cadd_header // channel: [mandatory] [ path(txt) ]
+ ch_cadd_resources // channel: [mandatory] [ path(annotation) ]
+ ch_cadd_prescored // channel: [mandatory] [ path(prescored) ]
+
+ main:
+ ch_versions = Channel.empty()
+ ch_vep_in = Channel.empty()
+
+ // Annotate with chosen databases (GNOMAD,CADD + SAMPLES_DB)
+ ECHTVAR_ANNO ( ch_vcf, ch_databases )
+ ch_versions = ch_versions.mix(ECHTVAR_ANNO.out.versions)
+
+ BCFTOOLS_FILLTAGS_ANNO(ECHTVAR_ANNO.out.bcf)
+ ch_versions = ch_versions.mix(BCFTOOLS_FILLTAGS_ANNO.out.versions)
+
+ // Annotating with CADD
+ if (val_annotate_cadd) {
+ ANNOTATE_CADD (
+ ch_fai,
+ BCFTOOLS_FILLTAGS_ANNO.out.vcf,
+ BCFTOOLS_FILLTAGS_ANNO.out.tbi,
+ ch_cadd_header,
+ ch_cadd_resources,
+ ch_cadd_prescored
+ )
+ ch_versions = ch_versions.mix(ANNOTATE_CADD.out.versions)
+
+ ANNOTATE_CADD.out.vcf
+ .map { meta, vcf -> [ meta, vcf, [] ] }
+ .set { ch_vep_in }
+
+ } else {
+ BCFTOOLS_FILLTAGS_ANNO.out.vcf
+ .map { meta, vcf -> [ meta, vcf, [] ] }
+ .set { ch_vep_in }
+
+ }
+
+ ENSEMBLVEP_VEP (
+ ch_vep_in,
+ "GRCh38",
+ "homo_sapiens",
+ val_vep_cache_version,
+ ch_vep_cache,
+ ch_fasta,
+ ch_vep_extra_files
+ )
+ ch_versions = ch_versions.mix(ENSEMBLVEP_VEP.out.versions)
+
+ TABIX_VEP ( ENSEMBLVEP_VEP.out.vcf )
+ ch_versions = ch_versions.mix(TABIX_VEP.out.versions)
+
+ emit:
+ vcf = ENSEMBLVEP_VEP.out.vcf
+ tbi = TABIX_VEP.out.tbi
+ versions = ch_versions
+}
diff --git a/subworkflows/local/snv_annotation/tests/main.nf.test b/subworkflows/local/snv_annotation/tests/main.nf.test
new file mode 100644
index 00000000..6009d4c2
--- /dev/null
+++ b/subworkflows/local/snv_annotation/tests/main.nf.test
@@ -0,0 +1,160 @@
+nextflow_workflow {
+
+ name "Test Workflow SNV_ANNOTATION"
+ script "../"
+ workflow "SNV_ANNOTATION"
+ config "./nextflow.config"
+ tag "SNV_ANNOTATION"
+
+ setup {
+ run("GUNZIP") {
+ script "../../../../modules/nf-core/gunzip/main.nf"
+ process {
+ """
+ input[0] = [
+ [ id:'hg38' ],
+ file(params.pipelines_testdata_base_path + 'nallo/reference/hg38.test.fa.gz', checkIfExists: true)
+ ]
+ """
+ }
+ }
+ run("SAMTOOLS_FAIDX") {
+ script "../../../../modules/nf-core/samtools/faidx/main.nf"
+ process {
+ """
+ input[0] = GUNZIP.out.gunzip
+ input[1] = [[],[]]
+ """
+ }
+ }
+ run("MINIMAP2_ALIGN") {
+ script "../../../../modules/nf-core/minimap2/align/main.nf"
+ process {
+ """
+ input[0] = [
+ [ id: 'test', num_intervals:1 ],
+ file(params.pipelines_testdata_base_path + 'nallo/testdata/HG002_PacBio_Revio.fastq.gz', checkIfExists: true)
+ ]
+ input[1] = GUNZIP.out.gunzip
+ input[2] = true
+ input[3] = 'csi'
+ input[4] = false
+ input[5] = false
+ """
+ }
+ }
+ run("SHORT_VARIANT_CALLING") {
+ script "../../short_variant_calling/main.nf"
+ process {
+ """
+ input[0] = MINIMAP2_ALIGN.out.bam
+ .join(MINIMAP2_ALIGN.out.index)
+ .join(Channel.of([
+ [ id: 'test', num_intervals:1 ],
+ file(params.pipelines_testdata_base_path + 'nallo/reference/test_data.bed', checkifexists: true)
+ ]))
+ input[1] = GUNZIP.out.gunzip
+ input[2] = SAMTOOLS_FAIDX.out.fai
+ input[3] = [
+ [],[]
+ ]
+ input[4] = [
+ [],[]
+ ]
+ """
+ }
+ }
+ run("UNTAR") {
+ script "../../../../modules/nf-core/untar/main.nf"
+ process {
+ """
+ input[0] = [
+ [ id: 'vep_cache' ],
+ file(params.pipelines_testdata_base_path + 'nallo/reference/vep_cache_test_data.tar.gz', checkIfExists:true)
+ ]
+ """
+ }
+ }
+
+ }
+
+ test("bcf, db, vep_cache, '110'") {
+
+ when {
+ workflow {
+ """
+ input[0] = SHORT_VARIANT_CALLING.out.combined_bcf
+ input[1] = [
+ file(params.pipelines_testdata_base_path + 'nallo/reference/cadd.v1.6.hg38.test_data.zip', checkIfExists: true)
+ ]
+ input[2] = GUNZIP.out.gunzip
+ input[3] = SAMTOOLS_FAIDX.out.fai
+ input[4] = UNTAR.out.untar.map { meta, cache -> cache }
+ input[5] = Channel.value('110')
+ input[6] = [
+ file(params.pipelines_testdata_base_path + 'nallo/reference/vep_plugin_files.csv', checkIfExists: true)
+ ]
+ input[7] = false
+ input[8] = Channel.value([])
+ input[9] = null
+ input[10] = null
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert snapshot(
+ workflow.out.versions,
+ file(workflow.out.tbi.get(0).get(1)).name,
+ path(workflow.out.vcf.get(0).get(1)).linesGzip[48..118], // Stable
+ path(workflow.out.vcf.get(0).get(1)).linesGzip[121..150] // Stable
+ ).match() }
+ )
+ }
+
+ }
+
+ test("bcf, db, vep_cache, '110', -stub") {
+
+ options "-stub"
+
+ when {
+ params {
+ }
+ workflow {
+ """
+ input[0] = SHORT_VARIANT_CALLING.out.combined_bcf
+ input[1] = [
+ file(params.pipelines_testdata_base_path + 'nallo/reference/cadd.v1.6.hg38.test_data.zip', checkIfExists: true)
+ ]
+ input[2] = GUNZIP.out.gunzip
+ input[3] = SAMTOOLS_FAIDX.out.fai
+ input[4] = UNTAR.out.untar.map { meta, cache -> cache }
+ input[5] = Channel.value('110')
+ input[6] = [
+ file(params.pipelines_testdata_base_path + 'nallo/reference/vep_plugin_files.csv', checkIfExists: true)
+ ]
+ input[7] = false
+ input[8] = Channel.value([])
+ input[9] = null
+ input[10] = null
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert snapshot(
+ workflow.out.versions,
+ workflow.out.tbi,
+ workflow.out.vcf
+ ).match() }
+ )
+ }
+
+ }
+
+}
diff --git a/subworkflows/local/snv_annotation/tests/main.nf.test.snap b/subworkflows/local/snv_annotation/tests/main.nf.test.snap
new file mode 100644
index 00000000..f491df0c
--- /dev/null
+++ b/subworkflows/local/snv_annotation/tests/main.nf.test.snap
@@ -0,0 +1,156 @@
+{
+ "bcf, db, vep_cache, '110', -stub": {
+ "content": [
+ [
+ "versions.yml:md5,503122d4650f6a8a39b4e810063d6c22",
+ "versions.yml:md5,992301857689684643c42695c032a7f2",
+ "versions.yml:md5,a07924ee4ebc2d4de5bb7ef897ddc30c",
+ "versions.yml:md5,c0e55e36a31ed71acf25702b7d059533"
+ ],
+ [
+ [
+ {
+ "id": "test_data.bed",
+ "contains_affected": false
+ },
+ "test_data.bed.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test_data.bed",
+ "contains_affected": false
+ },
+ "test_data.bed.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.3"
+ },
+ "timestamp": "2024-08-12T10:11:15.912554747"
+ },
+ "bcf, db, vep_cache, '110'": {
+ "content": [
+ [
+ "versions.yml:md5,503122d4650f6a8a39b4e810063d6c22",
+ "versions.yml:md5,992301857689684643c42695c032a7f2",
+ "versions.yml:md5,a07924ee4ebc2d4de5bb7ef897ddc30c",
+ "versions.yml:md5,c0e55e36a31ed71acf25702b7d059533"
+ ],
+ "test_data.bed.vcf.gz.tbi",
+ [
+ "##INFO=",
+ "##VEP-command-line='vep --assembly GRCh38 --cache --cache_version 110 --compress_output bgzip --database 0 --dir_cache [PATH]/vep_cache --fasta hg38.test.fa --fork 2 --input_file test_data.bed_filltags_anno.vcf.gz --offline --output_file test_data.bed.vcf.gz --vcf'",
+ "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\ttest",
+ "chr16\t63972\tchr16_63972_T_C\tT\tC\t20\t.\tAF=1;AQ=20;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=C|intron_variant|MODIFIER|RHBDF1|ENSG00000007384|Transcript|ENST00000262316|protein_coding||3/17||||||||||-1||HGNC|HGNC:20561,C|intron_variant&NMD_transcript_variant|MODIFIER|RHBDF1|ENSG00000007384|Transcript|ENST00000417043|nonsense_mediated_decay||2/3||||||||||-1||HGNC|HGNC:20561,C|intron_variant|MODIFIER|RHBDF1|ENSG00000007384|Transcript|ENST00000419764|protein_coding||3/3||||||||||-1|cds_end_NF|HGNC|HGNC:20561,C|intron_variant&NMD_transcript_variant|MODIFIER|RHBDF1|ENSG00000007384|Transcript|ENST00000428730|nonsense_mediated_decay||2/16||||||||||-1||HGNC|HGNC:20561,C|upstream_gene_variant|MODIFIER|RHBDF1|ENSG00000007384|Transcript|ENST00000448893|protein_coding|||||||||||4210|-1|cds_start_NF|HGNC|HGNC:20561,C|intron_variant|MODIFIER|RHBDF1|ENSG00000007384|Transcript|ENST00000450643|protein_coding||3/4||||||||||-1|cds_end_NF|HGNC|HGNC:20561,C|downstream_gene_variant|MODIFIER|RHBDF1|ENSG00000007384|Transcript|ENST00000472390|retained_intron|||||||||||580|-1||HGNC|HGNC:20561,C|upstream_gene_variant|MODIFIER|RHBDF1|ENSG00000007384|Transcript|ENST00000482904|retained_intron|||||||||||1766|-1||HGNC|HGNC:20561,C|upstream_gene_variant|MODIFIER|RHBDF1|ENSG00000007384|Transcript|ENST00000486045|retained_intron|||||||||||3779|-1||HGNC|HGNC:20561,C|non_coding_transcript_exon_variant|MODIFIER|RHBDF1|ENSG00000007384|Transcript|ENST00000487201|retained_intron|2/3||||1177|||||||-1||HGNC|HGNC:20561,C|upstream_gene_variant|MODIFIER|RHBDF1|ENSG00000007384|Transcript|ENST00000493647|retained_intron|||||||||||3292|-1||HGNC|HGNC:20561\tGT:DP:AD:GQ:PL:RNC\t1/1:13:0,13:11:20,11,0:..",
+ "chr16\t86889\tchr16_86889_T_C\tT\tC\t21\t.\tAF=1;AQ=21;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=C|downstream_gene_variant|MODIFIER|MPG|ENSG00000103152|Transcript|ENST00000219431|protein_coding|||||||||||1047|1||HGNC|HGNC:7211,C|downstream_gene_variant|MODIFIER|MPG|ENSG00000103152|Transcript|ENST00000356432|protein_coding|||||||||||1043|1||HGNC|HGNC:7211,C|downstream_gene_variant|MODIFIER|MPG|ENSG00000103152|Transcript|ENST00000397817|protein_coding|||||||||||1038|1||HGNC|HGNC:7211,C|intron_variant|MODIFIER|NPRL3|ENSG00000103148|Transcript|ENST00000399953|protein_coding||11/11||||||||||-1||HGNC|HGNC:14124,C|downstream_gene_variant|MODIFIER|MPG|ENSG00000103152|Transcript|ENST00000436333|protein_coding|||||||||||1205|1|cds_end_NF|HGNC|HGNC:7211,C|upstream_gene_variant|MODIFIER||ENSG00000269482|Transcript|ENST00000601483|lncRNA|||||||||||3742|1|||,C|intron_variant&non_coding_transcript_variant|MODIFIER|NPRL3|ENSG00000103148|Transcript|ENST00000610509|retained_intron||1/2||||||||||-1||HGNC|HGNC:14124,C|intron_variant|MODIFIER|NPRL3|ENSG00000103148|Transcript|ENST00000611875|protein_coding||13/13||||||||||-1||HGNC|HGNC:14124,C|intron_variant&NMD_transcript_variant|MODIFIER|NPRL3|ENSG00000103148|Transcript|ENST00000621703|nonsense_mediated_decay||10/10||||||||||-1||HGNC|HGNC:14124,C|intron_variant&NMD_transcript_variant|MODIFIER|NPRL3|ENSG00000103148|Transcript|ENST00000622194|nonsense_mediated_decay||11/11||||||||||-1||HGNC|HGNC:14124\tGT:DP:AD:GQ:PL:RNC\t1/1:28:0,28:18:21,20,0:..",
+ "chr16\t160055\tchr16_160055_C_G\tC\tG\t18\t.\tAF=1;AQ=18;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=G|upstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||3067|1||HGNC|HGNC:4836,G|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,G|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||4427|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t1/1:30:0,29:17:18,22,0:..",
+ "chr16\t160070\tchr16_160070_C_CGGCTTGTGGGGCACAGGTTTGAGAGGTGCCCGGGACGGCTTGTGGGACACAGGTTGTGAGAGGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTGTGAGACGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGAGATGCCCAGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTTGAGAGGTGCCCGGGAT\tC\tCGGCTTGTGGGGCACAGGTTTGAGAGGTGCCCGGGACGGCTTGTGGGACACAGGTTGTGAGAGGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTGTGAGACGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGAGATGCCCAGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTTGAGAGGTGCCCGGGAT\t0\t.\tAF=0.5;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=GGCTTGTGGGGCACAGGTTTGAGAGGTGCCCGGGACGGCTTGTGGGACACAGGTTGTGAGAGGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTGTGAGACGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGAGATGCCCAGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTTGAGAGGTGCCCGGGAT|upstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||3051|1||HGNC|HGNC:4836,GGCTTGTGGGGCACAGGTTTGAGAGGTGCCCGGGACGGCTTGTGGGACACAGGTTGTGAGAGGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTGTGAGACGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGAGATGCCCAGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTTGAGAGGTGCCCGGGAT|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,GGCTTGTGGGGCACAGGTTTGAGAGGTGCCCGGGACGGCTTGTGGGACACAGGTTGTGAGAGGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTGTGAGACGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGAGATGCCCAGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTTGAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGTTTGAGAGGTGCCCGGGAT|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||4411|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:30:8,7:2:9,0,18:..",
+ "chr16\t160089\tchr16_160089_TG_T\tTG\tT\t14\t.\tAF=0.5;AQ=14;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=-|upstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||3032|1||HGNC|HGNC:4836,-|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,-|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||4392|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:30:25,5:15:14,0,29:..",
+ "chr16\t160180\tchr16_160180_C_T\tC\tT\t15\t.\tAF=1;AQ=15;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=T|upstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||2942|1||HGNC|HGNC:4836,T|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,T|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||4302|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t1/1:29:1,28:15:15,25,0:..",
+ "chr16\t160216\tchr16_160216_T_A\tT\tA\t10\t.\tAF=1;AQ=10;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=A|upstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||2906|1||HGNC|HGNC:4836,A|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,A|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||4266|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t1/1:30:0,27:6:10,6,0:..",
+ "chr16\t160217\tchr16_160217_C_T\tC\tT\t9\t.\tAF=1;AQ=9;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=T|upstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||2905|1||HGNC|HGNC:4836,T|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,T|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||4265|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t1/1:27:0,27:5:9,3,0:..",
+ "chr16\t160326\tchr16_160326_T_A\tT\tA\t15\t.\tAF=1;AQ=15;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=A|upstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||2796|1||HGNC|HGNC:4836,A|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,A|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||4156|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t1/1:30:0,29:14:15,18,0:..",
+ "chr16\t160327\tchr16_160327_C_T\tC\tT\t16\t.\tAF=1;AQ=16;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=T|upstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||2795|1||HGNC|HGNC:4836,T|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,T|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||4155|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t1/1:29:3,26:14:16,17,0:..",
+ "chr16\t160388\tchr16_160388_G_GGGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGCTGCAAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGATGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCTGGGACGGCTTGTGGGGCACAGGTTGTGAGA\tG\tGGGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGCTGCAAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGATGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCTGGGACGGCTTGTGGGGCACAGGTTGTGAGA\t15\t.\tAF=0.5;AQ=15;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=GGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGCTGCAAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGATGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCTGGGACGGCTTGTGGGGCACAGGTTGTGAGA|upstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||2733|1||HGNC|HGNC:4836,GGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGCTGCAAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGATGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCTGGGACGGCTTGTGGGGCACAGGTTGTGAGA|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,GGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCGGGATGGCTTGTGGGGCACAGGCTGCAAGAGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCCGGGATGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCTGGGATGGCTTGTGGGGCACAGGTTGTGAGAGGTGCCTGGGACGGCTTGTGGGGCACAGGTTGTGAGA|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||4093|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:31:4,14:11:15,0,12:..",
+ "chr16\t160728\tchr16_160728_C_T\tC\tT\t5\t.\tAF=1;AQ=5;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=T|upstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||2394|1||HGNC|HGNC:4836,T|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,T|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||3754|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t1/1:30:8,22:4:5,3,0:..",
+ "chr16\t160746\tchr16_160746_T_C\tT\tC\t10\t.\tAF=0.5;AQ=10;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=C|upstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||2376|1||HGNC|HGNC:4836,C|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,C|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||3736|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:30:23,7:10:10,0,18:..",
+ "chr16\t160751\tchr16_160751_AG_A;chr16_160752_G_GGGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCAGGATGGCTTGTGGGGCACAGGCTGCAAGAGGTGCCCAGGACGGCTTGTGGGGCACAGGTTGTGAGGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGA\tAG\tA\t8\t.\tAF=0.5;AQ=8;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=-|upstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||2370|1||HGNC|HGNC:4836,-|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,-|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||3730|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t1/0:22:.,7:6:0,0,0:..",
+ "chr16\t160752\tchr16_160751_AG_A;chr16_160752_G_GGGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCAGGATGGCTTGTGGGGCACAGGCTGCAAGAGGTGCCCAGGACGGCTTGTGGGGCACAGGTTGTGAGGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGA\tG\tGGGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCAGGATGGCTTGTGGGGCACAGGCTGCAAGAGGTGCCCAGGACGGCTTGTGGGGCACAGGTTGTGAGGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGA\t8\t.\tAF=0.5;AQ=6;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=GGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCAGGATGGCTTGTGGGGCACAGGCTGCAAGAGGTGCCCAGGACGGCTTGTGGGGCACAGGTTGTGAGGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGA|upstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||2369|1||HGNC|HGNC:4836,GGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCAGGATGGCTTGTGGGGCACAGGCTGCAAGAGGTGCCCAGGACGGCTTGTGGGGCACAGGTTGTGAGGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGA|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,GGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGAGGGTGCCCAGGATGGCTTGTGGGGCACAGGCTGCAAGAGGTGCCCAGGACGGCTTGTGGGGCACAGGTTGTGAGGGTGCCCGGGACGGCTTGTGGGGCACAGGCTGTGA|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||3729|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:22:.,14:6:0,0,0:..",
+ "chr16\t161210\tchr16_161210_GC_G\tGC\tG\t6\t.\tAF=0.5;AQ=6;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=-|upstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||1911|1||HGNC|HGNC:4836,-|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||4767|1||HGNC|HGNC:4826,-|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,-|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||3271|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:27:21,5:7:6,0,27:..",
+ "chr16\t161474\tchr16_161474_TA_T\tTA\tT\t22\t.\tAF=0.5;AQ=22;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=-|upstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||1647|1||HGNC|HGNC:4836,-|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||4503|1||HGNC|HGNC:4826,-|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,-|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||3007|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:25:16,9:23:22,0,39:..",
+ "chr16\t162329\tchr16_162329_G_A\tG\tA\t23\t.\tAF=0.5;AQ=23;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=A|upstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||793|1||HGNC|HGNC:4836,A|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||3649|1||HGNC|HGNC:4826,A|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,A|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||2153|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:23:15,8:24:23,0,41:..",
+ "chr16\t162650\tchr16_162650_C_T\tC\tT\t34\t.\tAF=0.5;AQ=34;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=T|upstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||472|1||HGNC|HGNC:4836,T|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||3328|1||HGNC|HGNC:4826,T|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,T|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||1832|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:24:9,15:33:34,0,41:..",
+ "chr16\t163106\tchr16_163106_C_T\tC\tT\t22\t.\tAF=0.5;AQ=22;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=T|upstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||16|1||HGNC|HGNC:4836,T|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||2872|1||HGNC|HGNC:4826,T|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,T|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||1376|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:23:15,8:22:22,0,39:..",
+ "chr16\t163140\tchr16_163140_T_G\tT\tG\t17\t.\tAF=0.5;AQ=17;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=G|non_coding_transcript_exon_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|1/3||||19|||||||1||HGNC|HGNC:4836,G|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||2838|1||HGNC|HGNC:4826,G|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,G|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||1342|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:23:15,8:17:17,0,39:..",
+ "chr16\t163143\tchr16_163143_G_A\tG\tA\t16\t.\tAF=0.5;AQ=16;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=A|non_coding_transcript_exon_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|1/3||||22|||||||1||HGNC|HGNC:4836,A|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||2835|1||HGNC|HGNC:4826,A|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,A|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||1339|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:23:15,8:17:16,0,41:..",
+ "chr16\t163229\tchr16_163229_T_C\tT\tC\t0\t.\tAF=0.5;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=C|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||1/2||||||||||1||HGNC|HGNC:4836,C|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||2749|1||HGNC|HGNC:4826,C|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,C|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||1253|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:23:15,8:3:0,0,27:..",
+ "chr16\t163796\tchr16_163796_T_A\tT\tA\t12\t.\tAF=1;AQ=12;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=A|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||1/2||||||||||1||HGNC|HGNC:4836,A|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||2182|1||HGNC|HGNC:4826,A|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||4883|1||HGNC|HGNC:4825,A|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,A|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||686|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t1/1:15:0,15:7:12,7,0:..",
+ "chr16\t163800\tchr16_163800_G_A\tG\tA\t8\t.\tAF=1;AQ=8;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=A|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||1/2||||||||||1||HGNC|HGNC:4836,A|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||2178|1||HGNC|HGNC:4826,A|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||4879|1||HGNC|HGNC:4825,A|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,A|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||682|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t1/1:16:2,14:3:8,0,0:..",
+ "chr16\t163804\tchr16_163804_G_A\tG\tA\t14\t.\tAF=1;AQ=14;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=A|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||1/2||||||||||1||HGNC|HGNC:4836,A|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||2174|1||HGNC|HGNC:4826,A|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||4875|1||HGNC|HGNC:4825,A|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,A|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||678|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t1/1:21:0,21:13:14,16,0:..",
+ "chr16\t164431\tchr16_164431_T_TG\tT\tTG\t25\t.\tAF=1;AQ=25;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=G|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||1/2||||||||||1||HGNC|HGNC:4836,G|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1546|1||HGNC|HGNC:4826,G|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||4247|1||HGNC|HGNC:4825,G|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,G|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||50|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t1/1:21:0,21:22:25,25,0:..",
+ "chr16\t164541\tchr16_164541_G_GGTCC\tG\tGGTCC\t26\t.\tAF=0.5;AQ=26;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=GTCC|non_coding_transcript_exon_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|2/3||||155-156|||||||1||HGNC|HGNC:4836,GTCC|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1436|1||HGNC|HGNC:4826,GTCC|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||4137|1||HGNC|HGNC:4825,GTCC|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,GTCC|non_coding_transcript_exon_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|1/3||||60-61|||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:21:13,7:26:26,0,39:..",
+ "chr16\t164690\tchr16_164690_C_CGCGGGGCGCGGTGCGGGCGGG\tC\tCGCGGGGCGCGGTGCGGGCGGG\t27\t.\tAF=0.5;AQ=27;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=GCGGGGCGCGGTGCGGGCGGG|splice_donor_region_variant&intron_variant&non_coding_transcript_variant|LOW|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4836,GCGGGGCGCGGTGCGGGCGGG|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1287|1||HGNC|HGNC:4826,GCGGGGCGCGGTGCGGGCGGG|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3988|1||HGNC|HGNC:4825,GCGGGGCGCGGTGCGGGCGGG|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,GCGGGGCGCGGTGCGGGCGGG|splice_donor_region_variant&intron_variant&non_coding_transcript_variant|LOW|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:21:6,14:25:27,0,28:..",
+ "chr16\t164723\tchr16_164723_C_T\tC\tT\t11\t.\tAF=0.5;AQ=11;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=T|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4836,T|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1255|1||HGNC|HGNC:4826,T|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3956|1||HGNC|HGNC:4825,T|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,T|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:21:15,4:11:11,0,21:..",
+ "chr16\t164731\tchr16_164731_C_CG\tC\tCG\t0\t.\tAF=0.5;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=G|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4836,G|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1246|1||HGNC|HGNC:4826,G|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3947|1||HGNC|HGNC:4825,G|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,G|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:21:2,14:2:14,0,22:..",
+ "chr16\t164740\tchr16_164740_C_T\tC\tT\t2\t.\tAF=0.5;AQ=2;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=T|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4836,T|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1238|1||HGNC|HGNC:4826,T|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3939|1||HGNC|HGNC:4825,T|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,T|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:20:1,18:2:2,0,1:..",
+ "chr16\t164771\tchr16_164771_T_C\tT\tC\t2\t.\tAF=0.5;AQ=2;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=C|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4836,C|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1207|1||HGNC|HGNC:4826,C|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3908|1||HGNC|HGNC:4825,C|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,C|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:21:13,6:4:10,0,25:..",
+ "chr16\t164791\tchr16_164791_G_A\tG\tA\t16\t.\tAF=0.5;AQ=16;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=A|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4836,A|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1187|1||HGNC|HGNC:4826,A|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3888|1||HGNC|HGNC:4825,A|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,A|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:21:6,15:15:16,0,20:..",
+ "chr16\t164800\tchr16_164800_G_GGCGGGGTCGCGGGGCGGGGCGAGGTC\tG\tGGCGGGGTCGCGGGGCGGGGCGAGGTC\t4\t.\tAF=0.5;AQ=4;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=GCGGGGTCGCGGGGCGGGGCGAGGTC|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4836,GCGGGGTCGCGGGGCGGGGCGAGGTC|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1177|1||HGNC|HGNC:4826,GCGGGGTCGCGGGGCGGGGCGAGGTC|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3878|1||HGNC|HGNC:4825,GCGGGGTCGCGGGGCGGGGCGAGGTC|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,GCGGGGTCGCGGGGCGGGGCGAGGTC|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:21:14,4:5:4,0,19:..",
+ "chr16\t164817\tchr16_164817_C_A\tC\tA\t22\t.\tAF=0.5;AQ=22;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=A|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4836,A|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1161|1||HGNC|HGNC:4826,A|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3862|1||HGNC|HGNC:4825,A|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,A|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:21:8,13:21:22,0,27:..",
+ "chr16\t164831\tchr16_164831_C_CG\tC\tCG\t4\t.\tAF=1;AQ=4;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=G|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4836,G|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1146|1||HGNC|HGNC:4826,G|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3847|1||HGNC|HGNC:4825,G|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,G|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t1/1:21:4,15:2:4,0,0:..",
+ "chr16\t164839\tchr16_164839_G_GGT\tG\tGGT\t15\t.\tAF=1;AQ=15;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=GT|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4836,GT|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1138|1||HGNC|HGNC:4826,GT|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3839|1||HGNC|HGNC:4825,GT|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,GT|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t1/1:21:2,15:10:15,10,0:..",
+ "chr16\t164871\tchr16_164871_T_G\tT\tG\t13\t.\tAF=0.5;AQ=13;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=G|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4836,G|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1107|1||HGNC|HGNC:4826,G|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3808|1||HGNC|HGNC:4825,G|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,G|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:20:12,7:14:13,0,33:..",
+ "chr16\t164884\tchr16_164884_G_GT\tG\tGT\t15\t.\tAF=1;AQ=15;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=T|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4836,T|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1093|1||HGNC|HGNC:4826,T|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3794|1||HGNC|HGNC:4825,T|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,T|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t1/1:21:0,21:15:15,22,0:..",
+ "chr16\t164892\tchr16_164892_CG_C\tCG\tC\t2\t.\tAF=0.5;AQ=2;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=-|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4836,-|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1085|1||HGNC|HGNC:4826,-|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3786|1||HGNC|HGNC:4825,-|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,-|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:21:18,3:5:2,0,21:..",
+ "chr16\t164907\tchr16_164907_C_A\tC\tA\t3\t.\tAF=0.5;AQ=3;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=A|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4836,A|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1071|1||HGNC|HGNC:4826,A|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3772|1||HGNC|HGNC:4825,A|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,A|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:20:12,8:5:3,0,20:..",
+ "chr16\t164931\tchr16_164931_C_G\tC\tG\t3\t.\tAF=0.5;AQ=3;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=G|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4836,G|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1047|1||HGNC|HGNC:4826,G|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3748|1||HGNC|HGNC:4825,G|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,G|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:9:1,8:5:3,0,19:..",
+ "chr16\t164932\tchr16_164932_G_GGGCGGGC\tG\tGGGCGGGC\t1\t.\tAF=0.5;AQ=1;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=GGCGGGC|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4836,GGCGGGC|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1045|1||HGNC|HGNC:4826,GGCGGGC|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3746|1||HGNC|HGNC:4825,GGCGGGC|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,GGCGGGC|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:9:2,7:4:1,0,15:..",
+ "chr16\t164942\tchr16_164942_G_GT\tG\tGT\t4\t.\tAF=0.5;AQ=4;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=T|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4836,T|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1035|1||HGNC|HGNC:4826,T|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3736|1||HGNC|HGNC:4825,T|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,T|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:9:0,8:6:4,0,14:..",
+ "chr16\t164953\tchr16_164953_G_GGCGGC\tG\tGGCGGC\t13\t.\tAF=0.5;AQ=13;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=GCGGC|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4836,GCGGC|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1024|1||HGNC|HGNC:4826,GCGGC|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3725|1||HGNC|HGNC:4825,GCGGC|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,GCGGC|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:21:14,6:13:13,0,33:..",
+ "chr16\t164985\tchr16_164985_C_CG\tC\tCG\t18\t.\tAF=1;AQ=18;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=G|intron_variant&non_coding_transcript_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4836,G|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||992|1||HGNC|HGNC:4826,G|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3693|1||HGNC|HGNC:4825,G|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,G|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t1/1:19:1,18:16:18,18,0:..",
+ "chr16\t165212\tchr16_165212_TC_T\tTC\tT\t3\t.\tAF=0.5;AQ=3;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=-|downstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||57|1||HGNC|HGNC:4836,-|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||765|1||HGNC|HGNC:4826,-|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3466|1||HGNC|HGNC:4825,-|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,-|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:21:18,3:5:3,0,30:..",
+ "chr16\t165380\tchr16_165380_G_A\tG\tA\t29\t.\tAF=0.5;AQ=29;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=A|downstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||224|1||HGNC|HGNC:4836,A|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||598|1||HGNC|HGNC:4826,A|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3299|1||HGNC|HGNC:4825,A|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,A|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:22:14,8:29:29,0,43:..",
+ "chr16\t165856\tchr16_165856_G_C\tG\tC\t29\t.\tAF=0.5;AQ=29;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=C|downstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||700|1||HGNC|HGNC:4836,C|upstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||122|1||HGNC|HGNC:4826,C|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||2823|1||HGNC|HGNC:4825,C|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,C|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:22:14,8:29:29,0,44:..",
+ "chr16\t166191\tchr16_166191_C_CG\tC\tCG\t22\t.\tAF=0.5;AQ=22;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=G|downstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||1035|1||HGNC|HGNC:4836,G|intron_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding||1/2||||||||||1||HGNC|HGNC:4826,G|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||2487|1||HGNC|HGNC:4825,G|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826,G|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||1/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:22:14,6:23:22,0,36:..",
+ "chr16\t166518\tchr16_166518_TG_T\tTG\tT\t11\t.\tAF=0.5;AQ=11;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=-|downstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||1363|1||HGNC|HGNC:4836,-|intron_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding||2/2||||||||||1||HGNC|HGNC:4826,-|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||2160|1||HGNC|HGNC:4825,-|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined||2/2||||||||||1||HGNC|HGNC:4826,-|intron_variant&non_coding_transcript_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined||2/2||||||||||1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:21:16,4:12:11,0,32:..",
+ "chr16\t167928\tchr16_167928_GT_G\tGT\tG\t17\t.\tAF=0.5;AQ=17;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||4947|1||HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||2773|1||HGNC|HGNC:4836,-|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1165|1||HGNC|HGNC:4826,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||4961|1||HGNC|HGNC:4824,-|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||750|1||HGNC|HGNC:4825,-|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined|||||||||||1168|1||HGNC|HGNC:4826,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||4965|1||HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||1168|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:21:14,6:17:17,0,31:..",
+ "chr16\t167947\tchr16_167947_T_TC\tT\tTC\t19\t.\tAF=0.5;AQ=19;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||4928|1||HGNC|HGNC:4824,C|downstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||2791|1||HGNC|HGNC:4836,C|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1183|1||HGNC|HGNC:4826,C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||4942|1||HGNC|HGNC:4824,C|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||731|1||HGNC|HGNC:4825,C|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined|||||||||||1186|1||HGNC|HGNC:4826,C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||4946|1||HGNC|HGNC:4824,C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||4996|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,C|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||1186|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:21:11,10:18:19,0,22:..",
+ "chr16\t167948\tchr16_167948_T_C\tT\tC\t10\t.\tAF=0.5;AQ=10;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||4928|1||HGNC|HGNC:4824,C|downstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||2792|1||HGNC|HGNC:4836,C|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1184|1||HGNC|HGNC:4826,C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||4942|1||HGNC|HGNC:4824,C|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||731|1||HGNC|HGNC:4825,C|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined|||||||||||1187|1||HGNC|HGNC:4826,C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||4946|1||HGNC|HGNC:4824,C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||4996|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,C|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||1187|1||HGNC|HGNC:4826\tGT:DP:AD:GQ:PL:RNC\t0/1:20:10,10:10:10,0,17:..",
+ "chr16\t168533\tchr16_168533_T_C\tT\tC\t29\t.\tAF=0.5;AQ=29;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||4343|1||HGNC|HGNC:4824,C|downstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||3377|1||HGNC|HGNC:4836,C|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1769|1||HGNC|HGNC:4826,C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||4357|1||HGNC|HGNC:4824,C|upstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||146|1||HGNC|HGNC:4825,C|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined|||||||||||1772|1||HGNC|HGNC:4826,C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||4361|1||HGNC|HGNC:4824,C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||4411|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,C|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||1772|1||HGNC|HGNC:4826,C|downstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||4969|-1|||\tGT:DP:AD:GQ:PL:RNC\t0/1:20:11,9:29:29,0,43:..",
+ "chr16\t168736\tchr16_168736_A_G\tA\tG\t34\t.\tAF=0.5;AQ=34;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=G|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||4140|1||HGNC|HGNC:4824,G|downstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||3580|1||HGNC|HGNC:4836,G|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||1972|1||HGNC|HGNC:4826,G|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||4154|1||HGNC|HGNC:4824,G|non_coding_transcript_exon_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|1/3||||58|||||||1||HGNC|HGNC:4825,G|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined|||||||||||1975|1||HGNC|HGNC:4826,G|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||4158|1||HGNC|HGNC:4824,G|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||4208|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,G|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||1975|1||HGNC|HGNC:4826,G|downstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||4766|-1|||\tGT:DP:AD:GQ:PL:RNC\t0/1:20:11,9:34:34,0,48:..",
+ "chr16\t169117\tchr16_169117_TG_T\tTG\tT\t4\t.\tAF=0.5;AQ=4;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||3758|1||HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||3962|1||HGNC|HGNC:4836,-|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||2354|1||HGNC|HGNC:4826,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||3772|1||HGNC|HGNC:4824,-|intron_variant&non_coding_transcript_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene||2/2||||||||||1||HGNC|HGNC:4825,-|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined|||||||||||2357|1||HGNC|HGNC:4826,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||3776|1||HGNC|HGNC:4824,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||3826|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||2357|1||HGNC|HGNC:4826,-|downstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||4384|-1|||\tGT:DP:AD:GQ:PL:RNC\t0/1:20:17,3:6:4,0,27:..",
+ "chr16\t169404\tchr16_169404_TC_T\tTC\tT\t5\t.\tAF=0.5;AQ=5;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||3471|1||HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||4249|1||HGNC|HGNC:4836,-|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||2641|1||HGNC|HGNC:4826,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||3485|1||HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||71|1||HGNC|HGNC:4825,-|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined|||||||||||2644|1||HGNC|HGNC:4826,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||3489|1||HGNC|HGNC:4824,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||3539|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||2644|1||HGNC|HGNC:4826,-|downstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||4097|-1|||\tGT:DP:AD:GQ:PL:RNC\t0/1:19:16,2:7:5,0,30:..",
+ "chr16\t169624\tchr16_169624_TC_T\tTC\tT\t10\t.\tAF=0.5;AQ=10;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||3251|1||HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBZP1|ENSG00000206178|Transcript|ENST00000354915|unprocessed_pseudogene|||||||||||4469|1||HGNC|HGNC:4836,-|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||2861|1||HGNC|HGNC:4826,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||3265|1||HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||291|1||HGNC|HGNC:4825,-|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined|||||||||||2864|1||HGNC|HGNC:4826,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||3269|1||HGNC|HGNC:4824,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||3319|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||2864|1||HGNC|HGNC:4826,-|downstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||3877|-1|||\tGT:DP:AD:GQ:PL:RNC\t0/1:20:16,4:11:10,0,32:..",
+ "chr16\t170831\tchr16_170831_T_TCACA\tT\tTCACA\t0\t.\tAF=0.5;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=CACA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||2044|1||HGNC|HGNC:4824,CACA|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||4067|1||HGNC|HGNC:4826,CACA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||2058|1||HGNC|HGNC:4824,CACA|downstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||1497|1||HGNC|HGNC:4825,CACA|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined|||||||||||4070|1||HGNC|HGNC:4826,CACA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||2062|1||HGNC|HGNC:4824,CACA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||2112|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,CACA|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||4070|1||HGNC|HGNC:4826,CACA|downstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||2670|-1|||\tGT:DP:AD:GQ:PL:RNC\t0/1:20:6,6:3:19,0,25:..",
+ "chr16\t170862\tchr16_170862_C_CACACA\tC\tCACACA\t5\t.\tAF=0.5;AQ=5;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=ACACA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||2013|1||HGNC|HGNC:4824,ACACA|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||4098|1||HGNC|HGNC:4826,ACACA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||2027|1||HGNC|HGNC:4824,ACACA|downstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||1528|1||HGNC|HGNC:4825,ACACA|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined|||||||||||4101|1||HGNC|HGNC:4826,ACACA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||2031|1||HGNC|HGNC:4824,ACACA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||2081|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,ACACA|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||4101|1||HGNC|HGNC:4826,ACACA|downstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||2639|-1|||\tGT:DP:AD:GQ:PL:RNC\t0/1:20:16,2:7:5,0,28:..",
+ "chr16\t170943\tchr16_170943_AG_A\tAG\tA\t1\t.\tAF=0.5;AQ=1;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||1932|1||HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||4180|1||HGNC|HGNC:4826,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||1946|1||HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||1610|1||HGNC|HGNC:4825,-|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined|||||||||||4183|1||HGNC|HGNC:4826,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||1950|1||HGNC|HGNC:4824,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||2000|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||4183|1||HGNC|HGNC:4826,-|downstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||2558|-1|||\tGT:DP:AD:GQ:PL:RNC\t0/1:20:18,2:4:1,0,26:..",
+ "chr16\t171206\tchr16_171206_T_TTTA\tT\tTTTA\t13\t.\tAF=0.5;AQ=13;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=TTA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||1669|1||HGNC|HGNC:4824,TTA|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||4442|1||HGNC|HGNC:4826,TTA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||1683|1||HGNC|HGNC:4824,TTA|downstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||1872|1||HGNC|HGNC:4825,TTA|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined|||||||||||4445|1||HGNC|HGNC:4826,TTA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||1687|1||HGNC|HGNC:4824,TTA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||1737|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,TTA|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||4445|1||HGNC|HGNC:4826,TTA|downstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||2295|-1|||\tGT:DP:AD:GQ:PL:RNC\t0/1:20:11,9:13:13,0,22:..",
+ "chr16\t171219\tchr16_171219_T_TTA\tT\tTTA\t1\t.\tAF=0.5;AQ=1;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=TA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||1656|1||HGNC|HGNC:4824,TA|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||4455|1||HGNC|HGNC:4826,TA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||1670|1||HGNC|HGNC:4824,TA|downstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||1885|1||HGNC|HGNC:4825,TA|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined|||||||||||4458|1||HGNC|HGNC:4826,TA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||1674|1||HGNC|HGNC:4824,TA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||1724|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,TA|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||4458|1||HGNC|HGNC:4826,TA|downstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||2282|-1|||\tGT:DP:AD:GQ:PL:RNC\t0/1:19:14,3:4:1,0,20:..",
+ "chr16\t171220\tchr16_171220_T_TATTA\tT\tTATTA\t9\t.\tAF=0.5;AQ=9;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=ATTA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||1655|1||HGNC|HGNC:4824,ATTA|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||4456|1||HGNC|HGNC:4826,ATTA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||1669|1||HGNC|HGNC:4824,ATTA|downstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||1886|1||HGNC|HGNC:4825,ATTA|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined|||||||||||4459|1||HGNC|HGNC:4826,ATTA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||1673|1||HGNC|HGNC:4824,ATTA|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||1723|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,ATTA|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||4459|1||HGNC|HGNC:4826,ATTA|downstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||2281|-1|||\tGT:DP:AD:GQ:PL:RNC\t0/1:20:16,4:10:9,0,23:..",
+ "chr16\t171534\tchr16_171534_AT_A\tAT\tA\t20\t.\tAF=0.5;AQ=20;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||1341|1||HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000356815|protein_coding|||||||||||4771|1||HGNC|HGNC:4826,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||1355|1||HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||2201|1||HGNC|HGNC:4825,-|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000472539|protein_coding_CDS_not_defined|||||||||||4774|1||HGNC|HGNC:4826,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||1359|1||HGNC|HGNC:4824,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||1409|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBM|ENSG00000206177|Transcript|ENST00000496585|protein_coding_CDS_not_defined|||||||||||4774|1||HGNC|HGNC:4826,-|downstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||1967|-1|||\tGT:DP:AD:GQ:PL:RNC\t0/1:19:14,5:20:20,0,40:..",
+ "chr16\t171881\tchr16_171881_TC_T\tTC\tT\t21\t.\tAF=0.5;AQ=21;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||994|1||HGNC|HGNC:4824,-|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000320868|protein_coding|||||||||||4798|1||HGNC|HGNC:4823,-|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000397797|protein_coding|||||||||||4822|1||HGNC|HGNC:4823,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||1008|1||HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||2548|1||HGNC|HGNC:4825,-|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000472694|retained_intron|||||||||||4816|1||HGNC|HGNC:4823,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||1012|1||HGNC|HGNC:4824,-|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||1062|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,-|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000487791|retained_intron|||||||||||4866|1||HGNC|HGNC:4823,-|downstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||1620|-1|||\tGT:DP:AD:GQ:PL:RNC\t0/1:19:12,7:21:21,0,35:.."
+ ],
+ [
+ "chr16\t172418\tchr16_172418_T_C\tT\tC\t20\t.\tAF=1;AQ=20;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||458|1||HGNC|HGNC:4824,C|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000320868|protein_coding|||||||||||4262|1||HGNC|HGNC:4823,C|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000397797|protein_coding|||||||||||4286|1||HGNC|HGNC:4823,C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||472|1||HGNC|HGNC:4824,C|downstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3084|1||HGNC|HGNC:4825,C|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000472694|retained_intron|||||||||||4280|1||HGNC|HGNC:4823,C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||476|1||HGNC|HGNC:4824,C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||526|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,C|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000487791|retained_intron|||||||||||4330|1||HGNC|HGNC:4823,C|downstream_gene_variant|MODIFIER||ENSG00000290010|Transcript|ENST00000702457|lncRNA|||||||||||4895|-1|||,C|downstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||1084|-1|||\tGT:DP:AD:GQ:PL:RNC\t1/1:18:0,18:16:20,17,0:..",
+ "chr16\t172636\tchr16_172636_C_T\tC\tT\t23\t.\tAF=1;AQ=23;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=T|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||240|1||HGNC|HGNC:4824,T|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000320868|protein_coding|||||||||||4044|1||HGNC|HGNC:4823,T|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000397797|protein_coding|||||||||||4068|1||HGNC|HGNC:4823,T|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||254|1||HGNC|HGNC:4824,T|downstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3302|1||HGNC|HGNC:4825,T|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000472694|retained_intron|||||||||||4062|1||HGNC|HGNC:4823,T|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||258|1||HGNC|HGNC:4824,T|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||308|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,T|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000487791|retained_intron|||||||||||4112|1||HGNC|HGNC:4823,T|downstream_gene_variant|MODIFIER||ENSG00000290010|Transcript|ENST00000702457|lncRNA|||||||||||4677|-1|||,T|downstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||866|-1|||\tGT:DP:AD:GQ:PL:RNC\t1/1:18:1,17:14:23,14,0:..",
+ "chr16\t172870\tchr16_172870_G_C\tG\tC\t24\t.\tAF=1;AQ=24;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||6|1||HGNC|HGNC:4824,C|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000320868|protein_coding|||||||||||3810|1||HGNC|HGNC:4823,C|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000397797|protein_coding|||||||||||3834|1||HGNC|HGNC:4823,C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||20|1||HGNC|HGNC:4824,C|downstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3536|1||HGNC|HGNC:4825,C|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000472694|retained_intron|||||||||||3828|1||HGNC|HGNC:4823,C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||24|1||HGNC|HGNC:4824,C|upstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||74|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,C|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000487791|retained_intron|||||||||||3878|1||HGNC|HGNC:4823,C|downstream_gene_variant|MODIFIER||ENSG00000290010|Transcript|ENST00000702457|lncRNA|||||||||||4443|-1|||,C|downstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||632|-1|||\tGT:DP:AD:GQ:PL:RNC\t1/1:18:0,18:20:24,22,0:..",
+ "chr16\t173116\tchr16_173116_TC_T\tTC\tT\t6\t.\tAF=0.5;AQ=6;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=-|splice_region_variant&splice_polypyrimidine_tract_variant&intron_variant|LOW|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding||1/2||||||||||1||HGNC|HGNC:4824,-|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000320868|protein_coding|||||||||||3563|1||HGNC|HGNC:4823,-|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000397797|protein_coding|||||||||||3587|1||HGNC|HGNC:4823,-|splice_region_variant&splice_polypyrimidine_tract_variant&intron_variant|LOW|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding||1/2||||||||||1||HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||3783|1||HGNC|HGNC:4825,-|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000472694|retained_intron|||||||||||3581|1||HGNC|HGNC:4823,-|non_coding_transcript_exon_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|1/2||||224|||||||1||HGNC|HGNC:4824,-|splice_region_variant&splice_polypyrimidine_tract_variant&intron_variant|LOW|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding||1/1||||||||||1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,-|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000487791|retained_intron|||||||||||3631|1||HGNC|HGNC:4823,-|downstream_gene_variant|MODIFIER||ENSG00000290010|Transcript|ENST00000702457|lncRNA|||||||||||4196|-1|||,-|downstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||385|-1|||\tGT:DP:AD:GQ:PL:RNC\t0/1:19:16,3:8:6,0,30:..",
+ "chr16\t173707\tchr16_173707_A_G\tA\tG\t27\t.\tAF=1;AQ=27;AC=2;cadd_raw=0.19;cadd_phred=3.05;CSQ=G|3_prime_UTR_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|3/3||||573|||||||1||HGNC|HGNC:4824,G|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000320868|protein_coding|||||||||||2973|1||HGNC|HGNC:4823,G|upstream_gene_variant|MODIFIER|Y_RNA|ENSG00000207243|Transcript|ENST00000384514|misc_RNA|||||||||||4847|1||RFAM|,G|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000397797|protein_coding|||||||||||2997|1||HGNC|HGNC:4823,G|3_prime_UTR_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|3/3||||510|||||||1||HGNC|HGNC:4824,G|downstream_gene_variant|MODIFIER|HBAP1|ENSG00000225323|Transcript|ENST00000438841|unprocessed_pseudogene|||||||||||4373|1||HGNC|HGNC:4825,G|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000472694|retained_intron|||||||||||2991|1||HGNC|HGNC:4823,G|downstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||32|1||HGNC|HGNC:4824,G|downstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||244|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,G|upstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000487791|retained_intron|||||||||||3041|1||HGNC|HGNC:4823,G|downstream_gene_variant|MODIFIER||ENSG00000290010|Transcript|ENST00000702457|lncRNA|||||||||||3606|-1|||,G|upstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||47|-1|||\tGT:DP:AD:GQ:PL:RNC\t1/1:19:0,19:25:27,27,0:..",
+ "chr16\t176848\tchr16_176848_GC_G\tGC\tG\t5\t.\tAF=0.5;AQ=5;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=-|upstream_gene_variant|MODIFIER|HBQ1|ENSG00000086506|Transcript|ENST00000199708|protein_coding|||||||||||3610|1||HGNC|HGNC:4833,-|downstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||3139|1||HGNC|HGNC:4824,-|intron_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000320868|protein_coding||1/2||||||||||1||HGNC|HGNC:4823,-|upstream_gene_variant|MODIFIER|Y_RNA|ENSG00000207243|Transcript|ENST00000384514|misc_RNA|||||||||||1705|1||RFAM|,-|intron_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000397797|protein_coding||1/2||||||||||1||HGNC|HGNC:4823,-|downstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||3139|1||HGNC|HGNC:4824,-|non_coding_transcript_exon_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000472694|retained_intron|1/2||||152|||||||1||HGNC|HGNC:4823,-|downstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||3174|1||HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||3386|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,-|intron_variant&non_coding_transcript_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000487791|retained_intron||1/1||||||||||1||HGNC|HGNC:4823,-|downstream_gene_variant|MODIFIER||ENSG00000290010|Transcript|ENST00000702457|lncRNA|||||||||||464|-1|||,-|upstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||3189|-1|||\tGT:DP:AD:GQ:PL:RNC\t0/1:17:15,2:6:5,0,28:..",
+ "chr16\t176953\tchr16_176953_CA_C\tCA\tC\t5\t.\tAF=0.5;AQ=5;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=-|upstream_gene_variant|MODIFIER|HBQ1|ENSG00000086506|Transcript|ENST00000199708|protein_coding|||||||||||3505|1||HGNC|HGNC:4833,-|downstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||3244|1||HGNC|HGNC:4824,-|frameshift_variant|HIGH|HBA1|ENSG00000206172|Transcript|ENST00000320868|protein_coding|2/3||||158|121|41|K/X|Aag/ag|||1||HGNC|HGNC:4823,-|upstream_gene_variant|MODIFIER|Y_RNA|ENSG00000207243|Transcript|ENST00000384514|misc_RNA|||||||||||1600|1||RFAM|,-|frameshift_variant|HIGH|HBA1|ENSG00000206172|Transcript|ENST00000397797|protein_coding|2/3||||85|25|9|K/X|Aag/ag|||1||HGNC|HGNC:4823,-|downstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||3244|1||HGNC|HGNC:4824,-|non_coding_transcript_exon_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000472694|retained_intron|1/2||||257|||||||1||HGNC|HGNC:4823,-|downstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||3279|1||HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||3491|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,-|non_coding_transcript_exon_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000487791|retained_intron|2/2||||90|||||||1||HGNC|HGNC:4823,-|downstream_gene_variant|MODIFIER||ENSG00000290010|Transcript|ENST00000702457|lncRNA|||||||||||359|-1|||,-|upstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||3294|-1|||\tGT:DP:AD:GQ:PL:RNC\t0/1:17:15,2:7:5,0,30:..",
+ "chr16\t177485\tchr16_177485_AC_A\tAC\tA\t5\t.\tAF=0.5;AQ=5;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=-|upstream_gene_variant|MODIFIER|HBQ1|ENSG00000086506|Transcript|ENST00000199708|protein_coding|||||||||||2973|1||HGNC|HGNC:4833,-|downstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000251595|protein_coding|||||||||||3776|1||HGNC|HGNC:4824,-|3_prime_UTR_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000320868|protein_coding|3/3||||541|||||||1||HGNC|HGNC:4823,-|upstream_gene_variant|MODIFIER|Y_RNA|ENSG00000207243|Transcript|ENST00000384514|misc_RNA|||||||||||1068|1||RFAM|,-|3_prime_UTR_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000397797|protein_coding|3/3||||468|||||||1||HGNC|HGNC:4823,-|downstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000397806|protein_coding|||||||||||3776|1||HGNC|HGNC:4824,-|non_coding_transcript_exon_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000472694|retained_intron|2/2||||640|||||||1||HGNC|HGNC:4823,-|downstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000482565|retained_intron|||||||||||3811|1||HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBA2|ENSG00000188536|Transcript|ENST00000484216|protein_coding|||||||||||4023|1|cds_start_NF&cds_end_NF|HGNC|HGNC:4824,-|downstream_gene_variant|MODIFIER|HBA1|ENSG00000206172|Transcript|ENST00000487791|retained_intron|||||||||||212|1||HGNC|HGNC:4823,-|non_coding_transcript_exon_variant|MODIFIER||ENSG00000290010|Transcript|ENST00000702457|lncRNA|1/1||||5|||||||-1|||,-|upstream_gene_variant|MODIFIER||ENSG00000290038|Transcript|ENST00000702607|lncRNA|||||||||||3826|-1|||\tGT:DP:AD:GQ:PL:RNC\t0/1:17:13,4:6:5,0,31:..",
+ "chr16\t274052\tchr16_274052_A_G\tA\tG\t28\t.\tAF=1;AQ=28;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=G|synonymous_variant&NMD_transcript_variant|LOW|RGS11|ENSG00000076344|Transcript|ENST00000168869|nonsense_mediated_decay|5/15||||353|250|84|L|Tta/Cta|||-1||HGNC|HGNC:9993,G|downstream_gene_variant|MODIFIER|FAM234A|ENSG00000167930|Transcript|ENST00000301679|protein_coding|||||||||||3972|1||HGNC|HGNC:14163,G|synonymous_variant|LOW|RGS11|ENSG00000076344|Transcript|ENST00000316163|protein_coding|6/17||||434|357|119|D|gaT/gaC|||-1||HGNC|HGNC:9993,G|synonymous_variant|LOW|RGS11|ENSG00000076344|Transcript|ENST00000359740|protein_coding|5/16||||387|387|129|D|gaT/gaC|||-1||HGNC|HGNC:9993,G|synonymous_variant|LOW|RGS11|ENSG00000076344|Transcript|ENST00000397770|protein_coding|6/17||||453|420|140|D|gaT/gaC|||-1||HGNC|HGNC:9993,G|downstream_gene_variant|MODIFIER|RGS11|ENSG00000076344|Transcript|ENST00000472466|retained_intron|||||||||||887|-1||HGNC|HGNC:9993,G|non_coding_transcript_exon_variant|MODIFIER|RGS11|ENSG00000076344|Transcript|ENST00000477143|retained_intron|1/9||||905|||||||-1||HGNC|HGNC:9993,G|non_coding_transcript_exon_variant|MODIFIER|RGS11|ENSG00000076344|Transcript|ENST00000481672|retained_intron|2/11||||110|||||||-1||HGNC|HGNC:9993,G|non_coding_transcript_exon_variant|MODIFIER|RGS11|ENSG00000076344|Transcript|ENST00000493449|retained_intron|5/9||||1199|||||||-1||HGNC|HGNC:9993,G|downstream_gene_variant|MODIFIER|FAM234A|ENSG00000167930|Transcript|ENST00000600536|nonsense_mediated_decay|||||||||||4109|1||HGNC|HGNC:14163,G|downstream_gene_variant|MODIFIER|FAM234A|ENSG00000167930|Transcript|ENST00000653392|nonsense_mediated_decay|||||||||||4216|1||HGNC|HGNC:14163,G|downstream_gene_variant|MODIFIER|FAM234A|ENSG00000167930|Transcript|ENST00000654053|nonsense_mediated_decay|||||||||||4154|1||HGNC|HGNC:14163,G|downstream_gene_variant|MODIFIER|FAM234A|ENSG00000167930|Transcript|ENST00000659283|nonsense_mediated_decay|||||||||||1869|1||HGNC|HGNC:14163,G|downstream_gene_variant|MODIFIER|FAM234A|ENSG00000167930|Transcript|ENST00000666018|nonsense_mediated_decay|||||||||||4097|1||HGNC|HGNC:14163\tGT:DP:AD:GQ:PL:RNC\t1/1:32:0,32:26:28,29,0:..",
+ "chr16\t286660\tchr16_286660_G_A\tG\tA\t25\t.\tAF=1;AQ=25;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=A|synonymous_variant|LOW|PDIA2|ENSG00000185615|Transcript|ENST00000219406|protein_coding|9/11||||1353|1347|449|T|acG/acA|||1||HGNC|HGNC:14180,A|downstream_gene_variant|MODIFIER|ARHGDIG|ENSG00000242173|Transcript|ENST00000219409|protein_coding|||||||||||3650|1||HGNC|HGNC:680,A|downstream_gene_variant|MODIFIER|AXIN1|ENSG00000103126|Transcript|ENST00000262320|protein_coding|||||||||||780|-1||HGNC|HGNC:903,A|downstream_gene_variant|MODIFIER|AXIN1|ENSG00000103126|Transcript|ENST00000354866|protein_coding|||||||||||781|-1||HGNC|HGNC:903,A|synonymous_variant|LOW|PDIA2|ENSG00000185615|Transcript|ENST00000404312|protein_coding|9/11||||1343|1338|446|T|acG/acA|||1||HGNC|HGNC:14180,A|downstream_gene_variant|MODIFIER|ARHGDIG|ENSG00000242173|Transcript|ENST00000414650|protein_coding|||||||||||3850|1|cds_end_NF|HGNC|HGNC:680,A|synonymous_variant|LOW|PDIA2|ENSG00000185615|Transcript|ENST00000435833|protein_coding|3/5||||234|234|78|T|acG/acA|||1|cds_start_NF|HGNC|HGNC:14180,A|downstream_gene_variant|MODIFIER|ARHGDIG|ENSG00000242173|Transcript|ENST00000447871|protein_coding|||||||||||3972|1|cds_end_NF|HGNC|HGNC:680,A|downstream_gene_variant|MODIFIER|PDIA2|ENSG00000185615|Transcript|ENST00000456379|protein_coding|||||||||||1520|1|cds_start_NF&cds_end_NF|HGNC|HGNC:14180,A|downstream_gene_variant|MODIFIER|AXIN1|ENSG00000103126|Transcript|ENST00000457798|protein_coding|||||||||||1530|-1|cds_start_NF|HGNC|HGNC:903,A|downstream_gene_variant|MODIFIER|AXIN1|ENSG00000103126|Transcript|ENST00000461023|retained_intron|||||||||||780|-1||HGNC|HGNC:903,A|downstream_gene_variant|MODIFIER|PDIA2|ENSG00000185615|Transcript|ENST00000462950|protein_coding_CDS_not_defined|||||||||||1084|1||HGNC|HGNC:14180,A|non_coding_transcript_exon_variant|MODIFIER|PDIA2|ENSG00000185615|Transcript|ENST00000467212|retained_intron|8/10||||1418|||||||1||HGNC|HGNC:14180,A|downstream_gene_variant|MODIFIER|ARHGDIG|ENSG00000242173|Transcript|ENST00000477621|retained_intron|||||||||||3820|1||HGNC|HGNC:680,A|non_coding_transcript_exon_variant|MODIFIER|PDIA2|ENSG00000185615|Transcript|ENST00000482665|retained_intron|5/7||||1630|||||||1||HGNC|HGNC:14180\tGT:DP:AD:GQ:PL:RNC\t1/1:22:0,22:19:25,19,0:..",
+ "chr16\t309953\tchr16_309953_A_G\tA\tG\t33\t.\tAF=0.5;AQ=33;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=G|intron_variant|MODIFIER|AXIN1|ENSG00000103126|Transcript|ENST00000262320|protein_coding||4/10||||||||||-1||HGNC|HGNC:903,G|intron_variant|MODIFIER|AXIN1|ENSG00000103126|Transcript|ENST00000354866|protein_coding||4/9||||||||||-1||HGNC|HGNC:903,G|intron_variant&non_coding_transcript_variant|MODIFIER|AXIN1|ENSG00000103126|Transcript|ENST00000461023|retained_intron||3/7||||||||||-1||HGNC|HGNC:903,G|intron_variant&non_coding_transcript_variant|MODIFIER|AXIN1|ENSG00000103126|Transcript|ENST00000481769|protein_coding_CDS_not_defined||3/4||||||||||-1||HGNC|HGNC:903\tGT:DP:AD:GQ:PL:RNC\t0/1:11:6,5:33:33,0,44:..",
+ "chr16\t346264\tchr16_346264_A_G\tA\tG\t31\t.\tAF=0.5;AQ=31;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=G|synonymous_variant|LOW|AXIN1|ENSG00000103126|Transcript|ENST00000262320|protein_coding|2/11||||1198|762|254|D|gaT/gaC|||-1||HGNC|HGNC:903,G|synonymous_variant|LOW|AXIN1|ENSG00000103126|Transcript|ENST00000354866|protein_coding|2/10||||924|762|254|D|gaT/gaC|||-1||HGNC|HGNC:903,G|non_coding_transcript_exon_variant|MODIFIER|AXIN1|ENSG00000103126|Transcript|ENST00000461023|retained_intron|1/8||||59|||||||-1||HGNC|HGNC:903,G|intron_variant&non_coding_transcript_variant|MODIFIER|AXIN1|ENSG00000103126|Transcript|ENST00000481769|protein_coding_CDS_not_defined||1/4||||||||||-1||HGNC|HGNC:903\tGT:DP:AD:GQ:PL:RNC\t0/1:26:15,11:31:31,0,40:..",
+ "chr16\t377013\tchr16_377013_G_C\tG\tC\t28\t.\tAF=0.5;AQ=28;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=C|intron_variant|MODIFIER|PGAP6|ENSG00000129925|Transcript|ENST00000250930|protein_coding||4/12||||||||||-1||HGNC|HGNC:17205,C|upstream_gene_variant|MODIFIER|PGAP6|ENSG00000129925|Transcript|ENST00000424078|protein_coding|||||||||||2162|-1|cds_start_NF|HGNC|HGNC:17205,C|intron_variant|MODIFIER|PGAP6|ENSG00000129925|Transcript|ENST00000427313|protein_coding||4/4||||||||||-1|cds_end_NF|HGNC|HGNC:17205,C|intron_variant|MODIFIER|PGAP6|ENSG00000129925|Transcript|ENST00000431232|protein_coding||4/12||||||||||-1||HGNC|HGNC:17205,C|upstream_gene_variant|MODIFIER|PGAP6|ENSG00000129925|Transcript|ENST00000448854|protein_coding|||||||||||1798|-1|cds_start_NF|HGNC|HGNC:17205,C|upstream_gene_variant|MODIFIER|PGAP6|ENSG00000129925|Transcript|ENST00000467452|retained_intron|||||||||||1938|-1||HGNC|HGNC:17205,C|upstream_gene_variant|MODIFIER|PGAP6|ENSG00000129925|Transcript|ENST00000475348|retained_intron|||||||||||538|-1||HGNC|HGNC:17205,C|downstream_gene_variant|MODIFIER|PGAP6|ENSG00000129925|Transcript|ENST00000476735|protein_coding_CDS_not_defined|||||||||||95|-1||HGNC|HGNC:17205\tGT:DP:AD:GQ:PL:RNC\t0/1:17:11,6:29:28,0,42:..",
+ "chr16\t520623\tchr16_520623_G_A\tG\tA\t34\t.\tAF=0.5;AQ=34;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=A|intron_variant|MODIFIER|RAB11FIP3|ENSG00000090565|Transcript|ENST00000262305|protein_coding||13/13||||||||||1||HGNC|HGNC:17224,A|intron_variant|MODIFIER|RAB11FIP3|ENSG00000090565|Transcript|ENST00000434585|protein_coding||14/14||||||||||1||HGNC|HGNC:17224,A|intron_variant|MODIFIER|RAB11FIP3|ENSG00000090565|Transcript|ENST00000450428|protein_coding||11/11||||||||||1||HGNC|HGNC:17224,A|downstream_gene_variant|MODIFIER|RAB11FIP3|ENSG00000090565|Transcript|ENST00000461009|retained_intron|||||||||||1515|1||HGNC|HGNC:17224,A|downstream_gene_variant|MODIFIER|RAB11FIP3|ENSG00000090565|Transcript|ENST00000464263|retained_intron|||||||||||861|1||HGNC|HGNC:17224,A|downstream_gene_variant|MODIFIER|RAB11FIP3|ENSG00000090565|Transcript|ENST00000487899|retained_intron|||||||||||1605|1||HGNC|HGNC:17224,A|downstream_gene_variant|MODIFIER|LINC00235|ENSG00000277142|Transcript|ENST00000622160|lncRNA|||||||||||4532|-1||HGNC|HGNC:14138\tGT:DP:AD:GQ:PL:RNC\t0/1:18:9,9:34:34,0,44:..",
+ "chr16\t551632\tchr16_551632_T_C\tT\tC\t21\t.\tAF=1;AQ=21;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=C|synonymous_variant|LOW|CAPN15|ENSG00000103326|Transcript|ENST00000219611|protein_coding|9/14||||2820|2313|771|G|ggT/ggC|||1||HGNC|HGNC:11182,C|downstream_gene_variant|MODIFIER|CAPN15|ENSG00000103326|Transcript|ENST00000562370|protein_coding|||||||||||4240|1|cds_end_NF|HGNC|HGNC:11182,C|upstream_gene_variant|MODIFIER|CAPN15|ENSG00000103326|Transcript|ENST00000565010|retained_intron|||||||||||665|1||HGNC|HGNC:11182,C|intron_variant&non_coding_transcript_variant|MODIFIER||ENSG00000261691|Transcript|ENST00000565879|lncRNA||1/1||||||||||-1|||,C|upstream_gene_variant|MODIFIER|CAPN15|ENSG00000103326|Transcript|ENST00000566977|protein_coding|||||||||||1123|1|cds_start_NF|HGNC|HGNC:11182,C|downstream_gene_variant|MODIFIER|CAPN15|ENSG00000103326|Transcript|ENST00000568988|protein_coding|||||||||||3423|1|cds_end_NF|HGNC|HGNC:11182,C|downstream_gene_variant|MODIFIER|CAPN15|ENSG00000103326|Transcript|ENST00000637507|protein_coding|||||||||||3601|1|cds_end_NF|HGNC|HGNC:11182\tGT:DP:AD:GQ:PL:RNC\t1/1:13:0,13:15:21,16,0:..",
+ "chr16\t582180\tchr16_582180_G_C\tG\tC\t25\t.\tAF=1;AQ=25;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=C|intron_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000026218|protein_coding||9/9||||||||||1||HGNC|HGNC:14135,C|intron_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000321878|protein_coding||9/10||||||||||1||HGNC|HGNC:14135,C|intron_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000409527|protein_coding||10/11||||||||||1||HGNC|HGNC:14135,C|intron_variant&NMD_transcript_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000420990|nonsense_mediated_decay||6/7||||||||||1|cds_start_NF|HGNC|HGNC:14135,C|downstream_gene_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000422307|protein_coding|||||||||||4946|1||HGNC|HGNC:14135,C|intron_variant&NMD_transcript_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000443147|nonsense_mediated_decay||10/11||||||||||1||HGNC|HGNC:14135,C|downstream_gene_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000470411|protein_coding|||||||||||4963|1||HGNC|HGNC:14135,C|non_coding_transcript_exon_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000476438|retained_intron|1/2||||420|||||||1||HGNC|HGNC:14135,C|intron_variant&non_coding_transcript_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000480424|retained_intron||3/4||||||||||1||HGNC|HGNC:14135,C|downstream_gene_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000537901|retained_intron|||||||||||3378|1||HGNC|HGNC:14135,C|intron_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000540241|protein_coding||2/3||||||||||1|cds_start_NF|HGNC|HGNC:14135,C|upstream_gene_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000540548|protein_coding_CDS_not_defined|||||||||||310|1||HGNC|HGNC:14135,C|downstream_gene_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000544860|protein_coding_CDS_not_defined|||||||||||3337|1||HGNC|HGNC:14135,C|upstream_gene_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000634341|retained_intron|||||||||||697|1||HGNC|HGNC:14135,C|intron_variant&NMD_transcript_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000635205|nonsense_mediated_decay||3/4||||||||||1|cds_start_NF|HGNC|HGNC:14135,C|intron_variant&NMD_transcript_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000635909|nonsense_mediated_decay||8/9||||||||||1|cds_start_NF|HGNC|HGNC:14135,C|downstream_gene_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000635935|protein_coding_CDS_not_defined|||||||||||1403|1||HGNC|HGNC:14135,C|intron_variant&NMD_transcript_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000636005|nonsense_mediated_decay||8/9||||||||||1|cds_start_NF|HGNC|HGNC:14135,C|intron_variant&NMD_transcript_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000636657|nonsense_mediated_decay||10/11||||||||||1||HGNC|HGNC:14135,C|intron_variant&NMD_transcript_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000637468|nonsense_mediated_decay||6/7||||||||||1|cds_start_NF|HGNC|HGNC:14135,C|downstream_gene_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000638143|nonsense_mediated_decay|||||||||||1258|1|cds_start_NF|HGNC|HGNC:14135,C|upstream_gene_variant|MODIFIER|PIGQ|ENSG00000007541|Transcript|ENST00000638152|protein_coding_CDS_not_defined|||||||||||1404|1||HGNC|HGNC:14135\tGT:DP:AD:GQ:PL:RNC\t1/1:6:0,6:9:25,8,0:..",
+ "chr16\t627629\tchr16_627629_T_C\tT\tC\t20\t.\tAF=1;AQ=20;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=C|3_prime_UTR_variant|MODIFIER|RAB40C|ENSG00000197562|Transcript|ENST00000248139|protein_coding|6/6||||1079|||||||1||HGNC|HGNC:18285,C|upstream_gene_variant|MODIFIER|WFIKKN1|ENSG00000127578|Transcript|ENST00000319070|protein_coding|||||||||||3356|1||HGNC|HGNC:30912,C|downstream_gene_variant|MODIFIER|RAB40C|ENSG00000197562|Transcript|ENST00000509637|nonsense_mediated_decay|||||||||||1636|1|cds_start_NF|HGNC|HGNC:18285,C|3_prime_UTR_variant|MODIFIER|RAB40C|ENSG00000197562|Transcript|ENST00000535977|protein_coding|7/7||||1075|||||||1||HGNC|HGNC:18285,C|3_prime_UTR_variant|MODIFIER|RAB40C|ENSG00000197562|Transcript|ENST00000538492|protein_coding|7/7||||927|||||||1||HGNC|HGNC:18285,C|3_prime_UTR_variant|MODIFIER|RAB40C|ENSG00000197562|Transcript|ENST00000539661|protein_coding|7/7||||967|||||||1||HGNC|HGNC:18285,C|non_coding_transcript_exon_variant|MODIFIER|RAB40C|ENSG00000197562|Transcript|ENST00000561781|retained_intron|2/2||||796|||||||1||HGNC|HGNC:18285,C|downstream_gene_variant|MODIFIER|RAB40C|ENSG00000197562|Transcript|ENST00000563109|protein_coding|||||||||||46|1|cds_start_NF&cds_end_NF|HGNC|HGNC:18285,C|downstream_gene_variant|MODIFIER|RAB40C|ENSG00000197562|Transcript|ENST00000564703|protein_coding|||||||||||1512|1|cds_end_NF|HGNC|HGNC:18285,C|downstream_gene_variant|MODIFIER|RAB40C|ENSG00000197562|Transcript|ENST00000565511|nonsense_mediated_decay|||||||||||1546|1||HGNC|HGNC:18285,C|downstream_gene_variant|MODIFIER|RAB40C|ENSG00000197562|Transcript|ENST00000566290|protein_coding|||||||||||214|1|cds_end_NF|HGNC|HGNC:18285,C|downstream_gene_variant|MODIFIER|RAB40C|ENSG00000197562|Transcript|ENST00000568586|protein_coding|||||||||||1508|1|cds_end_NF|HGNC|HGNC:18285,C|downstream_gene_variant|MODIFIER|RAB40C|ENSG00000197562|Transcript|ENST00000569575|protein_coding|||||||||||1603|1|cds_end_NF|HGNC|HGNC:18285,C|upstream_gene_variant|MODIFIER|WFIKKN1|ENSG00000127578|Transcript|ENST00000573440|retained_intron|||||||||||1610|1||HGNC|HGNC:30912\tGT:DP:AD:GQ:PL:RNC\t1/1:13:0,13:13:20,13,0:..",
+ "chr16\t638557\tchr16_638557_A_C\tA\tC\t24\t.\tAF=1;AQ=24;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=C|upstream_gene_variant|MODIFIER|METTL26|ENSG00000130731|Transcript|ENST00000301686|protein_coding|||||||||||2252|-1||HGNC|HGNC:14141,C|upstream_gene_variant|MODIFIER|MCRIP2|ENSG00000172366|Transcript|ENST00000307650|protein_coding|||||||||||3271|1||HGNC|HGNC:14142,C|downstream_gene_variant|MODIFIER|WFIKKN1|ENSG00000127578|Transcript|ENST00000319070|protein_coding|||||||||||4440|1||HGNC|HGNC:30912,C|upstream_gene_variant|MODIFIER|METTL26|ENSG00000130731|Transcript|ENST00000338401|protein_coding|||||||||||2215|-1||HGNC|HGNC:14141,C|upstream_gene_variant|MODIFIER|METTL26|ENSG00000130731|Transcript|ENST00000397664|protein_coding|||||||||||2206|-1||HGNC|HGNC:14141,C|upstream_gene_variant|MODIFIER|METTL26|ENSG00000130731|Transcript|ENST00000397665|protein_coding|||||||||||2267|-1||HGNC|HGNC:14141,C|upstream_gene_variant|MODIFIER|METTL26|ENSG00000130731|Transcript|ENST00000397666|protein_coding|||||||||||2255|-1||HGNC|HGNC:14141,C|upstream_gene_variant|MODIFIER|METTL26|ENSG00000130731|Transcript|ENST00000448973|retained_intron|||||||||||2247|-1||HGNC|HGNC:14141,C|upstream_gene_variant|MODIFIER|METTL26|ENSG00000130731|Transcript|ENST00000456420|nonsense_mediated_decay|||||||||||2318|-1|cds_start_NF|HGNC|HGNC:14141,C|upstream_gene_variant|MODIFIER|MCRIP2|ENSG00000172366|Transcript|ENST00000474840|nonsense_mediated_decay|||||||||||3372|1||HGNC|HGNC:14142,C|upstream_gene_variant|MODIFIER|MCRIP2|ENSG00000172366|Transcript|ENST00000491999|nonsense_mediated_decay|||||||||||3289|1||HGNC|HGNC:14142,C|upstream_gene_variant|MODIFIER|METTL26|ENSG00000130731|Transcript|ENST00000564039|nonsense_mediated_decay|||||||||||2279|-1|cds_start_NF|HGNC|HGNC:14141,C|upstream_gene_variant|MODIFIER|METTL26|ENSG00000130731|Transcript|ENST00000565163|protein_coding|||||||||||2297|-1|cds_start_NF&cds_end_NF|HGNC|HGNC:14141,C|upstream_gene_variant|MODIFIER|METTL26|ENSG00000130731|Transcript|ENST00000565799|retained_intron|||||||||||2290|-1||HGNC|HGNC:14141,C|upstream_gene_variant|MODIFIER|METTL26|ENSG00000130731|Transcript|ENST00000568077|nonsense_mediated_decay|||||||||||2226|-1||HGNC|HGNC:14141,C|upstream_gene_variant|MODIFIER|METTL26|ENSG00000130731|Transcript|ENST00000568773|protein_coding|||||||||||2322|-1|cds_start_NF|HGNC|HGNC:14141,C|upstream_gene_variant|MODIFIER|METTL26|ENSG00000130731|Transcript|ENST00000568830|protein_coding|||||||||||2199|-1|cds_end_NF|HGNC|HGNC:14141,C|downstream_gene_variant|MODIFIER|WFIKKN1|ENSG00000127578|Transcript|ENST00000573440|retained_intron|||||||||||4441|1||HGNC|HGNC:30912,C|intron_variant&non_coding_transcript_variant|MODIFIER|MCRIP2|ENSG00000172366|Transcript|ENST00000611328|protein_coding_CDS_not_defined||1/3||||||||||1||HGNC|HGNC:14142,C|upstream_gene_variant|MODIFIER|METTL26|ENSG00000130731|Transcript|ENST00000614890|protein_coding|||||||||||2191|-1||HGNC|HGNC:14141,C|upstream_gene_variant|MODIFIER|MCRIP2|ENSG00000172366|Transcript|ENST00000615744|protein_coding_CDS_not_defined|||||||||||3894|1||HGNC|HGNC:14142,C|non_coding_transcript_exon_variant|MODIFIER|MCRIP2|ENSG00000172366|Transcript|ENST00000619114|protein_coding_CDS_not_defined|2/2||||576|||||||1||HGNC|HGNC:14142,C|non_coding_transcript_exon_variant|MODIFIER|MCRIP2|ENSG00000172366|Transcript|ENST00000619377|protein_coding_CDS_not_defined|2/2||||664|||||||1||HGNC|HGNC:14142,C|upstream_gene_variant|MODIFIER|MCRIP2|ENSG00000172366|Transcript|ENST00000620462|retained_intron|||||||||||3926|1||HGNC|HGNC:14142,C|upstream_gene_variant|MODIFIER|MCRIP2|ENSG00000172366|Transcript|ENST00000629534|protein_coding|||||||||||3292|1||HGNC|HGNC:14142\tGT:DP:AD:GQ:PL:RNC\t1/1:14:0,14:16:24,17,0:..",
+ "chr16\t655844\tchr16_655844_T_C\tT\tC\t23\t.\tAF=1;AQ=23;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=C|synonymous_variant|LOW|WDR90|ENSG00000161996|Transcript|ENST00000293879|protein_coding|17/41||||1973|1921|641|L|Ttg/Ctg|||1||HGNC|HGNC:26960,C|non_coding_transcript_exon_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000420061|retained_intron|17/17||||1985|||||||1||HGNC|HGNC:26960,C|non_coding_transcript_exon_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000546516|retained_intron|3/20||||614|||||||1||HGNC|HGNC:26960,C|upstream_gene_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000547407|retained_intron|||||||||||431|1||HGNC|HGNC:26960,C|upstream_gene_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000548844|protein_coding_CDS_not_defined|||||||||||2716|1||HGNC|HGNC:26960,C|upstream_gene_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000548859|retained_intron|||||||||||589|1||HGNC|HGNC:26960,C|synonymous_variant|LOW|WDR90|ENSG00000161996|Transcript|ENST00000549091|protein_coding|17/41||||2013|1921|641|L|Ttg/Ctg|||1||HGNC|HGNC:26960,C|non_coding_transcript_exon_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000549648|retained_intron|17/17||||1988|||||||1||HGNC|HGNC:26960,C|downstream_gene_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000550593|retained_intron|||||||||||2374|1||HGNC|HGNC:26960,C|upstream_gene_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000550739|protein_coding|||||||||||3401|1|cds_start_NF&cds_end_NF|HGNC|HGNC:26960,C|non_coding_transcript_exon_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000552648|protein_coding_CDS_not_defined|4/7||||455|||||||1||HGNC|HGNC:26960,C|upstream_gene_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000552683|retained_intron|||||||||||2014|1||HGNC|HGNC:26960,C|upstream_gene_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000552728|nonsense_mediated_decay|||||||||||577|1|cds_start_NF|HGNC|HGNC:26960,C|non_coding_transcript_exon_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000552943|retained_intron|15/26||||3591|||||||1||HGNC|HGNC:26960,C|non_coding_transcript_exon_variant|MODIFIER||ENSG00000262528|Transcript|ENST00000573609|lncRNA|1/2||||351|||||||-1|||\tGT:DP:AD:GQ:PL:RNC\t1/1:14:0,14:16:23,17,0:..",
+ "chr16\t667523\tchr16_667523_G_T\tG\tT\t27\t.\tAF=1;AQ=27;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=T|synonymous_variant|LOW|WDR90|ENSG00000161996|Transcript|ENST00000293879|protein_coding|41/41||||5233|5181|1727|P|ccG/ccT|||1||HGNC|HGNC:26960,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000315082|protein_coding|||||||||||609|1||HGNC|HGNC:21169,T|synonymous_variant|LOW|WDR90|ENSG00000161996|Transcript|ENST00000315764|protein_coding|6/6||||1147|834|278|P|ccG/ccT|||1||HGNC|HGNC:26960,T|downstream_gene_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000546516|retained_intron|||||||||||4090|1||HGNC|HGNC:26960,T|missense_variant|MODERATE|WDR90|ENSG00000161996|Transcript|ENST00000546896|protein_coding|2/2||||89|89|30|R/L|cGt/cTt|||1|cds_start_NF|HGNC|HGNC:26960,T|downstream_gene_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000546923|retained_intron|||||||||||4787|1||HGNC|HGNC:26960,T|non_coding_transcript_exon_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000547407|retained_intron|24/24||||3541|||||||1||HGNC|HGNC:26960,T|downstream_gene_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000547543|protein_coding_CDS_not_defined|||||||||||778|1||HGNC|HGNC:26960,T|synonymous_variant|LOW|WDR90|ENSG00000161996|Transcript|ENST00000547944|protein_coding|7/7||||1365|978|326|P|ccG/ccT|||1||HGNC|HGNC:26960,T|downstream_gene_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000548448|retained_intron|||||||||||1162|1||HGNC|HGNC:26960,T|downstream_gene_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000548603|protein_coding_CDS_not_defined|||||||||||4086|1||HGNC|HGNC:26960,T|non_coding_transcript_exon_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000549024|retained_intron|12/12||||1951|||||||1||HGNC|HGNC:26960,T|synonymous_variant|LOW|WDR90|ENSG00000161996|Transcript|ENST00000549091|protein_coding|41/41||||5279|5187|1729|P|ccG/ccT|||1||HGNC|HGNC:26960,T|3_prime_UTR_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000551100|protein_coding|3/3||||315|||||||1|cds_start_NF|HGNC|HGNC:26960,T|downstream_gene_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000552683|retained_intron|||||||||||4085|1||HGNC|HGNC:26960,T|3_prime_UTR_variant&NMD_transcript_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000552728|nonsense_mediated_decay|22/22||||2826|||||||1|cds_start_NF|HGNC|HGNC:26960,T|non_coding_transcript_exon_variant|MODIFIER|WDR90|ENSG00000161996|Transcript|ENST00000553080|retained_intron|6/6||||1635|||||||1||HGNC|HGNC:26960,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000561711|retained_intron|||||||||||4394|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000561929|protein_coding|||||||||||686|1|cds_start_NF&cds_end_NF|HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000561983|nonsense_mediated_decay|||||||||||582|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000562333|nonsense_mediated_decay|||||||||||664|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000562598|retained_intron|||||||||||659|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000562708|retained_intron|||||||||||589|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000563134|protein_coding|||||||||||583|1|cds_end_NF|HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000563637|nonsense_mediated_decay|||||||||||628|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000563776|retained_intron|||||||||||582|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000565004|retained_intron|||||||||||3559|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000566214|nonsense_mediated_decay|||||||||||628|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000566965|retained_intron|||||||||||1037|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000567017|retained_intron|||||||||||639|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000567589|retained_intron|||||||||||3005|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000568636|retained_intron|||||||||||1135|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000568950|retained_intron|||||||||||655|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000569358|retained_intron|||||||||||2932|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000569675|retained_intron|||||||||||612|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000569706|retained_intron|||||||||||609|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000569943|protein_coding_CDS_not_defined|||||||||||830|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000570092|retained_intron|||||||||||601|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000570280|nonsense_mediated_decay|||||||||||630|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000602564|retained_intron|||||||||||657|1||HGNC|HGNC:21169,T|upstream_gene_variant|MODIFIER|RHOT2|ENSG00000140983|Transcript|ENST00000697194|protein_coding|||||||||||577|1||HGNC|HGNC:21169\tGT:DP:AD:GQ:PL:RNC\t1/1:22:0,22:23:27,24,0:..",
+ "chr16\t690466\tchr16_690466_A_G\tA\tG\t26\t.\tAF=1;AQ=26;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=G|upstream_gene_variant|MODIFIER|WDR24|ENSG00000127580|Transcript|ENST00000248142|protein_coding|||||||||||22|-1||HGNC|HGNC:20852,G|upstream_gene_variant|MODIFIER|WDR24|ENSG00000127580|Transcript|ENST00000293883|protein_coding|||||||||||68|-1||HGNC|HGNC:20852,G|downstream_gene_variant|MODIFIER|FBXL16|ENSG00000127585|Transcript|ENST00000397621|protein_coding|||||||||||2034|-1||HGNC|HGNC:14150,G|downstream_gene_variant|MODIFIER|FBXL16|ENSG00000127585|Transcript|ENST00000562563|protein_coding|||||||||||2461|-1||HGNC|HGNC:14150,G|downstream_gene_variant|MODIFIER|FBXL16|ENSG00000127585|Transcript|ENST00000562585|protein_coding_CDS_not_defined|||||||||||4192|-1||HGNC|HGNC:14150,G|downstream_gene_variant|MODIFIER|FBXL16|ENSG00000127585|Transcript|ENST00000562648|retained_intron|||||||||||2032|-1||HGNC|HGNC:14150,G|intron_variant&non_coding_transcript_variant|MODIFIER||ENSG00000261659|Transcript|ENST00000566927|lncRNA||1/1||||||||||1|||,G|upstream_gene_variant|MODIFIER|WDR24|ENSG00000127580|Transcript|ENST00000567014|retained_intron|||||||||||3537|-1||HGNC|HGNC:20852,G|non_coding_transcript_exon_variant|MODIFIER||ENSG00000261659|Transcript|ENST00000575305|retained_intron|1/1||||1466|||||||1|||,G|upstream_gene_variant|MODIFIER|WDR24|ENSG00000127580|Transcript|ENST00000647644|protein_coding|||||||||||219|-1||HGNC|HGNC:20852\tGT:DP:AD:GQ:PL:RNC\t1/1:17:0,17:20:26,21,0:..",
+ "chr16\t723341\tchr16_723341_G_A\tG\tA\t23\t.\tAF=1;AQ=23;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=A|downstream_gene_variant|MODIFIER|ANTKMT|ENSG00000103254|Transcript|ENST00000219535|protein_coding|||||||||||768|1||HGNC|HGNC:14152,A|intron_variant|MODIFIER|CCDC78|ENSG00000162004|Transcript|ENST00000293889|protein_coding||11/13||||||||||-1||HGNC|HGNC:14153,A|upstream_gene_variant|MODIFIER|HAGHL|ENSG00000103253|Transcript|ENST00000341413|protein_coding|||||||||||3617|1||HGNC|HGNC:14177,A|intron_variant|MODIFIER|CCDC78|ENSG00000162004|Transcript|ENST00000345165|protein_coding||11/13||||||||||-1||HGNC|HGNC:14153,A|upstream_gene_variant|MODIFIER|HAGHL|ENSG00000103253|Transcript|ENST00000389701|retained_intron|||||||||||4064|1||HGNC|HGNC:14177,A|upstream_gene_variant|MODIFIER|HAGHL|ENSG00000103253|Transcript|ENST00000389703|protein_coding|||||||||||3765|1||HGNC|HGNC:14177,A|downstream_gene_variant|MODIFIER|CCDC78|ENSG00000162004|Transcript|ENST00000423653|protein_coding_CDS_not_defined|||||||||||1586|-1||HGNC|HGNC:14153,A|downstream_gene_variant|MODIFIER|CCDC78|ENSG00000162004|Transcript|ENST00000439619|retained_intron|||||||||||1209|-1||HGNC|HGNC:14153,A|downstream_gene_variant|MODIFIER|CCDC78|ENSG00000162004|Transcript|ENST00000460023|retained_intron|||||||||||1566|-1||HGNC|HGNC:14153,A|intron_variant&non_coding_transcript_variant|MODIFIER|CCDC78|ENSG00000162004|Transcript|ENST00000463539|retained_intron||9/11||||||||||-1||HGNC|HGNC:14153,A|intron_variant&non_coding_transcript_variant|MODIFIER|CCDC78|ENSG00000162004|Transcript|ENST00000466708|retained_intron||10/12||||||||||-1||HGNC|HGNC:14153,A|downstream_gene_variant|MODIFIER|CCDC78|ENSG00000162004|Transcript|ENST00000471861|retained_intron|||||||||||1660|-1||HGNC|HGNC:14153,A|downstream_gene_variant|MODIFIER|CCDC78|ENSG00000162004|Transcript|ENST00000474647|retained_intron|||||||||||1782|-1||HGNC|HGNC:14153,A|non_coding_transcript_exon_variant|MODIFIER|CCDC78|ENSG00000162004|Transcript|ENST00000478979|retained_intron|4/4||||2296|||||||-1||HGNC|HGNC:14153,A|non_coding_transcript_exon_variant|MODIFIER|CCDC78|ENSG00000162004|Transcript|ENST00000481804|retained_intron|4/5||||2627|||||||-1||HGNC|HGNC:14153,A|intron_variant&non_coding_transcript_variant|MODIFIER|CCDC78|ENSG00000162004|Transcript|ENST00000482152|retained_intron||3/4||||||||||-1||HGNC|HGNC:14153,A|non_coding_transcript_exon_variant|MODIFIER|CCDC78|ENSG00000162004|Transcript|ENST00000482878|retained_intron|4/4||||2699|||||||-1||HGNC|HGNC:14153,A|intron_variant&non_coding_transcript_variant|MODIFIER|CCDC78|ENSG00000162004|Transcript|ENST00000485091|retained_intron||10/11||||||||||-1||HGNC|HGNC:14153,A|downstream_gene_variant|MODIFIER|CCDC78|ENSG00000162004|Transcript|ENST00000538176|retained_intron|||||||||||1201|-1||HGNC|HGNC:14153,A|downstream_gene_variant|MODIFIER|CCDC78|ENSG00000162004|Transcript|ENST00000544996|retained_intron|||||||||||1643|-1||HGNC|HGNC:14153,A|upstream_gene_variant|MODIFIER|HAGHL|ENSG00000103253|Transcript|ENST00000549114|protein_coding|||||||||||3595|1||HGNC|HGNC:14177,A|upstream_gene_variant|MODIFIER|HAGHL|ENSG00000103253|Transcript|ENST00000561546|protein_coding|||||||||||3905|1||HGNC|HGNC:14177,A|upstream_gene_variant|MODIFIER|HAGHL|ENSG00000103253|Transcript|ENST00000561750|retained_intron|||||||||||3934|1||HGNC|HGNC:14177,A|upstream_gene_variant|MODIFIER|HAGHL|ENSG00000103253|Transcript|ENST00000562141|protein_coding|||||||||||3778|1|cds_end_NF|HGNC|HGNC:14177,A|upstream_gene_variant|MODIFIER|HAGHL|ENSG00000103253|Transcript|ENST00000562187|protein_coding|||||||||||3765|1|cds_end_NF|HGNC|HGNC:14177,A|upstream_gene_variant|MODIFIER|HAGHL|ENSG00000103253|Transcript|ENST00000563792|protein_coding|||||||||||4398|1|cds_end_NF|HGNC|HGNC:14177,A|downstream_gene_variant|MODIFIER|ANTKMT|ENSG00000103254|Transcript|ENST00000564000|protein_coding|||||||||||756|1||HGNC|HGNC:14152,A|upstream_gene_variant|MODIFIER|HAGHL|ENSG00000103253|Transcript|ENST00000564537|protein_coding|||||||||||3765|1||HGNC|HGNC:14177,A|upstream_gene_variant|MODIFIER|HAGHL|ENSG00000103253|Transcript|ENST00000564545|protein_coding|||||||||||3921|1||HGNC|HGNC:14177,A|downstream_gene_variant|MODIFIER|ANTKMT|ENSG00000103254|Transcript|ENST00000564640|retained_intron|||||||||||1029|1||HGNC|HGNC:14152,A|downstream_gene_variant|MODIFIER|ANTKMT|ENSG00000103254|Transcript|ENST00000566437|retained_intron|||||||||||833|1||HGNC|HGNC:14152,A|downstream_gene_variant|MODIFIER|ANTKMT|ENSG00000103254|Transcript|ENST00000566525|retained_intron|||||||||||752|1||HGNC|HGNC:14152,A|upstream_gene_variant|MODIFIER|HAGHL|ENSG00000103253|Transcript|ENST00000567414|protein_coding|||||||||||3952|1|cds_end_NF|HGNC|HGNC:14177,A|upstream_gene_variant|MODIFIER|HAGHL|ENSG00000103253|Transcript|ENST00000567696|retained_intron|||||||||||3977|1||HGNC|HGNC:14177,A|upstream_gene_variant|MODIFIER|HAGHL|ENSG00000103253|Transcript|ENST00000568141|protein_coding|||||||||||4005|1|cds_end_NF|HGNC|HGNC:14177,A|downstream_gene_variant|MODIFIER|METRN|ENSG00000103260|Transcript|ENST00000568223|protein_coding|||||||||||3686|1||HGNC|HGNC:14151,A|downstream_gene_variant|MODIFIER|ANTKMT|ENSG00000103254|Transcript|ENST00000568916|protein_coding|||||||||||751|1|cds_start_NF|HGNC|HGNC:14152,A|upstream_gene_variant|MODIFIER|HAGHL|ENSG00000103253|Transcript|ENST00000569143|retained_intron|||||||||||3977|1||HGNC|HGNC:14177,A|downstream_gene_variant|MODIFIER|ANTKMT|ENSG00000103254|Transcript|ENST00000569529|protein_coding|||||||||||751|1||HGNC|HGNC:14152,A|downstream_gene_variant|MODIFIER|ANTKMT|ENSG00000103254|Transcript|ENST00000570237|retained_intron|||||||||||751|1||HGNC|HGNC:14152,A|downstream_gene_variant|MODIFIER|CCDC78|ENSG00000162004|Transcript|ENST00000650995|protein_coding|||||||||||2453|-1|cds_end_NF|HGNC|HGNC:14153,A|intron_variant&non_coding_transcript_variant|MODIFIER|CCDC78|ENSG00000162004|Transcript|ENST00000682391|retained_intron||7/9||||||||||-1||HGNC|HGNC:14153\tGT:DP:AD:GQ:PL:RNC\t1/1:17:0,17:19:23,21,0:..",
+ "chr16\t739761\tchr16_739761_A_G\tA\tG\t22\t.\tAF=1;AQ=22;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=G|intron_variant|MODIFIER|CIAO3|ENSG00000103245|Transcript|ENST00000251588|protein_coding||1/10||||||||||-1||HGNC|HGNC:14179,G|5_prime_UTR_variant|MODIFIER|CIAO3|ENSG00000103245|Transcript|ENST00000540986|protein_coding|1/10||||1174|||||||-1||HGNC|HGNC:14179,G|upstream_gene_variant|MODIFIER|CIAO3|ENSG00000103245|Transcript|ENST00000562421|protein_coding|||||||||||2006|-1|cds_end_NF|HGNC|HGNC:14179,G|intron_variant&non_coding_transcript_variant|MODIFIER|CIAO3|ENSG00000103245|Transcript|ENST00000562752|retained_intron||1/4||||||||||-1||HGNC|HGNC:14179,G|upstream_gene_variant|MODIFIER|CIAO3|ENSG00000103245|Transcript|ENST00000563534|protein_coding_CDS_not_defined|||||||||||2297|-1||HGNC|HGNC:14179,G|intron_variant&NMD_transcript_variant|MODIFIER|CIAO3|ENSG00000103245|Transcript|ENST00000565065|nonsense_mediated_decay||2/5||||||||||-1||HGNC|HGNC:14179,G|intron_variant&non_coding_transcript_variant|MODIFIER|CIAO3|ENSG00000103245|Transcript|ENST00000565341|retained_intron||1/3||||||||||-1||HGNC|HGNC:14179,G|intron_variant&NMD_transcript_variant|MODIFIER|CIAO3|ENSG00000103245|Transcript|ENST00000565425|nonsense_mediated_decay||1/9||||||||||-1||HGNC|HGNC:14179,G|intron_variant&non_coding_transcript_variant|MODIFIER|CIAO3|ENSG00000103245|Transcript|ENST00000565693|protein_coding_CDS_not_defined||1/2||||||||||-1||HGNC|HGNC:14179,G|intron_variant|MODIFIER|CIAO3|ENSG00000103245|Transcript|ENST00000566614|protein_coding||1/4||||||||||-1|cds_end_NF|HGNC|HGNC:14179,G|intron_variant&non_coding_transcript_variant|MODIFIER|CIAO3|ENSG00000103245|Transcript|ENST00000567172|protein_coding_CDS_not_defined||1/1||||||||||-1||HGNC|HGNC:14179,G|upstream_gene_variant|MODIFIER|CIAO3|ENSG00000103245|Transcript|ENST00000567403|protein_coding|||||||||||2009|-1|cds_end_NF|HGNC|HGNC:14179,G|intron_variant&non_coding_transcript_variant|MODIFIER|CIAO3|ENSG00000103245|Transcript|ENST00000567455|protein_coding_CDS_not_defined||1/2||||||||||-1||HGNC|HGNC:14179,G|upstream_gene_variant|MODIFIER|CIAO3|ENSG00000103245|Transcript|ENST00000568545|protein_coding|||||||||||1872|-1||HGNC|HGNC:14179,G|downstream_gene_variant|MODIFIER|HAGHL|ENSG00000103253|Transcript|ENST00000569604|protein_coding_CDS_not_defined|||||||||||4236|1||HGNC|HGNC:14177,G|upstream_gene_variant|MODIFIER|CIAO3|ENSG00000103245|Transcript|ENST00000569759|protein_coding_CDS_not_defined|||||||||||3391|-1||HGNC|HGNC:14179,G|intron_variant&NMD_transcript_variant|MODIFIER|CIAO3|ENSG00000103245|Transcript|ENST00000570066|nonsense_mediated_decay||1/4||||||||||-1||HGNC|HGNC:14179,G|intron_variant&NMD_transcript_variant|MODIFIER|CIAO3|ENSG00000103245|Transcript|ENST00000570289|nonsense_mediated_decay||1/3||||||||||-1||HGNC|HGNC:14179,G|downstream_gene_variant|MODIFIER|HAGHL|ENSG00000103253|Transcript|ENST00000647875|protein_coding_CDS_not_defined|||||||||||4408|1||HGNC|HGNC:14177\tGT:DP:AD:GQ:PL:RNC\t1/1:14:0,14:14:22,14,0:..",
+ "chr16\t766294\tchr16_766294_T_G\tT\tG\t29\t.\tAF=0.5;AQ=29;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=G|intron_variant|MODIFIER|MSLN|ENSG00000102854|Transcript|ENST00000382862|protein_coding||11/16||||||||||1||HGNC|HGNC:7371,G|upstream_gene_variant|MODIFIER|MIR662|ENSG00000207579|Transcript|ENST00000384847|miRNA|||||||||||3889|1||HGNC|HGNC:32918,G|downstream_gene_variant|MODIFIER|MSLNL|ENSG00000162006|Transcript|ENST00000543963|protein_coding|||||||||||3134|-1|cds_start_NF|HGNC|HGNC:14170,G|intron_variant|MODIFIER|MSLN|ENSG00000102854|Transcript|ENST00000545450|protein_coding||12/17||||||||||1||HGNC|HGNC:7371,G|intron_variant|MODIFIER|MSLN|ENSG00000102854|Transcript|ENST00000561896|protein_coding||2/6||||||||||1|cds_start_NF&cds_end_NF|HGNC|HGNC:7371,G|intron_variant|MODIFIER|MSLN|ENSG00000102854|Transcript|ENST00000563651|protein_coding||10/10||||||||||1|cds_end_NF|HGNC|HGNC:7371,G|intron_variant|MODIFIER|MSLN|ENSG00000102854|Transcript|ENST00000563941|protein_coding||12/17||||||||||1||HGNC|HGNC:7371,G|intron_variant|MODIFIER|MSLN|ENSG00000102854|Transcript|ENST00000566269|protein_coding||2/7||||||||||1|cds_start_NF|HGNC|HGNC:7371,G|intron_variant|MODIFIER|MSLN|ENSG00000102854|Transcript|ENST00000566549|protein_coding||11/16||||||||||1||HGNC|HGNC:7371,G|downstream_gene_variant|MODIFIER|MSLN|ENSG00000102854|Transcript|ENST00000569566|protein_coding|||||||||||1258|1|cds_end_NF|HGNC|HGNC:7371,G|downstream_gene_variant|MODIFIER||ENSG00000279136|Transcript|ENST00000623562|TEC|||||||||||2825|-1|||\tGT:DP:AD:GQ:PL:RNC\t0/1:19:12,7:29:29,0,43:..",
+ "chr16\t789996\tchr16_789996_T_C\tT\tC\t36\t.\tAF=0.5;AQ=36;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=C|upstream_gene_variant|MODIFIER|RPUSD1|ENSG00000007376|Transcript|ENST00000007264|protein_coding|||||||||||1615|-1||HGNC|HGNC:14173,C|intron_variant|MODIFIER|CHTF18|ENSG00000127586|Transcript|ENST00000262315|protein_coding||4/21||||||||||1||HGNC|HGNC:18435,C|intron_variant|MODIFIER|CHTF18|ENSG00000127586|Transcript|ENST00000317063|protein_coding||4/21||||||||||1||HGNC|HGNC:18435,C|splice_polypyrimidine_tract_variant&intron_variant|LOW|CHTF18|ENSG00000127586|Transcript|ENST00000426047|protein_coding||2/6||||||||||1|cds_start_NF&cds_end_NF|HGNC|HGNC:18435,C|intron_variant&NMD_transcript_variant|MODIFIER|CHTF18|ENSG00000127586|Transcript|ENST00000440239|nonsense_mediated_decay||4/13||||||||||1||HGNC|HGNC:18435,C|intron_variant|MODIFIER|CHTF18|ENSG00000127586|Transcript|ENST00000455171|protein_coding||3/20||||||||||1||HGNC|HGNC:18435,C|upstream_gene_variant|MODIFIER|CHTF18|ENSG00000127586|Transcript|ENST00000461268|protein_coding_CDS_not_defined|||||||||||2528|1||HGNC|HGNC:18435,C|intron_variant&non_coding_transcript_variant|MODIFIER|CHTF18|ENSG00000127586|Transcript|ENST00000464728|retained_intron||3/17||||||||||1||HGNC|HGNC:18435,C|splice_polypyrimidine_tract_variant&intron_variant&non_coding_transcript_variant|LOW|CHTF18|ENSG00000127586|Transcript|ENST00000471202|retained_intron||4/18||||||||||1||HGNC|HGNC:18435,C|intron_variant&non_coding_transcript_variant|MODIFIER|CHTF18|ENSG00000127586|Transcript|ENST00000479976|retained_intron||4/6||||||||||1||HGNC|HGNC:18435,C|intron_variant&non_coding_transcript_variant|MODIFIER|CHTF18|ENSG00000127586|Transcript|ENST00000484349|retained_intron||1/3||||||||||1||HGNC|HGNC:18435,C|intron_variant&non_coding_transcript_variant|MODIFIER|CHTF18|ENSG00000127586|Transcript|ENST00000491530|protein_coding_CDS_not_defined||2/5||||||||||1||HGNC|HGNC:18435,C|upstream_gene_variant|MODIFIER|CHTF18|ENSG00000127586|Transcript|ENST00000493715|protein_coding_CDS_not_defined|||||||||||4706|1||HGNC|HGNC:18435,C|upstream_gene_variant|MODIFIER|RPUSD1|ENSG00000007376|Transcript|ENST00000561734|protein_coding|||||||||||2015|-1||HGNC|HGNC:14173,C|upstream_gene_variant|MODIFIER|RPUSD1|ENSG00000007376|Transcript|ENST00000562070|protein_coding|||||||||||2527|-1|cds_start_NF|HGNC|HGNC:14173,C|downstream_gene_variant|MODIFIER|CHTF18|ENSG00000127586|Transcript|ENST00000563545|nonsense_mediated_decay|||||||||||360|1||HGNC|HGNC:18435,C|upstream_gene_variant|MODIFIER|RPUSD1|ENSG00000007376|Transcript|ENST00000563560|protein_coding|||||||||||1667|-1|cds_end_NF|HGNC|HGNC:14173,C|upstream_gene_variant|MODIFIER|RPUSD1|ENSG00000007376|Transcript|ENST00000565377|protein_coding|||||||||||1615|-1|cds_end_NF|HGNC|HGNC:14173,C|upstream_gene_variant|MODIFIER|RPUSD1|ENSG00000007376|Transcript|ENST00000565503|nonsense_mediated_decay|||||||||||1656|-1||HGNC|HGNC:14173,C|upstream_gene_variant|MODIFIER|CHTF18|ENSG00000127586|Transcript|ENST00000565787|retained_intron|||||||||||1141|1||HGNC|HGNC:18435,C|upstream_gene_variant|MODIFIER|RPUSD1|ENSG00000007376|Transcript|ENST00000565809|protein_coding|||||||||||1625|-1||HGNC|HGNC:14173,C|upstream_gene_variant|MODIFIER|RPUSD1|ENSG00000007376|Transcript|ENST00000567114|protein_coding|||||||||||1599|-1||HGNC|HGNC:14173,C|upstream_gene_variant|MODIFIER|RPUSD1|ENSG00000007376|Transcript|ENST00000567283|nonsense_mediated_decay|||||||||||1663|-1||HGNC|HGNC:14173,C|upstream_gene_variant|MODIFIER|CHTF18|ENSG00000127586|Transcript|ENST00000567620|retained_intron|||||||||||703|1||HGNC|HGNC:18435,C|upstream_gene_variant|MODIFIER|CHTF18|ENSG00000127586|Transcript|ENST00000569270|nonsense_mediated_decay|||||||||||255|1|cds_start_NF|HGNC|HGNC:18435,C|upstream_gene_variant|MODIFIER|RPUSD1|ENSG00000007376|Transcript|ENST00000569601|protein_coding|||||||||||1662|-1|cds_end_NF|HGNC|HGNC:14173,C|downstream_gene_variant|MODIFIER|CHTF18|ENSG00000127586|Transcript|ENST00000570058|protein_coding_CDS_not_defined|||||||||||656|1||HGNC|HGNC:18435,C|intron_variant|MODIFIER|CHTF18|ENSG00000127586|Transcript|ENST00000631357|protein_coding||5/23||||||||||1||HGNC|HGNC:18435\tGT:DP:AD:GQ:PL:RNC\t0/1:15:7,8:36:36,0,49:..",
+ "chr16\t813336\tchr16_813336_T_C\tT\tC\t32\t.\tAF=0.5;AQ=32;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=C|upstream_gene_variant|MODIFIER||ENSG00000287855|Transcript|ENST00000655150|lncRNA|||||||||||4153|1|||\tGT:DP:AD:GQ:PL:RNC\t0/1:14:5,9:32:32,0,39:..",
+ "chr16\t893361\tchr16_893361_T_C\tT\tC\t34\t.\tAF=0.5;AQ=34;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=C|intron_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000262301|protein_coding||4/10||||||||||-1||HGNC|HGNC:14154,C|intron_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000543238|protein_coding||1/7||||||||||-1||HGNC|HGNC:14154,C|intron_variant&NMD_transcript_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000545827|nonsense_mediated_decay||5/11||||||||||-1||HGNC|HGNC:14154,C|intron_variant&NMD_transcript_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000562226|nonsense_mediated_decay||6/7||||||||||-1||HGNC|HGNC:14154,C|upstream_gene_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000565198|retained_intron|||||||||||109|-1||HGNC|HGNC:14154,C|intron_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000566627|protein_coding||3/5||||||||||-1|cds_end_NF|HGNC|HGNC:14154,C|intron_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000568897|protein_coding||3/9||||||||||-1||HGNC|HGNC:14154,C|intron_variant&NMD_transcript_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000568964|nonsense_mediated_decay||4/5||||||||||-1|cds_start_NF|HGNC|HGNC:14154,C|intron_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000570014|protein_coding||4/6||||||||||-1|cds_end_NF|HGNC|HGNC:14154\tGT:DP:AD:GQ:PL:RNC\t0/1:17:6,11:34:34,0,45:..",
+ "chr16\t947473\tchr16_947473_T_C\tT\tC\t29\t.\tAF=0.5;AQ=29;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=C|intron_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000262301|protein_coding||2/10||||||||||-1||HGNC|HGNC:14154,C|intron_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000543238|protein_coding||1/7||||||||||-1||HGNC|HGNC:14154,C|intron_variant&NMD_transcript_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000545827|nonsense_mediated_decay||2/11||||||||||-1||HGNC|HGNC:14154,C|intron_variant&NMD_transcript_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000562226|nonsense_mediated_decay||2/7||||||||||-1||HGNC|HGNC:14154,C|intron_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000562380|protein_coding||2/3||||||||||-1|cds_start_NF|HGNC|HGNC:14154,C|5_prime_UTR_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000566627|protein_coding|1/6||||172|||||||-1|cds_end_NF|HGNC|HGNC:14154,C|intron_variant&NMD_transcript_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000567595|nonsense_mediated_decay||2/4||||||||||-1|cds_start_NF|HGNC|HGNC:14154,C|intron_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000568897|protein_coding||2/9||||||||||-1||HGNC|HGNC:14154,C|intron_variant&NMD_transcript_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000568964|nonsense_mediated_decay||2/5||||||||||-1|cds_start_NF|HGNC|HGNC:14154,C|intron_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000570014|protein_coding||2/6||||||||||-1|cds_end_NF|HGNC|HGNC:14154\tGT:DP:AD:GQ:PL:RNC\t0/1:23:8,15:30:29,0,47:..",
+ "chr16\t964119\tchr16_964119_A_G\tA\tG\t24\t.\tAF=1;AQ=24;AC=2;cadd_raw=-2;cadd_phred=-2;CSQ=G|intron_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000262301|protein_coding||1/10||||||||||-1||HGNC|HGNC:14154,G|intron_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000543238|protein_coding||1/7||||||||||-1||HGNC|HGNC:14154,G|intron_variant&NMD_transcript_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000545827|nonsense_mediated_decay||1/11||||||||||-1||HGNC|HGNC:14154,G|intron_variant&NMD_transcript_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000562226|nonsense_mediated_decay||1/7||||||||||-1||HGNC|HGNC:14154,G|missense_variant|MODERATE|LMF1|ENSG00000103227|Transcript|ENST00000562380|protein_coding|1/4||||4|4|2|Y/H|Tat/Cat|||-1|cds_start_NF|HGNC|HGNC:14154,G|intron_variant&non_coding_transcript_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000566609|protein_coding_CDS_not_defined||1/2||||||||||-1||HGNC|HGNC:14154,G|intron_variant&NMD_transcript_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000567595|nonsense_mediated_decay||1/4||||||||||-1|cds_start_NF|HGNC|HGNC:14154,G|intron_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000568897|protein_coding||1/9||||||||||-1||HGNC|HGNC:14154,G|intron_variant&NMD_transcript_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000568964|nonsense_mediated_decay||1/5||||||||||-1|cds_start_NF|HGNC|HGNC:14154,G|intron_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000570014|protein_coding||1/6||||||||||-1|cds_end_NF|HGNC|HGNC:14154,G|downstream_gene_variant|MODIFIER||ENSG00000276931|Transcript|ENST00000620075|lncRNA|||||||||||4256|-1|||\tGT:DP:AD:GQ:PL:RNC\t1/1:29:0,29:21:24,24,0:..",
+ "chr16\t977069\tchr16_977069_T_C\tT\tC\t34\t.\tAF=0.5;AQ=34;AC=1;cadd_raw=-2;cadd_phred=-2;CSQ=C|upstream_gene_variant|MODIFIER|SOX8|ENSG00000005513|Transcript|ENST00000293894|protein_coding|||||||||||4701|1||HGNC|HGNC:11203,C|intron_variant&NMD_transcript_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000545827|nonsense_mediated_decay||1/11||||||||||-1||HGNC|HGNC:14154,C|non_coding_transcript_exon_variant|MODIFIER|CEROX1|ENSG00000260807|Transcript|ENST00000562570|lncRNA|2/2||||1914|||||||-1||HGNC|HGNC:53928,C|non_coding_transcript_exon_variant|MODIFIER|CEROX1|ENSG00000260807|Transcript|ENST00000563837|lncRNA|4/4||||423|||||||-1||HGNC|HGNC:53928,C|non_coding_transcript_exon_variant|MODIFIER|CEROX1|ENSG00000260807|Transcript|ENST00000563863|lncRNA|4/4||||423|||||||-1||HGNC|HGNC:53928,C|non_coding_transcript_exon_variant|MODIFIER|CEROX1|ENSG00000260807|Transcript|ENST00000565069|lncRNA|3/3||||265|||||||-1||HGNC|HGNC:53928,C|upstream_gene_variant|MODIFIER|CEROX1|ENSG00000260807|Transcript|ENST00000565139|lncRNA|||||||||||143|-1||HGNC|HGNC:53928,C|intron_variant&non_coding_transcript_variant|MODIFIER|CEROX1|ENSG00000260807|Transcript|ENST00000565467|lncRNA||1/1||||||||||-1||HGNC|HGNC:53928,C|upstream_gene_variant|MODIFIER|CEROX1|ENSG00000260807|Transcript|ENST00000567961|lncRNA|||||||||||157|-1||HGNC|HGNC:53928,C|intron_variant&non_coding_transcript_variant|MODIFIER|CEROX1|ENSG00000260807|Transcript|ENST00000568394|lncRNA||1/2||||||||||-1||HGNC|HGNC:53928,C|intron_variant|MODIFIER|LMF1|ENSG00000103227|Transcript|ENST00000570014|protein_coding||1/6||||||||||-1|cds_end_NF|HGNC|HGNC:14154,C|non_coding_transcript_exon_variant|MODIFIER|CEROX1|ENSG00000260807|Transcript|ENST00000655952|lncRNA|3/3||||743|||||||-1||HGNC|HGNC:53928,C|non_coding_transcript_exon_variant|MODIFIER|CEROX1|ENSG00000260807|Transcript|ENST00000662104|lncRNA|3/3||||322|||||||-1||HGNC|HGNC:53928,C|non_coding_transcript_exon_variant|MODIFIER|CEROX1|ENSG00000260807|Transcript|ENST00000669274|lncRNA|3/3||||225|||||||-1||HGNC|HGNC:53928\tGT:DP:AD:GQ:PL:RNC\t0/1:17:6,11:34:34,0,44:.."
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.3"
+ },
+ "timestamp": "2024-08-12T10:10:59.241809478"
+ }
+}
\ No newline at end of file
diff --git a/subworkflows/local/snv_annotation/tests/nextflow.config b/subworkflows/local/snv_annotation/tests/nextflow.config
new file mode 100644
index 00000000..2de8ad16
--- /dev/null
+++ b/subworkflows/local/snv_annotation/tests/nextflow.config
@@ -0,0 +1,71 @@
+process {
+ withName: 'DEEPVARIANT' {
+ ext.prefix = { intervals ? "${meta.id}_${intervals}_deepvariant" : "${meta.id}_deepvariant" }
+ ext.args = { [
+ '--model_type WGS',
+ "--sample_name=${meta.id}",
+ '-vcf_stats_report=False'
+ ].join(' ') }
+ }
+
+ withName: 'GLNEXUS' {
+ ext.args = '--config DeepVariant_unfiltered'
+ }
+
+ withName: 'BCFTOOLS_CONCAT' {
+ ext.prefix = { "${meta.id}_concat" }
+ ext.args = [
+ '--no-version',
+ '--allow-overlaps'
+ ].join(' ')
+ }
+
+ withName: 'BCFTOOLS_NORM_MULTISAMPLE' {
+ ext.prefix = { "${meta.id}_norm_multisample" }
+ ext.args = [
+ '--no-version',
+ '-m -',
+ '--output-type u',
+ '--write-index=csi',
+ '-w 10000'
+ ].join(' ')
+ }
+
+ withName: 'BCFTOOLS_NORM_SINGLESAMPLE' {
+ ext.prefix = { "${meta.id}_norm_singlesample" }
+ ext.args = [
+ '--no-version',
+ '-m -',
+ '-w 10000',
+ '--output-type u',
+ ].join(' ')
+ }
+
+ withName: '.*:SHORT_VARIANT_CALLING:BCFTOOLS_FILLTAGS' {
+ ext.prefix = { "${meta.id}_ac" }
+ ext.args = [
+ '--no-version',
+ '--output-type u'
+ ].join(' ')
+ }
+
+ withName: 'BCFTOOLS_FILLTAGS_ANNO' {
+ ext.prefix = { "${meta.id}_filltags_anno" }
+ ext.args = [
+ '--no-version',
+ '--output-type z'
+ ].join(' ')
+ }
+
+ withName: 'MINIMAP2_ALIGN' {
+ ext.args = '-x map-hifi'
+ }
+
+ withName: 'ENSEMBLVEP_VEP' {
+ ext.args = { [
+ '--offline',
+ '--vcf',
+ '--compress_output bgzip'
+ ].join(' ') }
+ }
+}
diff --git a/subworkflows/local/structural_variant_calling.nf b/subworkflows/local/structural_variant_calling.nf
index ace51636..be738c63 100644
--- a/subworkflows/local/structural_variant_calling.nf
+++ b/subworkflows/local/structural_variant_calling.nf
@@ -5,7 +5,6 @@ workflow STRUCTURAL_VARIANT_CALLING {
take:
ch_bam_bai // channel: [ val(meta), [[ bam ], [bai]] ]
- ch_snfs
ch_fasta
ch_fai
ch_tandem_repeats
@@ -15,13 +14,10 @@ workflow STRUCTURAL_VARIANT_CALLING {
SNIFFLES (ch_bam_bai, ch_fasta, ch_tandem_repeats, true, true)
- // Combine sniffles output with supplied extra snfs
SNIFFLES.out.snf
- .map{ it [1] }
- .concat(ch_snfs.map{ it[1] })
- .collect()
- .sort{ it.name }
- .map { snfs -> [ [id:'multisample'], snfs, [] ] }
+ .map { meta, snf -> [ [ 'id': meta.project ], snf ] }
+ .groupTuple()
+ .map { meta, snfs -> [ meta, snfs, [] ] }
.set{ ch_multisample_input }
SNIFFLES_MULTISAMPLE( ch_multisample_input, ch_fasta, ch_tandem_repeats, true, false )
diff --git a/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf b/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf
index 0b49178f..91cb7ac5 100644
--- a/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf
@@ -35,50 +35,50 @@ include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline'
// Define subworkflows and their associated "--skip"
//
def workflowSkips = [
- assembly : "skip_assembly_wf",
- qc : "skip_raw_read_qc",
- mapping : "skip_mapping_wf",
- snv_calling : "skip_short_variant_calling",
- snv_annotation: "skip_snv_annotation",
- call_paralogs : "skip_call_paralogs",
- cnv_calling : "skip_cnv_calling",
- phasing : "skip_phasing_wf",
- repeat_calling: "skip_repeat_wf",
- methylation : "skip_methylation_wf",
+ assembly : "skip_assembly_wf",
+ raw_read_qc : "skip_raw_read_qc",
+ aligned_read_qc : "skip_aligned_read_qc",
+ mapping : "skip_mapping_wf",
+ snv_calling : "skip_short_variant_calling",
+ snv_annotation : "skip_snv_annotation",
+ call_paralogs : "skip_call_paralogs",
+ cnv_calling : "skip_cnv_calling",
+ phasing : "skip_phasing_wf",
+ rank_variants : "skip_rank_variants",
+ repeat_calling : "skip_repeat_calling",
+ repeat_annotation: "skip_repeat_annotation",
+ methylation : "skip_methylation_wf",
]
//
// E.g., the CNV-calling workflow depends on mapping and snv_calling and can't run without them.
//
def workflowDependencies = [
- assembly : ["mapping"],
- call_paralogs : ["mapping"],
- snv_calling : ["mapping"],
- snv_annotation : ["mapping", "snv_calling"],
- cnv_calling : ["mapping", "snv_calling"],
- phasing : ["mapping", "snv_calling"],
- repeat_calling : ["mapping", "snv_calling", "phasing"],
- methylation : ["mapping", "snv_calling", "phasing"],
+ aligned_read_qc : ["mapping"],
+ assembly : ["mapping"],
+ call_paralogs : ["mapping"],
+ snv_calling : ["mapping"],
+ snv_annotation : ["mapping", "snv_calling"],
+ cnv_calling : ["mapping", "snv_calling"],
+ phasing : ["mapping", "snv_calling"],
+ rank_variants : ["mapping", "snv_calling", "snv_annotation"],
+ repeat_calling : ["mapping", "snv_calling", "phasing"],
+ repeat_annotation: ["mapping", "snv_calling", "phasing", "repeat_calling"],
+ methylation : ["mapping", "snv_calling", "phasing"],
]
//
-// E.g., the dipcall_par file is required by the assembly workflow and the assembly workflow can't run without dipcall_par
+// E.g., the par_regions file is required by the assembly workflow and the assembly workflow can't run without par_regions
//
def fileDependencies = [
- mapping : ["fasta", "somalier_sites"],
- assembly : ["fasta"], // The assembly workflow should be split into two - assembly and variant calling (requires ref)
- assembly : ["dipcall_par"],
- snv_annotation: ["snp_db", "vep_cache"],
- cnv_calling : ["hificnv_xy", "hificnv_xx", "hificnv_exclude"],
- repeat_calling: ["trgt_repeats"]
-]
-
-//
-// E.g., pacbio can't run with the methylation workflow
-//
-def presetIncompatibilities = [
- pacbio : ["methylation"],
- ONT_R10: ["assembly", "cnv_calling"],
+ mapping : ["fasta", "somalier_sites"],
+ assembly : ["fasta", "par_regions"], // The assembly workflow should be split into two - assembly and variant calling (requires ref)
+ snv_calling : ["fasta", "par_regions"],
+ snv_annotation : ["snp_db", "vep_cache", "vep_plugin_files", "variant_consequences_snv"],
+ cnv_calling : ["hificnv_xy", "hificnv_xx", "hificnv_exclude"],
+ rank_variants : ["reduced_penetrance", "score_config_snv"],
+ repeat_calling : ["trgt_repeats"],
+ repeat_annotation: ["variant_catalog"],
]
def parameterStatus = [
@@ -86,29 +86,32 @@ def parameterStatus = [
skip_short_variant_calling: params.skip_short_variant_calling,
skip_phasing_wf : params.skip_phasing_wf,
skip_methylation_wf : params.skip_methylation_wf,
- skip_repeat_wf : params.skip_repeat_wf,
+ skip_rank_variants : params.skip_rank_variants,
+ skip_repeat_calling : params.skip_repeat_calling,
+ skip_repeat_annotation : params.skip_repeat_annotation,
skip_snv_annotation : params.skip_snv_annotation,
skip_call_paralogs : params.skip_call_paralogs,
skip_cnv_calling : params.skip_cnv_calling,
skip_mapping_wf : params.skip_mapping_wf,
- skip_qc : params.skip_qc,
+ skip_aligned_read_qc : params.skip_aligned_read_qc,
+ skip_raw_read_qc : params.skip_raw_read_qc,
skip_assembly_wf : params.skip_assembly_wf,
],
files: [
- dipcall_par : params.dipcall_par,
- snp_db : params.snp_db,
- somalier_sites : params.somalier_sites,
- vep_cache : params.vep_cache,
- hificnv_xy : params.hificnv_xy,
- hificnv_xx : params.hificnv_xx,
- hificnv_exclude: params.hificnv_exclude,
- fasta : params.fasta,
- trgt_repeats : params.trgt_repeats,
- ],
- preset: [
- pacbio : params.preset == "pacbio",
- revio : params.preset == "revio",
- ONT_R10: params.preset == "ONT_R10",
+ par_regions : params.par_regions,
+ snp_db : params.snp_db,
+ somalier_sites : params.somalier_sites,
+ vep_cache : params.vep_cache,
+ hificnv_xy : params.hificnv_xy,
+ hificnv_xx : params.hificnv_xx,
+ hificnv_exclude : params.hificnv_exclude,
+ fasta : params.fasta,
+ trgt_repeats : params.trgt_repeats,
+ variant_catalog : params.variant_catalog,
+ score_config_snv : params.score_config_snv,
+ reduced_penetrance : params.reduced_penetrance,
+ score_config_snv : params.score_config_snv,
+ variant_consequences_snv: params.variant_consequences_snv,
]
]
@@ -165,7 +168,7 @@ workflow PIPELINE_INITIALISATION {
//
// Custom validation for pipeline parameters
//
- validateInputParameters(parameterStatus, workflowSkips, workflowDependencies, fileDependencies, presetIncompatibilities)
+ validateInputParameters(parameterStatus, workflowSkips, workflowDependencies, fileDependencies)
//
// Create channel from input file provided through params.input
@@ -181,7 +184,7 @@ workflow PIPELINE_INITIALISATION {
.groupTuple() // group by sample
.map { sample, metas, reads ->
// Add number of files per sample _after_ splitting to meta
- [ sample, metas[0] + [n_files: metas.size() + metas.size() * Math.max(0, params.split_fastq - 1), single_end:true ], reads ]
+ [ sample, metas[0] + [n_files: metas.size() + metas.size() * Math.max(0, params.parallel_alignments - 1), single_end:true ], reads ]
}
// Convert back to [ meta, reads ]
.flatMap {
@@ -190,6 +193,25 @@ workflow PIPELINE_INITIALISATION {
}
.set { ch_samplesheet }
+ // Check that there's samples with affected phenotype if we are ranking variants
+ ch_samplesheet
+ .filter { meta, reads -> meta.phenotype == 2 }
+ .ifEmpty {
+ if(!params.skip_rank_variants) {
+ error("No samples in samplesheet has affected phenotype (=2), --skip_rank_variants has to be active.")
+ }
+ }
+
+ // Check that there's no more than one project
+ // TODO: Try to do this in nf-schema
+ ch_samplesheet
+ .map { meta, reads -> meta.project }
+ .unique()
+ .collect()
+ .filter{ it.size() == 1 }
+ .ifEmpty {
+ error("Only one project may be specified per run")
+ }
emit:
samplesheet = ch_samplesheet
versions = ch_versions
@@ -245,9 +267,9 @@ workflow PIPELINE_COMPLETION {
// Check and validate pipeline parameters
//
-def validateInputParameters(statusMap, workflowMap, workflowDependencies, fileDependencies, presetDependencies) {
+def validateInputParameters(statusMap, workflowMap, workflowDependencies, fileDependencies) {
genomeExistsError()
- validateParameterCombinations(statusMap, workflowMap, workflowDependencies, fileDependencies, presetDependencies)
+ validateParameterCombinations(statusMap, workflowMap, workflowDependencies, fileDependencies)
}
//
@@ -288,27 +310,162 @@ def genomeExistsError() {
// Generate methods description for MultiQC
//
def toolCitationText() {
- // TODO nf-core: Optionally add in-text citation tools to this list.
- // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "",
- // Uncomment function in methodsDescriptionText to render in MultiQC report
+
def citation_text = [
- "Tools used in the workflow included:",
- "FastQC (Andrews 2010),",
- "MultiQC (Ewels et al. 2016)",
- "."
- ].join(' ').trim()
+ "MultiQC (Ewels et al. 2016)",
+ "SAMtools (Danecek et al. 2021)",
+ ]
+ if (!params.skip_raw_read_qc) {
+ citation_text = citation_text + [
+ "FastQC (Andrews 2010)",
+ "fcqrs",
+ ]
+ }
+ if (!params.skip_mapping_wf) {
+ if (params.parallel_alignments > 1) {
+ citation_text = citation_text + [
+ "splitubam",
+ ]
+ }
+ citation_text = citation_text + [
+ "SAMtools (Danecek et al. 2021)",
+ "Minimap2 (Li 2018)",
+ "Somalier (Pedersen et al. 2020)",
+ "Sniffles2 (Smolka et al. 2024)",
+ ]
+ if (!params.skip_aligned_read_qc) {
+ citation_text = citation_text + [
+ "cramino (De Coster & Rademakers 2023)",
+ "mosdepth (Pedersen & Quinlan 2018)",
+ ]
+ }
+ if (!params.skip_call_paralogs) {
+ citation_text = citation_text + [
+ "paraphase",
+ ]
+ }
+ if (!params.skip_assembly_wf) {
+ if (params.hifiasm_mode == 'trio-binning') {
+ citation_text = citation_text + [
+ "yak",
+ ]
+ }
+ citation_text = citation_text + [
+ "Hifiasm (Cheng et al. 2021)",
+ "Gfastats (Formenti et al. 2022)",
+ "dipcall (Li et al. 2018)",
+ "SAMtools (Danecek et al. 2021)",
+ "Minimap2 (Li 2018)",
+ ]
+ }
+ if (!params.skip_short_variant_calling) {
+ citation_text = citation_text + [
+ "BEDTools (Quinlan & Hall 2010)",
+ "BCFtools (Danecek et al. 2021)",
+ "DeepVariant (Poplin et al. 2018)",
+ "GLnexus (Yun et al. 2021)",
+ ]
+ }
+ if (!params.skip_snv_annotation) {
+ citation_text = citation_text + [
+ "CADD (Rentzsch et al. 2019, Rentzsch et al. 2021)",
+ "BCFtools (Danecek et al. 2021)",
+ "VEP (McLaren et al. 2016)",
+ "Tabix (Li 2011)",
+ "Echtvar (Pedersen & de Ridder 2023)",
+ ]
+ if (!params.skip_rank_variants) {
+ citation_text = citation_text + [
+ "Genmod (Magnusson et al. 2018)",
+ "Tabix (Li 2011)",
+ ]
+ }
+ }
+ if (!params.skip_cnv_calling) {
+ citation_text = citation_text + [
+ "HiFiCNV",
+ ]
+ }
+ if (!params.skip_phasing_wf) {
+ citation_text = citation_text + [
+ "SAMtools (Danecek et al. 2021)",
+ "cramino (De Coster & Rademakers 2023)",
+ ]
+ if(params.phaser == 'whatshap') {
+ citation_text = citation_text + [
+ "WhatsHap (Martin et al. 2016)",
+ ]
+ }
+ if(params.phaser == 'hiphase_sv') {
+ citation_text = citation_text + [
+ "HiPhase (Holt et al. 2024)",
+ ]
+ }
+ if(params.phaser == 'hiphase_snv') {
+ citation_text = citation_text + [
+ "HiPhase (Holt et al. 2024)",
+ ]
+ }
+ if (!params.skip_methylation_wf) {
+ citation_text = citation_text + [
+ "modkit",
+ "Tabix (Li 2011)",
+ ]
+ }
+ if (!params.skip_repeat_calling) {
+ citation_text = citation_text + [
+ "TRGT (Dolzhenko et al. 2024)",
+ ]
+ if (!params.skip_repeat_annotation) {
+ citation_text = citation_text + [
+ "Stranger (Nilsson & Magnusson 2021)",
+ ]
+ }
+ }
+ }
+ }
- return citation_text
+ def return_text = "Tools used in the workflow included: " + citation_text.unique(false) { a, b -> a <=> b }.join(', ') - "" + "."
+ return return_text
}
def toolBibliographyText() {
- // TODO nf-core: Optionally add bibliographic entries to this list.
- // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Author (2023) Pub name, Journal, DOI" : "",
- // Uncomment function in methodsDescriptionText to render in MultiQC report
- def reference_text = [
- "Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).",
- "Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354"
- ].join(' ').trim()
+
+ reference_text = [
+ "Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/",
+ "Ewels PA, Peltzer A, Fillinger S, Patel H, Alneberg J, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. The nf-core framework for community-curated bioinformatics pipelines. Nat Biotechnol. 2020 Mar;38(3):276-278. doi: 10.1038/s41587-020-0439-x. PubMed PMID: 32055031.",
+ "Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924.",
+ "Di Tommaso P, Chatzou M, Floden EW, Barja PP, Palumbo E, Notredame C. Nextflow enables reproducible computational workflows. Nat Biotechnol. 2017 Apr 11;35(4):316-319. doi: 10.1038/nbt.3820. PubMed PMID: 28398311.",
+ "Danecek P, Bonfield JK, Liddle J, et al. Twelve years of SAMtools and BCFtools. GigaScience. 2021;10(2):giab008. doi:10.1093/gigascience/giab008",
+ "Quinlan AR and Hall IM, 2010. BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics. 26, 6, pp. 841–842.",
+ "Wouter De Coster, Rosa Rademakers, NanoPack2: population-scale evaluation of long-read sequencing data, Bioinformatics, Volume 39, Issue 5, May 2023, btad311, https://doi.org/10.1093/bioinformatics/btad311",
+ "Rentzsch P, Schubach M, Shendure J, Kircher M. CADD-Splice—improving genome-wide variant effect prediction using deep learning-derived splice scores. Genome Med. 2021;13(1):31. doi:10.1186/s13073-021-00835-9",
+ "Rentzsch P, Witten D, Cooper GM, Shendure J, Kircher M. CADD: predicting the deleteriousness of variants throughout the human genome. Nucleic Acids Research. 2019;47(D1):D886-D894. doi:10.1093/nar/gky1016",
+ "Poplin R, Chang PC, Alexander D, et al. A universal SNP and small-indel variant caller using deep neural networks. Nat Biotechnol. 2018;36(10):983-987. doi:10.1038/nbt.4235",
+ "Li H, Bloom JM, Farjoun Y, Fleharty M, Gauthier L, Neale B, MacArthur D (2018) A synthetic-diploid benchmark for accurate variant-calling evaluation. Nat Methods, 15:595-597. [PMID:30013044]",
+ "Brent S Pedersen, Jeroen de Ridder, Echtvar: compressed variant representation for rapid annotation and filtering of SNPs and indels, Nucleic Acids Research, Volume 51, Issue 1, 11 January 2023, Page e3, https://doi.org/10.1093/nar/gkac931",
+ "McLaren W, Gil L, Hunt SE, et al. The Ensembl Variant Effect Predictor. Genome Biol. 2016;17(1):122. doi:10.1186/s13059-016-0974-4",
+ "Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online].",
+ "Magnusson M, Hughes T, Glabilloy, Bitdeli Chef. genmod: Version 3.7.3. Published online November 15, 2018. doi:10.5281/ZENODO.3841142",
+ "Giulio Formenti, Linelle Abueg, Angelo Brajuka, Nadolina Brajuka, Cristóbal Gallardo-Alba, Alice Giani, Olivier Fedrigo, Erich D Jarvis, Gfastats: conversion, evaluation and manipulation of genome sequences using assembly graphs, Bioinformatics, Volume 38, Issue 17, September 2022, Pages 4214–4216, https://doi.org/10.1093/bioinformatics/btac460",
+ "Yun T, Li H, Chang PC, Lin MF, Carroll A, McLean CY. Accurate, scalable cohort variant calls using DeepVariant and GLnexus. Robinson P, ed. Bioinformatics. 2021;36(24):5582-5589. doi:10.1093/bioinformatics/btaa1081",
+ "Cheng, H., Concepcion, G.T., Feng, X. et al. Haplotype-resolved de novo assembly using phased assembly graphs with hifiasm. Nat Methods 18, 170–175 (2021). https://doi.org/10.1038/s41592-020-01056-5",
+ "James M Holt, Christopher T Saunders, William J Rowell, Zev Kronenberg, Aaron M Wenger, Michael Eberle, HiPhase: jointly phasing small, structural, and tandem repeat variants from HiFi sequencing, Bioinformatics, Volume 40, Issue 2, February 2024, btae042, https://doi.org/10.1093/bioinformatics/btae042",
+ "Heng Li, Minimap2: pairwise alignment for nucleotide sequences, Bioinformatics, Volume 34, Issue 18, September 2018, Pages 3094–3100, https://doi.org/10.1093/bioinformatics/bty191",
+ "Pedersen BS, Quinlan AR. Mosdepth: quick coverage calculation for genomes and exomes. Hancock J, ed. Bioinformatics. 2018;34(5):867-868. doi:10.1093/bioinformatics/btx699",
+ "Genome-wide profiling of highly similar paralogous genes using HiFi sequencing. Xiao Chen, Daniel Baker, Egor Dolzhenko, Joseph M Devaney, Jessica Noya, April S Berlyoung, Rhonda Brandon, Kathleen S Hruska, Lucas Lochovsky, Paul Kruszka, Scott Newman, Emily Farrow, Isabelle Thiffault, Tomi Pastinen, Dalia Kasperaviciute, Christian Gilissen, Lisenka Vissers, Alexander Hoischen, Seth Berger, Eric Vilain, Emmanuèle Délot, UCI Genomics Research to Elucidate the Genetics of Rare Diseases (UCI GREGoR) Consortium, Michael A Eberle. bioRxiv 2024.04.19.590294; doi: https://doi.org/10.1101/2024.04.19.590294",
+ "Smolka, M., Paulin, L.F., Grochowski, C.M. et al. Detection of mosaic and population-level structural variants with Sniffles2. Nat Biotechnol (2024). https://doi.org/10.1038/s41587-023-02024-y",
+ "Pedersen, B.S., Bhetariya, P.J., Brown, J. et al. Somalier: rapid relatedness estimation for cancer and germline studies using efficient genome sketches. Genome Med 12, 62 (2020). https://doi.org/10.1186/s13073-020-00761-2",
+ "Nilsson D, Magnusson M. moonso/stranger v0.7.1. Published online February 18, 2021. doi:10.5281/ZENODO.4548873",
+ "Li H. Tabix: fast retrieval of sequence features from generic TAB-delimited files. Bioinformatics. 2011;27(5):718-719. doi:10.1093/bioinformatics/btq671",
+ "Dolzhenko, E., English, A., Dashnow, H. et al. Characterization and visualization of tandem repeats at genome scale. Nat Biotechnol (2024). https://doi.org/10.1038/s41587-023-02057-3",
+ "Marcel Martin, Murray Patterson, Shilpa Garg, Sarah O Fischer, Nadia Pisanti, Gunnar W Klau, Alexander Schöenhuth, Tobias Marschall. bioRxiv 085050; doi: https://doi.org/10.1101/085050",
+ "Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web.",
+ "Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506.",
+ "da Veiga Leprevost F, Grüning B, Aflitos SA, Röst HL, Uszkoreit J, Barsnes H, Vaudel M, Moreno P, Gatto L, Weber J, Bai M, Jimenez RC, Sachsenberg T, Pfeuffer J, Alvarez RV, Griss J, Nesvizhskii AI, Perez-Riverol Y. BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics. 2017 Aug 15;33(16):2580-2582. doi: 10.1093/bioinformatics/btx192. PubMed PMID: 28379341; PubMed Central PMCID: PMC5870671.",
+ "Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241.",
+ "Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675.",
+ ].join(' ').trim()
return reference_text
}
@@ -335,10 +492,8 @@ def methodsDescriptionText(mqc_methods_yaml) {
meta["tool_citations"] = ""
meta["tool_bibliography"] = ""
- // TODO nf-core: Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled!
- // meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".")
- // meta["tool_bibliography"] = toolBibliographyText()
-
+ meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".")
+ meta["tool_bibliography"] = toolBibliographyText()
def methods_text = mqc_methods_yaml.text
@@ -349,12 +504,12 @@ def methodsDescriptionText(mqc_methods_yaml) {
}
//
-// Validate preset and workflow skip combinations
+// Validate workflow skip combinations
//
-def validateParameterCombinations(statusMap, workflowMap, workflowDependencies, fileDependencies, presetIncompatibilities) {
+def validateParameterCombinations(statusMap, workflowMap, workflowDependencies, fileDependencies) {
// Array to store errors
def errors = []
- // For each of the "workflow", "files", "preset"
+ // For each of the "workflow", "files"
statusMap.each { paramsType, allParams ->
// Go through all params and their status
statusMap[paramsType].each { param, paramStatus ->
@@ -365,9 +520,6 @@ def validateParameterCombinations(statusMap, workflowMap, workflowDependencies,
case "workflow":
checkWorkflowDependencies(param, workflowDependencies, statusMap, workflowMap, errors)
break
- case "preset":
- checkPresetDependencies(param, presetIncompatibilities, statusMap, workflowMap, errors)
- break
default:
break
}
@@ -383,35 +535,6 @@ def validateParameterCombinations(statusMap, workflowMap, workflowDependencies,
}
}
-//
-// Lookup all workflows that needs to be active for a certain preset
-//
-def checkPresetDependencies(String preset, Map combinationsMap, Map statusMap, Map workflowMap, List errors) {
-
- // If preset is not active, then give no error
- presetIsActive = statusMap["preset"][preset]
- if(!presetIsActive) {
- return
- }
-
- // Get all required workflows for a preset
- def requiredWorkflows = combinationsMap[preset] as Set
- // If no direct dependencies are found, return an empty list
- if (!requiredWorkflows) {
- return []
- }
- // Collect the required --skips that are not active for the current preset
- def dependencyString = findRequiredSkips("preset", requiredWorkflows, statusMap, workflowMap)
- .collect { [ '--', it ].join('') }
- .join(" ")
- // If all reqired sets are set, give no error
- if (!dependencyString) {
- return
- }
- errors << "--preset $preset is active, the pipeline has to be run with: $dependencyString"
- return errors
-}
-
//
// Lookup all workflows that needs to be active for another workflow
//
@@ -480,10 +603,6 @@ def findRequiredSkips(paramType, Set requiredWorkflows, Map statusMap, M
if(workflowIsSkipped) {
requiredSkips << skip
}
- } else if(paramType == "preset") {
- if(!workflowIsSkipped) {
- requiredSkips << skip
- }
}
}
return requiredSkips
diff --git a/tests/main.nf.test b/tests/main.nf.test
new file mode 100644
index 00000000..7584d5ad
--- /dev/null
+++ b/tests/main.nf.test
@@ -0,0 +1,345 @@
+nextflow_pipeline {
+
+ name "Test pipeline GENOMICMEDICINESWEDEN_NALLO"
+ script "../main.nf"
+ profile "test"
+ tag "PIPELINE"
+
+ test("test profile") {
+ tag "samplesheet"
+
+ when {
+ params {
+ // Base directory for genomic-medicine-sweden/nallo test data
+ pipelines_testdata_base_path = 'https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/'
+ // Test files
+ fasta = params.pipelines_testdata_base_path + 'nallo/reference/hg38.test.fa.gz'
+ input = 'https://github.com/genomic-medicine-sweden/test-datasets/raw/e2266a34c14d1e0a9ef798de3cd81a76c9216fc1/testdata/samplesheet.csv'
+ bed = params.pipelines_testdata_base_path + 'nallo/reference/test_data.bed'
+ hificnv_xy = params.pipelines_testdata_base_path + 'nallo/reference/expected_cn.hg38.XY.bed'
+ hificnv_xx = params.pipelines_testdata_base_path + 'nallo/reference/expected_cn.hg38.XX.bed'
+ hificnv_exclude = params.pipelines_testdata_base_path + 'nallo/reference/empty.bed'
+ par_regions = params.pipelines_testdata_base_path + 'nallo/reference/hs38.PAR.bed'
+ trgt_repeats = params.pipelines_testdata_base_path + 'nallo/reference/pathogenic_repeats.hg38.bed'
+ variant_catalog = params.pipelines_testdata_base_path + 'nallo/reference/variant_catalog_grch38.json'
+ vep_cache = params.pipelines_testdata_base_path + 'nallo/reference/vep_cache_test_data.tar.gz'
+ vep_plugin_files = params.pipelines_testdata_base_path + 'nallo/reference/vep_plugin_files.csv'
+ snp_db = params.pipelines_testdata_base_path + 'nallo/testdata/snp_dbs.csv'
+ somalier_sites = params.pipelines_testdata_base_path + 'nallo/reference/somalier_sites.vcf.gz'
+ reduced_penetrance = params.pipelines_testdata_base_path + 'nallo/reference/reduced_penetrance.tsv'
+ score_config_snv = params.pipelines_testdata_base_path + 'nallo/reference/rank_model_snv.ini'
+ variant_consequences_snv = params.pipelines_testdata_base_path + 'nallo/reference/variant_consequences_v2.txt'
+
+ // Parameters
+ parallel_snv = 2
+ preset = "revio"
+ outdir = "$outputDir"
+ }
+ }
+
+ then {
+ assertAll (
+ { assert workflow.success },
+ // Assert with snapshot multisample
+ { assert snapshot(
+ file("$outputDir/pedigree/test.ped"),
+ file("$outputDir/qc_aligned_reads/somalier/relate/test/test.pairs.tsv"),
+ file("$outputDir/qc_aligned_reads/somalier/relate/test/test.samples.tsv"),
+ file("$outputDir/qc_aligned_reads/somalier/relate/test/test.html"),
+ file("$outputDir/multiqc/multiqc_data/multiqc_citations.txt"),
+ file("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"),
+ file("$outputDir/multiqc/multiqc_data/multiqc_somalier.txt"),
+ // Assert with snapshot HG002_Revio
+ bam("$outputDir/aligned_reads/HG002_Revio/HG002_Revio_phased.bam", stringency: 'silent').getReadsMD5(),
+ file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio/HG002_Revio.asm.bp.hap1.p_ctg.assembly_summary"),
+ file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio/HG002_Revio.asm.bp.hap2.p_ctg.assembly_summary"),
+ bam("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap1.bam", stringency: 'silent').getReadsMD5(),
+ bam("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap2.bam", stringency: 'silent').getReadsMD5(),
+ file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.dip.bed"),
+ file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap1.bed"),
+ file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap2.bed"),
+ file("$outputDir/cnv_calling/hificnv/HG002_Revio/HG002_Revio.HG002_Revio.copynum.bedgraph"),
+ file("$outputDir/cnv_calling/hificnv/HG002_Revio/HG002_Revio.HG002_Revio.depth.bw"),
+ file("$outputDir/cnv_calling/hificnv/HG002_Revio/HG002_Revio.HG002_Revio.maf.bw"),
+ file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio/HG002_Revio.mosdepth.global.dist.txt"),
+ file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio/HG002_Revio.mosdepth.region.dist.txt"),
+ file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio/HG002_Revio.mosdepth.summary.txt"),
+ file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio/HG002_Revio.asm.bp.hap1.p_ctg.fasta.gz"),
+ file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio/HG002_Revio.asm.bp.hap2.p_ctg.fasta.gz"),
+ file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.dip.vcf.gz"),
+ file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap1.paf.gz"),
+ file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap1.var.gz"),
+ file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap2.paf.gz"),
+ file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap2.var.gz"),
+ file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.pair.vcf.gz"),
+ file("$outputDir/cnv_calling/hificnv/HG002_Revio/HG002_Revio.HG002_Revio.vcf.gz"),
+ file("$outputDir/cnv_calling/hificnv/HG002_Revio/HG002_Revio.HG002_Revio.vcf.gz.tbi"),
+ file("$outputDir/methylation/modkit/pileup/unphased/HG002_Revio/HG002_Revio_modkit_pileup.bed.gz"),
+ file("$outputDir/methylation/modkit/pileup/unphased/HG002_Revio/HG002_Revio_modkit_pileup.bed.gz.tbi"),
+ bam("$outputDir/paraphase/HG002_Revio/HG002_Revio.paraphase.bam", stringency: 'silent').getReadsMD5(),
+ file("$outputDir/qc_aligned_reads/cramino/phased/HG002_Revio/HG002_Revio_cramino_aligned_phased.arrow"),
+ file("$outputDir/qc_aligned_reads/cramino/unphased/HG002_Revio/HG002_Revio_cramino_aligned.arrow"),
+ file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio/HG002_Revio.regions.bed.gz"),
+ file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio/HG002_Revio.regions.bed.gz.csi"),
+ file("$outputDir/qc_raw_reads/fqcrs/HG002_Revio/HG002_PacBio_Revio.fastq.gz.tsv.zst"),
+ file("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio/HG002_Revio_sorted.vcf.gz"),
+ file("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio/HG002_Revio_sorted.vcf.gz.tbi"),
+ bam("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio/HG002_Revio_spanning_sorted.bam", stringency: 'silent').getReadsMD5(),
+ file("$outputDir/snvs/stats/single_sample/HG002_Revio.vcf.gz.bcftools_stats.txt").readLines()[0..2],
+ ).match() },
+ // Assert exists multisample
+ { assert new File("$outputDir/databases/echtvar/encode/test/test.zip").exists() },
+ { assert new File("$outputDir/multiqc/multiqc_data/multiqc_data.json").exists() },
+ { assert new File("$outputDir/multiqc/multiqc_data/multiqc.log").exists() },
+ { assert new File("$outputDir/multiqc/multiqc_report.html").exists() },
+ { assert new File("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt").exists() },
+ { assert new File("$outputDir/multiqc/multiqc_data/multiqc_sources.txt").exists() },
+ { assert new File("$outputDir/multiqc/multiqc_data/multiqc_software_versions.txt").exists() },
+ { assert new File("$outputDir/repeat_calling/trgt/multi_sample/test/test.vcf.gz").exists() },
+ { assert new File("$outputDir/repeat_calling/trgt/multi_sample/test/test.vcf.gz.tbi").exists() },
+ { assert new File("$outputDir/snvs/multi_sample/test/test_snv_annotated_ranked.vcf.gz").exists() },
+ { assert new File("$outputDir/snvs/multi_sample/test/test_snv_annotated_ranked.vcf.gz.tbi").exists() },
+ { assert new File("$outputDir/sv_calling/sniffles/multi_sample/test/test_sniffles.vcf.gz").exists() },
+ { assert new File("$outputDir/sv_calling/sniffles/multi_sample/test/test_sniffles.vcf.gz.tbi").exists() },
+ // Assert exists HG002_Revio
+ { assert new File("$outputDir/aligned_reads/HG002_Revio/HG002_Revio_phased.bam.bai").exists() },
+ { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap1.bam.bai").exists() },
+ { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap1.sam.gz").exists() },
+ { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap2.bam.bai").exists() },
+ { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap2.sam.gz").exists() },
+ { assert new File("$outputDir/cnv_calling/hificnv/HG002_Revio/HG002_Revio.log").exists() },
+ { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio/HG002_Revio_modkit_pileup_phased_1.bed.gz").exists() },
+ { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio/HG002_Revio_modkit_pileup_phased_1.bed.gz.tbi").exists() },
+ { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio/HG002_Revio_modkit_pileup_phased_2.bed.gz").exists() },
+ { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio/HG002_Revio_modkit_pileup_phased_2.bed.gz.tbi").exists() },
+ { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio/HG002_Revio_modkit_pileup_phased_ungrouped.bed.gz").exists() },
+ { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio/HG002_Revio_modkit_pileup_phased_ungrouped.bed.gz.tbi").exists() },
+ { assert new File("$outputDir/paraphase/HG002_Revio/HG002_Revio.paraphase.bam.bai").exists() },
+ { assert new File("$outputDir/paraphase/HG002_Revio/HG002_Revio_paraphase_vcfs/HG002_Revio_hba.vcf.gz").exists() },
+ { assert new File("$outputDir/paraphase/HG002_Revio/HG002_Revio_paraphase_vcfs/HG002_Revio_hba.vcf.gz.tbi").exists() },
+ { assert new File("$outputDir/paraphase/HG002_Revio/HG002_Revio.paraphase.json").exists() },
+ { assert new File("$outputDir/qc_aligned_reads/cramino/phased/HG002_Revio/HG002_Revio_cramino_aligned_phased.txt").exists() },
+ { assert new File("$outputDir/qc_aligned_reads/cramino/unphased/HG002_Revio/HG002_Revio_cramino_aligned.txt").exists() },
+ { assert new File("$outputDir/phasing/whatshap/phase/HG002_Revio/HG002_Revio_phased.vcf.gz").exists() },
+ { assert new File("$outputDir/phasing/whatshap/phase/HG002_Revio/HG002_Revio_phased.vcf.gz.tbi").exists() },
+ { assert new File("$outputDir/phasing/whatshap/stats/HG002_Revio/HG002_Revio_stats.stats.tsv").exists() },
+ { assert new File("$outputDir/phasing/whatshap/stats/HG002_Revio/HG002_Revio_stats.blocks.tsv").exists() },
+ { assert new File("$outputDir/qc_raw_reads/fastqc/HG002_Revio/HG002_Revio_fastqc.html").exists() },
+ { assert new File("$outputDir/qc_raw_reads/fastqc/HG002_Revio/HG002_Revio_fastqc.zip").exists() },
+ { assert new File("$outputDir/repeat_annotation/stranger/HG002_Revio/HG002_Revio_repeat_expansion_stranger.vcf.gz").exists() },
+ { assert new File("$outputDir/repeat_annotation/stranger/HG002_Revio/HG002_Revio_repeat_expansion_stranger.vcf.gz.tbi").exists() },
+ { assert new File("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio/HG002_Revio_spanning_sorted.bam.bai").exists() },
+ { assert new File("$outputDir/snvs/single_sample/HG002_Revio/HG002_Revio_snv_annotated_ranked.vcf.gz").exists() },
+ { assert new File("$outputDir/snvs/single_sample/HG002_Revio/HG002_Revio_snv_annotated_ranked.vcf.gz.tbi").exists() },
+ { assert new File("$outputDir/sv_calling/sniffles/single_sample/HG002_Revio/HG002_Revio_sniffles.snf").exists() },
+ { assert new File("$outputDir/sv_calling/sniffles/single_sample/HG002_Revio/HG002_Revio_sniffles.vcf.gz").exists() },
+ { assert new File("$outputDir/sv_calling/sniffles/single_sample/HG002_Revio/HG002_Revio_sniffles.vcf.gz.tbi").exists() }
+ )
+ }
+ }
+
+ test("test profile - multisample") {
+ tag "samplesheet_multisample_bam"
+
+ when {
+ params {
+ // Base directory for genomic-medicine-sweden/nallo test data
+ pipelines_testdata_base_path = 'https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/'
+ // Test files
+ fasta = params.pipelines_testdata_base_path + 'nallo/reference/hg38.test.fa.gz'
+ input = 'https://github.com/genomic-medicine-sweden/test-datasets/raw/e2266a34c14d1e0a9ef798de3cd81a76c9216fc1/testdata/samplesheet_multisample_bam.csv'
+ bed = params.pipelines_testdata_base_path + 'nallo/reference/test_data.bed'
+ hificnv_xy = params.pipelines_testdata_base_path + 'nallo/reference/expected_cn.hg38.XY.bed'
+ hificnv_xx = params.pipelines_testdata_base_path + 'nallo/reference/expected_cn.hg38.XX.bed'
+ hificnv_exclude = params.pipelines_testdata_base_path + 'nallo/reference/empty.bed'
+ par_regions = params.pipelines_testdata_base_path + 'nallo/reference/hs38.PAR.bed'
+ trgt_repeats = params.pipelines_testdata_base_path + 'nallo/reference/pathogenic_repeats.hg38.bed'
+ variant_catalog = params.pipelines_testdata_base_path + 'nallo/reference/variant_catalog_grch38.json'
+ vep_cache = params.pipelines_testdata_base_path + 'nallo/reference/vep_cache_test_data.tar.gz'
+ vep_plugin_files = params.pipelines_testdata_base_path + 'nallo/reference/vep_plugin_files.csv'
+ snp_db = params.pipelines_testdata_base_path + 'nallo/testdata/snp_dbs.csv'
+ somalier_sites = params.pipelines_testdata_base_path + 'nallo/reference/somalier_sites.vcf.gz'
+ reduced_penetrance = params.pipelines_testdata_base_path + 'nallo/reference/reduced_penetrance.tsv'
+ score_config_snv = params.pipelines_testdata_base_path + 'nallo/reference/rank_model_snv.ini'
+ variant_consequences_snv = params.pipelines_testdata_base_path + 'nallo/reference/variant_consequences_v2.txt'
+
+ // Parameters
+ parallel_snv = 2
+ preset = "revio"
+ outdir = "$outputDir"
+ }
+ }
+
+ then {
+ assertAll (
+ { assert workflow.success },
+ // Assert with snapshot multisample
+ { assert snapshot(
+ file("$outputDir/pedigree/test.ped"),
+ file("$outputDir/multiqc/multiqc_data/multiqc_citations.txt"),
+ file("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"),
+ // Assert with snapshot HG002_Revio_A
+ bam("$outputDir/aligned_reads/HG002_Revio_A/HG002_Revio_A_phased.bam", stringency: 'silent').getReadsMD5(),
+ file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio_A/HG002_Revio_A.asm.bp.hap1.p_ctg.assembly_summary"),
+ file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio_A/HG002_Revio_A.asm.bp.hap2.p_ctg.assembly_summary"),
+ bam("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap1.bam", stringency: 'silent').getReadsMD5(),
+ bam("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap2.bam", stringency: 'silent').getReadsMD5(),
+ file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.dip.bed"),
+ file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap1.bed"),
+ file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap2.bed"),
+ file("$outputDir/cnv_calling/hificnv/HG002_Revio_A/HG002_Revio_A.HG002_Revio_A.copynum.bedgraph"),
+ file("$outputDir/cnv_calling/hificnv/HG002_Revio_A/HG002_Revio_A.HG002_Revio_A.depth.bw"),
+ file("$outputDir/cnv_calling/hificnv/HG002_Revio_A/HG002_Revio_A.HG002_Revio_A.maf.bw"),
+ file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio_A/HG002_Revio_A.mosdepth.global.dist.txt"),
+ file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio_A/HG002_Revio_A.mosdepth.region.dist.txt"),
+ file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio_A/HG002_Revio_A.mosdepth.summary.txt"),
+ file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio_A/HG002_Revio_A.asm.bp.hap1.p_ctg.fasta.gz"),
+ file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio_A/HG002_Revio_A.asm.bp.hap2.p_ctg.fasta.gz"),
+ file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.dip.vcf.gz"),
+ file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap1.paf.gz"),
+ file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap1.var.gz"),
+ file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap2.paf.gz"),
+ file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap2.var.gz"),
+ file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.pair.vcf.gz"),
+ file("$outputDir/cnv_calling/hificnv/HG002_Revio_A/HG002_Revio_A.HG002_Revio_A.vcf.gz"),
+ file("$outputDir/cnv_calling/hificnv/HG002_Revio_A/HG002_Revio_A.HG002_Revio_A.vcf.gz.tbi"),
+ file("$outputDir/methylation/modkit/pileup/unphased/HG002_Revio_A/HG002_Revio_A_modkit_pileup.bed.gz"),
+ file("$outputDir/methylation/modkit/pileup/unphased/HG002_Revio_A/HG002_Revio_A_modkit_pileup.bed.gz.tbi"),
+ bam("$outputDir/paraphase/HG002_Revio_A/HG002_Revio_A.paraphase.bam", stringency: 'silent').getReadsMD5(),
+ file("$outputDir/qc_aligned_reads/cramino/phased/HG002_Revio_A/HG002_Revio_A_cramino_aligned_phased.arrow"),
+ file("$outputDir/qc_aligned_reads/cramino/unphased/HG002_Revio_A/HG002_Revio_A_cramino_aligned.arrow"),
+ file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio_A/HG002_Revio_A.regions.bed.gz"),
+ file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio_A/HG002_Revio_A.regions.bed.gz.csi"),
+ file("$outputDir/qc_raw_reads/fqcrs/HG002_Revio_A/HG002_PacBio_Revio.bam_other.fastq.gz.tsv.zst"),
+ file("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio_A/HG002_Revio_A_sorted.vcf.gz"),
+ file("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio_A/HG002_Revio_A_sorted.vcf.gz.tbi"),
+ bam("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio_A/HG002_Revio_A_spanning_sorted.bam", stringency: 'silent').getReadsMD5(),
+ file("$outputDir/snvs/stats/single_sample/HG002_Revio_A.vcf.gz.bcftools_stats.txt").readLines()[0..2],
+ // Assert with snapshot HG002_Revio_B
+ bam("$outputDir/aligned_reads/HG002_Revio_B/HG002_Revio_B_phased.bam", stringency: 'silent').getReadsMD5(),
+ file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio_B/HG002_Revio_B.asm.bp.hap1.p_ctg.assembly_summary"),
+ file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio_B/HG002_Revio_B.asm.bp.hap2.p_ctg.assembly_summary"),
+ bam("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap1.bam", stringency: 'silent').getReadsMD5(),
+ bam("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap2.bam", stringency: 'silent').getReadsMD5(),
+ file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.dip.bed"),
+ file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap1.bed"),
+ file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap2.bed"),
+ file("$outputDir/cnv_calling/hificnv/HG002_Revio_B/HG002_Revio_B.HG002_Revio_B.copynum.bedgraph"),
+ file("$outputDir/cnv_calling/hificnv/HG002_Revio_B/HG002_Revio_B.HG002_Revio_B.depth.bw"),
+ file("$outputDir/cnv_calling/hificnv/HG002_Revio_B/HG002_Revio_B.HG002_Revio_B.maf.bw"),
+ file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio_B/HG002_Revio_B.mosdepth.global.dist.txt"),
+ file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio_B/HG002_Revio_B.mosdepth.region.dist.txt"),
+ file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio_B/HG002_Revio_B.mosdepth.summary.txt"),
+ file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio_B/HG002_Revio_B.asm.bp.hap1.p_ctg.fasta.gz"),
+ file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio_B/HG002_Revio_B.asm.bp.hap2.p_ctg.fasta.gz"),
+ file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.dip.vcf.gz"),
+ file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap1.paf.gz"),
+ file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap1.var.gz"),
+ file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap2.paf.gz"),
+ file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap2.var.gz"),
+ file("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.pair.vcf.gz"),
+ file("$outputDir/cnv_calling/hificnv/HG002_Revio_B/HG002_Revio_B.HG002_Revio_B.vcf.gz"),
+ file("$outputDir/cnv_calling/hificnv/HG002_Revio_B/HG002_Revio_B.HG002_Revio_B.vcf.gz.tbi"),
+ file("$outputDir/methylation/modkit/pileup/unphased/HG002_Revio_B/HG002_Revio_B_modkit_pileup.bed.gz"),
+ file("$outputDir/methylation/modkit/pileup/unphased/HG002_Revio_B/HG002_Revio_B_modkit_pileup.bed.gz.tbi"),
+ bam("$outputDir/paraphase/HG002_Revio_B/HG002_Revio_B.paraphase.bam", stringency: 'silent').getReadsMD5(),
+ file("$outputDir/qc_aligned_reads/cramino/phased/HG002_Revio_B/HG002_Revio_B_cramino_aligned_phased.arrow"),
+ file("$outputDir/qc_aligned_reads/cramino/unphased/HG002_Revio_B/HG002_Revio_B_cramino_aligned.arrow"),
+ file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio_B/HG002_Revio_B.regions.bed.gz"),
+ file("$outputDir/qc_aligned_reads/mosdepth/HG002_Revio_B/HG002_Revio_B.regions.bed.gz.csi"),
+ file("$outputDir/qc_raw_reads/fqcrs/HG002_Revio_B/HG002_Revio_B.merged.fastq.gz.tsv.zst"),
+ file("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio_B/HG002_Revio_B_sorted.vcf.gz"),
+ file("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio_B/HG002_Revio_B_sorted.vcf.gz.tbi"),
+ bam("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio_B/HG002_Revio_B_spanning_sorted.bam", stringency: 'silent').getReadsMD5(),
+ file("$outputDir/snvs/stats/single_sample/HG002_Revio_B.vcf.gz.bcftools_stats.txt").readLines()[0..2],
+ ).match() },
+ // Assert exists multisample - note the trgt multisample that doesn't exist in singlesample
+ { assert new File("$outputDir/databases/echtvar/encode/test/test.zip").exists() },
+ { assert new File("$outputDir/qc_aligned_reads/somalier/relate/test/test.pairs.tsv").exists() },
+ { assert new File("$outputDir/qc_aligned_reads/somalier/relate/test/test.samples.tsv").exists() },
+ { assert new File("$outputDir/qc_aligned_reads/somalier/relate/test/test.html").exists() },
+ { assert new File("$outputDir/multiqc/multiqc_data/multiqc_general_stats.txt").exists() },
+ { assert new File("$outputDir/multiqc/multiqc_data/multiqc_somalier.txt").exists() },
+ { assert new File("$outputDir/multiqc/multiqc_data/multiqc_data.json").exists() },
+ { assert new File("$outputDir/multiqc/multiqc_data/multiqc.log").exists() },
+ { assert new File("$outputDir/multiqc/multiqc_report.html").exists() },
+ { assert new File("$outputDir/multiqc/multiqc_data/multiqc_sources.txt").exists() },
+ { assert new File("$outputDir/multiqc/multiqc_data/multiqc_software_versions.txt").exists() },
+ { assert new File("$outputDir/snvs/multi_sample/test/test_snv_annotated_ranked.vcf.gz").exists() },
+ { assert new File("$outputDir/snvs/multi_sample/test/test_snv_annotated_ranked.vcf.gz.tbi").exists() },
+ { assert new File("$outputDir/sv_calling/sniffles/multi_sample/test/test_sniffles.vcf.gz").exists() },
+ { assert new File("$outputDir/sv_calling/sniffles/multi_sample/test/test_sniffles.vcf.gz.tbi").exists() },
+ { assert new File("$outputDir/repeat_calling/trgt/multi_sample/test/test.vcf.gz").exists() },
+ { assert new File("$outputDir/repeat_calling/trgt/multi_sample/test/test.vcf.gz.tbi").exists() },
+ // Assert exists HG002_Revio_A
+ { assert new File("$outputDir/aligned_reads/HG002_Revio_A/HG002_Revio_A_phased.bam.bai").exists() },
+ { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap1.bam.bai").exists() },
+ { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap1.sam.gz").exists() },
+ { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap2.bam.bai").exists() },
+ { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap2.sam.gz").exists() },
+ { assert new File("$outputDir/cnv_calling/hificnv/HG002_Revio_A/HG002_Revio_A.log").exists() },
+ { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio_A/HG002_Revio_A_modkit_pileup_phased_1.bed.gz").exists() },
+ { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio_A/HG002_Revio_A_modkit_pileup_phased_1.bed.gz.tbi").exists() },
+ { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio_A/HG002_Revio_A_modkit_pileup_phased_2.bed.gz").exists() },
+ { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio_A/HG002_Revio_A_modkit_pileup_phased_2.bed.gz.tbi").exists() },
+ { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio_A/HG002_Revio_A_modkit_pileup_phased_ungrouped.bed.gz").exists() },
+ { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio_A/HG002_Revio_A_modkit_pileup_phased_ungrouped.bed.gz.tbi").exists() },
+ { assert new File("$outputDir/paraphase/HG002_Revio_A/HG002_Revio_A.paraphase.bam.bai").exists() },
+ { assert new File("$outputDir/paraphase/HG002_Revio_A/HG002_Revio_A_paraphase_vcfs/HG002_Revio_A_hba.vcf.gz").exists() },
+ { assert new File("$outputDir/paraphase/HG002_Revio_A/HG002_Revio_A_paraphase_vcfs/HG002_Revio_A_hba.vcf.gz.tbi").exists() },
+ { assert new File("$outputDir/paraphase/HG002_Revio_A/HG002_Revio_A.paraphase.json").exists() },
+ { assert new File("$outputDir/qc_aligned_reads/cramino/phased/HG002_Revio_A/HG002_Revio_A_cramino_aligned_phased.txt").exists() },
+ { assert new File("$outputDir/qc_aligned_reads/cramino/unphased/HG002_Revio_A/HG002_Revio_A_cramino_aligned.txt").exists() },
+ { assert new File("$outputDir/aligned_reads/HG002_Revio_A/HG002_Revio_A_phased.bam.bai").exists() },
+ { assert new File("$outputDir/phasing/whatshap/phase/HG002_Revio_A/HG002_Revio_A_phased.vcf.gz").exists() },
+ { assert new File("$outputDir/phasing/whatshap/phase/HG002_Revio_A/HG002_Revio_A_phased.vcf.gz.tbi").exists() },
+ { assert new File("$outputDir/phasing/whatshap/stats/HG002_Revio_A/HG002_Revio_A_stats.stats.tsv").exists() },
+ { assert new File("$outputDir/phasing/whatshap/stats/HG002_Revio_A/HG002_Revio_A_stats.blocks.tsv").exists() },
+ { assert new File("$outputDir/qc_raw_reads/fastqc/HG002_Revio_A/HG002_Revio_A_fastqc.html").exists() },
+ { assert new File("$outputDir/qc_raw_reads/fastqc/HG002_Revio_A/HG002_Revio_A_fastqc.zip").exists() },
+ { assert new File("$outputDir/repeat_annotation/stranger/HG002_Revio_A/HG002_Revio_A_repeat_expansion_stranger.vcf.gz").exists() },
+ { assert new File("$outputDir/repeat_annotation/stranger/HG002_Revio_A/HG002_Revio_A_repeat_expansion_stranger.vcf.gz.tbi").exists() },
+ { assert new File("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio_A/HG002_Revio_A_spanning_sorted.bam.bai").exists() },
+ { assert new File("$outputDir/snvs/single_sample/HG002_Revio_A/HG002_Revio_A_snv_annotated_ranked.vcf.gz").exists() },
+ { assert new File("$outputDir/snvs/single_sample/HG002_Revio_A/HG002_Revio_A_snv_annotated_ranked.vcf.gz.tbi").exists() },
+ { assert new File("$outputDir/sv_calling/sniffles/single_sample/HG002_Revio_A/HG002_Revio_A_sniffles.snf").exists() },
+ { assert new File("$outputDir/sv_calling/sniffles/single_sample/HG002_Revio_A/HG002_Revio_A_sniffles.vcf.gz").exists() },
+ { assert new File("$outputDir/sv_calling/sniffles/single_sample/HG002_Revio_A/HG002_Revio_A_sniffles.vcf.gz.tbi").exists() },
+ // Assert exists HG002_Revio_B
+ { assert new File("$outputDir/aligned_reads/HG002_Revio_B/HG002_Revio_B_phased.bam.bai").exists() },
+ { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap1.bam.bai").exists() },
+ { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap1.sam.gz").exists() },
+ { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap2.bam.bai").exists() },
+ { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap2.sam.gz").exists() },
+ { assert new File("$outputDir/cnv_calling/hificnv/HG002_Revio_B/HG002_Revio_B.log").exists() },
+ { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio_B/HG002_Revio_B_modkit_pileup_phased_1.bed.gz").exists() },
+ { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio_B/HG002_Revio_B_modkit_pileup_phased_1.bed.gz.tbi").exists() },
+ { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio_B/HG002_Revio_B_modkit_pileup_phased_2.bed.gz").exists() },
+ { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio_B/HG002_Revio_B_modkit_pileup_phased_2.bed.gz.tbi").exists() },
+ { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio_B/HG002_Revio_B_modkit_pileup_phased_ungrouped.bed.gz").exists() },
+ { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_Revio_B/HG002_Revio_B_modkit_pileup_phased_ungrouped.bed.gz.tbi").exists() },
+ { assert new File("$outputDir/paraphase/HG002_Revio_B/HG002_Revio_B.paraphase.bam.bai").exists() },
+ { assert new File("$outputDir/paraphase/HG002_Revio_B/HG002_Revio_B_paraphase_vcfs/HG002_Revio_B_hba.vcf.gz").exists() },
+ { assert new File("$outputDir/paraphase/HG002_Revio_B/HG002_Revio_B_paraphase_vcfs/HG002_Revio_B_hba.vcf.gz.tbi").exists() },
+ { assert new File("$outputDir/paraphase/HG002_Revio_B/HG002_Revio_B.paraphase.json").exists() },
+ { assert new File("$outputDir/qc_aligned_reads/cramino/phased/HG002_Revio_B/HG002_Revio_B_cramino_aligned_phased.txt").exists() },
+ { assert new File("$outputDir/qc_aligned_reads/cramino/unphased/HG002_Revio_B/HG002_Revio_B_cramino_aligned.txt").exists() },
+ { assert new File("$outputDir/aligned_reads/HG002_Revio_B/HG002_Revio_B_phased.bam.bai").exists() },
+ { assert new File("$outputDir/phasing/whatshap/phase/HG002_Revio_B/HG002_Revio_B_phased.vcf.gz").exists() },
+ { assert new File("$outputDir/phasing/whatshap/phase/HG002_Revio_B/HG002_Revio_B_phased.vcf.gz.tbi").exists() },
+ { assert new File("$outputDir/phasing/whatshap/stats/HG002_Revio_B/HG002_Revio_B_stats.stats.tsv").exists() },
+ { assert new File("$outputDir/phasing/whatshap/stats/HG002_Revio_B/HG002_Revio_B_stats.blocks.tsv").exists() },
+ { assert new File("$outputDir/qc_raw_reads/fastqc/HG002_Revio_B/HG002_Revio_B_fastqc.html").exists() },
+ { assert new File("$outputDir/qc_raw_reads/fastqc/HG002_Revio_B/HG002_Revio_B_fastqc.zip").exists() },
+ { assert new File("$outputDir/repeat_annotation/stranger/HG002_Revio_B/HG002_Revio_B_repeat_expansion_stranger.vcf.gz").exists() },
+ { assert new File("$outputDir/repeat_annotation/stranger/HG002_Revio_B/HG002_Revio_B_repeat_expansion_stranger.vcf.gz.tbi").exists() },
+ { assert new File("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio_B/HG002_Revio_B_spanning_sorted.bam.bai").exists() },
+ { assert new File("$outputDir/snvs/single_sample/HG002_Revio_B/HG002_Revio_B_snv_annotated_ranked.vcf.gz").exists() },
+ { assert new File("$outputDir/snvs/single_sample/HG002_Revio_B/HG002_Revio_B_snv_annotated_ranked.vcf.gz.tbi").exists() },
+ { assert new File("$outputDir/sv_calling/sniffles/single_sample/HG002_Revio_B/HG002_Revio_B_sniffles.snf").exists() },
+ { assert new File("$outputDir/sv_calling/sniffles/single_sample/HG002_Revio_B/HG002_Revio_B_sniffles.vcf.gz").exists() },
+ { assert new File("$outputDir/sv_calling/sniffles/single_sample/HG002_Revio_B/HG002_Revio_B_sniffles.vcf.gz.tbi").exists() }
+ )
+ }
+ }
+}
diff --git a/tests/main.nf.test.snap b/tests/main.nf.test.snap
new file mode 100644
index 00000000..a3412aba
--- /dev/null
+++ b/tests/main.nf.test.snap
@@ -0,0 +1,150 @@
+{
+ "test profile": {
+ "content": [
+ "test.ped:md5,bd5cec27ba7337a85cf98e787131e2b5",
+ "test.pairs.tsv:md5,4a0988fc3c0fe5cfd5dd205fe6755595",
+ "test.samples.tsv:md5,1685dc6cb8c6b9806ca636662980d686",
+ "test.html:md5,d05e0eceb70ada3a0c25f99a16ad1889",
+ "multiqc_citations.txt:md5,a27affce20d456d20ed387097a4f0350",
+ "multiqc_fastqc.txt:md5,c60b523b5f11c07c9149043bf68f92ad",
+ "multiqc_somalier.txt:md5,20b4c5b2d5b94b77fb800548e07a874e",
+ "74b4822241bd8d1bc42f494f1f3e326c",
+ "HG002_Revio.asm.bp.hap1.p_ctg.assembly_summary:md5,ccaad2690abccadc4ec3b2c5d8fa4b05",
+ "HG002_Revio.asm.bp.hap2.p_ctg.assembly_summary:md5,c5bbeabb571453186a39cf6e487dbcc5",
+ "67fc08c5db63d417992aa4842a567c2d",
+ "28f964b0683d285fabc5407af0f28580",
+ "HG002_Revio.dip.bed:md5,5c0ad25a4bb82b8ce06f526664ffbd1c",
+ "HG002_Revio.hap1.bed:md5,28ac0570c41a83c231f2c853918d24c5",
+ "HG002_Revio.hap2.bed:md5,8b9a998402277ef043718f95a5410fe1",
+ "HG002_Revio.HG002_Revio.copynum.bedgraph:md5,517bc59c1b235490c79aa8319437b033",
+ "HG002_Revio.HG002_Revio.depth.bw:md5,20757c985f4713b8568dee05765db701",
+ "HG002_Revio.HG002_Revio.maf.bw:md5,4699d7a664277863f06eb48c3cba3c41",
+ "HG002_Revio.mosdepth.global.dist.txt:md5,6186315d4d65eda85553af82a98829d1",
+ "HG002_Revio.mosdepth.region.dist.txt:md5,c6c7ee8f056b8b2b92c97ec472b1db16",
+ "HG002_Revio.mosdepth.summary.txt:md5,35c51f1ad9d2856d1e6446205f19a8e3",
+ "HG002_Revio.asm.bp.hap1.p_ctg.fasta.gz:md5,6c040d554c3310e1555c928a68fca9f2",
+ "HG002_Revio.asm.bp.hap2.p_ctg.fasta.gz:md5,d250ea1f1a1186d1d324c843bc8a3e7a",
+ "HG002_Revio.dip.vcf.gz:md5,ce28d131a954db1325246fce498eb659",
+ "HG002_Revio.hap1.paf.gz:md5,493bbf9db4bc541a348de656f29e58f1",
+ "HG002_Revio.hap1.var.gz:md5,55e7be8fb8fe051725890f1cfaae3692",
+ "HG002_Revio.hap2.paf.gz:md5,63066bc6d6abcdff9b6916d549077a41",
+ "HG002_Revio.hap2.var.gz:md5,fb0b9d9708cd907831f9cd7019f1d03d",
+ "HG002_Revio.pair.vcf.gz:md5,977d60dfdf18e27c06574ec8297e49e4",
+ "HG002_Revio.HG002_Revio.vcf.gz:md5,771f7d9ecbd161dedfbdd053ceff0d52",
+ "HG002_Revio.HG002_Revio.vcf.gz.tbi:md5,65d6930f79e626a5542091b7a419dc2b",
+ "HG002_Revio_modkit_pileup.bed.gz:md5,8850d8414ecefc2341452f23ce5e6870",
+ "HG002_Revio_modkit_pileup.bed.gz.tbi:md5,af7e0b1699351d6df3d93d223d5c3da9",
+ "fcc3f220bb09d3264f97e7f620779be0",
+ "HG002_Revio_cramino_aligned_phased.arrow:md5,72df2934ff8aa7e1bf8cf8a4881a0d2a",
+ "HG002_Revio_cramino_aligned.arrow:md5,72df2934ff8aa7e1bf8cf8a4881a0d2a",
+ "HG002_Revio.regions.bed.gz:md5,e051f64c7a780d67ea6727a327dd4281",
+ "HG002_Revio.regions.bed.gz.csi:md5,026eef1c69fb4aa3a1687463fe2088ab",
+ "HG002_PacBio_Revio.fastq.gz.tsv.zst:md5,4b073293b3e771d19b4cfdb07909571b",
+ "HG002_Revio_sorted.vcf.gz:md5,fbb5699b8f74fc105fb154e8fac7bfea",
+ "HG002_Revio_sorted.vcf.gz.tbi:md5,0466518ee265ba63160ed27cee0dec88",
+ "65999ab8f2bc7841de8172468bf23ab6",
+ [
+ "# This file was produced by bcftools stats (1.20+htslib-1.20) and can be plotted using plot-vcfstats.",
+ "# The command line was:\tbcftools stats HG002_Revio.vcf.gz",
+ "#"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.3"
+ },
+ "timestamp": "2024-08-29T10:42:09.011660883"
+ },
+ "test profile - multisample": {
+ "content": [
+ "test.ped:md5,db74c6787a92a70ffaab766fa4d7a873",
+ "multiqc_citations.txt:md5,a27affce20d456d20ed387097a4f0350",
+ "multiqc_fastqc.txt:md5,b74145ef9fbf8addcc2997ca26b3aa45",
+ "74b4822241bd8d1bc42f494f1f3e326c",
+ "HG002_Revio_A.asm.bp.hap1.p_ctg.assembly_summary:md5,ccaad2690abccadc4ec3b2c5d8fa4b05",
+ "HG002_Revio_A.asm.bp.hap2.p_ctg.assembly_summary:md5,c5bbeabb571453186a39cf6e487dbcc5",
+ "67fc08c5db63d417992aa4842a567c2d",
+ "28f964b0683d285fabc5407af0f28580",
+ "HG002_Revio_A.dip.bed:md5,5c0ad25a4bb82b8ce06f526664ffbd1c",
+ "HG002_Revio_A.hap1.bed:md5,28ac0570c41a83c231f2c853918d24c5",
+ "HG002_Revio_A.hap2.bed:md5,8b9a998402277ef043718f95a5410fe1",
+ "HG002_Revio_A.HG002_Revio_A.copynum.bedgraph:md5,517bc59c1b235490c79aa8319437b033",
+ "HG002_Revio_A.HG002_Revio_A.depth.bw:md5,20757c985f4713b8568dee05765db701",
+ "HG002_Revio_A.HG002_Revio_A.maf.bw:md5,4699d7a664277863f06eb48c3cba3c41",
+ "HG002_Revio_A.mosdepth.global.dist.txt:md5,6186315d4d65eda85553af82a98829d1",
+ "HG002_Revio_A.mosdepth.region.dist.txt:md5,c6c7ee8f056b8b2b92c97ec472b1db16",
+ "HG002_Revio_A.mosdepth.summary.txt:md5,35c51f1ad9d2856d1e6446205f19a8e3",
+ "HG002_Revio_A.asm.bp.hap1.p_ctg.fasta.gz:md5,6c040d554c3310e1555c928a68fca9f2",
+ "HG002_Revio_A.asm.bp.hap2.p_ctg.fasta.gz:md5,d250ea1f1a1186d1d324c843bc8a3e7a",
+ "HG002_Revio_A.dip.vcf.gz:md5,d97b3e520a3bf6f85eb6b5dd66961e71",
+ "HG002_Revio_A.hap1.paf.gz:md5,493bbf9db4bc541a348de656f29e58f1",
+ "HG002_Revio_A.hap1.var.gz:md5,55e7be8fb8fe051725890f1cfaae3692",
+ "HG002_Revio_A.hap2.paf.gz:md5,63066bc6d6abcdff9b6916d549077a41",
+ "HG002_Revio_A.hap2.var.gz:md5,fb0b9d9708cd907831f9cd7019f1d03d",
+ "HG002_Revio_A.pair.vcf.gz:md5,8fd0eb40954ed6d829d0632d0cf54414",
+ "HG002_Revio_A.HG002_Revio_A.vcf.gz:md5,e575fbadcd826243561192310f234c82",
+ "HG002_Revio_A.HG002_Revio_A.vcf.gz.tbi:md5,e27fe63e51a2cfaec020ad0a1aec4a7f",
+ "HG002_Revio_A_modkit_pileup.bed.gz:md5,8850d8414ecefc2341452f23ce5e6870",
+ "HG002_Revio_A_modkit_pileup.bed.gz.tbi:md5,af7e0b1699351d6df3d93d223d5c3da9",
+ "fcc3f220bb09d3264f97e7f620779be0",
+ "HG002_Revio_A_cramino_aligned_phased.arrow:md5,72df2934ff8aa7e1bf8cf8a4881a0d2a",
+ "HG002_Revio_A_cramino_aligned.arrow:md5,72df2934ff8aa7e1bf8cf8a4881a0d2a",
+ "HG002_Revio_A.regions.bed.gz:md5,e051f64c7a780d67ea6727a327dd4281",
+ "HG002_Revio_A.regions.bed.gz.csi:md5,026eef1c69fb4aa3a1687463fe2088ab",
+ "HG002_PacBio_Revio.bam_other.fastq.gz.tsv.zst:md5,4b073293b3e771d19b4cfdb07909571b",
+ "HG002_Revio_A_sorted.vcf.gz:md5,680938d6ebeafe73d8df0b21c0310276",
+ "HG002_Revio_A_sorted.vcf.gz.tbi:md5,a6554ab817e7c232a1554ea85fa00151",
+ "65999ab8f2bc7841de8172468bf23ab6",
+ [
+ "# This file was produced by bcftools stats (1.20+htslib-1.20) and can be plotted using plot-vcfstats.",
+ "# The command line was:\tbcftools stats HG002_Revio_A.vcf.gz",
+ "#"
+ ],
+ "fe7bb70701d1100b2874c10a512a2144",
+ "HG002_Revio_B.asm.bp.hap1.p_ctg.assembly_summary:md5,4941730ceacb4012e771208be7a6673a",
+ "HG002_Revio_B.asm.bp.hap2.p_ctg.assembly_summary:md5,be7dcb093d25922b72ef0f7bc1bf0706",
+ "dc5ae77e003989e86ccbfd14b4ae6c7",
+ "3608f178b942e804e5588e5bbd9e06ee",
+ "HG002_Revio_B.dip.bed:md5,865dbd07d7c6b7904176ee25cce91928",
+ "HG002_Revio_B.hap1.bed:md5,50609668f7b26a9d68d583fdaad9bedd",
+ "HG002_Revio_B.hap2.bed:md5,865338ef4e621ca1e8efcc0a8a6c64d0",
+ "HG002_Revio_B.HG002_Revio_B.copynum.bedgraph:md5,22b040fce6bdf6b2e323f1cc1658128d",
+ "HG002_Revio_B.HG002_Revio_B.depth.bw:md5,26bc36669ae0b066c2462e3c9307860f",
+ "HG002_Revio_B.HG002_Revio_B.maf.bw:md5,6c5691dbaf6e526f4b8fa0f7147bfc8b",
+ "HG002_Revio_B.mosdepth.global.dist.txt:md5,c3449580a34c495f537635c3bde4699d",
+ "HG002_Revio_B.mosdepth.region.dist.txt:md5,98a20c6295a956932c21534cbf5e889c",
+ "HG002_Revio_B.mosdepth.summary.txt:md5,d4fa8aeabc3b348255e7a2fa0135b1a4",
+ "HG002_Revio_B.asm.bp.hap1.p_ctg.fasta.gz:md5,bd4ae3d478b26ebaa6b64c6795e09e71",
+ "HG002_Revio_B.asm.bp.hap2.p_ctg.fasta.gz:md5,688d9e693b580a901632022b26fd9d5f",
+ "HG002_Revio_B.dip.vcf.gz:md5,544c42489591d02fa0dc143e6cf1c6b8",
+ "HG002_Revio_B.hap1.paf.gz:md5,467daced5b3897cf74c68251b8f99e3a",
+ "HG002_Revio_B.hap1.var.gz:md5,fb4efe049800a27b10d080ea5a563fb6",
+ "HG002_Revio_B.hap2.paf.gz:md5,fadac5cffae4e167ef1c41de929edf1b",
+ "HG002_Revio_B.hap2.var.gz:md5,6e4fcd586c9a30b46d23a8de1d0efe73",
+ "HG002_Revio_B.pair.vcf.gz:md5,987d192995729f82247dffdcea2dd60f",
+ "HG002_Revio_B.HG002_Revio_B.vcf.gz:md5,59773d5836f76b36dc1bf75e32186898",
+ "HG002_Revio_B.HG002_Revio_B.vcf.gz.tbi:md5,1408aeaca23a00eee979101eb45d887d",
+ "HG002_Revio_B_modkit_pileup.bed.gz:md5,e50e61d644ad3219b3088b1c0d701862",
+ "HG002_Revio_B_modkit_pileup.bed.gz.tbi:md5,ae29296657f0d2c7451d285ec49f8675",
+ "a8a1403ba192f971638c870a2be35700",
+ "HG002_Revio_B_cramino_aligned_phased.arrow:md5,bdd385c0b007ffc30c6cfdd98b15ed82",
+ "HG002_Revio_B_cramino_aligned.arrow:md5,bdd385c0b007ffc30c6cfdd98b15ed82",
+ "HG002_Revio_B.regions.bed.gz:md5,deaca22783bd058cdc8756efa25b5f53",
+ "HG002_Revio_B.regions.bed.gz.csi:md5,dd9a0d36d71da0d274d1c9ca6f8571ae",
+ "HG002_Revio_B.merged.fastq.gz.tsv.zst:md5,0641e175a07429b61710329a2eeef450",
+ "HG002_Revio_B_sorted.vcf.gz:md5,ce617741468f4bc7f504f8f488332098",
+ "HG002_Revio_B_sorted.vcf.gz.tbi:md5,e255a5ea92885967f0c126bddc8ea3b2",
+ "6b0cf3f492ce898398835d1102afd369",
+ [
+ "# This file was produced by bcftools stats (1.20+htslib-1.20) and can be plotted using plot-vcfstats.",
+ "# The command line was:\tbcftools stats HG002_Revio_B.vcf.gz",
+ "#"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.3"
+ },
+ "timestamp": "2024-08-29T10:45:44.41793623"
+ }
+}
\ No newline at end of file
diff --git a/tests/nextflow.config b/tests/nextflow.config
new file mode 100644
index 00000000..3c53747a
--- /dev/null
+++ b/tests/nextflow.config
@@ -0,0 +1,77 @@
+params {
+ // Base directory for nf-core/modules test data
+ modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/'
+
+ // Base directory for genomic-medicine-sweden/nallo test data
+ pipelines_testdata_base_path = 'https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/'
+
+ max_cpus = 4
+ max_memory = '15.GB'
+ max_time = '6.h'
+
+}
+
+// Impose same minimum Nextflow version as the pipeline for testing
+manifest {
+ nextflowVersion = '!>=23.04.0'
+}
+
+// Disable all Nextflow reporting options
+timeline { enabled = false }
+report { enabled = false }
+trace { enabled = false }
+dag { enabled = false }
+
+process {
+
+ withLabel: 'process_high' {
+ cpus = 4
+ }
+
+ withLabel: 'process_medium' {
+ cpus = 2
+ }
+
+ withLabel: 'process_low' {
+ cpus = 1
+ }
+
+ withLabel: 'process_single' {
+ cpus = 1
+ }
+
+ withName: '.*:NALLO:ASSEMBLY:HIFIASM' {
+
+ ext.args = '-f0'
+ }
+
+ withName: '.*:CALL_PARALOGS:PARAPHASE' {
+
+ ext.args = '--gene hba'
+ }
+
+ withName: '.*:NALLO:PHASING:WHATSHAP_PHASE' {
+
+ ext.args = '--ignore-read-groups --indels --distrust-genotypes --include-homozygous'
+ }
+
+ withName: '.*:NALLO:SNV_ANNOTATION:ENSEMBLVEP_VEP' {
+ ext.prefix = { "${meta.id}_vep" }
+ ext.args = { [
+ "--dir_plugins .",
+ "--plugin LoFtool,LoFtool_scores.txt",
+ "--plugin pLI,pLI_values.txt",
+ "--plugin SpliceAI,snv=spliceai_21_scores_raw_snv_-v1.3-.vcf.gz,indel=spliceai_21_scores_raw_snv_-v1.3-.vcf.gz",
+ '--distance 5000',
+ '--buffer_size 20000',
+ '--format vcf --max_sv_size 248387328',
+ '--appris --biotype --cache --canonical --ccds --compress_output bgzip',
+ '--domains --exclude_predicted --force_overwrite',
+ '--hgvs --humdiv --no_progress --numbers',
+ '--polyphen p --protein --offline --regulatory --sift p --symbol --tsl',
+ '--uniprot --vcf',
+ '--no_stats'
+ ].join(' ') }
+ }
+
+}
diff --git a/workflows/nallo.nf b/workflows/nallo.nf
index 9547c48f..ebf26251 100644
--- a/workflows/nallo.nf
+++ b/workflows/nallo.nf
@@ -6,20 +6,24 @@ include { fromSamplesheet } from 'plugin/nf-validation'
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
-include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome'
-include { BAM_TO_FASTQ } from '../subworkflows/local/bam_to_fastq'
-include { BAM_INFER_SEX } from '../subworkflows/local/bam_infer_sex'
-include { ASSEMBLY } from '../subworkflows/local/genome_assembly'
-include { ASSEMBLY_VARIANT_CALLING } from '../subworkflows/local/assembly_variant_calling'
-include { CALL_PARALOGS } from '../subworkflows/local/call_paralogs'
-include { QC_ALIGNED_READS } from '../subworkflows/local/qc_aligned_reads'
-include { STRUCTURAL_VARIANT_CALLING } from '../subworkflows/local/structural_variant_calling'
-include { SHORT_VARIANT_CALLING } from '../subworkflows/local/short_variant_calling'
-include { CNV } from '../subworkflows/local/cnv'
-include { REPEAT_ANALYSIS } from '../subworkflows/local/repeat_analysis'
-include { METHYLATION } from '../subworkflows/local/methylation'
-include { PHASING } from '../subworkflows/local/phasing'
-include { SNV_ANNOTATION } from '../subworkflows/local/snv_annotation'
+include { ANNOTATE_CSQ_PLI as ANN_CSQ_PLI_SNV } from '../subworkflows/local/annotate_consequence_pli'
+include { ANNOTATE_REPEAT_EXPANSIONS } from '../subworkflows/local/annotate_repeat_expansions'
+include { ASSEMBLY } from '../subworkflows/local/genome_assembly'
+include { ASSEMBLY_VARIANT_CALLING } from '../subworkflows/local/assembly_variant_calling'
+include { CONVERT_INPUT_FILES } from '../subworkflows/local/convert_input_files'
+include { BAM_INFER_SEX } from '../subworkflows/local/bam_infer_sex'
+include { CALL_PARALOGS } from '../subworkflows/local/call_paralogs'
+include { CALL_REPEAT_EXPANSIONS } from '../subworkflows/local/call_repeat_expansions'
+include { CNV } from '../subworkflows/local/cnv'
+include { METHYLATION } from '../subworkflows/local/methylation'
+include { PHASING } from '../subworkflows/local/phasing'
+include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome'
+include { QC_ALIGNED_READS } from '../subworkflows/local/qc_aligned_reads'
+include { RANK_VARIANTS as RANK_VARIANTS_SNV } from '../subworkflows/local/rank_variants'
+include { SCATTER_GENOME } from '../subworkflows/local/scatter_genome'
+include { SHORT_VARIANT_CALLING } from '../subworkflows/local/short_variant_calling'
+include { SNV_ANNOTATION } from '../subworkflows/local/snv_annotation'
+include { STRUCTURAL_VARIANT_CALLING } from '../subworkflows/local/structural_variant_calling'
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -28,22 +32,25 @@ include { SNV_ANNOTATION } from '../subworkflows/local/snv_annotatio
*/
// local
-include { FQCRS } from '../modules/local/fqcrs'
-include { CONVERT_ONT_READ_NAMES } from '../modules/local/convert_ont_read_names'
-include { BUILD_INTERVALS } from '../modules/local/build_intervals/main'
-include { SPLIT_BED_CHUNKS } from '../modules/local/split_bed_chunks/main'
-include { SAMTOOLS_MERGE } from '../modules/nf-core/samtools/merge/main'
+include { CREATE_PEDIGREE_FILE as SAMPLESHEET_PED } from '../modules/local/create_pedigree_file'
+include { CREATE_PEDIGREE_FILE as SOMALIER_PED } from '../modules/local/create_pedigree_file'
+include { ECHTVAR_ENCODE } from '../modules/local/echtvar/encode/main'
+include { FQCRS } from '../modules/local/fqcrs'
+include { SAMTOOLS_MERGE } from '../modules/nf-core/samtools/merge/main'
// nf-core
-include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/'
-include { FASTQC } from '../modules/nf-core/fastqc/main'
-include { FASTP } from '../modules/nf-core/fastp/main'
-include { MINIMAP2_ALIGN } from '../modules/nf-core/minimap2/align/main'
-include { MULTIQC } from '../modules/nf-core/multiqc/main'
-include { paramsSummaryMap } from 'plugin/nf-validation'
-include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline'
-include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline'
-include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_nallo_pipeline'
+include { BCFTOOLS_CONCAT } from '../modules/nf-core/bcftools/concat/main'
+include { BCFTOOLS_PLUGINSPLIT } from '../modules/nf-core/bcftools/pluginsplit/main'
+include { BCFTOOLS_STATS } from '../modules/nf-core/bcftools/stats/main'
+include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main'
+include { FASTQC } from '../modules/nf-core/fastqc/main'
+include { MINIMAP2_ALIGN } from '../modules/nf-core/minimap2/align/main'
+include { MULTIQC } from '../modules/nf-core/multiqc/main'
+include { SPLITUBAM } from '../modules/nf-core/splitubam/main'
+include { paramsSummaryMap } from 'plugin/nf-validation'
+include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline'
+include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline'
+include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_nallo_pipeline'
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -57,124 +64,146 @@ workflow NALLO {
ch_input
main:
+ ch_vep_cache = Channel.value([])
ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()
- // Optional input files
- ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map { it -> [it.simpleName, it] }.collect()
- : ''
- ch_extra_snfs = params.extra_snfs ? Channel.fromSamplesheet('extra_snfs')
- : Channel.empty()
- ch_extra_gvcfs = params.extra_gvcfs ? Channel.fromSamplesheet('extra_gvcfs')
- : Channel.empty()
- ch_tandem_repeats = params.tandem_repeats ? Channel.fromPath(params.tandem_repeats).map{ [ it.getSimpleName(), it]}.collect()
- : Channel.value([[],[]])
- ch_bed = params.bed ? Channel.fromPath(params.bed).map{ [ it.getSimpleName(), it]}.collect()
- : Channel.empty()
- ch_input_bed = params.bed ? Channel.fromPath(params.bed).map{ [ it.getSimpleName(), it]}.collect()
- : Channel.value([[],[]])
-
- // Conditional input files that has to be set depending on which workflow is run
- ch_par = params.dipcall_par ? Channel.fromPath(params.dipcall_par).collect()
- : ''
- ch_trgt_bed = params.trgt_repeats ? Channel.fromPath(params.trgt_repeats).collect()
- : ''
- ch_databases = params.snp_db ? Channel.fromSamplesheet('snp_db', immutable_meta: false).map{it[1]}.collect()
- : ''
- ch_vep_cache_unprocessed = params.vep_cache ? Channel.fromPath(params.vep_cache).map { it -> [[id:'vep_cache'], it] }.collect()
- : Channel.value([[],[]])
- ch_expected_xy_bed = params.hificnv_xy ? Channel.fromPath(params.hificnv_xy).collect()
- : ''
- ch_expected_xx_bed = params.hificnv_xx ? Channel.fromPath(params.hificnv_xx).collect()
- : ''
- ch_exclude_bed = params.hificnv_exclude ? Channel.fromPath(params.hificnv_exclude).collect()
- : ''
- ch_somalier_sites = params.somalier_sites ? Channel.fromPath(params.somalier_sites).map { [it.getSimpleName(), it ] }.collect()
- : ''
+ // Optional input files that has to be set depending on which workflow is run
+ ch_cadd_header = Channel.fromPath("$projectDir/assets/cadd_to_vcf_header_-1.0-.txt", checkIfExists: true).collect()
+ ch_cadd_resources = params.cadd_resources ? Channel.fromPath(params.cadd_resources).collect()
+ : ''
+ ch_cadd_prescored = params.cadd_prescored ? Channel.fromPath(params.cadd_prescored).collect()
+ : ''
+ ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map { it -> [ it.simpleName, it ] }.collect()
+ : ''
+ ch_tandem_repeats = params.tandem_repeats ? Channel.fromPath(params.tandem_repeats).map{ [ it.simpleName, it ] }.collect()
+ : Channel.value([[],[]])
+ ch_input_bed = params.bed ? Channel.fromPath(params.bed).map{ [ [ id:it.simpleName ] , it ] }.collect()
+ : Channel.value([[],[]])
+ ch_par = params.par_regions ? Channel.fromPath(params.par_regions).map { [ [ id: it.simpleName ], it ] }.collect()
+ : ''
+ ch_trgt_bed = params.trgt_repeats ? Channel.fromPath(params.trgt_repeats).map { it -> [ it.simpleName, it ] }.collect()
+ : ''
+ ch_variant_catalog = params.variant_catalog ? Channel.fromPath(params.variant_catalog).map { it -> [ it.simpleName, it ] }.collect()
+ : ''
+ ch_databases = params.snp_db ? Channel.fromSamplesheet('snp_db', immutable_meta: false).map{ it[1] }.collect()
+ : ''
+ ch_variant_consequences_snv = params.variant_consequences_snv ? Channel.fromPath(params.variant_consequences_snv).collect()
+ : Channel.value([])
+ ch_vep_cache_unprocessed = params.vep_cache ? Channel.fromPath(params.vep_cache).map { it -> [ [ id:'vep_cache' ], it ] }.collect()
+ : Channel.value([[],[]])
+ ch_vep_extra_files_unsplit = params.vep_plugin_files ? Channel.fromPath(params.vep_plugin_files).collect()
+ : ''
+ ch_expected_xy_bed = params.hificnv_xy ? Channel.fromPath(params.hificnv_xy).collect()
+ : ''
+ ch_expected_xx_bed = params.hificnv_xx ? Channel.fromPath(params.hificnv_xx).collect()
+ : ''
+ ch_exclude_bed = params.hificnv_exclude ? Channel.fromPath(params.hificnv_exclude).collect()
+ : ''
+ ch_reduced_penetrance = params.reduced_penetrance ? Channel.fromPath(params.reduced_penetrance).collect()
+ : Channel.value([])
+ ch_score_config_snv = params.score_config_snv ? Channel.fromPath(params.score_config_snv).collect()
+ : Channel.value([])
+ ch_somalier_sites = params.somalier_sites ? Channel.fromPath(params.somalier_sites).map { [ it.simpleName, it ] }.collect()
+ : ''
// Check parameter that doesn't conform to schema validation here
- if (params.split_fastq != 0 && (params.split_fastq < 2 || params.split_fastq > 999 )) { exit 1, '--split_fastq must be 0, or between 2 and 999'}
- if (params.parallel_snv == 0 ) { exit 1, '--parallel_snv must be > 0'}
-
- // Create PED from samplesheet
- ch_pedfile = ch_input.toList().map { file(CustomFunctions.makePed(it, params.outdir)) }
+ if (params.phaser.matches('hiphase_sv|hiphase_snv') && params.preset == 'ONT_R10') { error "The HiPhase license only permits analysis of data from PacBio. For details see: https://github.com/PacificBiosciences/HiPhase/blob/main/LICENSE.md" }
+
+ // Read and store paths in the vep_plugin_files file
+ if (params.vep_plugin_files) {
+ ch_vep_extra_files_unsplit.splitCsv ( header:true )
+ .map { row ->
+ path = file(row.vep_files[0])
+ if(path.isFile() || path.isDirectory()){
+ return [path]
+ } else {
+ error("\nVep database file ${path} does not exist.")
+ }
+ }
+ .collect()
+ .set {ch_vep_extra_files}
+ }
//
- // Main workflow
+ // Convert BAM files to FASTQ and vice versa
//
- BAM_TO_FASTQ ( ch_input )
- ch_versions = ch_versions.mix(BAM_TO_FASTQ.out.versions)
-
- BAM_TO_FASTQ.out.fastq
- .set { ch_sample }
+ CONVERT_INPUT_FILES ( ch_input )
+ ch_versions = ch_versions.mix(CONVERT_INPUT_FILES.out.versions)
+ //
+ // Run raw (unaligned) read QC with FastQC and fqcrs
+ //
if(!params.skip_raw_read_qc) {
- // Cat samples with multiple input files before QC - still not ideal
- ch_sample
+ // Combine samples with multiple input files before QC - not ideal
+ CONVERT_INPUT_FILES.out.fastq
.groupTuple()
.branch { meta, reads ->
single: reads.size() == 1
return [ meta, reads[0] ]
multiple: reads.size() > 1
}
- .set { ch_sample_reads }
+ .set { ch_fastq }
- CAT_FASTQ ( ch_sample_reads.multiple )
+ CAT_FASTQ ( ch_fastq.multiple )
ch_versions = ch_versions.mix(CAT_FASTQ.out.versions)
- ch_sample_reads.single
+ ch_fastq.single
.concat ( CAT_FASTQ.out.reads )
.set { raw_read_qc_in }
- FASTQC( raw_read_qc_in )
+ FASTQC ( raw_read_qc_in )
ch_versions = ch_versions.mix(FASTQC.out.versions)
ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
- FQCRS( raw_read_qc_in )
+ FQCRS ( raw_read_qc_in )
ch_versions = ch_versions.mix(FQCRS.out.versions)
}
+ //
+ // Prepare references
+ //
if(!params.skip_mapping_wf | !params.skip_assembly_wf ) {
- // Index genome
- PREPARE_GENOME( ch_fasta, ch_vep_cache_unprocessed )
+
+ PREPARE_GENOME (
+ ch_fasta,
+ ch_vep_cache_unprocessed,
+ )
ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions)
+ if(!params.skip_snv_annotation) {
+ if (params.vep_cache) {
+ if (params.vep_cache.endsWith("tar.gz")) {
+ ch_vep_cache = PREPARE_GENOME.out.vep_resources
+ } else {
+ ch_vep_cache = Channel.fromPath(params.vep_cache).collect()
+ }
+ }
+ }
+
// Gather indices
fasta = PREPARE_GENOME.out.fasta
fai = PREPARE_GENOME.out.fai
mmi = PREPARE_GENOME.out.mmi
}
- // Move this inside prepare genome?
-
- // If no BED-file is provided then build intervals from reference
- if(!params.bed) {
- fai
- .map{ name, fai -> [['id':name], fai] }
- .set{ ch_build_intervals_in }
-
- BUILD_INTERVALS( ch_build_intervals_in )
- ch_versions = ch_versions.mix(BUILD_INTERVALS.out.versions)
-
- BUILD_INTERVALS.out.bed
- .set{ ch_bed }
- }
-
+ //
+ // (Split input files and), map reads to reference and merge into a single BAM per sample
+ //
if(!params.skip_mapping_wf) {
- // Split fastq
- if (params.split_fastq > 0) {
+ // Split input files for alignment
+ if (params.parallel_alignments > 1) {
- FASTP( ch_sample, [], [], [] )
- ch_versions = ch_versions.mix(FASTP.out.versions)
+ SPLITUBAM ( CONVERT_INPUT_FILES.out.bam )
+ ch_versions = ch_versions.mix(SPLITUBAM.out.versions)
- reads_for_alignment = FASTP.out.reads.transpose()
+ reads_for_alignment = SPLITUBAM.out.bam.transpose()
} else {
- reads_for_alignment = ch_sample
+ reads_for_alignment = CONVERT_INPUT_FILES.out.bam
}
-
// Align (split) reads
MINIMAP2_ALIGN ( reads_for_alignment, mmi, true, 'bai', false, false )
ch_versions = ch_versions.mix(MINIMAP2_ALIGN.out.versions)
@@ -194,36 +223,89 @@ workflow NALLO {
}
.set { bam_to_merge }
- // Merge files if we have mutiple files per sample
- SAMTOOLS_MERGE( bam_to_merge.multiple.map { meta, bam, bai -> [ meta, bam ] }, [[],[]], [[],[]], 'bai' )
+ // Merge files if we have multiple files per sample
+ SAMTOOLS_MERGE ( bam_to_merge.multiple.map { meta, bam, bai -> [ meta, bam ] }, [[],[]], [[],[]], 'bai' )
ch_versions = ch_versions.mix(SAMTOOLS_MERGE.out.versions)
// Combine merged with unmerged bams
SAMTOOLS_MERGE.out.bam
.join(SAMTOOLS_MERGE.out.index)
- .concat( bam_to_merge.single )
+ .concat(bam_to_merge.single)
+ .map { meta, bam, bai -> [ meta - meta.subMap('n_files'), bam, bai ] }
.set { bam_infer_sex_in }
- // Infer sex if sex unknown
- BAM_INFER_SEX ( bam_infer_sex_in, fasta, fai, ch_somalier_sites, ch_pedfile )
+ //
+ // Create PED from samplesheet
+ //
+ ch_input
+ .map { meta, files -> [ meta.project, meta ] }
+ .groupTuple()
+ .set { ch_samplesheet_ped_in }
+
+ SAMPLESHEET_PED ( ch_samplesheet_ped_in )
+ ch_versions = ch_versions.mix(SAMPLESHEET_PED.out.versions)
+
+ SAMPLESHEET_PED.out.ped
+ .map { project, ped -> [ [ 'id': project ], ped ] }
+ .collect()
+ .set { ch_samplesheet_pedfile }
+
+ //
+ // Check sex and relatedness, and update with infered sex if the sex for a sample is unknown
+ //
+ BAM_INFER_SEX ( bam_infer_sex_in, fasta, fai, ch_somalier_sites, ch_samplesheet_pedfile )
+ ch_versions = ch_versions.mix(BAM_INFER_SEX.out.versions)
+
ch_multiqc_files = ch_multiqc_files.mix(BAM_INFER_SEX.out.somalier_samples.map{it[1]}.collect().ifEmpty([]))
ch_multiqc_files = ch_multiqc_files.mix(BAM_INFER_SEX.out.somalier_pairs.map{it[1]}.collect().ifEmpty([]))
- ch_versions = ch_versions.mix(BAM_INFER_SEX.out.versions)
bam = BAM_INFER_SEX.out.bam
bai = BAM_INFER_SEX.out.bai
bam_bai = BAM_INFER_SEX.out.bam_bai
- // Only compatible with hg38 (and a few hg19 genes)
+ //
+ // Create PED with updated sex
+ //
+ bam
+ .map { meta, files -> [ meta.project, meta ] }
+ .groupTuple()
+ .set { ch_somalier_ped_in }
+
+ SOMALIER_PED ( ch_somalier_ped_in )
+ ch_versions = ch_versions.mix(SOMALIER_PED.out.versions)
+
+ SOMALIER_PED.out.ped
+ .map { project, ped -> [ [ 'id': project ], ped ] }
+ .collect()
+ .set { ch_updated_pedfile }
+
+ //
+ // Run aligned read QC with mosdepth and cramino
+ //
+ if (!params.skip_aligned_read_qc) {
+ QC_ALIGNED_READS( bam_bai, fasta, ch_input_bed )
+ ch_versions = ch_versions.mix(QC_ALIGNED_READS.out.versions)
+
+ ch_multiqc_files = ch_multiqc_files.mix( QC_ALIGNED_READS.out.mosdepth_summary.collect { it[1] } )
+ ch_multiqc_files = ch_multiqc_files.mix( QC_ALIGNED_READS.out.mosdepth_global_dist.collect { it[1] } )
+ ch_multiqc_files = ch_multiqc_files.mix( QC_ALIGNED_READS.out.mosdepth_region_dist.collect { it[1] }.ifEmpty([]) )
+ }
+
+ //
+ // Call paralogous genes with paraphase
+ //
if(!params.skip_call_paralogs) {
CALL_PARALOGS ( bam_bai, fasta )
+ ch_versions = ch_versions.mix(CALL_PARALOGS.out.versions)
}
- // Assembly workflow
+ //
+ // Hifiasm assembly and assembly variant calling
+ //
if(!params.skip_assembly_wf) {
//Hifiasm assembly
- ASSEMBLY( ch_sample )
+ ASSEMBLY( CONVERT_INPUT_FILES.out.fastq )
ch_versions = ch_versions.mix(ASSEMBLY.out.versions)
// Update assembly variant calling meta with sex from somalier
@@ -242,109 +324,182 @@ workflow NALLO {
ch_versions = ch_versions.mix(ASSEMBLY_VARIANT_CALLING.out.versions)
}
- // TODO: parallel_snv should only be allowed when snv calling is active
- // TODO: move inside PREPARE GENOME, but only run if(parallel_snv > 1)
- // Split BED/Genome into equal chunks
- // 13 is a good number since no bin is larger than chr1 & it will not overload SLURM
-
- SPLIT_BED_CHUNKS(ch_bed, params.parallel_snv)
- ch_versions = ch_versions.mix(SPLIT_BED_CHUNKS.out.versions)
-
- // Combine to create a bam_bai - chunk pair for each sample
- // Do this here, pre-process or inside SNV-calling?
- bam_bai
- .combine(SPLIT_BED_CHUNKS.out
- .split_beds
- .flatten())
- .set{ ch_snv_calling_in }
-
- QC_ALIGNED_READS( bam_bai, fasta, ch_input_bed )
- ch_versions = ch_versions.mix(QC_ALIGNED_READS.out.versions)
-
+ //
// Call SVs with Sniffles2
- STRUCTURAL_VARIANT_CALLING( bam_bai , ch_extra_snfs, fasta, fai, ch_tandem_repeats )
+ STRUCTURAL_VARIANT_CALLING( bam_bai, fasta, fai, ch_tandem_repeats )
ch_versions = ch_versions.mix(STRUCTURAL_VARIANT_CALLING.out.versions)
+ //
+ // Call (and annotate and rank) SNVs
+ //
if(!params.skip_short_variant_calling) {
- // Call SNVs with DeepVariant/DeepTrio
- SHORT_VARIANT_CALLING( ch_snv_calling_in , ch_extra_gvcfs, fasta, fai, ch_bed )
+
+ //
+ // Make BED intervals, to be used for parallel SNV calling
+ //
+ SCATTER_GENOME (
+ fai,
+ ch_input_bed,
+ !params.bed,
+ !params.skip_short_variant_calling,
+ params.parallel_snv
+ )
+ ch_versions = ch_versions.mix(SCATTER_GENOME.out.versions)
+
+ // Combine to create a bam_bai - interval pair for each sample
+ bam_bai
+ .combine( SCATTER_GENOME.out.bed_intervals )
+ .map { meta, bam, bai, bed, intervals ->
+ [ meta + [ num_intervals: intervals ], bam, bai, bed ]
+ }
+ .set{ ch_snv_calling_in }
+
+ //
+ // This subworkflow calls SNVs with DeepVariant and outputs:
+ // 1. A merged and normalised VCF, containing one sample with all regions, to be used in downstream subworkflows requiring SNVs.
+ // 2. A merged and normalised VCF, containing one region with all samples, to be used in annotation and ranking.
+ //
+ SHORT_VARIANT_CALLING( ch_snv_calling_in, fasta, fai, SCATTER_GENOME.out.bed, ch_par )
ch_versions = ch_versions.mix(SHORT_VARIANT_CALLING.out.versions)
+ //
+ // Annotate SNVs
+ //
if(!params.skip_snv_annotation) {
- def ch_vep_cache
-
- if (params.vep_cache) {
- if (params.vep_cache.endsWith("tar.gz")) {
- ch_vep_cache = PREPARE_GENOME.out.vep_resources
- } else {
- ch_vep_cache = Channel.fromPath(params.vep_cache).collect()
- }
- } else {
- ch_vep_cache = Channel.value([])
- }
-
+ //
+ // Annotates one multisample VCF per variant call region
+ //
SNV_ANNOTATION(
SHORT_VARIANT_CALLING.out.combined_bcf,
- SHORT_VARIANT_CALLING.out.snp_calls_vcf,
ch_databases,
fasta,
+ fai.map { name, fai -> [ [ id: name ], fai ] },
ch_vep_cache,
- params.vep_cache_version
+ params.vep_cache_version,
+ ch_vep_extra_files,
+ (params.cadd_resources && params.cadd_prescored),
+ ch_cadd_header,
+ ch_cadd_resources,
+ ch_cadd_prescored
)
ch_versions = ch_versions.mix(SNV_ANNOTATION.out.versions)
+
+ ANN_CSQ_PLI_SNV (
+ SNV_ANNOTATION.out.vcf,
+ ch_variant_consequences_snv
+ )
+ ch_versions = ch_versions.mix(ANN_CSQ_PLI_SNV.out.versions)
+
+ //
+ // Ranks one multisample VCF per variant call region
+ //
+ if(!params.skip_rank_variants) {
+ // Only run if we have affected individuals
+ RANK_VARIANTS_SNV (
+ ANN_CSQ_PLI_SNV.out.vcf_ann.filter { meta, vcf -> meta.contains_affected },
+ ch_updated_pedfile.map { meta, ped -> ped },
+ ch_reduced_penetrance,
+ ch_score_config_snv
+ )
+ ch_versions = ch_versions.mix(RANK_VARIANTS_SNV.out.versions)
+
+ // If there are affected individuals and RANK_VARIANTS has been run,
+ // input that to VCF concatenation
+ RANK_VARIANTS_SNV.out.vcf
+ .join( RANK_VARIANTS_SNV.out.tbi )
+ .set { ch_vcf_tbi_per_region }
+ } else {
+ // otherwise grab the VCF that should have gone into RANK_VARIANTS
+ ANN_CSQ_PLI_SNV.out.vcf_ann
+ .join( ANN_CSQ_PLI_SNV.out.tbi_ann )
+ .set { ch_vcf_tbi_per_region }
+ }
+ } else {
+ // If neither snv_annotation nor rank_variants was run, take the output from
+ // SHORT_VARIANT_CALLING
+ SHORT_VARIANT_CALLING.out.combined_bcf
+ .join( SHORT_VARIANT_CALLING.out.combined_csi )
+ .set { ch_vcf_tbi_per_region }
}
- if(params.preset != 'ONT_R10') {
+ ch_vcf_tbi_per_region
+ .map { meta, vcf, tbi -> [ [ id: meta.project ], vcf, tbi ] }
+ .groupTuple()
+ .set { ch_bcftools_concat_in }
+
+ // Concat into a multisample VCF with all regions and publish
+ BCFTOOLS_CONCAT ( ch_bcftools_concat_in )
+ ch_versions = ch_versions.mix(BCFTOOLS_CONCAT.out.versions)
+
+ // Make an echtvar database of all samples
+ ECHTVAR_ENCODE ( BCFTOOLS_CONCAT.out.vcf )
+ ch_versions = ch_versions.mix(ECHTVAR_ENCODE.out.versions)
+
+ // Split multisample VCF to also publish a VCF per sample
+ BCFTOOLS_PLUGINSPLIT ( BCFTOOLS_CONCAT.out.vcf.join(BCFTOOLS_CONCAT.out.tbi ), [], [], [], [] )
+ ch_versions = ch_versions.mix(BCFTOOLS_PLUGINSPLIT.out.versions)
+
+ BCFTOOLS_PLUGINSPLIT.out.vcf
+ .transpose()
+ .map { meta, vcf -> [ meta, vcf, [] ] }
+ .set { ch_bcftools_stats_snv_in }
+
+ BCFTOOLS_STATS ( ch_bcftools_stats_snv_in, [[],[]], [[],[]], [[],[]], [[],[]], [[],[]] )
+ ch_versions = ch_versions.mix(BCFTOOLS_STATS.out.versions)
+ ch_multiqc_files = ch_multiqc_files.mix(BCFTOOLS_STATS.out.stats.collect{it[1]}.ifEmpty([]))
+ //
+ // Call CNVs with HiFiCNV
+ //
+ if(!params.skip_cnv_calling) {
bam_bai
.join(SHORT_VARIANT_CALLING.out.snp_calls_vcf)
- .groupTuple()
.set { cnv_workflow_in }
- if(!params.skip_cnv_calling) {
- CNV(cnv_workflow_in, fasta, ch_expected_xy_bed, ch_expected_xx_bed, ch_exclude_bed)
- ch_versions = ch_versions.mix(CNV.out.versions)
- }
+ CNV(cnv_workflow_in, fasta, ch_expected_xy_bed, ch_expected_xx_bed, ch_exclude_bed)
+ ch_versions = ch_versions.mix(CNV.out.versions)
}
-
-
+ //
+ // Phase SNVs and INDELs
+ //
if(!params.skip_phasing_wf) {
- // Phase variants with WhatsHap
+
PHASING( SHORT_VARIANT_CALLING.out.snp_calls_vcf, STRUCTURAL_VARIANT_CALLING.out.ch_sv_calls_vcf, bam_bai, fasta, fai)
ch_versions = ch_versions.mix(PHASING.out.versions)
- hap_bam_bai = PHASING.out.haplotagged_bam_bai
+ ch_multiqc_files = ch_multiqc_files.mix(PHASING.out.stats.collect{it[1]}.ifEmpty([]))
+ //
+ // Create methylation pileups with modkit
+ //
if(!params.skip_methylation_wf) {
- // Pileup methylation with modkit
- METHYLATION( hap_bam_bai, fasta, fai, ch_bed )
+
+ METHYLATION( PHASING.out.haplotagged_bam_bai, fasta, fai, ch_input_bed )
ch_versions = ch_versions.mix(METHYLATION.out.versions)
}
- if(!params.skip_repeat_wf) {
- // Repeat analysis with TRGT
+ //
+ // Call repeat expansions with TRGT
+ //
+ if(!params.skip_repeat_calling) {
- // Hack read names
- if (params.preset == "ONT_R10") {
- CONVERT_ONT_READ_NAMES(hap_bam_bai)
- ch_versions = ch_versions.mix(CONVERT_ONT_READ_NAMES.out.versions)
+ CALL_REPEAT_EXPANSIONS ( PHASING.out.haplotagged_bam_bai, fasta, fai, ch_trgt_bed )
+ ch_versions = ch_versions.mix(CALL_REPEAT_EXPANSIONS.out.versions)
- ch_repeat_analysis_in = CONVERT_ONT_READ_NAMES.out.bam_bai
- } else {
- ch_repeat_analysis_in = hap_bam_bai
+ //
+ // Annotate repeat expansions with stranger
+ //
+ if(!params.skip_repeat_annotation) {
+ ANNOTATE_REPEAT_EXPANSIONS ( ch_variant_catalog, CALL_REPEAT_EXPANSIONS.out.vcf )
+ ch_versions = ch_versions.mix(ANNOTATE_REPEAT_EXPANSIONS.out.versions)
}
-
- REPEAT_ANALYSIS( ch_repeat_analysis_in, fasta, fai, ch_trgt_bed )
- ch_versions = ch_versions.mix(REPEAT_ANALYSIS.out.versions)
}
}
}
}
- ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}ifEmpty([]))
-
//
// Collate and save software versions
//
|