```
## Presets
@@ -139,7 +135,7 @@ cadd,/path/to/cadd.v1.6.hg38.zip
- By default SNV-calling is split into 13 parallel processes, limit this by setting `--parallel_snv` to a different number.
-- By default the pipeline does not perform parallel alignment, but this can be set by setting `--split_fastq` to split the input and alignment into N files/processes.
+- By default the pipeline does not perform parallel alignment, but this can be set by setting `--parallel_alignmentss` to split the input and alignment into N files/processes.
All parameters are listed below:
@@ -155,7 +151,7 @@ Options to skip various steps within the workflow
| `skip_assembly_wf` | Skip assembly and downstream processes | `boolean` | `False` | | |
| `skip_mapping_wf` | Skip read mapping and downstream processes | `boolean` | `False` | | |
| `skip_methylation_wf` | Skip methylation workflow | `boolean` | `False` | | |
-| `skip_repeat_calling | Skip repeat calling workflow | `boolean` | `False` | | |
+| `skip_repeat_calling` | Skip repeat calling workflow | `boolean` | `False` | | |
| `skip_repeat_annotation` | Skip repeat annotation workflow | `boolean` | `False` | | |
| `skip_phasing_wf` | Skip phasing workflow | `boolean` | `False` | | |
| `skip_snv_annotation` | Skip SNV annotation | `boolean` | `False` | | |
@@ -231,14 +227,14 @@ Less common options for the pipeline, typically set in a config file.
## Workflow options
-| Parameter | Description | Type | Default | Required | Hidden |
-| ---------------- | ------------------------------------------ | --------- | ----------- | -------- | ------ |
-| `preset` | Choose a preset depending on data type | `string` | revio | True | |
-| `variant_caller` | Choose variant caller | `string` | deepvariant | | |
-| `phaser` | Choose phasing software | `string` | whatshap | | |
-| `hifiasm_mode` | Run hifiasm in hifi-only or hifi-trio mode | `string` | hifi-only | | |
-| `split_fastq` | Split alignment into n jobs | `integer` | 0 | | |
-| `parallel_snv` | Split SNV calling into n chunks | `integer` | 13 | | |
+| Parameter | Description | Type | Default | Required | Hidden |
+| ---------------------- | ------------------------------------------- | --------- | ----------- | -------- | ------ |
+| `preset` | Choose a preset depending on data type | `string` | revio | True | |
+| `variant_caller` | Choose variant caller | `string` | deepvariant | | |
+| `phaser` | Choose phasing software | `string` | whatshap | | |
+| `hifiasm_mode` | Run hifiasm in hifi-only or hifi-trio mode | `string` | hifi-only | | |
+| `parallel_alignmentss` | Split alignment into n processes per sample | `integer` | 1 | | |
+| `parallel_snv` | Split SNV calling into n chunks | `integer` | 13 | | |
## Extra file inputs
diff --git a/modules.json b/modules.json
index b7708876..feebf0f4 100644
--- a/modules.json
+++ b/modules.json
@@ -85,11 +85,6 @@
"git_sha": "e1938f8ecb13b9b82ff488d4b35269c84aa965e8",
"installed_by": ["modules"]
},
- "fastp": {
- "branch": "master",
- "git_sha": "95cf5fe0194c7bf5cb0e3027a2eb7e7c89385080",
- "installed_by": ["modules"]
- },
"fastqc": {
"branch": "master",
"git_sha": "46eca555142d6e597729fcb682adcc791796f514",
@@ -180,6 +175,11 @@
"git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773",
"installed_by": ["modules"]
},
+ "samtools/import": {
+ "branch": "master",
+ "git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519",
+ "installed_by": ["modules"]
+ },
"samtools/index": {
"branch": "master",
"git_sha": "46eca555142d6e597729fcb682adcc791796f514",
@@ -217,6 +217,11 @@
"installed_by": ["modules"],
"patch": "modules/nf-core/somalier/relate/somalier-relate.diff"
},
+ "splitubam": {
+ "branch": "master",
+ "git_sha": "d12bb6cdcd22ef8f26803a6a55e918ac017176e8",
+ "installed_by": ["modules"]
+ },
"stranger": {
"branch": "master",
"git_sha": "4806239588f35d27a95b187b4000d80e15152022",
diff --git a/modules/nf-core/fastp/environment.yml b/modules/nf-core/fastp/environment.yml
deleted file mode 100644
index 70389e66..00000000
--- a/modules/nf-core/fastp/environment.yml
+++ /dev/null
@@ -1,7 +0,0 @@
-name: fastp
-channels:
- - conda-forge
- - bioconda
- - defaults
-dependencies:
- - bioconda::fastp=0.23.4
diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf
deleted file mode 100644
index 4fc19b74..00000000
--- a/modules/nf-core/fastp/main.nf
+++ /dev/null
@@ -1,120 +0,0 @@
-process FASTP {
- tag "$meta.id"
- label 'process_medium'
-
- conda "${moduleDir}/environment.yml"
- container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/fastp:0.23.4--h5f740d0_0' :
- 'biocontainers/fastp:0.23.4--h5f740d0_0' }"
-
- input:
- tuple val(meta), path(reads)
- path adapter_fasta
- val save_trimmed_fail
- val save_merged
-
- output:
- tuple val(meta), path('*.fastp.fastq.gz') , optional:true, emit: reads
- tuple val(meta), path('*.json') , emit: json
- tuple val(meta), path('*.html') , emit: html
- tuple val(meta), path('*.log') , emit: log
- path "versions.yml" , emit: versions
- tuple val(meta), path('*.fail.fastq.gz') , optional:true, emit: reads_fail
- tuple val(meta), path('*.merged.fastq.gz'), optional:true, emit: reads_merged
-
- when:
- task.ext.when == null || task.ext.when
-
- script:
- def args = task.ext.args ?: ''
- def prefix = task.ext.prefix ?: "${meta.id}"
- def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : ""
- def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--failed_out ${prefix}.paired.fail.fastq.gz --unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : ''
- // Added soft-links to original fastqs for consistent naming in MultiQC
- // Use single ended for interleaved. Add --interleaved_in in config.
- if ( task.ext.args?.contains('--interleaved_in') ) {
- """
- [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz
-
- fastp \\
- --stdout \\
- --in1 ${prefix}.fastq.gz \\
- --thread $task.cpus \\
- --json ${prefix}.fastp.json \\
- --html ${prefix}.fastp.html \\
- $adapter_list \\
- $fail_fastq \\
- $args \\
- 2> >(tee ${prefix}.fastp.log >&2) \\
- | gzip -c > ${prefix}.fastp.fastq.gz
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
- END_VERSIONS
- """
- } else if (meta.single_end) {
- """
- [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz
-
- fastp \\
- --in1 ${prefix}.fastq.gz \\
- --out1 ${prefix}.fastp.fastq.gz \\
- --thread $task.cpus \\
- --json ${prefix}.fastp.json \\
- --html ${prefix}.fastp.html \\
- $adapter_list \\
- $fail_fastq \\
- $args \\
- 2> >(tee ${prefix}.fastp.log >&2)
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
- END_VERSIONS
- """
- } else {
- def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : ''
- """
- [ ! -f ${prefix}_1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_1.fastq.gz
- [ ! -f ${prefix}_2.fastq.gz ] && ln -sf ${reads[1]} ${prefix}_2.fastq.gz
- fastp \\
- --in1 ${prefix}_1.fastq.gz \\
- --in2 ${prefix}_2.fastq.gz \\
- --out1 ${prefix}_1.fastp.fastq.gz \\
- --out2 ${prefix}_2.fastp.fastq.gz \\
- --json ${prefix}.fastp.json \\
- --html ${prefix}.fastp.html \\
- $adapter_list \\
- $fail_fastq \\
- $merge_fastq \\
- --thread $task.cpus \\
- --detect_adapter_for_pe \\
- $args \\
- 2> >(tee ${prefix}.fastp.log >&2)
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
- END_VERSIONS
- """
- }
-
- stub:
- def prefix = task.ext.prefix ?: "${meta.id}"
- def is_single_output = task.ext.args?.contains('--interleaved_in') || meta.single_end
- def touch_reads = is_single_output ? "${prefix}.fastp.fastq.gz" : "${prefix}_1.fastp.fastq.gz ${prefix}_2.fastp.fastq.gz"
- def touch_merged = (!is_single_output && save_merged) ? "touch ${prefix}.merged.fastq.gz" : ""
- """
- touch $touch_reads
- touch "${prefix}.fastp.json"
- touch "${prefix}.fastp.html"
- touch "${prefix}.fastp.log"
- $touch_merged
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g")
- END_VERSIONS
- """
-}
diff --git a/modules/nf-core/fastp/meta.yml b/modules/nf-core/fastp/meta.yml
deleted file mode 100644
index c22a16ab..00000000
--- a/modules/nf-core/fastp/meta.yml
+++ /dev/null
@@ -1,75 +0,0 @@
-name: fastp
-description: Perform adapter/quality trimming on sequencing reads
-keywords:
- - trimming
- - quality control
- - fastq
-tools:
- - fastp:
- description: |
- A tool designed to provide fast all-in-one preprocessing for FastQ files. This tool is developed in C++ with multithreading supported to afford high performance.
- documentation: https://github.com/OpenGene/fastp
- doi: 10.1093/bioinformatics/bty560
- licence: ["MIT"]
-input:
- - meta:
- type: map
- description: |
- Groovy Map containing sample information. Use 'single_end: true' to specify single ended or interleaved FASTQs. Use 'single_end: false' for paired-end reads.
- e.g. [ id:'test', single_end:false ]
- - reads:
- type: file
- description: |
- List of input FastQ files of size 1 and 2 for single-end and paired-end data,
- respectively. If you wish to run interleaved paired-end data, supply as single-end data
- but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module.
- - adapter_fasta:
- type: file
- description: File in FASTA format containing possible adapters to remove.
- pattern: "*.{fasta,fna,fas,fa}"
- - save_trimmed_fail:
- type: boolean
- description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz`
- - save_merged:
- type: boolean
- description: Specify true to save all merged reads to the a file ending in `*.merged.fastq.gz`
-output:
- - meta:
- type: map
- description: |
- Groovy Map containing sample information
- e.g. [ id:'test', single_end:false ]
- - reads:
- type: file
- description: The trimmed/modified/unmerged fastq reads
- pattern: "*fastp.fastq.gz"
- - json:
- type: file
- description: Results in JSON format
- pattern: "*.json"
- - html:
- type: file
- description: Results in HTML format
- pattern: "*.html"
- - log:
- type: file
- description: fastq log file
- pattern: "*.log"
- - versions:
- type: file
- description: File containing software versions
- pattern: "versions.yml"
- - reads_fail:
- type: file
- description: Reads the failed the preprocessing
- pattern: "*fail.fastq.gz"
- - reads_merged:
- type: file
- description: Reads that were successfully merged
- pattern: "*.{merged.fastq.gz}"
-authors:
- - "@drpatelh"
- - "@kevinmenden"
-maintainers:
- - "@drpatelh"
- - "@kevinmenden"
diff --git a/modules/nf-core/fastp/tests/main.nf.test b/modules/nf-core/fastp/tests/main.nf.test
deleted file mode 100644
index 6f1f4897..00000000
--- a/modules/nf-core/fastp/tests/main.nf.test
+++ /dev/null
@@ -1,725 +0,0 @@
-nextflow_process {
-
- name "Test Process FASTP"
- script "../main.nf"
- process "FASTP"
- tag "modules"
- tag "modules_nfcore"
- tag "fastp"
-
- test("test_fastp_single_end") {
-
- when {
- params {
- outdir = "$outputDir"
- }
- process {
- """
- adapter_fasta = []
- save_trimmed_fail = false
- save_merged = false
-
- input[0] = Channel.of([
- [ id:'test', single_end:true ],
- [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]
- ])
- input[1] = adapter_fasta
- input[2] = save_trimmed_fail
- input[3] = save_merged
- """
- }
- }
-
- then {
- def html_text = [ "Q20 bases:12.922000 K (92.984097%)",
- "single end (151 cycles)" ]
- def log_text = [ "Q20 bases: 12922(92.9841%)",
- "reads passed filter: 99" ]
- def read_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1",
- "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT",
- "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE
- { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) }
- }
- },
- { html_text.each { html_part ->
- { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
- }
- },
- { assert snapshot(process.out.json).match("test_fastp_single_end_json") },
- { log_text.each { log_part ->
- { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
- }
- },
- {
- assert snapshot(
- (
- [process.out.reads[0][0].toString()] + // meta
- process.out.reads.collect { file(it[1]).getName() } +
- process.out.json.collect { file(it[1]).getName() } +
- process.out.html.collect { file(it[1]).getName() } +
- process.out.log.collect { file(it[1]).getName() } +
- process.out.reads_fail.collect { file(it[1]).getName() } +
- process.out.reads_merged.collect { file(it[1]).getName() }
- ).sort()
- ).match("test_fastp_single_end-_match")
- },
- { assert snapshot(process.out.versions).match("versions_single_end") }
- )
- }
- }
-
- test("test_fastp_single_end-stub") {
-
- options '-stub'
-
- when {
- params {
- outdir = "$outputDir"
- }
- process {
- """
- adapter_fasta = []
- save_trimmed_fail = false
- save_merged = false
-
- input[0] = Channel.of([
- [ id:'test', single_end:true ],
- [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]
- ])
- input[1] = adapter_fasta
- input[2] = save_trimmed_fail
- input[3] = save_merged
- """
- }
- }
-
- then {
-
- assertAll(
- { assert process.success },
- {
- assert snapshot(
- (
- [process.out.reads[0][0].toString()] + // meta
- process.out.reads.collect { file(it[1]).getName() } +
- process.out.json.collect { file(it[1]).getName() } +
- process.out.html.collect { file(it[1]).getName() } +
- process.out.log.collect { file(it[1]).getName() } +
- process.out.reads_fail.collect { file(it[1]).getName() } +
- process.out.reads_merged.collect { file(it[1]).getName() }
- ).sort()
- ).match("test_fastp_single_end-for_stub_match")
- },
- { assert snapshot(process.out.versions).match("versions_single_end_stub") }
- )
- }
- }
-
- test("test_fastp_paired_end") {
-
- when {
- params {
- outdir = "$outputDir"
- }
- process {
- """
- adapter_fasta = []
- save_trimmed_fail = false
- save_merged = false
-
- input[0] = Channel.of([
- [ id:'test', single_end:false ], // meta map
- [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
- file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
- ])
- input[1] = adapter_fasta
- input[2] = save_trimmed_fail
- input[3] = save_merged
- """
- }
- }
-
- then {
- def html_text = [ "Q20 bases: | 25.719000 K (93.033098%)",
- "The input has little adapter percentage (~0.000000%), probably it's trimmed before."]
- def log_text = [ "No adapter detected for read1",
- "Q30 bases: 12281(88.3716%)"]
- def json_text = ['"passed_filter_reads": 198']
- def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1",
- "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT",
- "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE
- { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) }
- }
- },
- { read2_lines.each { read2_line ->
- { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) }
- }
- },
- { html_text.each { html_part ->
- { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
- }
- },
- { json_text.each { json_part ->
- { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) }
- }
- },
- { log_text.each { log_part ->
- { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
- }
- },
- {
- assert snapshot(
- (
- [process.out.reads[0][0].toString()] + // meta
- process.out.reads.collect { it[1].collect { item -> file(item).getName() } } +
- process.out.json.collect { file(it[1]).getName() } +
- process.out.html.collect { file(it[1]).getName() } +
- process.out.log.collect { file(it[1]).getName() } +
- process.out.reads_fail.collect { file(it[1]).getName() } +
- process.out.reads_merged.collect { file(it[1]).getName() }
- ).sort()
- ).match("test_fastp_paired_end_match")
- },
- { assert snapshot(process.out.versions).match("versions_paired_end") }
- )
- }
- }
-
- test("test_fastp_paired_end-stub") {
-
- options '-stub'
-
- when {
- params {
- outdir = "$outputDir"
- }
- process {
- """
- adapter_fasta = []
- save_trimmed_fail = false
- save_merged = false
-
- input[0] = Channel.of([
- [ id:'test', single_end:false ], // meta map
- [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
- file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
- ])
- input[1] = adapter_fasta
- input[2] = save_trimmed_fail
- input[3] = save_merged
- """
- }
- }
-
- then {
- assertAll(
- { assert process.success },
- {
- assert snapshot(
- (
- [process.out.reads[0][0].toString()] + // meta
- process.out.reads.collect { it[1].collect { item -> file(item).getName() } } +
- process.out.json.collect { file(it[1]).getName() } +
- process.out.html.collect { file(it[1]).getName() } +
- process.out.log.collect { file(it[1]).getName() } +
- process.out.reads_fail.collect { file(it[1]).getName() } +
- process.out.reads_merged.collect { file(it[1]).getName() }
- ).sort()
- ).match("test_fastp_paired_end-for_stub_match")
- },
- { assert snapshot(process.out.versions).match("versions_paired_end-stub") }
- )
- }
- }
-
- test("fastp test_fastp_interleaved") {
-
- config './nextflow.interleaved.config'
- when {
- params {
- outdir = "$outputDir"
- }
- process {
- """
- adapter_fasta = []
- save_trimmed_fail = false
- save_merged = false
-
- input[0] = Channel.of([
- [ id:'test', single_end:true ], // meta map
- [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ]
- ])
- input[1] = adapter_fasta
- input[2] = save_trimmed_fail
- input[3] = save_merged
- """
- }
- }
-
- then {
- def html_text = [ "Q20 bases: | 25.719000 K (93.033098%)",
- "paired end (151 cycles + 151 cycles)"]
- def log_text = [ "Q20 bases: 12922(92.9841%)",
- "reads passed filter: 162"]
- def read_lines = [ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1",
- "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT",
- "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE
- { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) }
- }
- },
- { html_text.each { html_part ->
- { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
- }
- },
- { assert snapshot(process.out.json).match("fastp test_fastp_interleaved_json") },
- { log_text.each { log_part ->
- { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
- }
- },
- {
- assert snapshot(
- (
- [process.out.reads[0][0].toString()] + // meta
- process.out.reads.collect { file(it[1]).getName() } +
- process.out.json.collect { file(it[1]).getName() } +
- process.out.html.collect { file(it[1]).getName() } +
- process.out.log.collect { file(it[1]).getName() } +
- process.out.reads_fail.collect { file(it[1]).getName() } +
- process.out.reads_merged.collect { file(it[1]).getName() }
- ).sort()
- ).match("test_fastp_interleaved-_match")
- },
- { assert snapshot(process.out.versions).match("versions_interleaved") }
- )
- }
- }
-
- test("fastp test_fastp_interleaved-stub") {
-
- options '-stub'
-
- config './nextflow.interleaved.config'
- when {
- params {
- outdir = "$outputDir"
- }
- process {
- """
- adapter_fasta = []
- save_trimmed_fail = false
- save_merged = false
-
- input[0] = Channel.of([
- [ id:'test', single_end:true ], // meta map
- [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ]
- ])
- input[1] = adapter_fasta
- input[2] = save_trimmed_fail
- input[3] = save_merged
- """
- }
- }
-
- then {
- assertAll(
- { assert process.success },
- {
- assert snapshot(
- (
- [process.out.reads[0][0].toString()] + // meta
- process.out.reads.collect { file(it[1]).getName() } +
- process.out.json.collect { file(it[1]).getName() } +
- process.out.html.collect { file(it[1]).getName() } +
- process.out.log.collect { file(it[1]).getName() } +
- process.out.reads_fail.collect { file(it[1]).getName() } +
- process.out.reads_merged.collect { file(it[1]).getName() }
- ).sort()
- ).match("test_fastp_interleaved-for_stub_match")
- },
- { assert snapshot(process.out.versions).match("versions_interleaved-stub") }
- )
- }
- }
-
- test("test_fastp_single_end_trim_fail") {
-
- when {
- params {
- outdir = "$outputDir"
- }
- process {
- """
- adapter_fasta = []
- save_trimmed_fail = true
- save_merged = false
-
- input[0] = Channel.of([
- [ id:'test', single_end:true ], // meta map
- [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]
- ])
- input[1] = adapter_fasta
- input[2] = save_trimmed_fail
- input[3] = save_merged
- """
- }
- }
-
- then {
- def html_text = [ "Q20 bases: | 12.922000 K (92.984097%)",
- "single end (151 cycles)"]
- def log_text = [ "Q20 bases: 12922(92.9841%)",
- "reads passed filter: 99" ]
- def read_lines = [ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1",
- "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT",
- "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE
- { assert path(process.out.reads.get(0).get(1)).linesGzip.contains(read_line) }
- }
- },
- { failed_read_lines.each { failed_read_line ->
- { assert path(process.out.reads_fail.get(0).get(1)).linesGzip.contains(failed_read_line) }
- }
- },
- { html_text.each { html_part ->
- { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
- }
- },
- { assert snapshot(process.out.json).match("test_fastp_single_end_trim_fail_json") },
- { log_text.each { log_part ->
- { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
- }
- },
- { assert snapshot(process.out.versions).match("versions_single_end_trim_fail") }
- )
- }
- }
-
- test("test_fastp_paired_end_trim_fail") {
-
- config './nextflow.save_failed.config'
- when {
- params {
- outdir = "$outputDir"
- }
- process {
- """
- adapter_fasta = []
- save_trimmed_fail = true
- save_merged = false
-
- input[0] = Channel.of([
- [ id:'test', single_end:false ], // meta map
- [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
- file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)]
- ])
- input[1] = adapter_fasta
- input[2] = save_trimmed_fail
- input[3] = save_merged
- """
- }
- }
-
- then {
- def html_text = [ "Q20 bases: | 25.719000 K (93.033098%)",
- "The input has little adapter percentage (~0.000000%), probably it's trimmed before."]
- def log_text = [ "No adapter detected for read1",
- "Q30 bases: 12281(88.3716%)"]
- def json_text = ['"passed_filter_reads": 162']
- def read1_lines = ["@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1",
- "TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT",
- "AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE
- { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) }
- }
- },
- { read2_lines.each { read2_line ->
- { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) }
- }
- },
- { failed_read2_lines.each { failed_read2_line ->
- { assert path(process.out.reads_fail.get(0).get(1).get(2)).linesGzip.contains(failed_read2_line) }
- }
- },
- { html_text.each { html_part ->
- { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
- }
- },
- { json_text.each { json_part ->
- { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) }
- }
- },
- { log_text.each { log_part ->
- { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
- }
- },
- { assert snapshot(process.out.versions).match("versions_paired_end_trim_fail") }
- )
- }
- }
-
- test("test_fastp_paired_end_merged") {
-
- when {
- params {
- outdir = "$outputDir"
- }
- process {
- """
- adapter_fasta = []
- save_trimmed_fail = false
- save_merged = true
- input[0] = Channel.of([
- [ id:'test', single_end:false ], // meta map
- [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
- file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
- ])
- input[1] = adapter_fasta
- input[2] = save_trimmed_fail
- input[3] = save_merged
- """
- }
- }
-
- then {
- def html_text = [ ""]
- def log_text = [ "Merged and filtered:",
- "total reads: 75",
- "total bases: 13683"]
- def json_text = ['"merged_and_filtered": {', '"total_reads": 75', '"total_bases": 13683']
- def read1_lines = [ "@ERR5069949.1066259 NS500628:121:HK3MMAFX2:1:11312:18369:8333/1",
- "CCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTC",
- "AAAAAEAEEAEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEAEEEEEEEEEEEEEEEEE/EAEEEEEE/6EEEEEEEEEEAEEAEEE/EE/AEEAEEEEEAEEEA/EEAAEAE
- { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) }
- }
- },
- { read2_lines.each { read2_line ->
- { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) }
- }
- },
- { read_merged_lines.each { read_merged_line ->
- { assert path(process.out.reads_merged.get(0).get(1)).linesGzip.contains(read_merged_line) }
- }
- },
- { html_text.each { html_part ->
- { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
- }
- },
- { json_text.each { json_part ->
- { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) }
- }
- },
- { log_text.each { log_part ->
- { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
- }
- },
- {
- assert snapshot(
- (
- [process.out.reads[0][0].toString()] + // meta
- process.out.reads.collect { it[1].collect { item -> file(item).getName() } } +
- process.out.json.collect { file(it[1]).getName() } +
- process.out.html.collect { file(it[1]).getName() } +
- process.out.log.collect { file(it[1]).getName() } +
- process.out.reads_fail.collect { file(it[1]).getName() } +
- process.out.reads_merged.collect { file(it[1]).getName() }
- ).sort()
- ).match("test_fastp_paired_end_merged_match")
- },
- { assert snapshot(process.out.versions).match("versions_paired_end_merged") }
- )
- }
- }
-
- test("test_fastp_paired_end_merged-stub") {
-
- options '-stub'
-
- when {
- params {
- outdir = "$outputDir"
- }
- process {
- """
- adapter_fasta = []
- save_trimmed_fail = false
- save_merged = true
-
- input[0] = Channel.of([
- [ id:'test', single_end:false ], // meta map
- [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
- file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
- ])
- input[1] = adapter_fasta
- input[2] = save_trimmed_fail
- input[3] = save_merged
- """
- }
- }
-
- then {
- assertAll(
- { assert process.success },
- {
- assert snapshot(
- (
- [process.out.reads[0][0].toString()] + // meta
- process.out.reads.collect { it[1].collect { item -> file(item).getName() } } +
- process.out.json.collect { file(it[1]).getName() } +
- process.out.html.collect { file(it[1]).getName() } +
- process.out.log.collect { file(it[1]).getName() } +
- process.out.reads_fail.collect { file(it[1]).getName() } +
- process.out.reads_merged.collect { file(it[1]).getName() }
- ).sort()
- ).match("test_fastp_paired_end_merged-for_stub_match")
- },
- { assert snapshot(process.out.versions).match("versions_paired_end_merged_stub") }
- )
- }
- }
-
- test("test_fastp_paired_end_merged_adapterlist") {
-
- when {
- params {
- outdir = "$outputDir"
- }
- process {
- """
- adapter_fasta = Channel.of([ file(params.modules_testdata_base_path + 'delete_me/fastp/adapters.fasta', checkIfExists: true) ])
- save_trimmed_fail = false
- save_merged = true
-
- input[0] = Channel.of([
- [ id:'test', single_end:false ], // meta map
- [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
- file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
- ])
- input[1] = adapter_fasta
- input[2] = save_trimmed_fail
- input[3] = save_merged
- """
- }
- }
-
- then {
- def html_text = [ ""]
- def log_text = [ "Merged and filtered:",
- "total reads: 75",
- "total bases: 13683"]
- def json_text = ['"merged_and_filtered": {', '"total_reads": 75', '"total_bases": 13683',"--adapter_fasta"]
- def read1_lines = ["@ERR5069949.1066259 NS500628:121:HK3MMAFX2:1:11312:18369:8333/1",
- "CCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTATGAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTCCATGTGGGCTCTTATAATCTCTGTTACTTC",
- "AAAAAEAEEAEEEEEEEEEEEEEEEEAEEEEAEEEEEEEEAEEEEEEEEEEEEEEEEE/EAEEEEEE/6EEEEEEEEEEAEEAEEE/EE/AEEAEEEEEAEEEA/EEAAEAE
- { assert path(process.out.reads.get(0).get(1).get(0)).linesGzip.contains(read1_line) }
- }
- },
- { read2_lines.each { read2_line ->
- { assert path(process.out.reads.get(0).get(1).get(1)).linesGzip.contains(read2_line) }
- }
- },
- { read_merged_lines.each { read_merged_line ->
- { assert path(process.out.reads_merged.get(0).get(1)).linesGzip.contains(read_merged_line) }
- }
- },
- { html_text.each { html_part ->
- { assert path(process.out.html.get(0).get(1)).getText().contains(html_part) }
- }
- },
- { json_text.each { json_part ->
- { assert path(process.out.json.get(0).get(1)).getText().contains(json_part) }
- }
- },
- { log_text.each { log_part ->
- { assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
- }
- },
- { assert snapshot(process.out.versions).match("versions_paired_end_merged_adapterlist") }
- )
- }
- }
-}
diff --git a/modules/nf-core/fastp/tests/main.nf.test.snap b/modules/nf-core/fastp/tests/main.nf.test.snap
deleted file mode 100644
index 3e876288..00000000
--- a/modules/nf-core/fastp/tests/main.nf.test.snap
+++ /dev/null
@@ -1,330 +0,0 @@
-{
- "fastp test_fastp_interleaved_json": {
- "content": [
- [
- [
- {
- "id": "test",
- "single_end": true
- },
- "test.fastp.json:md5,b24e0624df5cc0b11cd5ba21b726fb22"
- ]
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-03-18T16:19:15.063001"
- },
- "test_fastp_paired_end_merged-for_stub_match": {
- "content": [
- [
- [
- "test_1.fastp.fastq.gz",
- "test_2.fastp.fastq.gz"
- ],
- "test.fastp.html",
- "test.fastp.json",
- "test.fastp.log",
- "test.merged.fastq.gz",
- "{id=test, single_end=false}"
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-01-17T18:10:13.467574"
- },
- "versions_interleaved": {
- "content": [
- [
- "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-02-01T11:56:24.615634793"
- },
- "test_fastp_single_end_json": {
- "content": [
- [
- [
- {
- "id": "test",
- "single_end": true
- },
- "test.fastp.json:md5,c852d7a6dba5819e4ac8d9673bedcacc"
- ]
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-03-18T16:18:43.526412"
- },
- "versions_paired_end": {
- "content": [
- [
- "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-02-01T11:55:42.333545689"
- },
- "test_fastp_paired_end_match": {
- "content": [
- [
- [
- "test_1.fastp.fastq.gz",
- "test_2.fastp.fastq.gz"
- ],
- "test.fastp.html",
- "test.fastp.json",
- "test.fastp.log",
- "{id=test, single_end=false}"
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-02-01T12:03:06.431833729"
- },
- "test_fastp_interleaved-_match": {
- "content": [
- [
- "test.fastp.fastq.gz",
- "test.fastp.html",
- "test.fastp.json",
- "test.fastp.log",
- "{id=test, single_end=true}"
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-03-18T16:19:15.111894"
- },
- "test_fastp_paired_end_merged_match": {
- "content": [
- [
- [
- "test_1.fastp.fastq.gz",
- "test_2.fastp.fastq.gz"
- ],
- "test.fastp.html",
- "test.fastp.json",
- "test.fastp.log",
- "test.merged.fastq.gz",
- "{id=test, single_end=false}"
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-02-01T12:08:44.496251446"
- },
- "versions_single_end_stub": {
- "content": [
- [
- "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-02-01T11:55:27.354051299"
- },
- "versions_interleaved-stub": {
- "content": [
- [
- "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-02-01T11:56:46.535528418"
- },
- "versions_single_end_trim_fail": {
- "content": [
- [
- "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-02-01T11:59:03.724591407"
- },
- "test_fastp_paired_end-for_stub_match": {
- "content": [
- [
- [
- "test_1.fastp.fastq.gz",
- "test_2.fastp.fastq.gz"
- ],
- "test.fastp.html",
- "test.fastp.json",
- "test.fastp.log",
- "{id=test, single_end=false}"
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-01-17T18:07:15.398827"
- },
- "versions_paired_end-stub": {
- "content": [
- [
- "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-02-01T11:56:06.50017282"
- },
- "versions_single_end": {
- "content": [
- [
- "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-02-01T11:55:07.67921647"
- },
- "versions_paired_end_merged_stub": {
- "content": [
- [
- "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-02-01T11:59:47.350653154"
- },
- "test_fastp_interleaved-for_stub_match": {
- "content": [
- [
- "test.fastp.fastq.gz",
- "test.fastp.html",
- "test.fastp.json",
- "test.fastp.log",
- "{id=test, single_end=true}"
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-01-17T18:08:06.127974"
- },
- "versions_paired_end_trim_fail": {
- "content": [
- [
- "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-02-01T11:59:18.140484878"
- },
- "test_fastp_single_end-for_stub_match": {
- "content": [
- [
- "test.fastp.fastq.gz",
- "test.fastp.html",
- "test.fastp.json",
- "test.fastp.log",
- "{id=test, single_end=true}"
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-01-17T18:06:00.244202"
- },
- "test_fastp_single_end-_match": {
- "content": [
- [
- "test.fastp.fastq.gz",
- "test.fastp.html",
- "test.fastp.json",
- "test.fastp.log",
- "{id=test, single_end=true}"
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-03-18T16:18:43.580336"
- },
- "versions_paired_end_merged_adapterlist": {
- "content": [
- [
- "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-02-01T12:05:37.845370554"
- },
- "versions_paired_end_merged": {
- "content": [
- [
- "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-02-01T11:59:32.860543858"
- },
- "test_fastp_single_end_trim_fail_json": {
- "content": [
- [
- [
- {
- "id": "test",
- "single_end": true
- },
- "test.fastp.json:md5,9a7ee180f000e8d00c7fb67f06293eb5"
- ]
- ]
- ],
- "meta": {
- "nf-test": "0.8.4",
- "nextflow": "23.10.1"
- },
- "timestamp": "2024-01-17T18:08:41.942317"
- }
-}
\ No newline at end of file
diff --git a/modules/nf-core/fastp/tests/nextflow.interleaved.config b/modules/nf-core/fastp/tests/nextflow.interleaved.config
deleted file mode 100644
index 4be8dbd2..00000000
--- a/modules/nf-core/fastp/tests/nextflow.interleaved.config
+++ /dev/null
@@ -1,5 +0,0 @@
-process {
- withName: FASTP {
- ext.args = "--interleaved_in -e 30"
- }
-}
diff --git a/modules/nf-core/fastp/tests/nextflow.save_failed.config b/modules/nf-core/fastp/tests/nextflow.save_failed.config
deleted file mode 100644
index 53b61b0c..00000000
--- a/modules/nf-core/fastp/tests/nextflow.save_failed.config
+++ /dev/null
@@ -1,5 +0,0 @@
-process {
- withName: FASTP {
- ext.args = "-e 30"
- }
-}
diff --git a/modules/nf-core/fastp/tests/tags.yml b/modules/nf-core/fastp/tests/tags.yml
deleted file mode 100644
index c1afcce7..00000000
--- a/modules/nf-core/fastp/tests/tags.yml
+++ /dev/null
@@ -1,2 +0,0 @@
-fastp:
- - modules/nf-core/fastp/**
diff --git a/modules/nf-core/hifiasm/hifiasm.diff b/modules/nf-core/hifiasm/hifiasm.diff
index 650cf697..03790816 100644
--- a/modules/nf-core/hifiasm/hifiasm.diff
+++ b/modules/nf-core/hifiasm/hifiasm.diff
@@ -8,7 +8,7 @@ Changes in module 'nf-core/hifiasm'
- tuple val(meta), path(reads)
- path paternal_kmer_dump
- path maternal_kmer_dump
-+ tuple val(meta), path(reads), path(paternal_kmer_dump), path(maternal_kmer_dump)
++ tuple val(meta), path(reads, stageAs: "?/*"), path(paternal_kmer_dump), path(maternal_kmer_dump)
path hic_read1
path hic_read2
diff --git a/modules/nf-core/hifiasm/main.nf b/modules/nf-core/hifiasm/main.nf
index 040d8369..a1bf30a0 100644
--- a/modules/nf-core/hifiasm/main.nf
+++ b/modules/nf-core/hifiasm/main.nf
@@ -8,7 +8,7 @@ process HIFIASM {
'biocontainers/hifiasm:0.19.8--h43eeafb_0' }"
input:
- tuple val(meta), path(reads), path(paternal_kmer_dump), path(maternal_kmer_dump)
+ tuple val(meta), path(reads, stageAs: "?/*"), path(paternal_kmer_dump), path(maternal_kmer_dump)
path hic_read1
path hic_read2
diff --git a/modules/nf-core/samtools/import/environment.yml b/modules/nf-core/samtools/import/environment.yml
new file mode 100644
index 00000000..38f1ebab
--- /dev/null
+++ b/modules/nf-core/samtools/import/environment.yml
@@ -0,0 +1,8 @@
+name: samtools_import
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::samtools=1.20
+ - bioconda::htslib=1.20
diff --git a/modules/nf-core/samtools/import/main.nf b/modules/nf-core/samtools/import/main.nf
new file mode 100644
index 00000000..954a8052
--- /dev/null
+++ b/modules/nf-core/samtools/import/main.nf
@@ -0,0 +1,60 @@
+process SAMTOOLS_IMPORT {
+ tag "$meta.id"
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/samtools:1.20--h50ea8bc_0':
+ 'biocontainers/samtools:1.20--h50ea8bc_0' }"
+
+ input:
+ tuple val(meta), path(reads)
+
+ output:
+ tuple val(meta), path("*.sam") , emit: sam, optional: true
+ tuple val(meta), path("*.bam") , emit: bam, optional: true
+ tuple val(meta), path("*.cram"), emit: cram, optional: true
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def suffix = args.contains("--output-fmt sam") ? "sam" :
+ args.contains("--output-fmt bam") ? "bam" :
+ args.contains("--output-fmt cram") ? "cram" :
+ "bam"
+ def input = reads instanceof List && meta.single_end ? reads.join(" -0") : // multiple single-end files
+ reads instanceof List && !meta.single_end ? "-1 ${reads[0]} -2 ${reads[1]}": // paired end file
+ meta.single_end ? "-0 $reads" : // single single-end file
+ !meta.single_end ? "-s $reads": // interleave paired-end file
+ reads // if all else fails, just add the reads without flags
+ """
+ samtools \\
+ import \\
+ $input \\
+ $args \\
+ -@ $task.cpus \\
+ -o ${prefix}.${suffix}
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+
+ """
+ touch ${prefix}.bam
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/samtools/import/meta.yml b/modules/nf-core/samtools/import/meta.yml
new file mode 100644
index 00000000..9002e092
--- /dev/null
+++ b/modules/nf-core/samtools/import/meta.yml
@@ -0,0 +1,56 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json
+name: "samtools_import"
+description: converts FASTQ files to unmapped SAM/BAM/CRAM
+keywords:
+ - import
+ - fastq
+ - bam
+ - sam
+ - cram
+tools:
+ - samtools:
+ description: |
+ SAMtools is a set of utilities for interacting with and post-processing
+ short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
+ These files are generated as output by short read aligners like BWA.
+ homepage: http://www.htslib.org/
+ documentation: http://www.htslib.org/doc/samtools.html
+ doi: 10.1093/bioinformatics/btp352
+ licence: ["MIT"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'test', single_end:false ]`
+ - reads:
+ type: file
+ description: fastq data to be converted to SAM/BAM/CRAM
+ pattern: "*.{fastq,fq,fastq.gz,fq.gz}"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'test', single_end:false ]`
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - sam:
+ type: file
+ description: SAM file
+ pattern: "*.sam"
+ - bam:
+ type: file
+ description: Unaligned BAM file
+ pattern: "*.bam"
+ - cram:
+ type: file
+ description: Unaligned CRAM file
+ pattern: "*.cram"
+authors:
+ - "@matthdsm"
+maintainers:
+ - "@matthdsm"
diff --git a/modules/nf-core/samtools/import/tests/main.nf.test b/modules/nf-core/samtools/import/tests/main.nf.test
new file mode 100644
index 00000000..d029ca70
--- /dev/null
+++ b/modules/nf-core/samtools/import/tests/main.nf.test
@@ -0,0 +1,83 @@
+nextflow_process {
+
+ name "Test Process SAMTOOLS_IMPORT"
+ script "../main.nf"
+ process "SAMTOOLS_IMPORT"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "samtools"
+ tag "samtools/import"
+
+ test("samtools_import_single ") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:true ], // meta map
+ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ {assert process.success},
+ {assert snapshot(process.out.bam.collect { it.collect { it instanceof Map ? it : file(it).name } }).match()}
+ )
+ }
+ }
+
+ test("samtools_import_paired ") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+ file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ {assert process.success},
+ {assert snapshot(process.out.bam.collect { it.collect { it instanceof Map ? it : file(it).name } }).match()}
+ )
+ }
+ }
+
+ test("samtools_import_interleaved") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true)
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ {assert process.success},
+ {assert snapshot(process.out.bam.collect { it.collect { it instanceof Map ? it : file(it).name } }).match()}
+ )
+ }
+ }
+}
diff --git a/modules/nf-core/samtools/import/tests/main.nf.test.snap b/modules/nf-core/samtools/import/tests/main.nf.test.snap
new file mode 100644
index 00000000..eb730a06
--- /dev/null
+++ b/modules/nf-core/samtools/import/tests/main.nf.test.snap
@@ -0,0 +1,103 @@
+{
+ "samtools_import_single ": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.bam"
+ ]
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-05-31T11:38:44.388259606"
+ },
+ "samtools_import_interleaved": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.bam"
+ ]
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-05-31T11:38:56.393371331"
+ },
+ "samtools_import_paired ": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.bam"
+ ]
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-05-31T11:38:50.437197406"
+ },
+ "samtools_import_interleaved ": {
+ "content": [
+ {
+ "0": [
+
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.bam:md5,fad91b070f51c77d7abe22cd31243710"
+ ]
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,a529fc2aa6485db14986c95c53638b11"
+ ],
+ "bam": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.bam:md5,fad91b070f51c77d7abe22cd31243710"
+ ]
+ ],
+ "cram": [
+
+ ],
+ "sam": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,a529fc2aa6485db14986c95c53638b11"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-05-30T12:12:43.491200967"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/samtools/import/tests/tags.yml b/modules/nf-core/samtools/import/tests/tags.yml
new file mode 100644
index 00000000..89c89128
--- /dev/null
+++ b/modules/nf-core/samtools/import/tests/tags.yml
@@ -0,0 +1,2 @@
+samtools/import:
+ - modules/nf-core/samtools/import/**
diff --git a/modules/nf-core/splitubam/environment.yml b/modules/nf-core/splitubam/environment.yml
new file mode 100644
index 00000000..ef128202
--- /dev/null
+++ b/modules/nf-core/splitubam/environment.yml
@@ -0,0 +1,9 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+name: "splitubam"
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - "bioconda::splitubam=0.1.1"
diff --git a/modules/nf-core/splitubam/main.nf b/modules/nf-core/splitubam/main.nf
new file mode 100644
index 00000000..3d413d69
--- /dev/null
+++ b/modules/nf-core/splitubam/main.nf
@@ -0,0 +1,53 @@
+process SPLITUBAM {
+ tag "$meta.id"
+ label 'process_medium'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/splitubam:0.1.1--hc9368f3_0':
+ 'biocontainers/splitubam:0.1.1--hc9368f3_0' }"
+
+ input:
+ tuple val(meta), path(bam)
+
+ output:
+ tuple val(meta), path("*.bam"), emit: bam
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ """
+ splitubam \\
+ $args \\
+ --threads $task.cpus \\
+ $bam
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ splitubam: \$(splitubam --version | sed 's/splitubam //')
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def match = (args =~ /--split\s+(\d+)/)
+ def create_cmd = ""
+ if (match) {
+ def n_splits = match[0][1].toInteger()
+ (1..n_splits).each { i ->
+ def formattedIteration = String.format('%03d', i)
+ create_cmd += "touch ${formattedIteration}.${bam}.bam\n"
+ }
+ } else { error("No `--split N` detected in args") }
+ """
+ $create_cmd
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ splitubam: \$(splitubam --version | sed 's/splitubam //')
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/splitubam/meta.yml b/modules/nf-core/splitubam/meta.yml
new file mode 100644
index 00000000..3d11eaae
--- /dev/null
+++ b/modules/nf-core/splitubam/meta.yml
@@ -0,0 +1,46 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "splitubam"
+description: split one ubam into multiple, per line, fast
+keywords:
+ - long-read
+ - bam
+ - genomics
+tools:
+ - "splitubam":
+ description: "Split one ubam into multiple, per line, fast"
+ homepage: "https://github.com/fellen31/splitubam"
+ documentation: "https://github.com/fellen31/splitubam"
+ tool_dev_url: "https://github.com/fellen31/splitubam"
+ licence: ["MIT"]
+
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1', single_end:true ]`
+ - bam:
+ type: file
+ description: (u)BAM file
+ pattern: "*.{bam}"
+
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1', single_end:false ]`
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - bam:
+ type: file
+ description: Split (u)BAM file
+ pattern: "*.{bam}"
+
+authors:
+ - "@fellen31"
+maintainers:
+ - "@fellen31"
diff --git a/modules/nf-core/splitubam/tests/main.nf.test b/modules/nf-core/splitubam/tests/main.nf.test
new file mode 100644
index 00000000..270df28b
--- /dev/null
+++ b/modules/nf-core/splitubam/tests/main.nf.test
@@ -0,0 +1,67 @@
+
+nextflow_process {
+
+ name "Test Process SPLITUBAM"
+ script "../main.nf"
+ process "SPLITUBAM"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "splitubam"
+
+ test("sarscov2 - bam") {
+
+ config "./nextflow.config"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ bam(process.out.bam.get(0).get(1).get(0)).getHeader(),
+ bam(process.out.bam.get(0).get(1).get(0)).getReadsMD5(),
+ bam(process.out.bam.get(0).get(1).get(1)).getHeader(),
+ bam(process.out.bam.get(0).get(1).get(1)).getReadsMD5(),
+ process.out.versions
+ ).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - bam - stub") {
+
+ options "-stub"
+ config "./nextflow.config"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/splitubam/tests/main.nf.test.snap b/modules/nf-core/splitubam/tests/main.nf.test.snap
new file mode 100644
index 00000000..df573efa
--- /dev/null
+++ b/modules/nf-core/splitubam/tests/main.nf.test.snap
@@ -0,0 +1,73 @@
+{
+ "sarscov2 - bam - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "001.test.paired_end.bam.bam:md5,d41d8cd98f00b204e9800998ecf8427e",
+ "002.test.paired_end.bam.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,78006e47ec8ddb5d6f098dcef4a3e099"
+ ],
+ "bam": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "001.test.paired_end.bam.bam:md5,d41d8cd98f00b204e9800998ecf8427e",
+ "002.test.paired_end.bam.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,78006e47ec8ddb5d6f098dcef4a3e099"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-23T13:07:19.115592832"
+ },
+ "sarscov2 - bam": {
+ "content": [
+ [
+ "@HD\tVN:1.6\tSO:unsorted",
+ "@SQ\tSN:MT192765.1\tLN:29829",
+ "@RG\tID:1\tLB:lib1\tPL:ILLUMINA\tSM:test\tPU:barcode1",
+ "@PG\tID:minimap2\tPN:minimap2\tVN:2.17-r941\tCL:minimap2 -ax sr tests/data/fasta/sarscov2/GCA_011545545.1_ASM1154554v1_genomic.fna tests/data/fastq/dna/sarscov2_1.fastq.gz tests/data/fastq/dna/sarscov2_2.fastq.gz",
+ "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.11\tCL:samtools view -Sb sarscov2_aln.sam",
+ "@PG\tID:splitbam\tPN:splitbam\tVN:0.1.0\tCL:splitubam --split 2 --threads 2 test.paired_end.bam"
+ ],
+ "4933fd727ab0ca4e215dddee7de73a2c",
+ [
+ "@HD\tVN:1.6\tSO:unsorted",
+ "@SQ\tSN:MT192765.1\tLN:29829",
+ "@RG\tID:1\tLB:lib1\tPL:ILLUMINA\tSM:test\tPU:barcode1",
+ "@PG\tID:minimap2\tPN:minimap2\tVN:2.17-r941\tCL:minimap2 -ax sr tests/data/fasta/sarscov2/GCA_011545545.1_ASM1154554v1_genomic.fna tests/data/fastq/dna/sarscov2_1.fastq.gz tests/data/fastq/dna/sarscov2_2.fastq.gz",
+ "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.11\tCL:samtools view -Sb sarscov2_aln.sam",
+ "@PG\tID:splitbam\tPN:splitbam\tVN:0.1.0\tCL:splitubam --split 2 --threads 2 test.paired_end.bam"
+ ],
+ "1a9e3bfa97c43dcbeba1ed01e51a6a54",
+ [
+ "versions.yml:md5,e5c9bb35328e8dcde2e934d9e6729fa6"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-23T13:07:07.013916943"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/splitubam/tests/nextflow.config b/modules/nf-core/splitubam/tests/nextflow.config
new file mode 100644
index 00000000..191f4bfb
--- /dev/null
+++ b/modules/nf-core/splitubam/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+ withName: 'SPLITUBAM' {
+ ext.args = '--split 2'
+ }
+}
diff --git a/modules/nf-core/splitubam/tests/tags.yml b/modules/nf-core/splitubam/tests/tags.yml
new file mode 100644
index 00000000..3a60af57
--- /dev/null
+++ b/modules/nf-core/splitubam/tests/tags.yml
@@ -0,0 +1,2 @@
+splitubam:
+ - "modules/nf-core/splitubam/**"
diff --git a/nextflow.config b/nextflow.config
index 592e5f28..1f3fd481 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -53,7 +53,7 @@ params {
// Preprocessing/parallelisation
parallel_snv = 13
- split_fastq = 0
+ parallel_alignments = 1
// References
fasta = null
@@ -298,9 +298,9 @@ includeConfig 'conf/modules/annotate_consequence_pli.config'
includeConfig 'conf/modules/annotate_repeat_expansions.config'
includeConfig 'conf/modules/general.config'
includeConfig 'conf/modules/bam_infer_sex.config'
-includeConfig 'conf/modules/bam_to_fastq.config'
includeConfig 'conf/modules/call_paralogs.config'
includeConfig 'conf/modules/call_repeat_expansions.config'
+includeConfig 'conf/modules/convert_input_files.config'
includeConfig 'conf/modules/assembly_variant_calling.config'
includeConfig 'conf/modules/genome_assembly.config'
includeConfig 'conf/modules/methylation.config'
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 4b9c4470..aea8e485 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -366,11 +366,11 @@
"enum": ["hifi-only", "trio-binning"],
"default": "hifi-only"
},
- "split_fastq": {
+ "parallel_alignments": {
"type": "integer",
- "default": 0,
- "minimum": 0,
- "description": "Split alignment into n processes"
+ "minimum": 1,
+ "default": 1,
+ "description": "Split alignment into n processes per sample"
},
"parallel_snv": {
"type": "integer",
diff --git a/subworkflows/local/bam_to_fastq.nf b/subworkflows/local/bam_to_fastq.nf
deleted file mode 100644
index 0a7bf330..00000000
--- a/subworkflows/local/bam_to_fastq.nf
+++ /dev/null
@@ -1,32 +0,0 @@
-include { SAMTOOLS_FASTQ } from '../../modules/nf-core/samtools/fastq/main'
-
-workflow BAM_TO_FASTQ {
-
- take:
- ch_sample // channel: [ val(meta), reads ]
-
- main:
- ch_versions = Channel.empty()
-
- // Filter out BAM from fastq
- ch_sample
- .map { meta, fastq -> [ meta + [ 'single_end': true ], fastq ] }
- .branch { meta, reads ->
- fastq: reads.extension == 'gz'
- bam: reads.extension == 'bam'
- }
- .set { ch_filetypes }
-
- ch_filetypes.fastq.set { ch_sample }
-
- SAMTOOLS_FASTQ ( ch_filetypes.bam, false )
- ch_versions = ch_versions.mix(SAMTOOLS_FASTQ.out.versions)
-
- // Mix converted BAM back in
- ch_sample = ch_sample.mix(SAMTOOLS_FASTQ.out.other)
-
- emit:
- fastq = ch_sample // channel: [ val(meta), fastq ]
- versions = ch_versions // channel: [ versions.yml ]
-}
-
diff --git a/subworkflows/local/convert_input_files.nf b/subworkflows/local/convert_input_files.nf
new file mode 100644
index 00000000..5c44e521
--- /dev/null
+++ b/subworkflows/local/convert_input_files.nf
@@ -0,0 +1,37 @@
+include { SAMTOOLS_IMPORT } from '../../modules/nf-core/samtools/import/main'
+include { SAMTOOLS_FASTQ } from '../../modules/nf-core/samtools/fastq/main'
+
+workflow CONVERT_INPUT_FILES {
+
+ take:
+ ch_sample // channel: [ val(meta), reads ]
+
+ main:
+ ch_versions = Channel.empty()
+
+ ch_sample
+ .branch { meta, reads ->
+ fastq: reads.extension == 'gz'
+ bam: reads.extension == 'bam'
+ }
+ .set { ch_filetypes }
+
+ ch_bam = ch_filetypes.bam
+ ch_fastq = ch_filetypes.fastq
+
+ SAMTOOLS_FASTQ ( ch_filetypes.bam, false )
+ ch_versions = ch_versions.mix(SAMTOOLS_FASTQ.out.versions)
+
+ SAMTOOLS_IMPORT ( ch_filetypes.fastq )
+ ch_versions = ch_versions.mix(SAMTOOLS_IMPORT.out.versions)
+
+ // Mix converted files back in
+ ch_bam = ch_bam.mix(SAMTOOLS_IMPORT.out.bam)
+ ch_fastq = ch_fastq.mix(SAMTOOLS_FASTQ.out.other)
+
+ emit:
+ bam = ch_bam // channel: [ val(meta), bam ]
+ fastq = ch_fastq // channel: [ val(meta), fastq ]
+ versions = ch_versions // channel: [ versions.yml ]
+}
+
diff --git a/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf b/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf
index c25f7f3f..8e1c2697 100644
--- a/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf
@@ -182,7 +182,7 @@ workflow PIPELINE_INITIALISATION {
.groupTuple() // group by sample
.map { sample, metas, reads ->
// Add number of files per sample _after_ splitting to meta
- [ sample, metas[0] + [n_files: metas.size() + metas.size() * Math.max(0, params.split_fastq - 1), single_end:true ], reads ]
+ [ sample, metas[0] + [n_files: metas.size() + metas.size() * Math.max(0, params.parallel_alignments - 1), single_end:true ], reads ]
}
// Convert back to [ meta, reads ]
.flatMap {
diff --git a/tests/main.nf.test b/tests/main.nf.test
index 04bb42d9..ab94bb8c 100644
--- a/tests/main.nf.test
+++ b/tests/main.nf.test
@@ -123,8 +123,8 @@ nextflow_pipeline {
{ assert new File("$outputDir/phasing/whatshap/phase/HG002_Revio/HG002_Revio_phased.vcf.gz.tbi").exists() },
{ assert new File("$outputDir/phasing/whatshap/stats/HG002_Revio/HG002_Revio_stats.stats.tsv").exists() },
{ assert new File("$outputDir/phasing/whatshap/stats/HG002_Revio/HG002_Revio_stats.blocks.tsv").exists() },
- { assert new File("$outputDir/qc_raw_reads/fastqc/HG002_Revio/HG002_PacBio_Revio.fastq.gz_fastqc.html").exists() },
- { assert new File("$outputDir/qc_raw_reads/fastqc/HG002_Revio/HG002_PacBio_Revio.fastq.gz_fastqc.zip").exists() },
+ { assert new File("$outputDir/qc_raw_reads/fastqc/HG002_Revio/HG002_Revio_fastqc.html").exists() },
+ { assert new File("$outputDir/qc_raw_reads/fastqc/HG002_Revio/HG002_Revio_fastqc.zip").exists() },
{ assert new File("$outputDir/repeat_annotation/stranger/HG002_Revio/HG002_Revio_repeat_expansion_stranger.vcf.gz").exists() },
{ assert new File("$outputDir/repeat_annotation/stranger/HG002_Revio/HG002_Revio_repeat_expansion_stranger.vcf.gz.tbi").exists() },
{ assert new File("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio/HG002_Revio_spanning_sorted.bam.bai").exists() },
@@ -293,8 +293,8 @@ nextflow_pipeline {
{ assert new File("$outputDir/phasing/whatshap/phase/HG002_Revio_A/HG002_Revio_A_phased.vcf.gz.tbi").exists() },
{ assert new File("$outputDir/phasing/whatshap/stats/HG002_Revio_A/HG002_Revio_A_stats.stats.tsv").exists() },
{ assert new File("$outputDir/phasing/whatshap/stats/HG002_Revio_A/HG002_Revio_A_stats.blocks.tsv").exists() },
- { assert new File("$outputDir/qc_raw_reads/fastqc/HG002_Revio_A/HG002_PacBio_Revio.bam_other.fastq.gz_fastqc.html").exists() },
- { assert new File("$outputDir/qc_raw_reads/fastqc/HG002_Revio_A/HG002_PacBio_Revio.bam_other.fastq.gz_fastqc.zip").exists() },
+ { assert new File("$outputDir/qc_raw_reads/fastqc/HG002_Revio_A/HG002_Revio_A_fastqc.html").exists() },
+ { assert new File("$outputDir/qc_raw_reads/fastqc/HG002_Revio_A/HG002_Revio_A_fastqc.zip").exists() },
{ assert new File("$outputDir/repeat_annotation/stranger/HG002_Revio_A/HG002_Revio_A_repeat_expansion_stranger.vcf.gz").exists() },
{ assert new File("$outputDir/repeat_annotation/stranger/HG002_Revio_A/HG002_Revio_A_repeat_expansion_stranger.vcf.gz.tbi").exists() },
{ assert new File("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio_A/HG002_Revio_A_spanning_sorted.bam.bai").exists() },
@@ -327,8 +327,8 @@ nextflow_pipeline {
{ assert new File("$outputDir/phasing/whatshap/phase/HG002_Revio_B/HG002_Revio_B_phased.vcf.gz.tbi").exists() },
{ assert new File("$outputDir/phasing/whatshap/stats/HG002_Revio_B/HG002_Revio_B_stats.stats.tsv").exists() },
{ assert new File("$outputDir/phasing/whatshap/stats/HG002_Revio_B/HG002_Revio_B_stats.blocks.tsv").exists() },
- { assert new File("$outputDir/qc_raw_reads/fastqc/HG002_Revio_B/HG002_Revio_B.merged.fastq.gz_fastqc.html").exists() },
- { assert new File("$outputDir/qc_raw_reads/fastqc/HG002_Revio_B/HG002_Revio_B.merged.fastq.gz_fastqc.zip").exists() },
+ { assert new File("$outputDir/qc_raw_reads/fastqc/HG002_Revio_B/HG002_Revio_B_fastqc.html").exists() },
+ { assert new File("$outputDir/qc_raw_reads/fastqc/HG002_Revio_B/HG002_Revio_B_fastqc.zip").exists() },
{ assert new File("$outputDir/repeat_annotation/stranger/HG002_Revio_B/HG002_Revio_B_repeat_expansion_stranger.vcf.gz").exists() },
{ assert new File("$outputDir/repeat_annotation/stranger/HG002_Revio_B/HG002_Revio_B_repeat_expansion_stranger.vcf.gz.tbi").exists() },
{ assert new File("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio_B/HG002_Revio_B_spanning_sorted.bam.bai").exists() },
diff --git a/tests/main.nf.test.snap b/tests/main.nf.test.snap
index 31a1d818..d54cb854 100644
--- a/tests/main.nf.test.snap
+++ b/tests/main.nf.test.snap
@@ -6,7 +6,7 @@
"test.samples.tsv:md5,1685dc6cb8c6b9806ca636662980d686",
"test.html:md5,d05e0eceb70ada3a0c25f99a16ad1889",
"multiqc_citations.txt:md5,a27affce20d456d20ed387097a4f0350",
- "multiqc_fastqc.txt:md5,055c2c156136798feeb1658adf905e95",
+ "multiqc_fastqc.txt:md5,c60b523b5f11c07c9149043bf68f92ad",
"multiqc_somalier.txt:md5,20b4c5b2d5b94b77fb800548e07a874e",
"74b4822241bd8d1bc42f494f1f3e326c",
"HG002_Revio.asm.bp.hap1.p_ctg.assembly_summary:md5,ccaad2690abccadc4ec3b2c5d8fa4b05",
@@ -53,13 +53,13 @@
"nf-test": "0.8.4",
"nextflow": "24.04.3"
},
- "timestamp": "2024-08-09T13:23:09.224424657"
+ "timestamp": "2024-08-12T09:45:54.652442138"
},
"test profile - multisample": {
"content": [
"test.ped:md5,a1e82af069bce823564e204c316d5500",
"multiqc_citations.txt:md5,a27affce20d456d20ed387097a4f0350",
- "multiqc_fastqc.txt:md5,234f2958710c30f62446a9406cbfcaae",
+ "multiqc_fastqc.txt:md5,b74145ef9fbf8addcc2997ca26b3aa45",
"74b4822241bd8d1bc42f494f1f3e326c",
"HG002_Revio_A.asm.bp.hap1.p_ctg.assembly_summary:md5,ccaad2690abccadc4ec3b2c5d8fa4b05",
"HG002_Revio_A.asm.bp.hap2.p_ctg.assembly_summary:md5,c5bbeabb571453186a39cf6e487dbcc5",
@@ -145,6 +145,6 @@
"nf-test": "0.8.4",
"nextflow": "24.04.3"
},
- "timestamp": "2024-08-09T13:26:52.566315569"
+ "timestamp": "2024-08-12T09:49:33.754269424"
}
}
\ No newline at end of file
diff --git a/workflows/nallo.nf b/workflows/nallo.nf
index c7a485f5..620af098 100644
--- a/workflows/nallo.nf
+++ b/workflows/nallo.nf
@@ -10,7 +10,7 @@ include { ANNOTATE_CSQ_PLI as ANN_CSQ_PLI_SNV } from '../subworkflows/local/anno
include { ANNOTATE_REPEAT_EXPANSIONS } from '../subworkflows/local/annotate_repeat_expansions'
include { ASSEMBLY } from '../subworkflows/local/genome_assembly'
include { ASSEMBLY_VARIANT_CALLING } from '../subworkflows/local/assembly_variant_calling'
-include { BAM_TO_FASTQ } from '../subworkflows/local/bam_to_fastq'
+include { CONVERT_INPUT_FILES } from '../subworkflows/local/convert_input_files'
include { BAM_INFER_SEX } from '../subworkflows/local/bam_infer_sex'
include { CALL_PARALOGS } from '../subworkflows/local/call_paralogs'
include { CALL_REPEAT_EXPANSIONS } from '../subworkflows/local/call_repeat_expansions'
@@ -43,9 +43,9 @@ include { BCFTOOLS_PLUGINSPLIT } from '../modules/nf-core/bcftool
include { BCFTOOLS_STATS } from '../modules/nf-core/bcftools/stats/main'
include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main'
include { FASTQC } from '../modules/nf-core/fastqc/main'
-include { FASTP } from '../modules/nf-core/fastp/main'
include { MINIMAP2_ALIGN } from '../modules/nf-core/minimap2/align/main'
include { MULTIQC } from '../modules/nf-core/multiqc/main'
+include { SPLITUBAM } from '../modules/nf-core/splitubam/main'
include { paramsSummaryMap } from 'plugin/nf-validation'
include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline'
include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline'
@@ -75,8 +75,6 @@ workflow NALLO {
: ''
ch_fasta = params.fasta ? Channel.fromPath(params.fasta).map { it -> [ it.simpleName, it ] }.collect()
: ''
- ch_extra_snfs = params.extra_snfs ? Channel.fromSamplesheet('extra_snfs')
- : Channel.empty()
ch_tandem_repeats = params.tandem_repeats ? Channel.fromPath(params.tandem_repeats).map{ [ it.simpleName, it ] }.collect()
: Channel.value([[],[]])
ch_input_bed = params.bed ? Channel.fromPath(params.bed).map{ [ [ id:it.simpleName ] , it ] }.collect()
@@ -107,7 +105,6 @@ workflow NALLO {
: ''
// Check parameter that doesn't conform to schema validation here
- if (params.split_fastq != 0 && (params.split_fastq < 2 || params.split_fastq > 999 )) { error "--split_fastq must be 0, or between 2 and 999."}
if (params.phaser.matches('hiphase_sv|hiphase_snv') && params.preset == 'ONT_R10') { error "The HiPhase license only permits analysis of data from PacBio. For details see: https://github.com/PacificBiosciences/HiPhase/blob/main/LICENSE.md" }
// Create PED from samplesheet
@@ -121,16 +118,14 @@ workflow NALLO {
CREATE_PEDIGREE_FILE.out.ped
.map { project, ped -> [ [ 'id': project ], ped ] }
+ .collect()
.set { ch_pedfile }
//
- // Convert BAM files to FASTQ
+ // Convert BAM files to FASTQ and vice versa
//
- BAM_TO_FASTQ ( ch_input )
- ch_versions = ch_versions.mix(BAM_TO_FASTQ.out.versions)
-
- BAM_TO_FASTQ.out.fastq
- .set { ch_sample }
+ CONVERT_INPUT_FILES ( ch_input )
+ ch_versions = ch_versions.mix(CONVERT_INPUT_FILES.out.versions)
//
// Run raw (unaligned) read QC with FastQC and fqcrs
@@ -138,19 +133,19 @@ workflow NALLO {
if(!params.skip_raw_read_qc) {
// Combine samples with multiple input files before QC - not ideal
- ch_sample
+ CONVERT_INPUT_FILES.out.fastq
.groupTuple()
.branch { meta, reads ->
single: reads.size() == 1
return [ meta, reads[0] ]
multiple: reads.size() > 1
}
- .set { ch_sample_reads }
+ .set { ch_fastq }
- CAT_FASTQ ( ch_sample_reads.multiple )
+ CAT_FASTQ ( ch_fastq.multiple )
ch_versions = ch_versions.mix(CAT_FASTQ.out.versions)
- ch_sample_reads.single
+ ch_fastq.single
.concat ( CAT_FASTQ.out.reads )
.set { raw_read_qc_in }
@@ -194,18 +189,17 @@ workflow NALLO {
//
if(!params.skip_mapping_wf) {
- // Split fastq
- if (params.split_fastq > 0) {
+ // Split input files for alignment
+ if (params.parallel_alignments > 1) {
- FASTP ( ch_sample, [], [], [] )
- ch_versions = ch_versions.mix(FASTP.out.versions)
+ SPLITUBAM ( CONVERT_INPUT_FILES.out.bam )
+ ch_versions = ch_versions.mix(SPLITUBAM.out.versions)
- reads_for_alignment = FASTP.out.reads.transpose()
+ reads_for_alignment = SPLITUBAM.out.bam.transpose()
} else {
- reads_for_alignment = ch_sample
+ reads_for_alignment = CONVERT_INPUT_FILES.out.bam
}
-
// Align (split) reads
MINIMAP2_ALIGN ( reads_for_alignment, mmi, true, 'bai', false, false )
ch_versions = ch_versions.mix(MINIMAP2_ALIGN.out.versions)
@@ -275,7 +269,7 @@ workflow NALLO {
if(!params.skip_assembly_wf) {
//Hifiasm assembly
- ASSEMBLY( ch_sample )
+ ASSEMBLY( CONVERT_INPUT_FILES.out.fastq )
ch_versions = ch_versions.mix(ASSEMBLY.out.versions)
// Update assembly variant calling meta with sex from somalier
From ed66340b5f1b60eb37e8a714ad3156fa41999c68 Mon Sep 17 00:00:00 2001
From: Felix Lenner <52530259+fellen31@users.noreply.github.com>
Date: Tue, 13 Aug 2024 20:27:20 +0200
Subject: [PATCH 42/59] Use project name in echtvar encode (#312)
---
CHANGELOG.md | 1 +
modules/local/echtvar/encode/main.nf | 4 ++--
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 86196de0..a396b6b8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -57,6 +57,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#307](https://github.com/genomic-medicine-sweden/nallo/pull/307) - Changed somalier relate to also run per sample on sampes with unknown sex, removing the need to wait on all samples to finish aligment before starting variant calling
- [#307](https://github.com/genomic-medicine-sweden/nallo/pull/307) - Changed the removal of n_files from meta from bam_infer_sex to nallo.nf
- [#308](https://github.com/genomic-medicine-sweden/nallo/pull/308) - Updated nf-core modules, fixed warnings in local modules, added Dockerfile to fqcrs
+- [#312](https://github.com/genomic-medicine-sweden/nallo/pull/312) - Changed echtvar encode database creation to use dynamic `${project}` from samplesheet
### `Removed`
diff --git a/modules/local/echtvar/encode/main.nf b/modules/local/echtvar/encode/main.nf
index aa06e394..a022b75c 100644
--- a/modules/local/echtvar/encode/main.nf
+++ b/modules/local/echtvar/encode/main.nf
@@ -23,12 +23,12 @@ process ECHTVAR_ENCODE {
[
{
"field": "AF",
- "alias": "COHORT_AF",
+ "alias": "${meta.id}_af",
"multiplier": 1000000
},
{
"field": "AC",
- "alias": "COHORT_AC",
+ "alias": "${meta.id}_ac",
"multiplier": 1000000
},
From c7dc538631c8b3597a0ceb060fcd4736cd6d8e73 Mon Sep 17 00:00:00 2001
From: Felix Lenner <52530259+fellen31@users.noreply.github.com>
Date: Tue, 13 Aug 2024 20:27:36 +0200
Subject: [PATCH 43/59] Fix typo (#315)
---
subworkflows/local/utils_nfcore_nallo_pipeline/main.nf | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf b/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf
index 8e1c2697..c23c11a5 100644
--- a/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf
@@ -54,7 +54,7 @@ def workflowSkips = [
// E.g., the CNV-calling workflow depends on mapping and snv_calling and can't run without them.
//
def workflowDependencies = [
- alinged_read_qc : ["mapping"],
+ aligned_read_qc : ["mapping"],
assembly : ["mapping"],
call_paralogs : ["mapping"],
snv_calling : ["mapping"],
From f319465f58925364cbafde2929a35d4acc29169a Mon Sep 17 00:00:00 2001
From: Felix Lenner <52530259+fellen31@users.noreply.github.com>
Date: Wed, 14 Aug 2024 11:41:07 +0200
Subject: [PATCH 44/59] Remove samtools reset from fastq (#319)
---
CHANGELOG.md | 1 +
conf/modules/convert_input_files.config | 7 +--
modules.json | 2 +-
.../nf-core/samtools/fastq/environment.yml | 4 +-
modules/nf-core/samtools/fastq/main.nf | 10 ++---
.../samtools/fastq/samtools-fastq.diff | 45 -------------------
6 files changed, 7 insertions(+), 62 deletions(-)
delete mode 100644 modules/nf-core/samtools/fastq/samtools-fastq.diff
diff --git a/CHANGELOG.md b/CHANGELOG.md
index a396b6b8..23359f6d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -68,6 +68,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#258](https://github.com/genomic-medicine-sweden/nallo/pull/258) - Removed VCF report from DeepVariant output
- [#264](https://github.com/genomic-medicine-sweden/nallo/pull/264) - Removed the option to provide extra SNF files to Sniffles with `--extra_snfs`
- [#305](https://github.com/genomic-medicine-sweden/nallo/pull/305) - Removed unused local module bcftools view regions
+- [#319](https://github.com/genomic-medicine-sweden/nallo/pull/319) - Removed samtools reset before samtools fastq when converting BAM to FASTQ
### `Fixed`
diff --git a/conf/modules/convert_input_files.config b/conf/modules/convert_input_files.config
index 80f4f239..4d0dccfb 100644
--- a/conf/modules/convert_input_files.config
+++ b/conf/modules/convert_input_files.config
@@ -26,12 +26,7 @@ process {
withName: '.*:CONVERT_INPUT_FILES:SAMTOOLS_FASTQ' {
ext.prefix = { "${input}" }
- // Strip tags with -x, and keep the rest
- ext.args = { [
- '-x HP,PS', // phasing
- '-x AS,CC,CG,CP,H1,H2,HI,H0,IH,MC,MD,MQ,NM,SA,TS'
- ].join(' ') }
- ext.args2 = '-T \\*'
+ ext.args = '-T \\*'
}
withName: '.*:CONVERT_INPUT_FILES:SAMTOOLS_IMPORT' {
diff --git a/modules.json b/modules.json
index feebf0f4..94687dcd 100644
--- a/modules.json
+++ b/modules.json
@@ -172,7 +172,7 @@
},
"samtools/fastq": {
"branch": "master",
- "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773",
+ "git_sha": "04fbbc7c43cebc0b95d5b126f6d9fe4effa33519",
"installed_by": ["modules"]
},
"samtools/import": {
diff --git a/modules/nf-core/samtools/fastq/environment.yml b/modules/nf-core/samtools/fastq/environment.yml
index 1d1bc98f..4455904e 100644
--- a/modules/nf-core/samtools/fastq/environment.yml
+++ b/modules/nf-core/samtools/fastq/environment.yml
@@ -1,10 +1,8 @@
name: samtools_fastq
-
channels:
- conda-forge
- bioconda
- defaults
-
dependencies:
- - bioconda::htslib=1.20
- bioconda::samtools=1.20
+ - bioconda::htslib=1.20
diff --git a/modules/nf-core/samtools/fastq/main.nf b/modules/nf-core/samtools/fastq/main.nf
index 66192d2a..6796c02b 100644
--- a/modules/nf-core/samtools/fastq/main.nf
+++ b/modules/nf-core/samtools/fastq/main.nf
@@ -23,21 +23,17 @@ process SAMTOOLS_FASTQ {
script:
def args = task.ext.args ?: ''
- def args2 = task.ext.args2 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def output = ( interleave && ! meta.single_end ) ? "> ${prefix}_interleaved.fastq" :
meta.single_end ? "-1 ${prefix}_1.fastq.gz -s ${prefix}_singleton.fastq.gz" :
"-1 ${prefix}_1.fastq.gz -2 ${prefix}_2.fastq.gz -s ${prefix}_singleton.fastq.gz"
"""
- samtools reset \\
- --threads ${task.cpus-1} \\
+ samtools \\
+ fastq \\
$args \\
- $input \\
- | \\
- samtools fastq \\
- $args2 \\
--threads ${task.cpus-1} \\
-0 ${prefix}_other.fastq.gz \\
+ $input \\
$output
cat <<-END_VERSIONS > versions.yml
diff --git a/modules/nf-core/samtools/fastq/samtools-fastq.diff b/modules/nf-core/samtools/fastq/samtools-fastq.diff
deleted file mode 100644
index 000ce76a..00000000
--- a/modules/nf-core/samtools/fastq/samtools-fastq.diff
+++ /dev/null
@@ -1,45 +0,0 @@
-Changes in module 'nf-core/samtools/fastq'
---- modules/nf-core/samtools/fastq/main.nf
-+++ modules/nf-core/samtools/fastq/main.nf
-@@ -23,17 +23,21 @@
-
- script:
- def args = task.ext.args ?: ''
-+ def args2 = task.ext.args2 ?: ''
- def prefix = task.ext.prefix ?: "${meta.id}"
- def output = ( interleave && ! meta.single_end ) ? "> ${prefix}_interleaved.fastq" :
- meta.single_end ? "-1 ${prefix}_1.fastq.gz -s ${prefix}_singleton.fastq.gz" :
- "-1 ${prefix}_1.fastq.gz -2 ${prefix}_2.fastq.gz -s ${prefix}_singleton.fastq.gz"
- """
-- samtools \\
-- fastq \\
-+ samtools reset \\
-+ --threads ${task.cpus-1} \\
- $args \\
-+ $input \\
-+ | \\
-+ samtools fastq \\
-+ $args2 \\
- --threads ${task.cpus-1} \\
- -0 ${prefix}_other.fastq.gz \\
-- $input \\
- $output
-
- cat <<-END_VERSIONS > versions.yml
-
---- modules/nf-core/samtools/fastq/environment.yml
-+++ modules/nf-core/samtools/fastq/environment.yml
-@@ -1,8 +1,10 @@
- name: samtools_fastq
-+
- channels:
- - conda-forge
- - bioconda
- - defaults
-+
- dependencies:
-+ - bioconda::htslib=1.20
- - bioconda::samtools=1.20
-- - bioconda::htslib=1.20
-
-************************************************************
From ced1328569f1b18641e024333527ab7c137fedbb Mon Sep 17 00:00:00 2001
From: Felix Lenner <52530259+fellen31@users.noreply.github.com>
Date: Wed, 14 Aug 2024 12:40:54 +0200
Subject: [PATCH 45/59] DeepVariant improved haploid calling (#313)
* Improve non-autosomal calling in DeepVariant
* Update nextflow_schema.json
Co-authored-by: Anders Jemt
* conditional haploid contigs
* Update conf/modules/short_variant_calling.config
Co-authored-by: Anders Jemt
---------
Co-authored-by: Anders Jemt
---
CHANGELOG.md | 2 +
conf/modules/short_variant_calling.config | 3 +-
conf/test.config | 2 +-
docs/usage.md | 4 +-
modules/local/dipcall/main.nf | 2 +-
nextflow.config | 2 +-
nextflow_schema.json | 4 +-
.../local/short_variant_calling/main.nf | 3 +-
.../short_variant_calling/tests/main.nf.test | 165 +++++-
.../tests/main.nf.test.snap | 504 ++++++++++++++----
.../local/snv_annotation/tests/main.nf.test | 3 +
.../local/utils_nfcore_nallo_pipeline/main.nf | 7 +-
tests/main.nf.test | 4 +-
workflows/nallo.nf | 4 +-
14 files changed, 564 insertions(+), 145 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 23359f6d..ba3065ad 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -58,6 +58,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#307](https://github.com/genomic-medicine-sweden/nallo/pull/307) - Changed the removal of n_files from meta from bam_infer_sex to nallo.nf
- [#308](https://github.com/genomic-medicine-sweden/nallo/pull/308) - Updated nf-core modules, fixed warnings in local modules, added Dockerfile to fqcrs
- [#312](https://github.com/genomic-medicine-sweden/nallo/pull/312) - Changed echtvar encode database creation to use dynamic `${project}` from samplesheet
+- [#313](https://github.com/genomic-medicine-sweden/nallo/pull/313) - Updated calling of variants in non-autosomal contigs for DeepVariant
### `Removed`
@@ -91,6 +92,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
| `--split_fastq` | `--parallel_alignments` |
| `--extra_gvcfs` | |
| `--extra_snfs` | |
+| `--dipcall_par` | `--par_regions` |
> [!NOTE]
> Parameter has been updated if both old and new parameter information is present.
diff --git a/conf/modules/short_variant_calling.config b/conf/modules/short_variant_calling.config
index 18a49b49..2559d947 100644
--- a/conf/modules/short_variant_calling.config
+++ b/conf/modules/short_variant_calling.config
@@ -29,7 +29,8 @@ process {
ext.args = { [
"--sample_name=${meta.id}",
"--model_type=${params.deepvariant_model_type}",
- '-vcf_stats_report=False'
+ '-vcf_stats_report=False',
+ meta.sex == 1 ? '--haploid_contigs="chrX,chrY"' : ''
].join(' ') }
}
diff --git a/conf/test.config b/conf/test.config
index 7c96ecd8..eadb4cd8 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -33,7 +33,7 @@ params {
hificnv_exclude = params.pipelines_testdata_base_path + 'nallo/reference/empty.bed'
// Dipcall
- dipcall_par = params.pipelines_testdata_base_path + 'nallo/reference/hs38.PAR.bed'
+ par_regions = params.pipelines_testdata_base_path + 'nallo/reference/hs38.PAR.bed'
// Repeats
trgt_repeats = params.pipelines_testdata_base_path + 'nallo/reference/pathogenic_repeats.hg38.bed'
diff --git a/docs/usage.md b/docs/usage.md
index 4b80d336..0b9110f6 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -102,7 +102,7 @@ The typical command example above requires no additional files except the refere
Nallo has the ability to skip certain parts of the pipeline, for example `--skip_repeat_wf`.
Some workflows require additional files:
-- If running without `--skip_assembly_wf`, download a BED file with PAR regions ([hg38](https://raw.githubusercontent.com/lh3/dipcall/master/data/hs38.PAR.bed)) to supply with `--dipcall_par`.
+- If running without `--skip_assembly_wf` or `--skip_short_variant_calling`, download a BED file with PAR regions ([hg38](https://storage.googleapis.com/deepvariant/case-study-testdata/GRCh38_PAR.bed)) to supply with `--par_regions`.
> [!NOTE]
> Make sure chrY PAR is hard masked in reference.
@@ -242,7 +242,7 @@ Different processes may need extra input files
| Parameter | Description | Type | Default | Required | Hidden |
| ---------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- | ------- | -------- | ------ |
-| `dipcall_par` | Provide a bed file of chrX PAR regions for dipcall | `string` | | | |
+| `par_regions` | Provide a bed file of chrX PAR regions for dipcall | `string` | | | |
| `tandem_repeats` | Tandem repeat BED-file for sniffles | `string` | | | |
| `trgt_repeats` | BED-file for repeats to be genotyped | `string` | | | |
| `snp_db` | Extra echtvar-databases to annotate SNVs with | `string` | | | |
diff --git a/modules/local/dipcall/main.nf b/modules/local/dipcall/main.nf
index d2af5eda..e19c7641 100644
--- a/modules/local/dipcall/main.nf
+++ b/modules/local/dipcall/main.nf
@@ -16,7 +16,7 @@ process DIPCALL {
tuple val(meta2), path(reference)
tuple val(meta3), path(fai)
tuple val(meta4), path(mmi)
- path(par)
+ tuple val(meta5), path(par)
output:
tuple val(meta), path("*.dip.vcf.gz") , emit: variant_calls
diff --git a/nextflow.config b/nextflow.config
index 1f3fd481..8c28585d 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -14,7 +14,7 @@ params {
bed = null
cadd_resources = null
cadd_prescored = null
- dipcall_par = null
+ par_regions = null
tandem_repeats = null
trgt_repeats = null
variant_catalog = null
diff --git a/nextflow_schema.json b/nextflow_schema.json
index aea8e485..4859a2aa 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -416,9 +416,9 @@
"description": "Path to the directory containing cadd annotations.",
"help_text": "This folder contains the uncompressed files that would otherwise be in data/annotation folder as described in https://github.com/kircherlab/CADD-scripts/#manual-installation."
},
- "dipcall_par": {
+ "par_regions": {
"type": "string",
- "description": "Provide a bed file of chrX PAR regions for dipcall",
+ "description": "Provide a bed file of chrX and chrY PAR regions for dipcall and DeepVariant",
"format": "file-path",
"exists": true
},
diff --git a/subworkflows/local/short_variant_calling/main.nf b/subworkflows/local/short_variant_calling/main.nf
index fcbf9882..5395cd2e 100644
--- a/subworkflows/local/short_variant_calling/main.nf
+++ b/subworkflows/local/short_variant_calling/main.nf
@@ -15,6 +15,7 @@ workflow SHORT_VARIANT_CALLING {
ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
ch_fai // channel: [mandatory] [ val(meta), path(fai) ]
ch_bed // channel: [optional] [ val(meta), path(input_bed) ]
+ ch_par_bed // channel: [mandatory] [ val(meta), path(par_bed) ]
main:
ch_versions = Channel.empty()
@@ -26,7 +27,7 @@ workflow SHORT_VARIANT_CALLING {
}
.set { ch_deepvariant_in }
- DEEPVARIANT ( ch_deepvariant_in, ch_fasta, ch_fai, [[],[]], [[],[]] )
+ DEEPVARIANT ( ch_deepvariant_in, ch_fasta, ch_fai, [[],[]], ch_par_bed )
ch_versions = ch_versions.mix(DEEPVARIANT.out.versions)
// First remove region so we can group per sample
diff --git a/subworkflows/local/short_variant_calling/tests/main.nf.test b/subworkflows/local/short_variant_calling/tests/main.nf.test
index b26175a9..1d07358f 100644
--- a/subworkflows/local/short_variant_calling/tests/main.nf.test
+++ b/subworkflows/local/short_variant_calling/tests/main.nf.test
@@ -5,7 +5,7 @@ nextflow_workflow {
config "./nextflow.config"
workflow "SHORT_VARIANT_CALLING"
- test("1 sample - no bed, fasta, fai, []") {
+ test("1 sample - no bed, fasta, fai, [], []") {
when {
workflow {
@@ -25,6 +25,7 @@ nextflow_workflow {
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
]
input[3] = [[],[]]
+ input[4] = [[],[]]
"""
}
}
@@ -38,7 +39,7 @@ nextflow_workflow {
}
- test("1 sample - 1 bed, fasta, fai, []") {
+ test("1 sample - 1 bed, fasta, fai, [], []") {
when {
workflow {
@@ -58,6 +59,7 @@ nextflow_workflow {
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
]
input[3] = [[],[]]
+ input[4] = [[],[]]
"""
}
}
@@ -72,7 +74,7 @@ nextflow_workflow {
}
- test("1 sample - 1 bed, fasta, fai, bed") {
+ test("1 sample - 1 bed, fasta, fai, bed, []") {
when {
workflow {
@@ -95,6 +97,7 @@ nextflow_workflow {
[ id:'bed'],
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
]
+ input[4] = [[],[]]
"""
}
}
@@ -109,7 +112,7 @@ nextflow_workflow {
}
- test("1 sample - 2 bed, fasta, fai, bed") {
+ test("1 sample - 2 bed, fasta, fai, bed, []") {
when {
workflow {
@@ -125,7 +128,7 @@ nextflow_workflow {
[ id:'test', single_end:false, num_intervals:2 ], // meta map
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
- file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.blacklist_intervals.bed', checkIfExists: true)
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true)
]
)
input[1] = [
@@ -140,6 +143,7 @@ nextflow_workflow {
[ id:'bed'],
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
]
+ input[4] = [[],[]]
"""
}
}
@@ -154,7 +158,7 @@ nextflow_workflow {
}
- test("2 samples - 2 bed, fasta, fai, bed") {
+ test("2 samples - 2 bed, fasta, fai, bed, []") {
when {
workflow {
@@ -170,7 +174,7 @@ nextflow_workflow {
[ id:'test', single_end:false, num_intervals:2 ], // meta map
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
- file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.blacklist_intervals.bed', checkIfExists: true)
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true)
],
[
[ id:'test2', single_end:false, num_intervals:2 ], // meta map
@@ -182,7 +186,7 @@ nextflow_workflow {
[ id:'test2', single_end:false, num_intervals:2 ], // meta map
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
- file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.blacklist_intervals.bed', checkIfExists: true)
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true)
]
)
input[1] = [
@@ -197,6 +201,7 @@ nextflow_workflow {
[ id:'bed'],
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
]
+ input[4] = [[],[]]
"""
}
}
@@ -211,7 +216,67 @@ nextflow_workflow {
}
- test("1 sample - no bed, fasta, fai, [] - stub") {
+ test("2 samples - 2 bed, fasta, fai, bed, par_bed") {
+
+ when {
+ workflow {
+ """
+ input[0] = Channel.of(
+ [
+ [ id:'test', single_end:false, num_intervals:2 ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
+ ],
+ [
+ [ id:'test', single_end:false, num_intervals:2 ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true)
+ ],
+ [
+ [ id:'test2', single_end:false, num_intervals:2 ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
+ ],
+ [
+ [ id:'test2', single_end:false, num_intervals:2 ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true)
+ ]
+ )
+ input[1] = [
+ [ id:'genome'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = [
+ [ id:'genome'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
+ ]
+ input[3] = [
+ [ id:'bed'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
+ ]
+ input[4] = [
+ [ id:'par_bed'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.blacklist_intervals.bed', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert snapshot(workflow.out).match()
+ }
+ )
+ }
+ }
+
+ test("1 sample - no bed, fasta, fai, [], [] - stub") {
options "-stub"
@@ -233,6 +298,7 @@ nextflow_workflow {
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
]
input[3] = [[],[]]
+ input[4] = [[],[]]
"""
}
}
@@ -247,7 +313,7 @@ nextflow_workflow {
}
- test("1 sample - 1 bed, fasta, fai, [] - stub") {
+ test("1 sample - 1 bed, fasta, fai, [], [] - stub") {
options "-stub"
@@ -269,6 +335,7 @@ nextflow_workflow {
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
]
input[3] = [[],[]]
+ input[4] = [[],[]]
"""
}
}
@@ -283,7 +350,7 @@ nextflow_workflow {
}
- test("1 sample - 1 bed, fasta, fai, bed - stub") {
+ test("1 sample - 1 bed, fasta, fai, bed, [] - stub") {
options "-stub"
@@ -308,6 +375,7 @@ nextflow_workflow {
[ id:'bed'],
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
]
+ input[4] = [[],[]]
"""
}
}
@@ -322,7 +390,7 @@ nextflow_workflow {
}
- test("1 sample - 2 bed, fasta, fai, bed - stub") {
+ test("1 sample - 2 bed, fasta, fai, bed, [] - stub") {
options "-stub"
@@ -340,7 +408,7 @@ nextflow_workflow {
[ id:'test', single_end:false, num_intervals:2 ], // meta map
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
- file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.blacklist_intervals.bed', checkIfExists: true)
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true)
]
)
input[1] = [
@@ -355,6 +423,7 @@ nextflow_workflow {
[ id:'bed'],
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
]
+ input[4] = [[],[]]
"""
}
}
@@ -369,7 +438,7 @@ nextflow_workflow {
}
- test("2 samples - 2 bed, fasta, fai, bed - stub") {
+ test("2 samples - 2 bed, fasta, fai, bed, [] - stub") {
options "-stub"
@@ -387,7 +456,7 @@ nextflow_workflow {
[ id:'test', single_end:false, num_intervals:2 ], // meta map
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
- file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.blacklist_intervals.bed', checkIfExists: true)
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true)
],
[
[ id:'test2', single_end:false, num_intervals:2 ], // meta map
@@ -399,7 +468,7 @@ nextflow_workflow {
[ id:'test2', single_end:false, num_intervals:2 ], // meta map
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
- file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.blacklist_intervals.bed', checkIfExists: true)
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true)
]
)
input[1] = [
@@ -414,6 +483,70 @@ nextflow_workflow {
[ id:'bed'],
file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
]
+ input[4] = [[],[]]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert snapshot(workflow.out).match()
+ }
+ )
+ }
+
+ }
+
+ test("2 samples - 2 bed, fasta, fai, bed, par_bed - stub") {
+
+ options "-stub"
+
+ when {
+ workflow {
+ """
+ input[0] = Channel.of(
+ [
+ [ id:'test', single_end:false, num_intervals:2 ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
+ ],
+ [
+ [ id:'test', single_end:false, num_intervals:2 ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true)
+ ],
+ [
+ [ id:'test2', single_end:false, num_intervals:2 ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
+ ],
+ [
+ [ id:'test2', single_end:false, num_intervals:2 ], // meta map
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true),
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.multi_intervals.bed', checkIfExists: true)
+ ]
+ )
+ input[1] = [
+ [ id:'genome'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = [
+ [ id:'genome'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true)
+ ]
+ input[3] = [
+ [ id:'bed'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true)
+ ]
+ input[4] = [
+ [ id:'par_bed'],
+ file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.blacklist_intervals.bed', checkIfExists: true)
+ ]
"""
}
}
diff --git a/subworkflows/local/short_variant_calling/tests/main.nf.test.snap b/subworkflows/local/short_variant_calling/tests/main.nf.test.snap
index 8212abbf..d35e9387 100644
--- a/subworkflows/local/short_variant_calling/tests/main.nf.test.snap
+++ b/subworkflows/local/short_variant_calling/tests/main.nf.test.snap
@@ -1,5 +1,5 @@
{
- "1 sample - 1 bed, fasta, fai, bed": {
+ "1 sample - 1 bed, fasta, fai, bed, []": {
"content": [
{
"0": [
@@ -78,9 +78,9 @@
"nf-test": "0.8.4",
"nextflow": "24.04.3"
},
- "timestamp": "2024-08-09T12:33:33.642550865"
+ "timestamp": "2024-08-13T12:18:48.194341252"
},
- "2 samples - 2 bed, fasta, fai, bed": {
+ "2 samples - 2 bed, fasta, fai, bed, [] - stub": {
"content": [
{
"0": [
@@ -89,14 +89,14 @@
"id": "test",
"single_end": false
},
- "test_norm_singlesample.bcf:md5,94d38d8fd3c50a7ede72a080fbd4992c"
+ "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e"
],
[
{
"id": "test2",
"single_end": false
},
- "test2_norm_singlesample.bcf:md5,61a94a7b8bdf02d7d7de88e24c223f15"
+ "test2_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"1": [
@@ -105,14 +105,14 @@
"id": "genome.bed",
"contains_affected": false
},
- "genome.bed_norm_multisample.vcf.gz:md5,ec12cca4d6849a5f47823331c4b9a8f3"
+ "genome.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
],
[
{
- "id": "genome.blacklist_intervals.bed",
+ "id": "genome.multi_intervals.bed",
"contains_affected": false
},
- "genome.blacklist_intervals.bed_norm_multisample.vcf.gz:md5,e01b4f349dbb9a7da597262fac9c7675"
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
]
],
"2": [
@@ -121,14 +121,14 @@
"id": "genome.bed",
"contains_affected": false
},
- "genome.bed_norm_multisample.vcf.gz.csi:md5,12d9802de9b9e071408e526930050626"
+ "genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
],
[
{
- "id": "genome.blacklist_intervals.bed",
+ "id": "genome.multi_intervals.bed",
"contains_affected": false
},
- "genome.blacklist_intervals.bed_norm_multisample.vcf.gz.csi:md5,bed9fa291c220a1ba04eb2d448932ffc"
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"3": [
@@ -153,14 +153,14 @@
"id": "genome.bed",
"contains_affected": false
},
- "genome.bed_norm_multisample.vcf.gz:md5,ec12cca4d6849a5f47823331c4b9a8f3"
+ "genome.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
],
[
{
- "id": "genome.blacklist_intervals.bed",
+ "id": "genome.multi_intervals.bed",
"contains_affected": false
},
- "genome.blacklist_intervals.bed_norm_multisample.vcf.gz:md5,e01b4f349dbb9a7da597262fac9c7675"
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
]
],
"combined_csi": [
@@ -169,14 +169,14 @@
"id": "genome.bed",
"contains_affected": false
},
- "genome.bed_norm_multisample.vcf.gz.csi:md5,12d9802de9b9e071408e526930050626"
+ "genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
],
[
{
- "id": "genome.blacklist_intervals.bed",
+ "id": "genome.multi_intervals.bed",
"contains_affected": false
},
- "genome.blacklist_intervals.bed_norm_multisample.vcf.gz.csi:md5,bed9fa291c220a1ba04eb2d448932ffc"
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"snp_calls_vcf": [
@@ -185,14 +185,14 @@
"id": "test",
"single_end": false
},
- "test_norm_singlesample.bcf:md5,94d38d8fd3c50a7ede72a080fbd4992c"
+ "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e"
],
[
{
"id": "test2",
"single_end": false
},
- "test2_norm_singlesample.bcf:md5,61a94a7b8bdf02d7d7de88e24c223f15"
+ "test2_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"versions": [
@@ -217,9 +217,9 @@
"nf-test": "0.8.4",
"nextflow": "24.04.3"
},
- "timestamp": "2024-08-09T12:34:36.087668576"
+ "timestamp": "2024-08-13T12:38:57.50673848"
},
- "2 samples - 2 bed, fasta, fai, bed - stub": {
+ "2 samples - 2 bed, fasta, fai, bed, par_bed - stub": {
"content": [
{
"0": [
@@ -248,10 +248,10 @@
],
[
{
- "id": "genome.blacklist_intervals.bed",
+ "id": "genome.multi_intervals.bed",
"contains_affected": false
},
- "genome.blacklist_intervals.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
]
],
"2": [
@@ -264,10 +264,10 @@
],
[
{
- "id": "genome.blacklist_intervals.bed",
+ "id": "genome.multi_intervals.bed",
"contains_affected": false
},
- "genome.blacklist_intervals.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"3": [
@@ -296,10 +296,10 @@
],
[
{
- "id": "genome.blacklist_intervals.bed",
+ "id": "genome.multi_intervals.bed",
"contains_affected": false
},
- "genome.blacklist_intervals.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
]
],
"combined_csi": [
@@ -312,10 +312,10 @@
],
[
{
- "id": "genome.blacklist_intervals.bed",
+ "id": "genome.multi_intervals.bed",
"contains_affected": false
},
- "genome.blacklist_intervals.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"snp_calls_vcf": [
@@ -356,9 +356,9 @@
"nf-test": "0.8.4",
"nextflow": "24.04.3"
},
- "timestamp": "2024-08-09T12:35:27.910838148"
+ "timestamp": "2024-08-13T12:39:09.239296314"
},
- "1 sample - no bed, fasta, fai, []": {
+ "1 sample - 1 bed, fasta, fai, bed, [] - stub": {
"content": [
{
"0": [
@@ -367,29 +367,25 @@
"id": "test",
"single_end": false
},
- "test_norm_singlesample.bcf:md5,94d38d8fd3c50a7ede72a080fbd4992c"
+ "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"1": [
[
{
- "id": [
-
- ],
+ "id": "genome.bed",
"contains_affected": false
},
- "[]_norm_multisample.vcf.gz:md5,56d662f0a527803ae72e40625c01014b"
+ "genome.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
]
],
"2": [
[
{
- "id": [
-
- ],
+ "id": "genome.bed",
"contains_affected": false
},
- "[]_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9"
+ "genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"3": [
@@ -403,23 +399,19 @@
"combined_bcf": [
[
{
- "id": [
-
- ],
+ "id": "genome.bed",
"contains_affected": false
},
- "[]_norm_multisample.vcf.gz:md5,56d662f0a527803ae72e40625c01014b"
+ "genome.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
]
],
"combined_csi": [
[
{
- "id": [
-
- ],
+ "id": "genome.bed",
"contains_affected": false
},
- "[]_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9"
+ "genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"snp_calls_vcf": [
@@ -428,15 +420,154 @@
"id": "test",
"single_end": false
},
- "test_norm_singlesample.bcf:md5,94d38d8fd3c50a7ede72a080fbd4992c"
+ "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.3"
+ },
+ "timestamp": "2024-08-13T12:20:26.146017974"
+ },
+ "2 samples - 2 bed, fasta, fai, bed, []": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_norm_singlesample.bcf:md5,f978f9f75d4892499156218a300fd246"
+ ],
+ [
+ {
+ "id": "test2",
+ "single_end": false
+ },
+ "test2_norm_singlesample.bcf:md5,562e4ab7cea09c4f296550784e02ca84"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz:md5,ec12cca4d6849a5f47823331c4b9a8f3"
+ ],
+ [
+ {
+ "id": "genome.multi_intervals.bed",
+ "contains_affected": false
+ },
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,ec12cca4d6849a5f47823331c4b9a8f3"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz.csi:md5,12d9802de9b9e071408e526930050626"
+ ],
+ [
+ {
+ "id": "genome.multi_intervals.bed",
+ "contains_affected": false
+ },
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,12d9802de9b9e071408e526930050626"
+ ]
+ ],
+ "3": [
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6"
+ ],
+ "combined_bcf": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz:md5,ec12cca4d6849a5f47823331c4b9a8f3"
+ ],
+ [
+ {
+ "id": "genome.multi_intervals.bed",
+ "contains_affected": false
+ },
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,ec12cca4d6849a5f47823331c4b9a8f3"
+ ]
+ ],
+ "combined_csi": [
+ [
+ {
+ "id": "genome.bed",
+ "contains_affected": false
+ },
+ "genome.bed_norm_multisample.vcf.gz.csi:md5,12d9802de9b9e071408e526930050626"
+ ],
+ [
+ {
+ "id": "genome.multi_intervals.bed",
+ "contains_affected": false
+ },
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,12d9802de9b9e071408e526930050626"
+ ]
+ ],
+ "snp_calls_vcf": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_norm_singlesample.bcf:md5,f978f9f75d4892499156218a300fd246"
+ ],
+ [
+ {
+ "id": "test2",
+ "single_end": false
+ },
+ "test2_norm_singlesample.bcf:md5,562e4ab7cea09c4f296550784e02ca84"
]
],
"versions": [
"versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
"versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
"versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
"versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
"versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6",
"versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6"
]
}
@@ -445,9 +576,9 @@
"nf-test": "0.8.4",
"nextflow": "24.04.3"
},
- "timestamp": "2024-08-09T12:32:38.562683632"
+ "timestamp": "2024-08-13T12:37:29.720749753"
},
- "1 sample - 1 bed, fasta, fai, []": {
+ "1 sample - 1 bed, fasta, fai, [], []": {
"content": [
{
"0": [
@@ -526,9 +657,9 @@
"nf-test": "0.8.4",
"nextflow": "24.04.3"
},
- "timestamp": "2024-08-09T12:33:06.128266568"
+ "timestamp": "2024-08-13T12:18:20.527237885"
},
- "1 sample - 1 bed, fasta, fai, [] - stub": {
+ "1 sample - 2 bed, fasta, fai, bed, [] - stub": {
"content": [
{
"0": [
@@ -547,6 +678,13 @@
"contains_affected": false
},
"genome.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ],
+ [
+ {
+ "id": "genome.multi_intervals.bed",
+ "contains_affected": false
+ },
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
]
],
"2": [
@@ -556,14 +694,25 @@
"contains_affected": false
},
"genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ],
+ [
+ {
+ "id": "genome.multi_intervals.bed",
+ "contains_affected": false
+ },
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"3": [
"versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
"versions.yml:md5,13101c9283d4a82e859574b0a981311c",
"versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
"versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
"versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6",
"versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6"
],
"combined_bcf": [
@@ -573,6 +722,13 @@
"contains_affected": false
},
"genome.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ],
+ [
+ {
+ "id": "genome.multi_intervals.bed",
+ "contains_affected": false
+ },
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
]
],
"combined_csi": [
@@ -582,6 +738,13 @@
"contains_affected": false
},
"genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ],
+ [
+ {
+ "id": "genome.multi_intervals.bed",
+ "contains_affected": false
+ },
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"snp_calls_vcf": [
@@ -595,10 +758,14 @@
],
"versions": [
"versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
"versions.yml:md5,13101c9283d4a82e859574b0a981311c",
"versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
"versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
"versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6",
"versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6"
]
}
@@ -607,9 +774,9 @@
"nf-test": "0.8.4",
"nextflow": "24.04.3"
},
- "timestamp": "2024-08-09T12:34:56.051878451"
+ "timestamp": "2024-08-13T12:38:45.950944715"
},
- "1 sample - 1 bed, fasta, fai, bed - stub": {
+ "2 samples - 2 bed, fasta, fai, bed, par_bed": {
"content": [
{
"0": [
@@ -618,7 +785,14 @@
"id": "test",
"single_end": false
},
- "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ "test_norm_singlesample.bcf:md5,f978f9f75d4892499156218a300fd246"
+ ],
+ [
+ {
+ "id": "test2",
+ "single_end": false
+ },
+ "test2_norm_singlesample.bcf:md5,562e4ab7cea09c4f296550784e02ca84"
]
],
"1": [
@@ -627,7 +801,14 @@
"id": "genome.bed",
"contains_affected": false
},
- "genome.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ "genome.bed_norm_multisample.vcf.gz:md5,ec12cca4d6849a5f47823331c4b9a8f3"
+ ],
+ [
+ {
+ "id": "genome.multi_intervals.bed",
+ "contains_affected": false
+ },
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,ec12cca4d6849a5f47823331c4b9a8f3"
]
],
"2": [
@@ -636,15 +817,30 @@
"id": "genome.bed",
"contains_affected": false
},
- "genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ "genome.bed_norm_multisample.vcf.gz.csi:md5,12d9802de9b9e071408e526930050626"
+ ],
+ [
+ {
+ "id": "genome.multi_intervals.bed",
+ "contains_affected": false
+ },
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,12d9802de9b9e071408e526930050626"
]
],
"3": [
"versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
"versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
"versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
"versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
"versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6",
"versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6"
],
"combined_bcf": [
@@ -653,7 +849,14 @@
"id": "genome.bed",
"contains_affected": false
},
- "genome.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ "genome.bed_norm_multisample.vcf.gz:md5,ec12cca4d6849a5f47823331c4b9a8f3"
+ ],
+ [
+ {
+ "id": "genome.multi_intervals.bed",
+ "contains_affected": false
+ },
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,ec12cca4d6849a5f47823331c4b9a8f3"
]
],
"combined_csi": [
@@ -662,7 +865,14 @@
"id": "genome.bed",
"contains_affected": false
},
- "genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ "genome.bed_norm_multisample.vcf.gz.csi:md5,12d9802de9b9e071408e526930050626"
+ ],
+ [
+ {
+ "id": "genome.multi_intervals.bed",
+ "contains_affected": false
+ },
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,12d9802de9b9e071408e526930050626"
]
],
"snp_calls_vcf": [
@@ -671,15 +881,30 @@
"id": "test",
"single_end": false
},
- "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ "test_norm_singlesample.bcf:md5,f978f9f75d4892499156218a300fd246"
+ ],
+ [
+ {
+ "id": "test2",
+ "single_end": false
+ },
+ "test2_norm_singlesample.bcf:md5,562e4ab7cea09c4f296550784e02ca84"
]
],
"versions": [
"versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
"versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
"versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
"versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
"versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6",
"versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6"
]
}
@@ -688,9 +913,9 @@
"nf-test": "0.8.4",
"nextflow": "24.04.3"
},
- "timestamp": "2024-08-09T12:35:05.660557092"
+ "timestamp": "2024-08-13T12:40:41.412678218"
},
- "1 sample - no bed, fasta, fai, [] - stub": {
+ "1 sample - no bed, fasta, fai, [], []": {
"content": [
{
"0": [
@@ -699,7 +924,7 @@
"id": "test",
"single_end": false
},
- "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ "test_norm_singlesample.bcf:md5,94d38d8fd3c50a7ede72a080fbd4992c"
]
],
"1": [
@@ -710,7 +935,7 @@
],
"contains_affected": false
},
- "[]_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ "[]_norm_multisample.vcf.gz:md5,56d662f0a527803ae72e40625c01014b"
]
],
"2": [
@@ -721,7 +946,7 @@
],
"contains_affected": false
},
- "[]_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ "[]_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9"
]
],
"3": [
@@ -740,7 +965,7 @@
],
"contains_affected": false
},
- "[]_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ "[]_norm_multisample.vcf.gz:md5,56d662f0a527803ae72e40625c01014b"
]
],
"combined_csi": [
@@ -751,7 +976,7 @@
],
"contains_affected": false
},
- "[]_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ "[]_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9"
]
],
"snp_calls_vcf": [
@@ -760,7 +985,7 @@
"id": "test",
"single_end": false
},
- "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ "test_norm_singlesample.bcf:md5,94d38d8fd3c50a7ede72a080fbd4992c"
]
],
"versions": [
@@ -777,9 +1002,9 @@
"nf-test": "0.8.4",
"nextflow": "24.04.3"
},
- "timestamp": "2024-08-09T12:34:45.861028555"
+ "timestamp": "2024-08-13T12:17:52.991579386"
},
- "1 sample - 2 bed, fasta, fai, bed": {
+ "1 sample - 2 bed, fasta, fai, bed, []": {
"content": [
{
"0": [
@@ -788,7 +1013,7 @@
"id": "test",
"single_end": false
},
- "test_norm_singlesample.bcf:md5,94d38d8fd3c50a7ede72a080fbd4992c"
+ "test_norm_singlesample.bcf:md5,f978f9f75d4892499156218a300fd246"
]
],
"1": [
@@ -801,10 +1026,10 @@
],
[
{
- "id": "genome.blacklist_intervals.bed",
+ "id": "genome.multi_intervals.bed",
"contains_affected": false
},
- "genome.blacklist_intervals.bed_norm_multisample.vcf.gz:md5,61e29e75aa0dbcbe453877d07cac63e7"
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,56d662f0a527803ae72e40625c01014b"
]
],
"2": [
@@ -817,10 +1042,10 @@
],
[
{
- "id": "genome.blacklist_intervals.bed",
+ "id": "genome.multi_intervals.bed",
"contains_affected": false
},
- "genome.blacklist_intervals.bed_norm_multisample.vcf.gz.csi:md5,bed9fa291c220a1ba04eb2d448932ffc"
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9"
]
],
"3": [
@@ -845,10 +1070,10 @@
],
[
{
- "id": "genome.blacklist_intervals.bed",
+ "id": "genome.multi_intervals.bed",
"contains_affected": false
},
- "genome.blacklist_intervals.bed_norm_multisample.vcf.gz:md5,61e29e75aa0dbcbe453877d07cac63e7"
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,56d662f0a527803ae72e40625c01014b"
]
],
"combined_csi": [
@@ -861,10 +1086,10 @@
],
[
{
- "id": "genome.blacklist_intervals.bed",
+ "id": "genome.multi_intervals.bed",
"contains_affected": false
},
- "genome.blacklist_intervals.bed_norm_multisample.vcf.gz.csi:md5,bed9fa291c220a1ba04eb2d448932ffc"
+ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9"
]
],
"snp_calls_vcf": [
@@ -873,7 +1098,7 @@
"id": "test",
"single_end": false
},
- "test_norm_singlesample.bcf:md5,94d38d8fd3c50a7ede72a080fbd4992c"
+ "test_norm_singlesample.bcf:md5,f978f9f75d4892499156218a300fd246"
]
],
"versions": [
@@ -894,9 +1119,9 @@
"nf-test": "0.8.4",
"nextflow": "24.04.3"
},
- "timestamp": "2024-08-09T12:34:03.458045229"
+ "timestamp": "2024-08-13T12:36:53.97901474"
},
- "1 sample - 2 bed, fasta, fai, bed - stub": {
+ "1 sample - no bed, fasta, fai, [], [] - stub": {
"content": [
{
"0": [
@@ -911,77 +1136,134 @@
"1": [
[
{
- "id": "genome.bed",
+ "id": [
+
+ ],
"contains_affected": false
},
- "genome.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
- ],
+ "[]_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "2": [
[
{
- "id": "genome.blacklist_intervals.bed",
+ "id": [
+
+ ],
"contains_affected": false
},
- "genome.blacklist_intervals.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ "[]_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
- "2": [
+ "3": [
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6"
+ ],
+ "combined_bcf": [
[
{
- "id": "genome.bed",
+ "id": [
+
+ ],
"contains_affected": false
},
- "genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
- ],
+ "[]_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "combined_csi": [
[
{
- "id": "genome.blacklist_intervals.bed",
+ "id": [
+
+ ],
"contains_affected": false
},
- "genome.blacklist_intervals.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ "[]_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
- "3": [
- "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "snp_calls_vcf": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
"versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
"versions.yml:md5,13101c9283d4a82e859574b0a981311c",
- "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
"versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
"versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
"versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
- "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
- "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6",
"versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.3"
+ },
+ "timestamp": "2024-08-13T12:20:07.024378325"
+ },
+ "1 sample - 1 bed, fasta, fai, [], [] - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
],
- "combined_bcf": [
+ "1": [
[
{
"id": "genome.bed",
"contains_affected": false
},
"genome.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
- ],
+ ]
+ ],
+ "2": [
[
{
- "id": "genome.blacklist_intervals.bed",
+ "id": "genome.bed",
"contains_affected": false
},
- "genome.blacklist_intervals.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ "genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
- "combined_csi": [
+ "3": [
+ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
+ "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
+ "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
+ "versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
+ "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
+ "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6"
+ ],
+ "combined_bcf": [
[
{
"id": "genome.bed",
"contains_affected": false
},
- "genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
- ],
+ "genome.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+ ]
+ ],
+ "combined_csi": [
[
{
- "id": "genome.blacklist_intervals.bed",
+ "id": "genome.bed",
"contains_affected": false
},
- "genome.blacklist_intervals.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
+ "genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"snp_calls_vcf": [
@@ -995,14 +1277,10 @@
],
"versions": [
"versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
- "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34",
- "versions.yml:md5,13101c9283d4a82e859574b0a981311c",
"versions.yml:md5,13101c9283d4a82e859574b0a981311c",
"versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44",
"versions.yml:md5,7d9ebdfc24f293b07e70dd2d18f44022",
"versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
- "versions.yml:md5,afe349eb9156445b91cacdcfaabcf43d",
- "versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6",
"versions.yml:md5,dce103ef6b2f37c6844db8191418b9e6"
]
}
@@ -1011,6 +1289,6 @@
"nf-test": "0.8.4",
"nextflow": "24.04.3"
},
- "timestamp": "2024-08-09T12:35:16.395171025"
+ "timestamp": "2024-08-13T12:20:16.739088461"
}
}
\ No newline at end of file
diff --git a/subworkflows/local/snv_annotation/tests/main.nf.test b/subworkflows/local/snv_annotation/tests/main.nf.test
index 0cfde2ab..bb02bc24 100644
--- a/subworkflows/local/snv_annotation/tests/main.nf.test
+++ b/subworkflows/local/snv_annotation/tests/main.nf.test
@@ -58,6 +58,9 @@ nextflow_workflow {
input[3] = [
[],[]
]
+ input[4] = [
+ [],[]
+ ]
"""
}
}
diff --git a/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf b/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf
index c23c11a5..f2b94156 100644
--- a/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf
@@ -68,11 +68,12 @@ def workflowDependencies = [
]
//
-// E.g., the dipcall_par file is required by the assembly workflow and the assembly workflow can't run without dipcall_par
+// E.g., the par_regions file is required by the assembly workflow and the assembly workflow can't run without par_regions
//
def fileDependencies = [
mapping : ["fasta", "somalier_sites"],
- assembly : ["fasta", "dipcall_par"], // The assembly workflow should be split into two - assembly and variant calling (requires ref)
+ assembly : ["fasta", "par_regions"], // The assembly workflow should be split into two - assembly and variant calling (requires ref)
+ snv_calling : ["fasta", "par_regions"],
snv_annotation : ["snp_db", "vep_cache", "reduced_penetrance", "score_config_snv", "variant_consequences_snv"],
cnv_calling : ["hificnv_xy", "hificnv_xx", "hificnv_exclude"],
repeat_calling : ["trgt_repeats"],
@@ -96,7 +97,7 @@ def parameterStatus = [
skip_assembly_wf : params.skip_assembly_wf,
],
files: [
- dipcall_par : params.dipcall_par,
+ par_regions : params.par_regions,
snp_db : params.snp_db,
somalier_sites : params.somalier_sites,
vep_cache : params.vep_cache,
diff --git a/tests/main.nf.test b/tests/main.nf.test
index ab94bb8c..f9c54bb5 100644
--- a/tests/main.nf.test
+++ b/tests/main.nf.test
@@ -19,7 +19,7 @@ nextflow_pipeline {
hificnv_xy = params.pipelines_testdata_base_path + 'nallo/reference/expected_cn.hg38.XY.bed'
hificnv_xx = params.pipelines_testdata_base_path + 'nallo/reference/expected_cn.hg38.XX.bed'
hificnv_exclude = params.pipelines_testdata_base_path + 'nallo/reference/empty.bed'
- dipcall_par = params.pipelines_testdata_base_path + 'nallo/reference/hs38.PAR.bed'
+ par_regions = params.pipelines_testdata_base_path + 'nallo/reference/hs38.PAR.bed'
trgt_repeats = params.pipelines_testdata_base_path + 'nallo/reference/pathogenic_repeats.hg38.bed'
variant_catalog = params.pipelines_testdata_base_path + 'nallo/reference/variant_catalog_grch38.json'
vep_cache = params.pipelines_testdata_base_path + 'nallo/reference/vep_cache_test_data.tar.gz'
@@ -151,7 +151,7 @@ nextflow_pipeline {
hificnv_xy = params.pipelines_testdata_base_path + 'nallo/reference/expected_cn.hg38.XY.bed'
hificnv_xx = params.pipelines_testdata_base_path + 'nallo/reference/expected_cn.hg38.XX.bed'
hificnv_exclude = params.pipelines_testdata_base_path + 'nallo/reference/empty.bed'
- dipcall_par = params.pipelines_testdata_base_path + 'nallo/reference/hs38.PAR.bed'
+ par_regions = params.pipelines_testdata_base_path + 'nallo/reference/hs38.PAR.bed'
trgt_repeats = params.pipelines_testdata_base_path + 'nallo/reference/pathogenic_repeats.hg38.bed'
variant_catalog = params.pipelines_testdata_base_path + 'nallo/reference/variant_catalog_grch38.json'
vep_cache = params.pipelines_testdata_base_path + 'nallo/reference/vep_cache_test_data.tar.gz'
diff --git a/workflows/nallo.nf b/workflows/nallo.nf
index 620af098..ffcc0e8f 100644
--- a/workflows/nallo.nf
+++ b/workflows/nallo.nf
@@ -79,7 +79,7 @@ workflow NALLO {
: Channel.value([[],[]])
ch_input_bed = params.bed ? Channel.fromPath(params.bed).map{ [ [ id:it.simpleName ] , it ] }.collect()
: Channel.value([[],[]])
- ch_par = params.dipcall_par ? Channel.fromPath(params.dipcall_par).collect()
+ ch_par = params.par_regions ? Channel.fromPath(params.par_regions).map { [ [ id: it.simpleName ], it ] }.collect()
: ''
ch_trgt_bed = params.trgt_repeats ? Channel.fromPath(params.trgt_repeats).map { it -> [ it.simpleName, it ] }.collect()
: ''
@@ -323,7 +323,7 @@ workflow NALLO {
// 1. A merged and normalised VCF, containing one sample with all regions, to be used in downstream subworkflows requiring SNVs.
// 2. A merged and normalised VCF, containing one region with all samples, to be used in annotation and ranking.
//
- SHORT_VARIANT_CALLING( ch_snv_calling_in, fasta, fai, SCATTER_GENOME.out.bed )
+ SHORT_VARIANT_CALLING( ch_snv_calling_in, fasta, fai, SCATTER_GENOME.out.bed, ch_par )
ch_versions = ch_versions.mix(SHORT_VARIANT_CALLING.out.versions)
//
From 341b8b579bb85e72cbdbe2df8d57338181b2e678 Mon Sep 17 00:00:00 2001
From: Felix Lenner <52530259+fellen31@users.noreply.github.com>
Date: Thu, 15 Aug 2024 15:06:34 +0200
Subject: [PATCH 46/59] Split vep_cache into vep_cache and vep_plugin_files
(#314)
* Split vep plugins into vep cache and vep plugins
* review comments
* Keep SpliceAI in tests
---
CHANGELOG.md | 3 +++
assets/vep_plugin_files_schema.json | 26 +++++++++++++++++++
conf/modules/snv_annotation.config | 7 +++--
conf/test.config | 24 +++++++++++++++--
docs/usage.md | 18 +++++++++++++
nextflow.config | 1 +
nextflow_schema.json | 6 +++++
subworkflows/local/snv_annotation/main.nf | 3 ++-
.../local/snv_annotation/tests/main.nf.test | 18 ++++++++-----
.../local/utils_nfcore_nallo_pipeline/main.nf | 2 +-
tests/main.nf.test | 2 ++
tests/nextflow.config | 19 ++++++++++++++
workflows/nallo.nf | 18 +++++++++++++
13 files changed, 133 insertions(+), 14 deletions(-)
create mode 100644 assets/vep_plugin_files_schema.json
diff --git a/CHANGELOG.md b/CHANGELOG.md
index ba3065ad..1c73adf8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#266](https://github.com/genomic-medicine-sweden/nallo/pull/266) - Added CADD to dynamically calculate indel CADD-scores
- [#270](https://github.com/genomic-medicine-sweden/nallo/pull/270) - Added SNV phasing stats to MultiQC
- [#271](https://github.com/genomic-medicine-sweden/nallo/pull/271) - Added a `--skip_aligned_read_qc` parameter to skip the qc aligned reads subworkflow
+- [#314](https://github.com/genomic-medicine-sweden/nallo/pull/314) - Added a `--vep_plugin_files` parameter to separate VEP plugins from cache
### `Changed`
@@ -59,6 +60,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#308](https://github.com/genomic-medicine-sweden/nallo/pull/308) - Updated nf-core modules, fixed warnings in local modules, added Dockerfile to fqcrs
- [#312](https://github.com/genomic-medicine-sweden/nallo/pull/312) - Changed echtvar encode database creation to use dynamic `${project}` from samplesheet
- [#313](https://github.com/genomic-medicine-sweden/nallo/pull/313) - Updated calling of variants in non-autosomal contigs for DeepVariant
+- [#313](https://github.com/genomic-medicine-sweden/nallo/pull/313) - Changed VEP annotation added in #244 to not include SpliceAI
### `Removed`
@@ -93,6 +95,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
| `--extra_gvcfs` | |
| `--extra_snfs` | |
| `--dipcall_par` | `--par_regions` |
+| | `--vep_plugin_files` |
> [!NOTE]
> Parameter has been updated if both old and new parameter information is present.
diff --git a/assets/vep_plugin_files_schema.json b/assets/vep_plugin_files_schema.json
new file mode 100644
index 00000000..d904317b
--- /dev/null
+++ b/assets/vep_plugin_files_schema.json
@@ -0,0 +1,26 @@
+{
+ "$schema": "http://json-schema.org/draft-07/schema",
+ "$id": "https://raw.githubusercontent.com/genomic-medicine-sweden/nallo/master/assets/vep_plugin_files_schema.json",
+ "title": "Schema for VEP plugin files and their indices",
+ "description": "Schema for VEP plugin files and their indices",
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "vep_files": {
+ "type": "string",
+ "anyOf": [
+ {
+ "format": "file-path"
+ },
+ {
+ "format": "directory-path"
+ }
+ ],
+ "exists": true,
+ "description": "Path to vep plugin files and their indices"
+ }
+ },
+ "required": ["vep_files"]
+ }
+}
diff --git a/conf/modules/snv_annotation.config b/conf/modules/snv_annotation.config
index 393e4cfd..36be4406 100644
--- a/conf/modules/snv_annotation.config
+++ b/conf/modules/snv_annotation.config
@@ -39,10 +39,9 @@ process {
withName: '.*:SNV_ANNOTATION:ENSEMBLVEP_VEP' {
ext.prefix = { "${meta.id}_vep" }
ext.args = { [
- "--dir_plugins ${cache}/Plugins",
- "--plugin LoFtool,${cache}/LoFtool_scores.txt",
- "--plugin pLI,${cache}/pLI_values.txt",
- "--plugin SpliceAI,snv=${cache}/spliceai_21_scores_raw_snv_-v1.3-.vcf.gz,indel=${cache}/spliceai_21_scores_raw_snv_-v1.3-.vcf.gz",
+ "--dir_plugins .",
+ "--plugin LoFtool,LoFtool_scores.txt",
+ "--plugin pLI,pLI_values.txt",
'--distance 5000',
'--buffer_size 20000',
'--format vcf --max_sv_size 248387328',
diff --git a/conf/test.config b/conf/test.config
index eadb4cd8..8cc9b923 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -40,8 +40,9 @@ params {
variant_catalog = params.pipelines_testdata_base_path + 'nallo/reference/variant_catalog_grch38.json'
// SNV Annotation
- vep_cache = params.pipelines_testdata_base_path + 'nallo/reference/vep_cache_test_data.tar.gz'
- snp_db = params.pipelines_testdata_base_path + 'nallo/testdata/snp_dbs.csv'
+ vep_cache = params.pipelines_testdata_base_path + 'nallo/reference/vep_cache_test_data.tar.gz'
+ vep_plugin_files = params.pipelines_testdata_base_path + 'nallo/reference/vep_plugin_files.csv'
+ snp_db = params.pipelines_testdata_base_path + 'nallo/testdata/snp_dbs.csv'
// Rank variants
reduced_penetrance = params.pipelines_testdata_base_path + 'nallo/reference/reduced_penetrance.tsv'
@@ -65,6 +66,25 @@ process {
ext.args = '--gene hba'
}
+ withName: '.*:SNV_ANNOTATION:ENSEMBLVEP_VEP' {
+ ext.prefix = { "${meta.id}_vep" }
+ ext.args = { [
+ "--dir_plugins .",
+ "--plugin LoFtool,LoFtool_scores.txt",
+ "--plugin pLI,pLI_values.txt",
+ "--plugin SpliceAI,snv=spliceai_21_scores_raw_snv_-v1.3-.vcf.gz,indel=spliceai_21_scores_raw_snv_-v1.3-.vcf.gz",
+ '--distance 5000',
+ '--buffer_size 20000',
+ '--format vcf --max_sv_size 248387328',
+ '--appris --biotype --cache --canonical --ccds --compress_output bgzip',
+ '--domains --exclude_predicted --force_overwrite',
+ '--hgvs --humdiv --no_progress --numbers',
+ '--polyphen p --protein --offline --regulatory --sift p --symbol --tsl',
+ '--uniprot --vcf',
+ '--no_stats'
+ ].join(' ') }
+ }
+
withName: '.*:NALLO:PHASING:WHATSHAP_PHASE' {
ext.args = '--ignore-read-groups --indels --distrust-genotypes --include-homozygous'
}
diff --git a/docs/usage.md b/docs/usage.md
index 0b9110f6..9aeec0df 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -112,6 +112,23 @@ Some workflows require additional files:
- If running without `--skip_repeat_annotation`, download a json variant catalog, (e.g. [variant_catalog_grch38.json](https://github.com/Clinical-Genomics/stranger/raw/main/stranger/resources/variant_catalog_grch38.json)) matching your reference genome to supply with `--variant_catalog`.
- If running without `--skip_snv_annotation`, download [VEP cache](https://ftp.ensembl.org/pub/release-110/variation/vep/homo_sapiens_vep_110_GRCh38.tar.gz) to supply with `--vep_cache` and prepare a samplesheet with annotation databases ([`echtvar encode`](https://github.com/brentp/echtvar)) to supply with `--snp_db`:
+- If running without `--skip_snv_annotation`, you will also need to download VEP plugin files to supply with `--vep_plugin_files` see [example](https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugin_files.csv). PLI and LoFtool.
+
+```
+vep_files
+https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/SpliceAI.pm
+https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/LoFtool.pm
+https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/spliceai_21_scores_raw_snv_-v1.3-.vcf.gz
+https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/dbNSFP.pm
+https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/plugin_config.txt
+https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/spliceai_21_scores_raw_indel_-v1.3-.vcf.gz.tbi
+https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/spliceai_21_scores_raw_indel_-v1.3-.vcf.gz
+https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/spliceai_21_scores_raw_snv_-v1.3-.vcf.gz.tbi
+https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/pLI_values.txt
+https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/pLI.pm
+https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/MaxEntScan.pm
+https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/LoFtool_scores.txt
+```
```
sample,file
@@ -247,6 +264,7 @@ Different processes may need extra input files
| `trgt_repeats` | BED-file for repeats to be genotyped | `string` | | | |
| `snp_db` | Extra echtvar-databases to annotate SNVs with | `string` | | | |
| `vep_cache` | Path to directory of vep_cache | `string` | | | |
+| `vep_plugin_files` | A csv file with paths to vep plugin files, pLI, LoFtool and SpliceAI is required. | `string` | | | |
| `bed` | BED file with regions of interest | `string` | | | |
| `hificnv_xy` | | `string` | | | |
| `hificnv_xx` | | `string` | | | |
diff --git a/nextflow.config b/nextflow.config
index 8c28585d..1f7d92d6 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -23,6 +23,7 @@ params {
snp_db = null
variant_consequences_snv = null
vep_cache = null
+ vep_plugin_files = null
hificnv_xy = null
hificnv_xx = null
hificnv_exclude = null
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 4859a2aa..3626f253 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -383,6 +383,12 @@
"default": 110,
"description": "VEP cache version"
},
+ "vep_plugin_files": {
+ "type": "string",
+ "mimetype": "text/csv",
+ "description": "A csv file with paths to vep plugin files, pLI, LoFtool and SpliceAI is required.",
+ "schema": "assets/vep_plugin_files_schema.json"
+ },
"deepvariant_model_type": {
"type": "string",
"default": "PACBIO",
diff --git a/subworkflows/local/snv_annotation/main.nf b/subworkflows/local/snv_annotation/main.nf
index 16994f41..1bcc22e7 100644
--- a/subworkflows/local/snv_annotation/main.nf
+++ b/subworkflows/local/snv_annotation/main.nf
@@ -13,6 +13,7 @@ workflow SNV_ANNOTATION {
ch_fai // channel: [mandatory] [ val(meta), path(fai) ]
ch_vep_cache // channel: [mandatory] [ path(cache) ]
val_vep_cache_version // string: [mandatory] default: 110
+ ch_vep_extra_files // channel: [mandatory] [ path(files) ]
val_annotate_cadd // bool: [mandatory]
ch_cadd_header // channel: [mandatory] [ path(txt) ]
ch_cadd_resources // channel: [mandatory] [ path(annotation) ]
@@ -59,7 +60,7 @@ workflow SNV_ANNOTATION {
val_vep_cache_version,
ch_vep_cache,
ch_fasta,
- []
+ ch_vep_extra_files
)
ch_versions = ch_versions.mix(ENSEMBLVEP_VEP.out.versions)
diff --git a/subworkflows/local/snv_annotation/tests/main.nf.test b/subworkflows/local/snv_annotation/tests/main.nf.test
index bb02bc24..6009d4c2 100644
--- a/subworkflows/local/snv_annotation/tests/main.nf.test
+++ b/subworkflows/local/snv_annotation/tests/main.nf.test
@@ -91,10 +91,13 @@ nextflow_workflow {
input[3] = SAMTOOLS_FAIDX.out.fai
input[4] = UNTAR.out.untar.map { meta, cache -> cache }
input[5] = Channel.value('110')
- input[6] = false
- input[7] = Channel.value([])
- input[8] = null
+ input[6] = [
+ file(params.pipelines_testdata_base_path + 'nallo/reference/vep_plugin_files.csv', checkIfExists: true)
+ ]
+ input[7] = false
+ input[8] = Channel.value([])
input[9] = null
+ input[10] = null
"""
}
}
@@ -130,10 +133,13 @@ nextflow_workflow {
input[3] = SAMTOOLS_FAIDX.out.fai
input[4] = UNTAR.out.untar.map { meta, cache -> cache }
input[5] = Channel.value('110')
- input[6] = false
- input[7] = Channel.value([])
- input[8] = null
+ input[6] = [
+ file(params.pipelines_testdata_base_path + 'nallo/reference/vep_plugin_files.csv', checkIfExists: true)
+ ]
+ input[7] = false
+ input[8] = Channel.value([])
input[9] = null
+ input[10] = null
"""
}
}
diff --git a/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf b/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf
index f2b94156..966c2b21 100644
--- a/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf
@@ -74,7 +74,7 @@ def fileDependencies = [
mapping : ["fasta", "somalier_sites"],
assembly : ["fasta", "par_regions"], // The assembly workflow should be split into two - assembly and variant calling (requires ref)
snv_calling : ["fasta", "par_regions"],
- snv_annotation : ["snp_db", "vep_cache", "reduced_penetrance", "score_config_snv", "variant_consequences_snv"],
+ snv_annotation : ["snp_db", "vep_cache", "vep_plugin_files", "reduced_penetrance", "score_config_snv", "variant_consequences_snv"],
cnv_calling : ["hificnv_xy", "hificnv_xx", "hificnv_exclude"],
repeat_calling : ["trgt_repeats"],
repeat_annotation: ["variant_catalog"],
diff --git a/tests/main.nf.test b/tests/main.nf.test
index f9c54bb5..7584d5ad 100644
--- a/tests/main.nf.test
+++ b/tests/main.nf.test
@@ -23,6 +23,7 @@ nextflow_pipeline {
trgt_repeats = params.pipelines_testdata_base_path + 'nallo/reference/pathogenic_repeats.hg38.bed'
variant_catalog = params.pipelines_testdata_base_path + 'nallo/reference/variant_catalog_grch38.json'
vep_cache = params.pipelines_testdata_base_path + 'nallo/reference/vep_cache_test_data.tar.gz'
+ vep_plugin_files = params.pipelines_testdata_base_path + 'nallo/reference/vep_plugin_files.csv'
snp_db = params.pipelines_testdata_base_path + 'nallo/testdata/snp_dbs.csv'
somalier_sites = params.pipelines_testdata_base_path + 'nallo/reference/somalier_sites.vcf.gz'
reduced_penetrance = params.pipelines_testdata_base_path + 'nallo/reference/reduced_penetrance.tsv'
@@ -155,6 +156,7 @@ nextflow_pipeline {
trgt_repeats = params.pipelines_testdata_base_path + 'nallo/reference/pathogenic_repeats.hg38.bed'
variant_catalog = params.pipelines_testdata_base_path + 'nallo/reference/variant_catalog_grch38.json'
vep_cache = params.pipelines_testdata_base_path + 'nallo/reference/vep_cache_test_data.tar.gz'
+ vep_plugin_files = params.pipelines_testdata_base_path + 'nallo/reference/vep_plugin_files.csv'
snp_db = params.pipelines_testdata_base_path + 'nallo/testdata/snp_dbs.csv'
somalier_sites = params.pipelines_testdata_base_path + 'nallo/reference/somalier_sites.vcf.gz'
reduced_penetrance = params.pipelines_testdata_base_path + 'nallo/reference/reduced_penetrance.tsv'
diff --git a/tests/nextflow.config b/tests/nextflow.config
index 05f743ea..3c53747a 100644
--- a/tests/nextflow.config
+++ b/tests/nextflow.config
@@ -55,4 +55,23 @@ process {
ext.args = '--ignore-read-groups --indels --distrust-genotypes --include-homozygous'
}
+ withName: '.*:NALLO:SNV_ANNOTATION:ENSEMBLVEP_VEP' {
+ ext.prefix = { "${meta.id}_vep" }
+ ext.args = { [
+ "--dir_plugins .",
+ "--plugin LoFtool,LoFtool_scores.txt",
+ "--plugin pLI,pLI_values.txt",
+ "--plugin SpliceAI,snv=spliceai_21_scores_raw_snv_-v1.3-.vcf.gz,indel=spliceai_21_scores_raw_snv_-v1.3-.vcf.gz",
+ '--distance 5000',
+ '--buffer_size 20000',
+ '--format vcf --max_sv_size 248387328',
+ '--appris --biotype --cache --canonical --ccds --compress_output bgzip',
+ '--domains --exclude_predicted --force_overwrite',
+ '--hgvs --humdiv --no_progress --numbers',
+ '--polyphen p --protein --offline --regulatory --sift p --symbol --tsl',
+ '--uniprot --vcf',
+ '--no_stats'
+ ].join(' ') }
+ }
+
}
diff --git a/workflows/nallo.nf b/workflows/nallo.nf
index ffcc0e8f..0597fb8b 100644
--- a/workflows/nallo.nf
+++ b/workflows/nallo.nf
@@ -91,6 +91,8 @@ workflow NALLO {
: Channel.value([])
ch_vep_cache_unprocessed = params.vep_cache ? Channel.fromPath(params.vep_cache).map { it -> [ [ id:'vep_cache' ], it ] }.collect()
: Channel.value([[],[]])
+ ch_vep_extra_files_unsplit = params.vep_plugin_files ? Channel.fromPath(params.vep_plugin_files).collect()
+ : ''
ch_expected_xy_bed = params.hificnv_xy ? Channel.fromPath(params.hificnv_xy).collect()
: ''
ch_expected_xx_bed = params.hificnv_xx ? Channel.fromPath(params.hificnv_xx).collect()
@@ -121,6 +123,21 @@ workflow NALLO {
.collect()
.set { ch_pedfile }
+ // Read and store paths in the vep_plugin_files file
+ if (params.vep_plugin_files) {
+ ch_vep_extra_files_unsplit.splitCsv ( header:true )
+ .map { row ->
+ path = file(row.vep_files[0])
+ if(path.isFile() || path.isDirectory()){
+ return [path]
+ } else {
+ error("\nVep database file ${path} does not exist.")
+ }
+ }
+ .collect()
+ .set {ch_vep_extra_files}
+ }
+
//
// Convert BAM files to FASTQ and vice versa
//
@@ -341,6 +358,7 @@ workflow NALLO {
fai.map { name, fai -> [ [ id: name ], fai ] },
ch_vep_cache,
params.vep_cache_version,
+ ch_vep_extra_files,
(params.cadd_resources && params.cadd_prescored),
ch_cadd_header,
ch_cadd_resources,
From 4548dc2a41a6b184d3a8025ad95ed03e97a5491f Mon Sep 17 00:00:00 2001
From: Felix Lenner <52530259+fellen31@users.noreply.github.com>
Date: Thu, 15 Aug 2024 15:49:01 +0200
Subject: [PATCH 47/59] Update citations (#320)
* citations
* dynamic method description
* citations
* Add back MultiQC
* Add back fastqc
---
CHANGELOG.md | 1 +
CITATIONS.md | 94 ++++++++-
.../local/utils_nfcore_nallo_pipeline/main.nf | 186 ++++++++++++++----
3 files changed, 241 insertions(+), 40 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1c73adf8..b92140f4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#270](https://github.com/genomic-medicine-sweden/nallo/pull/270) - Added SNV phasing stats to MultiQC
- [#271](https://github.com/genomic-medicine-sweden/nallo/pull/271) - Added a `--skip_aligned_read_qc` parameter to skip the qc aligned reads subworkflow
- [#314](https://github.com/genomic-medicine-sweden/nallo/pull/314) - Added a `--vep_plugin_files` parameter to separate VEP plugins from cache
+- [#320](https://github.com/genomic-medicine-sweden/nallo/pull/320) - Added complete citations to CITATIONS.md and MultiQC report
### `Changed`
diff --git a/CITATIONS.md b/CITATIONS.md
index 10efcd38..811889b7 100644
--- a/CITATIONS.md
+++ b/CITATIONS.md
@@ -10,10 +10,78 @@
## Pipeline tools
+- [BCFtools](https://academic.oup.com/gigascience/article/10/2/giab008/6137722) & [SAMtools](https://academic.oup.com/bioinformatics/article/25/16/2078/204688)
+
+ > Danecek P, Bonfield JK, Liddle J, et al. Twelve years of SAMtools and BCFtools. GigaScience. 2021;10(2):giab008. doi:10.1093/gigascience/giab008
+
+- [BEDTools](https://academic.oup.com/bioinformatics/article/26/6/841/244688)
+
+ > Quinlan AR and Hall IM, 2010. BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics. 26, 6, pp. 841–842.
+
+- [cramino](https://academic.oup.com/bioinformatics/article/39/5/btad311/7160911)
+
+ > Wouter De Coster, Rosa Rademakers, NanoPack2: population-scale evaluation of long-read sequencing data, Bioinformatics, Volume 39, Issue 5, May 2023, btad311, https://doi.org/10.1093/bioinformatics/btad311
+
+- [CADD1](https://genomemedicine.biomedcentral.com/articles/10.1186/s13073-021-00835-9), [2](https://academic.oup.com/nar/article/47/D1/D886/5146191)
+
+ > Rentzsch P, Schubach M, Shendure J, Kircher M. CADD-Splice—improving genome-wide variant effect prediction using deep learning-derived splice scores. Genome Med. 2021;13(1):31. doi:10.1186/s13073-021-00835-9
+
+ > Rentzsch P, Witten D, Cooper GM, Shendure J, Kircher M. CADD: predicting the deleteriousness of variants throughout the human genome. Nucleic Acids Research. 2019;47(D1):D886-D894. doi:10.1093/nar/gky1016
+
+- [DeepVariant](https://www.nature.com/articles/nbt.4235)
+
+ > Poplin R, Chang PC, Alexander D, et al. A universal SNP and small-indel variant caller using deep neural networks. Nat Biotechnol. 2018;36(10):983-987. doi:10.1038/nbt.4235
+
+- [dipcall](https://www.nature.com/articles/s41592-018-0054-7)
+
+ > Li H, Bloom JM, Farjoun Y, Fleharty M, Gauthier L, Neale B, MacArthur D (2018) A synthetic-diploid benchmark for accurate variant-calling evaluation. Nat Methods, 15:595-597. [PMID:30013044]
+
+- [echtvar](https://academic.oup.com/nar/article/51/1/e3/6775383)
+
+ > Brent S Pedersen, Jeroen de Ridder, Echtvar: compressed variant representation for rapid annotation and filtering of SNPs and indels, Nucleic Acids Research, Volume 51, Issue 1, 11 January 2023, Page e3, https://doi.org/10.1093/nar/gkac931
+
+- [Ensembl VEP](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-016-0974-4)
+
+ > McLaren W, Gil L, Hunt SE, et al. The Ensembl Variant Effect Predictor. Genome Biol. 2016;17(1):122. doi:10.1186/s13059-016-0974-4
+
- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)
> Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online].
+- [fqcrs](https://github.com/fellen31/fqcrs)
+
+- [Genmod](https://github.com/Clinical-Genomics/genmod)
+
+ > Magnusson M, Hughes T, Glabilloy, Bitdeli Chef. genmod: Version 3.7.3. Published online November 15, 2018. doi:10.5281/ZENODO.3841142
+
+- [Gfastats](https://academic.oup.com/bioinformatics/article/38/17/4214/6633308)
+
+ > Giulio Formenti, Linelle Abueg, Angelo Brajuka, Nadolina Brajuka, Cristóbal Gallardo-Alba, Alice Giani, Olivier Fedrigo, Erich D Jarvis, Gfastats: conversion, evaluation and manipulation of genome sequences using assembly graphs, Bioinformatics, Volume 38, Issue 17, September 2022, Pages 4214–4216, https://doi.org/10.1093/bioinformatics/btac460
+
+- [GLnexus](https://academic.oup.com/bioinformatics/article/36/24/5582/6064144)
+
+ > Yun T, Li H, Chang PC, Lin MF, Carroll A, McLean CY. Accurate, scalable cohort variant calls using DeepVariant and GLnexus. Robinson P, ed. Bioinformatics. 2021;36(24):5582-5589. doi:10.1093/bioinformatics/btaa1081
+
+- [hifiasm](https://www.nature.com/articles/s41592-020-01056-5)
+
+ > Cheng, H., Concepcion, G.T., Feng, X. et al. Haplotype-resolved de novo assembly using phased assembly graphs with hifiasm. Nat Methods 18, 170–175 (2021). https://doi.org/10.1038/s41592-020-01056-5
+
+- [HiPhase](https://academic.oup.com/bioinformatics/article/40/2/btae042/7588891)
+
+ > James M Holt, Christopher T Saunders, William J Rowell, Zev Kronenberg, Aaron M Wenger, Michael Eberle, HiPhase: jointly phasing small, structural, and tandem repeat variants from HiFi sequencing, Bioinformatics, Volume 40, Issue 2, February 2024, btae042, https://doi.org/10.1093/bioinformatics/btae042
+
+- [HiFiCNV](https://github.com/PacificBiosciences/HiFiCNV)
+
+- [minimap2](https://academic.oup.com/bioinformatics/article/34/18/3094/4994778)
+
+ > Heng Li, Minimap2: pairwise alignment for nucleotide sequences, Bioinformatics, Volume 34, Issue 18, September 2018, Pages 3094–3100, https://doi.org/10.1093/bioinformatics/bty191
+
+- [modkit](https://github.com/nanoporetech/modkit)
+
+- [Mosdepth](https://academic.oup.com/bioinformatics/article/34/5/867/4583630?login=true)
+
+ > Pedersen BS, Quinlan AR. Mosdepth: quick coverage calculation for genomes and exomes. Hancock J, ed. Bioinformatics. 2018;34(5):867-868. doi:10.1093/bioinformatics/btx699
+
- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/)
> Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924.
@@ -22,9 +90,33 @@
> Genome-wide profiling of highly similar paralogous genes using HiFi sequencing. Xiao Chen, Daniel Baker, Egor Dolzhenko, Joseph M Devaney, Jessica Noya, April S Berlyoung, Rhonda Brandon, Kathleen S Hruska, Lucas Lochovsky, Paul Kruszka, Scott Newman, Emily Farrow, Isabelle Thiffault, Tomi Pastinen, Dalia Kasperaviciute, Christian Gilissen, Lisenka Vissers, Alexander Hoischen, Seth Berger, Eric Vilain, Emmanuèle Délot, UCI Genomics Research to Elucidate the Genetics of Rare Diseases (UCI GREGoR) Consortium, Michael A Eberle. bioRxiv 2024.04.19.590294; doi: https://doi.org/10.1101/2024.04.19.590294
+- [Sniffles2](https://www.nature.com/articles/s41587-023-02024-y)
+
+ > Smolka, M., Paulin, L.F., Grochowski, C.M. et al. Detection of mosaic and population-level structural variants with Sniffles2. Nat Biotechnol (2024). https://doi.org/10.1038/s41587-023-02024-y
+
+- [Somalier](https://genomemedicine.biomedcentral.com/articles/10.1186/s13073-020-00761-2)
+
+ > Pedersen, B.S., Bhetariya, P.J., Brown, J. et al. Somalier: rapid relatedness estimation for cancer and germline studies using efficient genome sketches. Genome Med 12, 62 (2020). https://doi.org/10.1186/s13073-020-00761-2
+
+- [splitubam](https://github.com/fellen31/splitubam)
+
- [stranger](https://github.com/Clinical-Genomics/stranger)
- > Nilsson D, Magnusson M. moonso/stranger v0.7.1. https://zenodo.org/doi/10.5281/zenodo.3841097
+ > Nilsson D, Magnusson M. moonso/stranger v0.7.1. Published online February 18, 2021. doi:10.5281/ZENODO.4548873
+
+- [Tabix](https://academic.oup.com/bioinformatics/article/27/5/718/262743)
+
+ > Li H. Tabix: fast retrieval of sequence features from generic TAB-delimited files. Bioinformatics. 2011;27(5):718-719. doi:10.1093/bioinformatics/btq671
+
+- [TRGT](https://www.nature.com/articles/s41587-023-02057-3)
+
+ > Dolzhenko, E., English, A., Dashnow, H. et al. Characterization and visualization of tandem repeats at genome scale. Nat Biotechnol (2024). https://doi.org/10.1038/s41587-023-02057-3
+
+- [WhatsHap](https://www.biorxiv.org/content/10.1101/085050v2)
+
+ > Marcel Martin, Murray Patterson, Shilpa Garg, Sarah O Fischer, Nadia Pisanti, Gunnar W Klau, Alexander Schöenhuth, Tobias Marschall. bioRxiv 085050; doi: https://doi.org/10.1101/085050
+
+- [yak](https://github.com/lh3/yak)
## Software packaging/containerisation tools
diff --git a/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf b/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf
index 966c2b21..c315bf96 100644
--- a/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf
@@ -310,55 +310,163 @@ def genomeExistsError() {
//
def toolCitationText() {
- def repeat_annotation_text = []
- def preprocessing_text = []
- def other_citation_text = []
-
- if (!params.skip_repeat_annotation) {
- repeat_annotation_text = [
- "stranger (Nilsson & Magnusson, 2021),"
+ def citation_text = [
+ "MultiQC (Ewels et al. 2016)",
+ "SAMtools (Danecek et al. 2021)",
+ ]
+ if (!params.skip_raw_read_qc) {
+ citation_text = citation_text + [
+ "FastQC (Andrews 2010)",
+ "fcqrs",
]
}
- preprocessing_text = [
- "FastQC (Andrews 2010),",
- ]
- other_citation_text = [
- "MultiQC (Ewels et al. 2016),",
- "."
- ]
- def concat_text = repeat_annotation_text +
- preprocessing_text +
- other_citation_text
+ if (!params.skip_mapping_wf) {
+ if (params.parallel_alignments > 1) {
+ citation_text = citation_text + [
+ "splitubam",
+ ]
+ }
+ citation_text = citation_text + [
+ "SAMtools (Danecek et al. 2021)",
+ "Minimap2 (Li 2018)",
+ "Somalier (Pedersen et al. 2020)",
+ "Sniffles2 (Smolka et al. 2024)",
+ ]
+ if (!params.skip_aligned_read_qc) {
+ citation_text = citation_text + [
+ "cramino (De Coster & Rademakers 2023)",
+ "mosdepth (Pedersen & Quinlan 2018)",
+ ]
+ }
+ if (!params.skip_call_paralogs) {
+ citation_text = citation_text + [
+ "paraphase",
+ ]
+ }
+ if (!params.skip_assembly_wf) {
+ if (params.hifiasm_mode == 'trio-binning') {
+ citation_text = citation_text + [
+ "yak",
+ ]
+ }
+ citation_text = citation_text + [
+ "Hifiasm (Cheng et al. 2021)",
+ "Gfastats (Formenti et al. 2022)",
+ "dipcall (Li et al. 2018)",
+ "SAMtools (Danecek et al. 2021)",
+ "Minimap2 (Li 2018)",
+ ]
+ }
+ if (!params.skip_short_variant_calling) {
+ citation_text = citation_text + [
+ "BEDTools (Quinlan & Hall 2010)",
+ "BCFtools (Danecek et al. 2021)",
+ "DeepVariant (Poplin et al. 2018)",
+ "GLnexus (Yun et al. 2021)",
+ ]
+ }
+ if (!params.skip_snv_annotation) {
+ citation_text = citation_text + [
+ "CADD (Rentzsch et al. 2019, Rentzsch et al. 2021)",
+ "BCFtools (Danecek et al. 2021)",
+ "VEP (McLaren et al. 2016)",
+ "Tabix (Li 2011)",
+ "Echtvar (Pedersen & de Ridder 2023)",
+ ]
+ if (!params.skip_rank_variants) {
+ citation_text = citation_text + [
+ "Genmod (Magnusson et al. 2018)",
+ "Tabix (Li 2011)",
+ ]
+ }
+ }
+ if (!params.skip_cnv_calling) {
+ citation_text = citation_text + [
+ "HiFiCNV",
+ ]
+ }
+ if (!params.skip_phasing_wf) {
+ citation_text = citation_text + [
+ "SAMtools (Danecek et al. 2021)",
+ "cramino (De Coster & Rademakers 2023)",
+ ]
+ if(params.phaser == 'whatshap') {
+ citation_text = citation_text + [
+ "WhatsHap (Martin et al. 2016)",
+ ]
+ }
+ if(params.phaser == 'hiphase_sv') {
+ citation_text = citation_text + [
+ "HiPhase (Holt et al. 2024)",
+ ]
+ }
+ if(params.phaser == 'hiphase_snv') {
+ citation_text = citation_text + [
+ "HiPhase (Holt et al. 2024)",
+ ]
+ }
+ if (!params.skip_methylation_wf) {
+ citation_text = citation_text + [
+ "modkit",
+ "Tabix (Li 2011)",
+ ]
+ }
+ if (!params.skip_repeat_calling) {
+ citation_text = citation_text + [
+ "TRGT (Dolzhenko et al. 2024)",
+ ]
+ if (!params.skip_repeat_annotation) {
+ citation_text = citation_text + [
+ "Stranger (Nilsson & Magnusson 2021)",
+ ]
+ }
+ }
+ }
+ }
- def citation_text = [ "Tools used in the workflow included:" ] + concat_text.unique(false) { a, b -> a <=> b } - ""
- return citation_text.join(' ').trim()
+ def return_text = "Tools used in the workflow included: " + citation_text.unique(false) { a, b -> a <=> b }.join(', ') - "" + "."
+ return return_text
}
def toolBibliographyText() {
- def repeat_annotation_text = []
- def preprocessing_text = []
- def other_citation_text = []
-
- if (!params.skip_repeat_annotation) {
- repeat_annotation_text = [
- "Nilsson, D., & Magnusson, M. (2021). Moonso/stranger v0.9.1 (v0.9.1) [Computer software]. Zenodo. https://zenodo.org/doi/10.5281/zenodo.3841097"
- ]
- }
- preprocessing_text = [
+ reference_text = [
"Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/",
- ]
-
- other_citation_text = [
- "Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354"
+ "Ewels PA, Peltzer A, Fillinger S, Patel H, Alneberg J, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. The nf-core framework for community-curated bioinformatics pipelines. Nat Biotechnol. 2020 Mar;38(3):276-278. doi: 10.1038/s41587-020-0439-x. PubMed PMID: 32055031.",
+ "Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924.",
+ "Di Tommaso P, Chatzou M, Floden EW, Barja PP, Palumbo E, Notredame C. Nextflow enables reproducible computational workflows. Nat Biotechnol. 2017 Apr 11;35(4):316-319. doi: 10.1038/nbt.3820. PubMed PMID: 28398311.",
+ "Danecek P, Bonfield JK, Liddle J, et al. Twelve years of SAMtools and BCFtools. GigaScience. 2021;10(2):giab008. doi:10.1093/gigascience/giab008",
+ "Quinlan AR and Hall IM, 2010. BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics. 26, 6, pp. 841–842.",
+ "Wouter De Coster, Rosa Rademakers, NanoPack2: population-scale evaluation of long-read sequencing data, Bioinformatics, Volume 39, Issue 5, May 2023, btad311, https://doi.org/10.1093/bioinformatics/btad311",
+ "Rentzsch P, Schubach M, Shendure J, Kircher M. CADD-Splice—improving genome-wide variant effect prediction using deep learning-derived splice scores. Genome Med. 2021;13(1):31. doi:10.1186/s13073-021-00835-9",
+ "Rentzsch P, Witten D, Cooper GM, Shendure J, Kircher M. CADD: predicting the deleteriousness of variants throughout the human genome. Nucleic Acids Research. 2019;47(D1):D886-D894. doi:10.1093/nar/gky1016",
+ "Poplin R, Chang PC, Alexander D, et al. A universal SNP and small-indel variant caller using deep neural networks. Nat Biotechnol. 2018;36(10):983-987. doi:10.1038/nbt.4235",
+ "Li H, Bloom JM, Farjoun Y, Fleharty M, Gauthier L, Neale B, MacArthur D (2018) A synthetic-diploid benchmark for accurate variant-calling evaluation. Nat Methods, 15:595-597. [PMID:30013044]",
+ "Brent S Pedersen, Jeroen de Ridder, Echtvar: compressed variant representation for rapid annotation and filtering of SNPs and indels, Nucleic Acids Research, Volume 51, Issue 1, 11 January 2023, Page e3, https://doi.org/10.1093/nar/gkac931",
+ "McLaren W, Gil L, Hunt SE, et al. The Ensembl Variant Effect Predictor. Genome Biol. 2016;17(1):122. doi:10.1186/s13059-016-0974-4",
+ "Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online].",
+ "Magnusson M, Hughes T, Glabilloy, Bitdeli Chef. genmod: Version 3.7.3. Published online November 15, 2018. doi:10.5281/ZENODO.3841142",
+ "Giulio Formenti, Linelle Abueg, Angelo Brajuka, Nadolina Brajuka, Cristóbal Gallardo-Alba, Alice Giani, Olivier Fedrigo, Erich D Jarvis, Gfastats: conversion, evaluation and manipulation of genome sequences using assembly graphs, Bioinformatics, Volume 38, Issue 17, September 2022, Pages 4214–4216, https://doi.org/10.1093/bioinformatics/btac460",
+ "Yun T, Li H, Chang PC, Lin MF, Carroll A, McLean CY. Accurate, scalable cohort variant calls using DeepVariant and GLnexus. Robinson P, ed. Bioinformatics. 2021;36(24):5582-5589. doi:10.1093/bioinformatics/btaa1081",
+ "Cheng, H., Concepcion, G.T., Feng, X. et al. Haplotype-resolved de novo assembly using phased assembly graphs with hifiasm. Nat Methods 18, 170–175 (2021). https://doi.org/10.1038/s41592-020-01056-5",
+ "James M Holt, Christopher T Saunders, William J Rowell, Zev Kronenberg, Aaron M Wenger, Michael Eberle, HiPhase: jointly phasing small, structural, and tandem repeat variants from HiFi sequencing, Bioinformatics, Volume 40, Issue 2, February 2024, btae042, https://doi.org/10.1093/bioinformatics/btae042",
+ "Heng Li, Minimap2: pairwise alignment for nucleotide sequences, Bioinformatics, Volume 34, Issue 18, September 2018, Pages 3094–3100, https://doi.org/10.1093/bioinformatics/bty191",
+ "Pedersen BS, Quinlan AR. Mosdepth: quick coverage calculation for genomes and exomes. Hancock J, ed. Bioinformatics. 2018;34(5):867-868. doi:10.1093/bioinformatics/btx699",
+ "Genome-wide profiling of highly similar paralogous genes using HiFi sequencing. Xiao Chen, Daniel Baker, Egor Dolzhenko, Joseph M Devaney, Jessica Noya, April S Berlyoung, Rhonda Brandon, Kathleen S Hruska, Lucas Lochovsky, Paul Kruszka, Scott Newman, Emily Farrow, Isabelle Thiffault, Tomi Pastinen, Dalia Kasperaviciute, Christian Gilissen, Lisenka Vissers, Alexander Hoischen, Seth Berger, Eric Vilain, Emmanuèle Délot, UCI Genomics Research to Elucidate the Genetics of Rare Diseases (UCI GREGoR) Consortium, Michael A Eberle. bioRxiv 2024.04.19.590294; doi: https://doi.org/10.1101/2024.04.19.590294",
+ "Smolka, M., Paulin, L.F., Grochowski, C.M. et al. Detection of mosaic and population-level structural variants with Sniffles2. Nat Biotechnol (2024). https://doi.org/10.1038/s41587-023-02024-y",
+ "Pedersen, B.S., Bhetariya, P.J., Brown, J. et al. Somalier: rapid relatedness estimation for cancer and germline studies using efficient genome sketches. Genome Med 12, 62 (2020). https://doi.org/10.1186/s13073-020-00761-2",
+ "Nilsson D, Magnusson M. moonso/stranger v0.7.1. Published online February 18, 2021. doi:10.5281/ZENODO.4548873",
+ "Li H. Tabix: fast retrieval of sequence features from generic TAB-delimited files. Bioinformatics. 2011;27(5):718-719. doi:10.1093/bioinformatics/btq671",
+ "Dolzhenko, E., English, A., Dashnow, H. et al. Characterization and visualization of tandem repeats at genome scale. Nat Biotechnol (2024). https://doi.org/10.1038/s41587-023-02057-3",
+ "Marcel Martin, Murray Patterson, Shilpa Garg, Sarah O Fischer, Nadia Pisanti, Gunnar W Klau, Alexander Schöenhuth, Tobias Marschall. bioRxiv 085050; doi: https://doi.org/10.1101/085050",
+ "Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web.",
+ "Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506.",
+ "da Veiga Leprevost F, Grüning B, Aflitos SA, Röst HL, Uszkoreit J, Barsnes H, Vaudel M, Moreno P, Gatto L, Weber J, Bai M, Jimenez RC, Sachsenberg T, Pfeuffer J, Alvarez RV, Griss J, Nesvizhskii AI, Perez-Riverol Y. BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics. 2017 Aug 15;33(16):2580-2582. doi: 10.1093/bioinformatics/btx192. PubMed PMID: 28379341; PubMed Central PMCID: PMC5870671.",
+ "Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241.",
+ "Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675.",
].join(' ').trim()
- def concat_text = repeat_annotation_text +
- preprocessing_text +
- other_citation_text
-
- def reference_text = concat_text.unique(false) { a, b -> a <=> b } - ""
- return reference_text.join(' ').trim()
+ return reference_text
}
def methodsDescriptionText(mqc_methods_yaml) {
From e64470f146753fc723c2336476112dd72e9ef571 Mon Sep 17 00:00:00 2001
From: Felix Lenner <52530259+fellen31@users.noreply.github.com>
Date: Thu, 15 Aug 2024 15:51:47 +0200
Subject: [PATCH 48/59] Use meta.id in BUILD_INTERVALS input (#321)
---
CHANGELOG.md | 3 ++-
subworkflows/local/scatter_genome/main.nf | 3 +--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b92140f4..f7cf2390 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -61,7 +61,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#308](https://github.com/genomic-medicine-sweden/nallo/pull/308) - Updated nf-core modules, fixed warnings in local modules, added Dockerfile to fqcrs
- [#312](https://github.com/genomic-medicine-sweden/nallo/pull/312) - Changed echtvar encode database creation to use dynamic `${project}` from samplesheet
- [#313](https://github.com/genomic-medicine-sweden/nallo/pull/313) - Updated calling of variants in non-autosomal contigs for DeepVariant
-- [#313](https://github.com/genomic-medicine-sweden/nallo/pull/313) - Changed VEP annotation added in #244 to not include SpliceAI
+- [#314](https://github.com/genomic-medicine-sweden/nallo/pull/314) - Changed VEP annotation added in #244 to not include SpliceAI
+- [#321](https://github.com/genomic-medicine-sweden/nallo/pull/321) - Changed the input to BUILD_INTERVALS to have `meta.id` when building intervals from reference
### `Removed`
diff --git a/subworkflows/local/scatter_genome/main.nf b/subworkflows/local/scatter_genome/main.nf
index 89e81499..5e611c29 100644
--- a/subworkflows/local/scatter_genome/main.nf
+++ b/subworkflows/local/scatter_genome/main.nf
@@ -22,8 +22,7 @@ workflow SCATTER_GENOME {
//
if( make_bed_from_fai ) {
-
- BUILD_INTERVALS ( ch_fai )
+ BUILD_INTERVALS ( ch_fai.map { name, fai -> [ [ id: name ], fai ] } )
ch_versions = ch_versions.mix(BUILD_INTERVALS.out.versions)
BUILD_INTERVALS.out.bed
From cc6d26a4859f2a4e2996caea6bcd0b3566434e8f Mon Sep 17 00:00:00 2001
From: Felix Lenner <52530259+fellen31@users.noreply.github.com>
Date: Thu, 15 Aug 2024 16:19:24 +0200
Subject: [PATCH 49/59] Fix file requirements (#317)
* Split vep plugins into vep cache and vep plugins
* Fix file requirements
---
CHANGELOG.md | 1 +
docs/usage.md | 5 ++++-
subworkflows/local/utils_nfcore_nallo_pipeline/main.nf | 3 ++-
3 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f7cf2390..6039f0b7 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -62,6 +62,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#312](https://github.com/genomic-medicine-sweden/nallo/pull/312) - Changed echtvar encode database creation to use dynamic `${project}` from samplesheet
- [#313](https://github.com/genomic-medicine-sweden/nallo/pull/313) - Updated calling of variants in non-autosomal contigs for DeepVariant
- [#314](https://github.com/genomic-medicine-sweden/nallo/pull/314) - Changed VEP annotation added in #244 to not include SpliceAI
+- [#317](https://github.com/genomic-medicine-sweden/nallo/pull/317) - Changed so that `--reduced_penetrance` and `--score_config_snv` is required by rank variants and not SNV annotation
- [#321](https://github.com/genomic-medicine-sweden/nallo/pull/321) - Changed the input to BUILD_INTERVALS to have `meta.id` when building intervals from reference
### `Removed`
diff --git a/docs/usage.md b/docs/usage.md
index 9aeec0df..1e443525 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -112,8 +112,11 @@ Some workflows require additional files:
- If running without `--skip_repeat_annotation`, download a json variant catalog, (e.g. [variant_catalog_grch38.json](https://github.com/Clinical-Genomics/stranger/raw/main/stranger/resources/variant_catalog_grch38.json)) matching your reference genome to supply with `--variant_catalog`.
- If running without `--skip_snv_annotation`, download [VEP cache](https://ftp.ensembl.org/pub/release-110/variation/vep/homo_sapiens_vep_110_GRCh38.tar.gz) to supply with `--vep_cache` and prepare a samplesheet with annotation databases ([`echtvar encode`](https://github.com/brentp/echtvar)) to supply with `--snp_db`:
+
- If running without `--skip_snv_annotation`, you will also need to download VEP plugin files to supply with `--vep_plugin_files` see [example](https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugin_files.csv). PLI and LoFtool.
+- If running without `--skip_snv_annotation`, `--variant_consequences_snv` is also required (File containing list of SO terms listed in the order of severity from most severe to lease severe for annotating genomic and mitochondrial SNVs. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/variant_consequences_v2.txt). You can learn more about these terms [here](https://grch37.ensembl.org/info/genome/variation/prediction/predicted_data.html)).
+
```
vep_files
https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/SpliceAI.pm
@@ -136,7 +139,7 @@ gnomad,/path/to/gnomad.v3.1.2.echtvar.popmax.v2.zip
cadd,/path/to/cadd.v1.6.hg38.zip
```
-- If your samplesheet contains at least one affected sample (phenotype = 2), `--reduced_penetrance` (Used by GENMOD while modeling the variants. Contains a list of loci that show [reduced penetrance](https://medlineplus.gov/genetics/understanding/inheritance/penetranceexpressivity/) in people. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/reduced_penetrance.tsv)), `--score_config_snv` (Used by GENMOD for ranking the variants. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/rank_model_snv.ini)) and `--variant_consequences_snv` (File containing list of SO terms listed in the order of severity from most severe to lease severe for annotating genomic and mitochondrial SNVs. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/variant_consequences_v2.txt). You can learn more about these terms [here](https://grch37.ensembl.org/info/genome/variation/prediction/predicted_data.html)) is also required.
+- If running wihtout `--skip_rank_variants` and your samplesheet contains at least one affected sample (phenotype = 2), `--reduced_penetrance` (Used by GENMOD while modeling the variants. Contains a list of loci that show [reduced penetrance](https://medlineplus.gov/genetics/understanding/inheritance/penetranceexpressivity/) in people. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/reduced_penetrance.tsv)), `--score_config_snv` (Used by GENMOD for ranking the variants. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/rank_model_snv.ini)).
- Optionally, if running without `--skip_snv_annotation`, supply a path to a folder containing cadd annotations with `--cadd_resources` and prescored indels with `--cadd_prescored`. Equivalent of the data/annotations/ and data/prescored/ folders described [here](https://github.com/kircherlab/CADD-scripts/#manual-installation), and it is used to calculate CADD scores for small indels.
diff --git a/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf b/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf
index c315bf96..91cb7ac5 100644
--- a/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf
@@ -74,8 +74,9 @@ def fileDependencies = [
mapping : ["fasta", "somalier_sites"],
assembly : ["fasta", "par_regions"], // The assembly workflow should be split into two - assembly and variant calling (requires ref)
snv_calling : ["fasta", "par_regions"],
- snv_annotation : ["snp_db", "vep_cache", "vep_plugin_files", "reduced_penetrance", "score_config_snv", "variant_consequences_snv"],
+ snv_annotation : ["snp_db", "vep_cache", "vep_plugin_files", "variant_consequences_snv"],
cnv_calling : ["hificnv_xy", "hificnv_xx", "hificnv_exclude"],
+ rank_variants : ["reduced_penetrance", "score_config_snv"],
repeat_calling : ["trgt_repeats"],
repeat_annotation: ["variant_catalog"],
]
From 1f155531735cf5119b56a61111ff72534e4f4aa8 Mon Sep 17 00:00:00 2001
From: Felix Lenner <52530259+fellen31@users.noreply.github.com>
Date: Thu, 15 Aug 2024 16:19:38 +0200
Subject: [PATCH 50/59] Fix parallel alignments in CI tests (#323)
---
.github/workflows/ci.yml | 2 +-
CHANGELOG.md | 1 +
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 4d412271..fe701aed 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -28,7 +28,7 @@ jobs:
matrix:
parameters:
- ""
- - "--preset ONT_R10 --input https://github.com/genomic-medicine-sweden/test-datasets/raw/e2266a34c14d1e0a9ef798de3cd81a76c9216fc1/testdata/samplesheet_multisample_bam_ont.csv --parallel_alignment 2 --parallel_snv 1"
+ - "--preset ONT_R10 --input https://github.com/genomic-medicine-sweden/test-datasets/raw/e2266a34c14d1e0a9ef798de3cd81a76c9216fc1/testdata/samplesheet_multisample_bam_ont.csv --parallel_alignments 2 --parallel_snv 1"
NXF_VER:
- "23.04.0"
- "latest-everything"
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6039f0b7..8c128c54 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -64,6 +64,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#314](https://github.com/genomic-medicine-sweden/nallo/pull/314) - Changed VEP annotation added in #244 to not include SpliceAI
- [#317](https://github.com/genomic-medicine-sweden/nallo/pull/317) - Changed so that `--reduced_penetrance` and `--score_config_snv` is required by rank variants and not SNV annotation
- [#321](https://github.com/genomic-medicine-sweden/nallo/pull/321) - Changed the input to BUILD_INTERVALS to have `meta.id` when building intervals from reference
+- [#323](https://github.com/genomic-medicine-sweden/nallo/pull/323) - Changed `parallel_alignment` to `parallel_alignments` in CI tests as well
### `Removed`
From e76879dca0e3fd9ed887448850d8291c7476c1f9 Mon Sep 17 00:00:00 2001
From: fellen31
Date: Tue, 13 Aug 2024 19:47:01 +0200
Subject: [PATCH 51/59] Fix file requirements
---
.nf-core.yml | 2 +
.prettierignore | 1 +
CHANGELOG.md | 1 +
docs/README.md | 4 +-
docs/parameters.md | 177 +++++++++++++++++++++++
docs/usage.md | 337 ++++++++++++++++++++-----------------------
nextflow_schema.json | 107 ++++++++------
7 files changed, 400 insertions(+), 229 deletions(-)
create mode 100644 docs/parameters.md
diff --git a/.nf-core.yml b/.nf-core.yml
index 168083fe..b5e2ed39 100644
--- a/.nf-core.yml
+++ b/.nf-core.yml
@@ -2,6 +2,7 @@ lint:
files_exist:
- CODE_OF_CONDUCT.md
- assets/nf-core-nallo_logo_light.png
+ - docs/README.md
- docs/images/nf-core-nallo_logo_light.png
- docs/images/nf-core-nallo_logo_dark.png
- .github/ISSUE_TEMPLATE/config.yml
@@ -11,6 +12,7 @@ lint:
files_unchanged:
- CODE_OF_CONDUCT.md
- assets/nf-core-nallo_logo_light.png
+ - docs/README.md
- docs/images/nf-core-nallo_logo_light.png
- docs/images/nf-core-nallo_logo_dark.png
- .github/ISSUE_TEMPLATE/bug_report.yml
diff --git a/.prettierignore b/.prettierignore
index 437d763d..ecbdd5b2 100644
--- a/.prettierignore
+++ b/.prettierignore
@@ -4,6 +4,7 @@ slackreport.json
.nextflow*
work/
data/
+docs/parameters.md
results/
.DS_Store
testing/
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8c128c54..19436299 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -63,6 +63,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#313](https://github.com/genomic-medicine-sweden/nallo/pull/313) - Updated calling of variants in non-autosomal contigs for DeepVariant
- [#314](https://github.com/genomic-medicine-sweden/nallo/pull/314) - Changed VEP annotation added in #244 to not include SpliceAI
- [#317](https://github.com/genomic-medicine-sweden/nallo/pull/317) - Changed so that `--reduced_penetrance` and `--score_config_snv` is required by rank variants and not SNV annotation
+- [#318](https://github.com/genomic-medicine-sweden/nallo/pull/318) - Updated docs and schema to clarify pipeline usage
- [#321](https://github.com/genomic-medicine-sweden/nallo/pull/321) - Changed the input to BUILD_INTERVALS to have `meta.id` when building intervals from reference
- [#323](https://github.com/genomic-medicine-sweden/nallo/pull/323) - Changed `parallel_alignment` to `parallel_alignments` in CI tests as well
diff --git a/docs/README.md b/docs/README.md
index ca01fef3..09d8b2fd 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -3,6 +3,8 @@
The genomic-medicine-sweden/nallo documentation is split into the following pages:
- [Usage](usage.md)
- - An overview of how the pipeline works, how to run it and a description of all of the different command-line flags.
+ - An overview of how the pipeline works, how to run it and a description of command-line flags and pipeline parameters needed to run the pipeline.
+- [Parameters](parameters.md)
+ - A description of all available pipeline parameters, including those not descibed in [Usage](usage.md).
- [Output](output.md)
- An overview of the different results produced by the pipeline and how to interpret them.
diff --git a/docs/parameters.md b/docs/parameters.md
new file mode 100644
index 00000000..848834c1
--- /dev/null
+++ b/docs/parameters.md
@@ -0,0 +1,177 @@
+
+
+# genomic-medicine-sweden/nallo pipeline parameters
+
+Long-read variant calling pipeline
+
+## Workflow skip options
+
+Allows skipping certain parts of the pipeline
+
+| Parameter | Description | Type | Default | Required | Hidden |
+|-----------|-----------|-----------|-----------|-----------|-----------|
+| `skip_aligned_read_qc` | Skip QC of aligned reads | `boolean` | False | | |
+| `skip_raw_read_qc` | Skip QC of unaligned (raw) reads | `boolean` | False | | |
+| `skip_short_variant_calling` | Skip short variant calling | `boolean` | False | | |
+| `skip_assembly_wf` | Skip genome assembly and assembly variant calling | `boolean` | False | | |
+| `skip_mapping_wf` | Skip read mapping (alignment) | `boolean` | False | | |
+| `skip_methylation_wf` | Skip generation of methylation pileups | `boolean` | False | | |
+| `skip_repeat_calling` | Skip tandem repeat calling | `boolean` | False | | |
+| `skip_repeat_annotation` | Skip tandem repeat annotation | `boolean` | False | | |
+| `skip_phasing_wf` | Skip phasing of variants and haplotagging of reads | `boolean` | False | | |
+| `skip_snv_annotation` | Skip short variant annotation | `boolean` | False | | |
+| `skip_cnv_calling` | Skip CNV calling | `boolean` | False | | |
+| `skip_call_paralogs` | Skip the calling of specific paralogous genes | `boolean` | False | | |
+| `skip_rank_variants` | Skip ranking of short variants | `boolean` | False | | |
+
+## Input/output options
+
+Define where the pipeline should find input data and save output data.
+
+| Parameter | Description | Type | Default | Required | Hidden |
+|-----------|-----------|-----------|-----------|-----------|-----------|
+| `input` | Path to comma-separated file containing information about the samples in the experiment. HelpYou will
+need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its
+location. It has to be a comma-separated file with 3 columns, and a header row. | `string` | | True | |
+| `outdir` | The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure. | `string` |
+| True | |
+| `email` | Email address for completion summary. HelpSet this parameter to your e-mail address to get a summary
+e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to
+specify this on the command line for every run. | `string` | | | |
+| `multiqc_title` | MultiQC report title. Printed as page header, used for filename if not otherwise specified. | `string` | | | |
+
+## Reference genome options
+
+Reference genome related files and options required for the workflow.
+
+| Parameter | Description | Type | Default | Required | Hidden |
+|-----------|-----------|-----------|-----------|-----------|-----------|
+| `fasta` | Reference genome | `string` | | | |
+| `genome` | Name of iGenomes reference. HelpIf using a reference genome configured in the pipeline using
+iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files
+e.g. `--genome GRCh38`.
See the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details. |
+`string` | | | |
+| `igenomes_ignore` | Do not load the iGenomes reference config. HelpDo not load `igenomes.config` when running
+the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in
+`igenomes.config`. | `boolean` | True | | True |
+
+## Institutional config options
+
+Parameters used to describe centralised config profiles. These should not be edited.
+
+| Parameter | Description | Type | Default | Required | Hidden |
+|-----------|-----------|-----------|-----------|-----------|-----------|
+| `custom_config_version` | Git commit id for Institutional configs. | `string` | master | | True |
+| `custom_config_base` | Base directory for Institutional configs. HelpIf you're running offline, Nextflow will
+not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you
+should download the files from the repo and tell Nextflow where to find them with this parameter. | `string` |
+https://raw.githubusercontent.com/nf-core/configs/master | | True |
+| `config_profile_name` | Institutional config name. | `string` | | | True |
+| `config_profile_description` | Institutional config description. | `string` | | | True |
+| `config_profile_contact` | Institutional config contact information. | `string` | | | True |
+| `config_profile_url` | Institutional config URL link. | `string` | | | True |
+
+## Max job request options
+
+Set the top limit for requested resources for any single job.
+
+| Parameter | Description | Type | Default | Required | Hidden |
+|-----------|-----------|-----------|-----------|-----------|-----------|
+| `max_cpus` | Maximum number of CPUs that can be requested for any single job. HelpUse to set an upper-limit for
+the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1` | `integer` | 16 | | True |
+| `max_memory` | Maximum amount of memory that can be requested for any single job. HelpUse to set an upper-limit
+for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'` | `string` |
+128.GB | | True |
+| `max_time` | Maximum amount of time that can be requested for any single job. HelpUse to set an upper-limit for
+the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'` | `string` | 240.h |
+| True |
+
+## Generic options
+
+Less common options for the pipeline, typically set in a config file.
+
+| Parameter | Description | Type | Default | Required | Hidden |
+|-----------|-----------|-----------|-----------|-----------|-----------|
+| `help` | Display help text. | `boolean` | | | True |
+| `version` | Display version and exit. | `boolean` | | | True |
+| `publish_dir_mode` | Method used to save pipeline results to output directory. HelpThe Nextflow `publishDir`
+option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move
+these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details. | `string` | copy | |
+True |
+| `email_on_fail` | Email address for completion summary, only when pipeline fails. HelpAn email address to send
+a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully. | `string` | | | True |
+| `plaintext_email` | Send plain-text email instead of HTML. | `boolean` | | | True |
+| `max_multiqc_email_size` | File size limit when attaching MultiQC reports to summary emails. | `string` | 25.MB | | True |
+| `monochrome_logs` | Do not use coloured log outputs. | `boolean` | | | True |
+| `hook_url` | Incoming hook URL for messaging service HelpIncoming hook URL for messaging service. Currently, MS
+Teams and Slack are supported. | `string` | | | True |
+| `multiqc_config` | Custom config file to supply to MultiQC. | `string` | | | True |
+| `multiqc_logo` | Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file | `string` | | | True |
+| `multiqc_methods_description` | Custom MultiQC yaml file containing HTML including a methods description. | `string` | | | |
+| `validate_params` | Boolean whether to validate parameters against the schema at runtime | `boolean` | True | | True |
+| `validationShowHiddenParams` | Show all params when using `--help` HelpBy default, parameters set as _hidden_
+in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all
+parameters. | `boolean` | | | True |
+| `validationSkipDuplicateCheck` | nf-validation related parameter | `boolean` | | | True |
+| `validationS3PathCheck` | Boolean whether to validate validate AWS S3 paths | `boolean` | | | True |
+| `monochromeLogs` | Boolean whether to color nf-validation logs | `boolean` | | | True |
+
+## Workflow options
+
+Workflow options specific to genomic-medicine-sweden/nallo
+
+| Parameter | Description | Type | Default | Required | Hidden |
+|-----------|-----------|-----------|-----------|-----------|-----------|
+| `preset` | Enable or disable certain parts of the pipeline by default, depending on data type (`revio`, `pacbio`, `ONT_R10`) | `string` | revio |
+True | |
+| `variant_caller` | Which short variant software to use (`deepvariant`) | `string` | deepvariant | | |
+| `phaser` | Which phasing software to use (`whatshap`, `hiphase_snv`, `hiphase_sv`) | `string` | whatshap | | |
+| `hifiasm_mode` | Run hifiasm in hifi-only or hifi-trio mode (`hifi-only`, `trio-binning`) | `string` | hifi-only | | |
+| `parallel_alignments` | If parallel_alignments is bigger than 1, input files will be split and aligned in parallel to reduce processing time. |
+`integer` | 1 | | |
+| `parallel_snv` | If parallel_snv is bigger than 1, short variant calling will be done in parallel to reduce processing time. | `integer` | 13 | |
+|
+| `vep_cache_version` | VEP cache version | `integer` | 110 | | |
+| `vep_plugin_files` | A csv file with paths to vep plugin files. pLI and LoFtool are required. | `string` | | | |
+| `deepvariant_model_type` | Sets the model type used for DeepVariant. This is set automatically using `--preset` by default. | `string` | PACBIO |
+| True |
+
+## File inputs
+
+The different files that are required. Some are only required by certain workflows, see the usage documentation.
+
+| Parameter | Description | Type | Default | Required | Hidden |
+|-----------|-----------|-----------|-----------|-----------|-----------|
+| `cadd_prescored` | Path to a directory containing prescored indels for CADD. HelpThis folder contains the
+compressed files and indexes that would otherwise be in data/prescored folder as described in
+https://github.com/kircherlab/CADD-scripts/#manual-installation. | `string` | | | |
+| `cadd_resources` | Path to a directory containing CADD annotations. HelpThis folder contains the uncompressed
+files that would otherwise be in data/annotation folder as described in
+https://github.com/kircherlab/CADD-scripts/#manual-installation. | `string` | | | |
+| `dipcall_par` | A BED file with PAR regions for dipcall and DeepVariant. | `string` | | | |
+| `tandem_repeats` | A tandem repeat BED file for sniffles | `string` | | | |
+| `trgt_repeats` | A BED file with repeats to be genotyped with TRGT | `string` | | | |
+| `snp_db` | A csv file with echtvar databases to annotate SNVs with | `string` | | | |
+| `variant_catalog` | A variant catalog json-file for stranger | `string` | | | |
+| `variant_consequences_snv` | File containing list of SO terms listed in the order of severity from most severe to lease severe for annotating
+genomic SNVs. HelpFor more information check
+https://ensembl.org/info/genome/variation/prediction/predicted_data.html | `string` | | | |
+| `vep_cache` | A path to the VEP cache location | `string` | | | |
+| `bed` | A BED file with regions of interest, used to limit short variant calling. | `string` | | | |
+| `hificnv_xy` | A BED file containing expected copy number regions for XY samples. | `string` | | | |
+| `hificnv_xx` | A BED file containing expected copy number regions for XX samples. | `string` | | | |
+| `hificnv_exclude` | A BED file specifying regions to exclude with HiFiCNV, such as centromeres. | `string` | | | |
+| `reduced_penetrance` | A file with gene ids that have reduced penetrance. For use with genmod. | `string` | | | |
+| `score_config_snv` | A SNV rank model config file for genmod. | `string` | | | |
+| `somalier_sites` | A VCF of known polymorphic sites for somalier | `string` | | | |
+| `validationFailUnrecognisedParams` | Validation of parameters fails when an unrecognised parameter is found.
+HelpBy default, when an unrecognised parameter is found, it returns a warning. | `boolean` | |
+| True |
+| `validationLenientMode` | Validation of parameters in lenient more. HelpAllows string values that are parseable
+as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode). |
+`boolean` | | | True |
+| `pipelines_testdata_base_path` | Base URL or local path to location of pipeline test dataset files | `string` |
+https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/ | | True |
+
+
+
diff --git a/docs/usage.md b/docs/usage.md
index 1e443525..c4ed524c 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -10,11 +10,11 @@ genomic-medicine-sweden/nallo is a bioinformatics analysis pipeline to analyse l
2. Install one of the following technologies for full pipeline reproducibility: Docker, Singularity, Podman, Shifter or Charliecloud.
> Almost all nf-core pipelines give you the option to use conda as well. However, some tools used in the nallo pipeline do not have a conda package so we do not support conda at the moment.
-## Run genomic-medicine-sweden/nallo with test data
+## Getting started
-Before running the pipeline with your data, we recommend running it with the test dataset available in the `assets/test_data` folder provided with the pipeline. You do not need to download any of the data as part of it came directly with the pipeline and the other part will be fetched automatically for you when you use the test profile.
+Before running the pipeline with your data, we recommend running it with the test profile. You do not need to download any of the data as it will be fetched automatically for you when you use the test profile.
-Run the following command, where YOURPROFILE is the package manager you installed on your machine. For example, `-profile test,docker` or `-profile test,singularity`:
+Run the following command, where YOURPROFILE is the package manager you installed on your machine. For example, `-profile test,docker` or `-profile test,singularity`
```
nextflow run genomic-medicine-sweden/nallo \
@@ -25,7 +25,7 @@ nextflow run genomic-medicine-sweden/nallo \
> Check [nf-core/configs](https://github.com/nf-core/configs/tree/master/conf) to see if a custom config file to run nf-core pipelines already exists for your institute. If so, you can simply use `-profile test,` in your command. This enables the appropriate package manager and sets the appropriate execution settings for your machine.
> NB: The order of profiles is important! They are loaded in sequence, so later profiles can overwrite earlier profiles.
-Running the command creates the following files in your working directory:
+Running the command creates the following files in your working directory
```
work # Directory containing the Nextflow working files
@@ -41,26 +41,23 @@ work # Directory containing the Nextflow working files
The above command downloads the pipeline from GitHub, caches it, and tests it on the test dataset. When you run the command again, it will fetch the pipeline from cache even if a more recent version of the pipeline is available. To make sure that you're running the latest version of the pipeline, update the cached version of the pipeline by including `-latest` in the command.
-## Run genomic-medicine-sweden/nallo with your data
+## Running genomic-medicine-sweden/nallo with your data
-Running the pipeline involves three steps:
+Running the pipeline on real data involves three steps:
-1. Prepare a samplesheet
-2. Gather all required references
-3. Supply samplesheet and references, and run the command
+1. Preparing a samplesheet with your data
+2. Gather required files and references
+3. Supply samplesheet, refeferences and files and run the pipeline
-## Samplesheet input
+## Samplesheet
-You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location.
+First, you will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location.
```bash
--input '[path to samplesheet file]'
```
-It has to be a comma-separated file with 7 columns, and a header row as shown in the examples below.
-`file` can either be a gzipped-fastq file or an aligned or unalinged BAM file (BAM files will be converted to FASTQ and aligned again).
-`project` needs to be the same for all samples in a run.
-If you don't have related samples, `family_id` could be set to sample name, and `paternal_id` and `maternal_id` should be set to 0.
+It has to be a comma-separated file with 7 columns, and a header row as shown in the example below:
```console
project,sample,file,family_id,paternal_id,maternal_id,sex,phenotype
@@ -68,228 +65,206 @@ testrun,HG002,/path/to/HG002.fastq.gz,FAM,HG003,0,1,2
testrun,HG003,/path/to/HG003.bam,FAM,0,0,2,1
```
-| Fields | Description |
-| ------------- | ------------------------------------------------------------------------------------------------------------------------- |
-| `project` | Project name must be provided and cannot contain spaces, needs to be the same for all samples." |
-| `sample` | Custom sample name, cannot contain spaces. |
-| `file` | Absolute path to gzipped FASTQ or BAM file. File has to have the extension ".fastq.gz", .fq.gz" or ".bam". |
-| `family_id` | "Family ID must be provided and cannot contain spaces. If no family ID is available you can use the same ID as the sample |
-| `paternal_id` | Paternal ID must be provided and cannot contain spaces. If no paternal ID is available, use 0. |
-| `maternal_id` | Maternal ID must be provided and cannot contain spaces. If no maternal ID is available, use 0. |
-| `sex` | Sex (0=unknown; 1=male; 2=female). |
-| `phenotype` | Affected status of patient (0 = missing; 1=unaffected; 2=affected). |
+| Fields | Description |
+| ------------- | --------------------------------------------------------------------------------------------------------------------------------- |
+| `project` | Project name must be provided and cannot contain spaces, needs to be the same for all samples." |
+| `sample` | Custom sample name, cannot contain spaces. |
+| `file` | Absolute path to gzipped FASTQ or BAM file. File has to have the extension ".fastq.gz", .fq.gz" or ".bam". |
+| `family_id` | Family ID must be provided and cannot contain spaces. If no family ID is available use the same ID as sample. |
+| `paternal_id` | Paternal ID must be provided and cannot contain spaces. If no paternal ID is available, use 0. |
+| `maternal_id` | Maternal ID must be provided and cannot contain spaces. If no maternal ID is available, use 0. |
+| `sex` | Sex must be provided as 0, 1 or 2 (0=unknown; 1=male; 2=female). If sex is unknown it will be assigned automatically if possible. |
+| `phenotype` | Affected status of patient (0 = missing; 1=unaffected; 2=affected). |
An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline.
-The typical command for running the pipeline is as follows:
+## Preset
-```bash
-nextflow run genomic-medicine-sweden/nallo -profile docker \
- --input samplesheet.csv \
- --preset \
- --outdir \
- --fasta
+This pipeline comes with three different presets that should be set with the `--preset` parameter
+
+- `revio` (default)
+- `pacbio`
+- `ONT_R10`
+
+`--skip_assembly_wf` and `--skip_repeat_wf` will be set to true for `ONT_R10` and `--skip_methylation_wf` will be set to true for `pacbio`, meaning these subworkflows are not run.
+
+## Subworkflows
+
+As indicated above, this pipeline is divided into multiple subworkflows, each with its own input requirements and outputs. By default, all subworklows are active, and thus all mandatory input files are required.
+
+The only parameter mandatory for all subworkflows is the `--input` and `--outdir` parameters, all other parameters are determined by the active subworkflows. If you would run `nextflow run genomic-medicine-sweden/nallo -profile docker --outdir results --input samplesheet.csv`
+
+```
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ --skip_assembly_wf is NOT active, the following files are required: --dipcall_par
+ --skip_snv_annotation is NOT active, the following files are required: --snp_db
+ --skip_mapping_wf is NOT active, the following files are required: --somalier_sites
+ --skip_snv_annotation is NOT active, the following files are required: --vep_cache
+ ...
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
```
-## Presets
+The pipeline will try to guide you through which files are required, but a thorough description is provided below.
-This pipeline comes with three different presets that can be set with the `--preset` parameter, "revio", "pacbio" and "ONT_R10" (defaults to "revio").
-By default, `--skip_assembly_wf` and `--skip_repeat_wf` is set to true for `ONT_R10` and `--skip_methylation_wf` is set to true for "pacbio", which means these workflows are not run.
+Additionally, if you want to skip a subworkflow, you will need to explicitly state to skip all subworklow that relies on it. For example, `nextflow run genomic-medicine-sweden/nallo -profile docker --outdir results --input samplesheet.csv --skip_mapping_wf` will tell you
+
+```
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ --skip_mapping_wf is active, the pipeline has to be run with: --skip_aligned_read_qc --skip_assembly_wf --skip_call_paralogs --skip_short_variant_calling --skip_snv_annotation --skip_cnv_calling --skip_phasing_wf --skip_rank_variants --skip_repeat_calling --skip_repeat_annotation --skip_methylation_wf
+ ...
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+```
+
+Because almost all other subworkflows relies on the mapping subworkflow.
+
+If you want to run the pipeline without any other input files than `--input samplesheet.csv`, all of the above skips will need to be active, and the pipeline will run only unaligned read QC.
## Reference files and parameters
-The typical command example above requires no additional files except the reference genome.
-Nallo has the ability to skip certain parts of the pipeline, for example `--skip_repeat_wf`.
-Some workflows require additional files:
+As descibed above, the files required depend on the active subworkflows. All parameters are listed [here](parameters.md), but the most useful parameters needed to run the pipeline described in more detail below.
-- If running without `--skip_assembly_wf` or `--skip_short_variant_calling`, download a BED file with PAR regions ([hg38](https://storage.googleapis.com/deepvariant/case-study-testdata/GRCh38_PAR.bed)) to supply with `--par_regions`.
+### Raw read QC (`--skip_raw_read_qc`)
-> [!NOTE]
-> Make sure chrY PAR is hard masked in reference.
+This subworkflow requires no additional files.
-- If running without `--skip_repeat_calling`, download a BED file with tandem repeats ([TRGT](https://github.com/PacificBiosciences/trgt/tree/main/repeats)) matching your reference genome to supply with `--trgt_repeats`.
+### Mapping (`--skip_mapping_wf`)
-- If running without `--skip_repeat_annotation`, download a json variant catalog, (e.g. [variant_catalog_grch38.json](https://github.com/Clinical-Genomics/stranger/raw/main/stranger/resources/variant_catalog_grch38.json)) matching your reference genome to supply with `--variant_catalog`.
+The majority of subworkflows depend on the mapping (alignment) subworkflow which requires `--fasta` and `--somalier_sites`.
-- If running without `--skip_snv_annotation`, download [VEP cache](https://ftp.ensembl.org/pub/release-110/variation/vep/homo_sapiens_vep_110_GRCh38.tar.gz) to supply with `--vep_cache` and prepare a samplesheet with annotation databases ([`echtvar encode`](https://github.com/brentp/echtvar)) to supply with `--snp_db`:
+| Parameter | Description |
+| ---------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `fasta` | Reference genome, either gzipped or uncompressed FASTA (e.g. [GRCh38_no_alt_analysis_set.fna.gz](ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/001/405/GCA_000001405.15_GRCh38/seqs_for_alignment_pipelines.ucsc_ids/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.gz)) |
+| `somalier_sites` | A VCF of known polymorphic sites (e.g. [sites.hg38.vcg.gz](https://github.com/brentp/somalier/files/3412456/sites.hg38.vcf.gz)), from which sex will be inferred if possible. |
-- If running without `--skip_snv_annotation`, you will also need to download VEP plugin files to supply with `--vep_plugin_files` see [example](https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugin_files.csv). PLI and LoFtool.
+### Aligned read QC (`--skip_aligned_read_qc`)
-- If running without `--skip_snv_annotation`, `--variant_consequences_snv` is also required (File containing list of SO terms listed in the order of severity from most severe to lease severe for annotating genomic and mitochondrial SNVs. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/variant_consequences_v2.txt). You can learn more about these terms [here](https://grch37.ensembl.org/info/genome/variation/prediction/predicted_data.html)).
+This subworkflow depends on the mapping subworkflow, but requires no additional files.
-```
-vep_files
-https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/SpliceAI.pm
-https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/LoFtool.pm
-https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/spliceai_21_scores_raw_snv_-v1.3-.vcf.gz
-https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/dbNSFP.pm
-https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/plugin_config.txt
-https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/spliceai_21_scores_raw_indel_-v1.3-.vcf.gz.tbi
-https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/spliceai_21_scores_raw_indel_-v1.3-.vcf.gz
-https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/spliceai_21_scores_raw_snv_-v1.3-.vcf.gz.tbi
-https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/pLI_values.txt
-https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/pLI.pm
-https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/MaxEntScan.pm
-https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/LoFtool_scores.txt
-```
+### Assembly (`--skip_assembly_wf`)
-```
-sample,file
-gnomad,/path/to/gnomad.v3.1.2.echtvar.popmax.v2.zip
-cadd,/path/to/cadd.v1.6.hg38.zip
-```
+This subworkflow contains both genome assembly and assembly variant calling. The assemblyt variant calling needs the sex of samples and for samples with unknown sex this is inferred from aligned reads, therefore it depends on the mapping subworkflow.
-- If running wihtout `--skip_rank_variants` and your samplesheet contains at least one affected sample (phenotype = 2), `--reduced_penetrance` (Used by GENMOD while modeling the variants. Contains a list of loci that show [reduced penetrance](https://medlineplus.gov/genetics/understanding/inheritance/penetranceexpressivity/) in people. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/reduced_penetrance.tsv)), `--score_config_snv` (Used by GENMOD for ranking the variants. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/rank_model_snv.ini)).
+It requires a BED file with PAR regions.
-- Optionally, if running without `--skip_snv_annotation`, supply a path to a folder containing cadd annotations with `--cadd_resources` and prescored indels with `--cadd_prescored`. Equivalent of the data/annotations/ and data/prescored/ folders described [here](https://github.com/kircherlab/CADD-scripts/#manual-installation), and it is used to calculate CADD scores for small indels.
+| Parameter | Description |
+| ------------- | --------------------------------------------------------------------------------------------------------------------------------- |
+| `par_regions` | A BED file with PAR regions (e.g. [GRCh38_PAR.bed](ttps://storage.googleapis.com/deepvariant/case-study-testdata/GRCh38_PAR.bed)) |
-- If running without `--skip_cnv_calling`, expected CN regions for your reference genome can be downloaded from [HiFiCNV GitHub](https://github.com/PacificBiosciences/HiFiCNV/tree/main/data) to supply with `--hificnv_xy`, `--hificnv_xx` (expected_cn) and `--hificnv_exclude` (excluded_regions).
+> [!NOTE]
+> Make sure chrY PAR is hard masked in reference genome you are using.
-- If running without `--skip_call_paralogs`, the reference genome needs to be hg38
+### Call paralogs (`--skip_call_paralogs`)
-- If running without `--skip_mapping_wf`, a VCF of known polymorphic sites (e.g. [sites.hg38.vcg.gz](https://github.com/brentp/somalier/files/3412456/sites.hg38.vcf.gz)) needs to be supplied with `--somalier_sites`, from which sex will be inferred if possible.
+This subworkflow depends on the mapping subworkflow, but requires no additional files.
-#### Highlighted parameters:
+> [!NOTE]
+> Only GRCh38 is supported.
-- You can choose to limit SNV calling to regions in BED file (`--bed`).
+### Short variant calling (`--skip_short_variant_calling`)
-- By default SNV-calling is split into 13 parallel processes, limit this by setting `--parallel_snv` to a different number.
+This subworkflow depends on the mapping subworkflow, and required the same PAR regions file as the assembly workflow.
-- By default the pipeline does not perform parallel alignment, but this can be set by setting `--parallel_alignmentss` to split the input and alignment into N files/processes.
+| Parameter | Description |
+| ------------- | --------------------------------------------------------------------------------------------------------------------------------- |
+| `par_regions` | A BED file with PAR regions (e.g. [GRCh38_PAR.bed](ttps://storage.googleapis.com/deepvariant/case-study-testdata/GRCh38_PAR.bed)) |
-All parameters are listed below:
+### CNV calling (`--skip_cnv_calling`)
-## Workflow skip options
+This subworkflow depends on the mapping and short variant calling subworkflows, and requires the following additional files:
-Options to skip various steps within the workflow
+| Parameter | Description |
+| ----------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `hificnv_xy` | expected XY copy number regions for your reference genome (e.g. [expected_cn.hg38.XY.bed](https://github.com/PacificBiosciences/HiFiCNV/raw/main/data/expected_cn/expected_cn.hg38.XY.bed)) |
+| `hificnv_xx` | expected XX copy number regions for your reference genome (e.g. [expected_cn.hg38.XX.bed](https://github.com/PacificBiosciences/HiFiCNV/raw/main/data/expected_cn/expected_cn.hg38.XX.bed)) |
+| `hificnv_exclude` | BED file specifying regions to exclude (e.g. [cnv.excluded_regions.hg38.bed.gz](https://github.com/PacificBiosciences/HiFiCNV/raw/main/data/excluded_regions/cnv.excluded_regions.hg38.bed.gz)) |
-| Parameter | Description | Type | Default | Required | Hidden |
-| ---------------------------- | ------------------------------------------ | --------- | ------- | -------- | ------ |
-| `skip_aligned_read_qc` | Skip aligned read QC | `boolean` | `False` | | |
-| `skip_raw_read_qc` | Skip raw read QC | `boolean` | `False` | | |
-| `skip_short_variant_calling` | Skip short variant calling | `boolean` | `False` | | |
-| `skip_assembly_wf` | Skip assembly and downstream processes | `boolean` | `False` | | |
-| `skip_mapping_wf` | Skip read mapping and downstream processes | `boolean` | `False` | | |
-| `skip_methylation_wf` | Skip methylation workflow | `boolean` | `False` | | |
-| `skip_repeat_calling` | Skip repeat calling workflow | `boolean` | `False` | | |
-| `skip_repeat_annotation` | Skip repeat annotation workflow | `boolean` | `False` | | |
-| `skip_phasing_wf` | Skip phasing workflow | `boolean` | `False` | | |
-| `skip_snv_annotation` | Skip SNV annotation | `boolean` | `False` | | |
-| `skip_cnv_calling` | Skip CNV workflow | `boolean` | `False` | | |
-| `skip_call_paralogs` | Skip call paralogs (Paraphase) | `boolean` | `False` | | |
+### Phasing (`--skip_phasing_wf`)
-## Input/output options
+This subworkflow phases variants and haplotags aligned BAM files, and such relies on the mapping and short variant calling subworkflows, but requires no additional files.
-Define where the pipeline should find input data and save output data.
+### Methylation (`--skip_methylation_wf`)
-| Parameter | Description | Type | Default | Required | Hidden |
-| --------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | ------- | -------- | ------ |
-| `input` | Path to comma-separated file containing information about the samples in the experiment. HelpYou will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. | `string` | | True | |
-| `outdir` | The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure. | `string` | | True | |
-| `email` | Email address for completion summary. HelpSet this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run. | `string` | | | |
-| `multiqc_title` | MultiQC report title. Printed as page header, used for filename if not otherwise specified. | `string` | | | |
+This subworkflow relies on mapping, short variant calling and phasing subworkflows, but requires no additional files.
-## Reference genome options
+### Repeat calling (`--skip_repeat_calling`)
-Reference genome related files and options required for the workflow.
+This subworkflow requires haplotagged BAM files, and such relies on the mapping, short variant calling and phasing subworkflows, and requires the following additional files:
-| Parameter | Description | Type | Default | Required | Hidden |
-| ----------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- | ------- | -------- | ------ |
-| `fasta` | Reference genome | `string` | | | |
-| `genome` | Name of iGenomes reference. HelpIf using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`.
See the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details. | `string` | | | |
-| `igenomes_ignore` | Do not load the iGenomes reference config. HelpDo not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`. | `boolean` | True | | True |
+| Parameter | Description |
+| -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `trgt_repeats` | a BED file with tandem repeats matching your reference genome (e.g. [pathogenic_repeats.hg38.bed](<[https://github.com/PacificBiosciences/trgt/tree/main/repeats](https://github.com/PacificBiosciences/trgt/raw/main/repeats/pathogenic_repeats.hg38.bed)>)) |
-## Institutional config options
+### Repeat annotation (`--skip_repeat_annotation`)
-Parameters used to describe centralised config profiles. These should not be edited.
+This subworkflow relies on the mapping, short variant calling, phasing and repeat calling subworkflows, and requires the following additional files:
-| Parameter | Description | Type | Default | Required | Hidden |
-| ---------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | -------------------------------------------------------- | -------- | ------ |
-| `custom_config_version` | Git commit id for Institutional configs. | `string` | master | | True |
-| `custom_config_base` | Base directory for Institutional configs. HelpIf you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter. | `string` | https://raw.githubusercontent.com/nf-core/configs/master | | True |
-| `config_profile_name` | Institutional config name. | `string` | | | True |
-| `config_profile_description` | Institutional config description. | `string` | | | True |
-| `config_profile_contact` | Institutional config contact information. | `string` | | | True |
-| `config_profile_url` | Institutional config URL link. | `string` | | | True |
+| Parameter | Description |
+| ----------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `variant_catalog` | a variant catalog matching your reference (e.g. [variant_catalog_grch38.json](https://github.com/Clinical-Genomics/stranger/raw/main/stranger/resources/variant_catalog_grch38.json)) |
-## Max job request options
+### SNV annotation (`--skip_snv_annotation`)
-Set the top limit for requested resources for any single job.
+This subworkflow relies on the mapping and short variant calling, and requires the following additional files:
-| Parameter | Description | Type | Default | Required | Hidden |
-| ------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- | ------- | -------- | ------ |
-| `max_cpus` | Maximum number of CPUs that can be requested for any single job. HelpUse to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1` | `integer` | 16 | | True |
-| `max_memory` | Maximum amount of memory that can be requested for any single job. HelpUse to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'` | `string` | 128.GB | | True |
-| `max_time` | Maximum amount of time that can be requested for any single job. HelpUse to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'` | `string` | 240.h | | True |
+
-## Generic options
+| Parameter | Description |
+| -------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `vep_cache` | VEP cache matching your reference genome, either as a `.tar.gz` archive or path to a directory (e.g. [homo_sapiens_vep_110_GRCh38.tar.gz](https://ftp.ensembl.org/pub/release-110/variation/vep/homo_sapiens_vep_110_GRCh38.tar.gz)) |
+| `vep_plugins` 1 | A csv file with VEP plugin files, pLI and LoFtool are required. Example provided below. |
+| `snp_db` 2 | A csv file with annotation databases from ([`echtvar encode`](https://github.com/brentp/echtvar)) (e.g. [gnomad.v3.1.2.echtvar.popmax.v2.zip](https://surfdrive.surf.nl/files/index.php/s/LddbAYQAYPqtYu6/download)) |
+| `variant_consequences_snv` | A list of SO terms listed in the order of severity from most severe to lease severe for annotating genomic and mitochondrial SNVs. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/variant_consequences_v2.txt). You can learn more about these terms [here](https://ensembl.org/info/genome/variation/prediction/predicted_data.html) |
-Less common options for the pipeline, typically set in a config file.
+1 Example file for input with `--vep_plugins`
-| Parameter | Description | Type | Default | Required | Hidden |
-| ------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | --------- | ------- | -------- | ------ |
-| `help` | Display help text. | `boolean` | | | True |
-| `version` | Display version and exit. | `boolean` | | | True |
-| `publish_dir_mode` | Method used to save pipeline results to output directory. HelpThe Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details. | `string` | copy | | True |
-| `email_on_fail` | Email address for completion summary, only when pipeline fails. HelpAn email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully. | `string` | | | True |
-| `plaintext_email` | Send plain-text email instead of HTML. | `boolean` | | | True |
-| `max_multiqc_email_size` | File size limit when attaching MultiQC reports to summary emails. | `string` | 25.MB | | True |
-| `monochrome_logs` | Do not use coloured log outputs. | `boolean` | | | True |
-| `hook_url` | Incoming hook URL for messaging service HelpIncoming hook URL for messaging service. Currently, MS Teams and Slack are supported. | `string` | | | True |
-| `multiqc_config` | Custom config file to supply to MultiQC. | `string` | | | True |
-| `multiqc_logo` | Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file | `string` | | | True |
-| `multiqc_methods_description` | Custom MultiQC yaml file containing HTML including a methods description. | `string` | | | |
-| `validate_params` | Boolean whether to validate parameters against the schema at runtime | `boolean` | True | | True |
-| `validationShowHiddenParams` | Show all params when using `--help` HelpBy default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters. | `boolean` | | | True |
-| `validationSkipDuplicateCheck` | nf-validation related parameter | `boolean` | | | True |
-| `validationS3PathCheck` | Boolean whether to validate validate AWS S3 paths | `boolean` | | | True |
-| `monochromeLogs` | Boolean whether to color nf-validation logs | `boolean` | | | True |
-
-## Workflow options
-
-| Parameter | Description | Type | Default | Required | Hidden |
-| ---------------------- | ------------------------------------------- | --------- | ----------- | -------- | ------ |
-| `preset` | Choose a preset depending on data type | `string` | revio | True | |
-| `variant_caller` | Choose variant caller | `string` | deepvariant | | |
-| `phaser` | Choose phasing software | `string` | whatshap | | |
-| `hifiasm_mode` | Run hifiasm in hifi-only or hifi-trio mode | `string` | hifi-only | | |
-| `parallel_alignmentss` | Split alignment into n processes per sample | `integer` | 1 | | |
-| `parallel_snv` | Split SNV calling into n chunks | `integer` | 13 | | |
-
-## Extra file inputs
-
-Different processes may need extra input files
-
-| Parameter | Description | Type | Default | Required | Hidden |
-| ---------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- | ------- | -------- | ------ |
-| `par_regions` | Provide a bed file of chrX PAR regions for dipcall | `string` | | | |
-| `tandem_repeats` | Tandem repeat BED-file for sniffles | `string` | | | |
-| `trgt_repeats` | BED-file for repeats to be genotyped | `string` | | | |
-| `snp_db` | Extra echtvar-databases to annotate SNVs with | `string` | | | |
-| `vep_cache` | Path to directory of vep_cache | `string` | | | |
-| `vep_plugin_files` | A csv file with paths to vep plugin files, pLI, LoFtool and SpliceAI is required. | `string` | | | |
-| `bed` | BED file with regions of interest | `string` | | | |
-| `hificnv_xy` | | `string` | | | |
-| `hificnv_xx` | | `string` | | | |
-| `hificnv_exclude` | HiFiCNV BED file specifying regions to exclude | `string` | | | |
-| `somalier_sites` | A VCF of known polymorphic sites | `string` | | | |
-| `variant_catalog` | Variant catalog json-file for Stranger | `string` | | | |
-| `validationFailUnrecognisedParams` | Validation of parameters fails when an unrecognised parameter is found. HelpBy default, when an unrecognised parameter is found, it returns a warning. | `boolean` | | | True |
-| `validationLenientMode` | Validation of parameters in lenient more. HelpAllows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode). | `boolean` | | | True |
+- If running without `--skip_snv_annotation`, `--variant_consequences_snv` is also required (File containing list of SO terms listed in the order of severity from most severe to lease severe for annotating genomic and mitochondrial SNVs. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/variant_consequences_v2.txt). You can learn more about these terms [here](https://grch37.ensembl.org/info/genome/variation/prediction/predicted_data.html)).
-### Updating the pipeline
+```
+vep_files
+https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/LoFtool.pm
+https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/spliceai_21_scores_raw_snv_-v1.3-.vcf.gz
+https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/dbNSFP.pm
+https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/plugin_config.txt
+https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/spliceai_21_scores_raw_indel_-v1.3-.vcf.gz.tbi
+https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/spliceai_21_scores_raw_indel_-v1.3-.vcf.gz
+https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/spliceai_21_scores_raw_snv_-v1.3-.vcf.gz.tbi
+https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/pLI_values.txt
+https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/pLI.pm
+https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/MaxEntScan.pm
+https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/LoFtool_scores.txt
+```
-```bash
-nextflow pull genomic-medicine-sweden/nallo
+2 Example file for input with `--vep_plugins`:
+
+```
+sample,file
+gnomad,/path/to/gnomad.v3.1.2.echtvar.popmax.v2.zip
+cadd,/path/to/cadd.v1.6.hg38.zip
```
-When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline:
+> [!NOTE]
+> Optionally, to calcuate CADD scores for small indels, supply a path to a folder containing cadd annotations with `--cadd_resources` and prescored indels with `--cadd_prescored`. Equivalent of the `data/annotations/` and `data/prescored/` folders described [here](https://github.com/kircherlab/CADD-scripts/#manual-installation). CADD scores for SNVs can be annotated through echvtvar and `--snp_db`.
+
+### Rank variants (`--skip_rank_variants`)
+
+This subworkflow relies on the mapping, short variant calling and SNV annotation subworkflows, and requires the following additional files:
+
+| Parameter | Description |
+| -------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `score_config_snv` | Used by GENMOD when ranking variants. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/rank_model_snv.ini). |
+| `reduced_penetrance` | A list of loci that show [reduced penetrance](https://medlineplus.gov/genetics/understanding/inheritance/penetranceexpressivity/) in people. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/reduced_penetrance.tsv)) |
+
+### Other highlighted parameters
+
+- Limit SNV calling to regions in BED file (`--bed`).
+- By default SNV-calling is split into 13 parallel processes, this speeds up the variant calling significantly. Limit this by setting `--parallel_snv` to a different number.
+- By default the pipeline does not perform parallel alignment, but this can be changed by setting `--parallel_alignments` to split the alignment into multiple processes. This comes with some additional overhead, but speeds up the alignment significantly.
### Reproducibility
It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since.
-First, go to the [genomic-medicine-sweden/nallo releases page](https://github.com/genomic-medicine-sweden/nallo/releases) and find the latest pipeline version - numeric only (eg. `0.1.0`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 0.1.0`. Of course, you can switch to another version by changing the number after the `-r` flag.
+First, go to the [genomic-medicine-sweden/nallo releases page](https://github.com/genomic-medicine-sweden/nallo/releases) and find the latest pipeline version - numeric only (eg. `0.2.0`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 0.2.0`. Of course, you can switch to another version by changing the number after the `-r` flag.
This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports.
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 3626f253..8e83ba04 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -8,69 +8,82 @@
"workflow_skip_options": {
"title": "Workflow skip options",
"type": "object",
- "description": "Options to skip various steps within the workflow",
+ "description": "Allows skipping certain parts of the pipeline",
"default": "",
"properties": {
"skip_aligned_read_qc": {
"type": "boolean",
"fa_icon": "fas fa-fast-forward",
- "description": "Skip aligned read QC"
+ "description": "Skip QC of aligned reads",
+ "default": false
},
"skip_raw_read_qc": {
"type": "boolean",
"fa_icon": "fas fa-fast-forward",
- "description": "Skip raw read QC"
+ "description": "Skip QC of unaligned (raw) reads",
+ "default": false
},
"skip_short_variant_calling": {
"type": "boolean",
"description": "Skip short variant calling",
- "fa_icon": "fas fa-fast-forward"
+ "fa_icon": "fas fa-fast-forward",
+ "default": false
},
"skip_assembly_wf": {
"type": "boolean",
- "description": "Skip assembly and downstream processes",
- "fa_icon": "fas fa-fast-forward"
+ "description": "Skip genome assembly and assembly variant calling",
+ "fa_icon": "fas fa-fast-forward",
+ "default": false
},
"skip_mapping_wf": {
"type": "boolean",
- "description": "Skip read mapping and downstream processes",
- "fa_icon": "fas fa-fast-forward"
+ "description": "Skip read mapping (alignment)",
+ "fa_icon": "fas fa-fast-forward",
+ "default": false
},
"skip_methylation_wf": {
"type": "boolean",
- "description": "Skip methylation workflow",
- "fa_icon": "fas fa-fast-forward"
+ "description": "Skip generation of methylation pileups",
+ "fa_icon": "fas fa-fast-forward",
+ "default": false
},
"skip_repeat_calling": {
"type": "boolean",
- "description": "Skip repeat calling workflow",
- "fa_icon": "fas fa-fast-forward"
+ "description": "Skip tandem repeat calling",
+ "fa_icon": "fas fa-fast-forward",
+ "default": false
},
"skip_repeat_annotation": {
"type": "boolean",
- "description": "Skip repeat annotation workflow",
- "fa_icon": "fas fa-fast-forward"
+ "description": "Skip tandem repeat annotation",
+ "fa_icon": "fas fa-fast-forward",
+ "default": false
},
"skip_phasing_wf": {
"type": "boolean",
"fa_icon": "fas fa-fast-forward",
- "description": "Skip phasing workflow"
+ "description": "Skip phasing of variants and haplotagging of reads",
+ "default": false
},
"skip_snv_annotation": {
"type": "boolean",
- "description": "Skip SNV annotation"
+ "description": "Skip short variant annotation",
+ "default": false
},
"skip_cnv_calling": {
"type": "boolean",
- "description": "Skip CNV workflow"
+ "description": "Skip CNV calling",
+ "default": false
},
"skip_call_paralogs": {
"type": "boolean",
- "description": "Skip call paralogs (Paraphase)"
+ "description": "Skip the calling of specific paralogous genes",
+ "default": false
},
"skip_rank_variants": {
"type": "boolean",
- "description": "Skip rank variants workflow"
+ "description": "Skip ranking of short variants",
+ "default": false
}
},
"fa_icon": "fas fa-american-sign-language-interpreting"
@@ -339,30 +352,30 @@
"workflow_options": {
"title": "Workflow options",
"type": "object",
- "description": "",
+ "description": "Workflow options specific to genomic-medicine-sweden/nallo",
"default": "",
"properties": {
"preset": {
"type": "string",
"default": "revio",
- "description": "Choose a preset depending on data type",
+ "description": "Enable or disable certain parts of the pipeline by default, depending on data type (`revio`, `pacbio`, `ONT_R10`)",
"enum": ["revio", "pacbio", "ONT_R10"]
},
"variant_caller": {
"type": "string",
"default": "deepvariant",
- "description": "Choose variant caller",
+ "description": "Which short variant software to use (`deepvariant`)",
"enum": ["deepvariant"]
},
"phaser": {
"type": "string",
"default": "whatshap",
- "description": "Choose phasing software",
+ "description": "Which phasing software to use (`whatshap`, `hiphase_snv`, `hiphase_sv`)",
"enum": ["whatshap", "hiphase_snv", "hiphase_sv"]
},
"hifiasm_mode": {
"type": "string",
- "description": "Run hifiasm in hifi-only or hifi-trio mode",
+ "description": "Run hifiasm in hifi-only or hifi-trio mode (`hifi-only`, `trio-binning`)",
"enum": ["hifi-only", "trio-binning"],
"default": "hifi-only"
},
@@ -370,12 +383,12 @@
"type": "integer",
"minimum": 1,
"default": 1,
- "description": "Split alignment into n processes per sample"
+ "description": "If parallel_alignments is bigger than 1, input files will be split and aligned in parallel to reduce processing time."
},
"parallel_snv": {
"type": "integer",
"default": 13,
- "description": "Split SNV calling into n chunks",
+ "description": "If parallel_snv is bigger than 1, short variant calling will be done in parallel to reduce processing time.",
"minimum": 1
},
"vep_cache_version": {
@@ -386,23 +399,23 @@
"vep_plugin_files": {
"type": "string",
"mimetype": "text/csv",
- "description": "A csv file with paths to vep plugin files, pLI, LoFtool and SpliceAI is required.",
+ "description": "A csv file with paths to vep plugin files. pLI and LoFtool are required.",
"schema": "assets/vep_plugin_files_schema.json"
},
"deepvariant_model_type": {
"type": "string",
"default": "PACBIO",
- "description": "Sets the model type for DeepVariant",
+ "description": "Sets the model type used for DeepVariant. This is set automatically using `--preset` by default.",
"hidden": true,
"enum": ["PACBIO", "ONT_R104"]
}
},
"required": ["preset"]
},
- "extra_file_inputs": {
- "title": "Extra file inputs",
+ "file_inputs": {
+ "title": "File inputs",
"type": "object",
- "description": "Different processes may need extra input files",
+ "description": "The different files that are required. Some are only required by certain workflows, see the usage documentation.",
"default": "",
"fa_icon": "fas fa-copy",
"properties": {
@@ -411,7 +424,7 @@
"exists": true,
"format": "directory-path",
"fa_icon": "fas fa-file",
- "description": "Path to the directory containing cadd prescored indels.",
+ "description": "Path to a directory containing prescored indels for CADD.",
"help_text": "This folder contains the compressed files and indexes that would otherwise be in data/prescored folder as described in https://github.com/kircherlab/CADD-scripts/#manual-installation."
},
"cadd_resources": {
@@ -419,7 +432,7 @@
"exists": true,
"format": "directory-path",
"fa_icon": "fas fa-file",
- "description": "Path to the directory containing cadd annotations.",
+ "description": "Path to a directory containing CADD annotations.",
"help_text": "This folder contains the uncompressed files that would otherwise be in data/annotation folder as described in https://github.com/kircherlab/CADD-scripts/#manual-installation."
},
"par_regions": {
@@ -431,13 +444,13 @@
"tandem_repeats": {
"type": "string",
"format": "file-path",
- "description": "Tandem repeat BED-file for sniffles",
+ "description": "A tandem repeat BED file for sniffles",
"pattern": "^\\S+\\.bed$",
"exists": true
},
"trgt_repeats": {
"type": "string",
- "description": "BED-file for repeats to be genotyped",
+ "description": "A BED file with repeats to be genotyped with TRGT",
"format": "file-path",
"exists": true
},
@@ -447,12 +460,12 @@
"format": "file-path",
"mimetype": "text/csv",
"schema": "/assets/schema_snpdb.json",
- "description": "Extra echtvar-databases to annotate SNVs with",
+ "description": "A csv file with echtvar databases to annotate SNVs with",
"exists": true
},
"variant_catalog": {
"type": "string",
- "description": "Variant catalog json-file for Stranger",
+ "description": "A variant catalog json-file for stranger",
"format": "file-path",
"exists": true
},
@@ -464,7 +477,7 @@
},
"vep_cache": {
"type": "string",
- "description": "Path to directory of vep_cache",
+ "description": "A path to the VEP cache location",
"format": "path",
"exists": true
},
@@ -472,25 +485,25 @@
"type": "string",
"pattern": "^\\S+\\.bed$",
"format": "file-path",
- "description": "BED file with regions of interest",
+ "description": "A BED file with regions of interest, used to limit short variant calling.",
"exists": true
},
"hificnv_xy": {
"type": "string",
"format": "file-path",
- "description": "HiFiCNV BED file specifying expected copy number regions for XY samples.",
+ "description": "A BED file containing expected copy number regions for XY samples.",
"exists": true
},
"hificnv_xx": {
"type": "string",
"format": "file-path",
- "description": "HiFiCNV BED file specifying expected copy number regions for XX samples.",
+ "description": "A BED file containing expected copy number regions for XX samples.",
"exists": true
},
"hificnv_exclude": {
"type": "string",
"format": "file-path",
- "description": "HiFiCNV BED file specifying regions to exclude",
+ "description": "A BED file specifying regions to exclude with HiFiCNV, such as centromeres.",
"exists": true
},
"reduced_penetrance": {
@@ -498,19 +511,19 @@
"exists": true,
"format": "path",
"fa_icon": "fas fa-file-csv",
- "description": "File with gene ids that have reduced penetrance. For use with genmod."
+ "description": "A file with gene ids that have reduced penetrance. For use with genmod."
},
"score_config_snv": {
"type": "string",
"exists": true,
"format": "path",
"fa_icon": "fas fa-file",
- "description": "SNV rank model config file for genmod."
+ "description": "A SNV rank model config file for genmod."
},
"somalier_sites": {
"type": "string",
"pattern": "^\\S+\\.vcf(\\.gz)?$",
- "description": "A VCF of known polymorphic sites",
+ "description": "A VCF of known polymorphic sites for somalier",
"format": "file-path",
"exists": true
},
@@ -519,7 +532,7 @@
"fa_icon": "far fa-check-circle",
"description": "Validation of parameters fails when an unrecognised parameter is found.",
"hidden": true,
- "help_text": "By default, when an unrecognised parameter is found, it returns a warinig."
+ "help_text": "By default, when an unrecognised parameter is found, it returns a warning."
},
"validationLenientMode": {
"type": "boolean",
@@ -561,7 +574,7 @@
"$ref": "#/definitions/workflow_options"
},
{
- "$ref": "#/definitions/extra_file_inputs"
+ "$ref": "#/definitions/file_inputs"
}
]
}
From 7a855b1b0649ddcfe96824a4007db47eda86f3a0 Mon Sep 17 00:00:00 2001
From: fellen31
Date: Thu, 15 Aug 2024 16:17:49 +0200
Subject: [PATCH 52/59] ignore .prettierignore
---
.nf-core.yml | 2 ++
1 file changed, 2 insertions(+)
diff --git a/.nf-core.yml b/.nf-core.yml
index b5e2ed39..116dd7e0 100644
--- a/.nf-core.yml
+++ b/.nf-core.yml
@@ -9,6 +9,7 @@ lint:
- .github/workflows/awstest.yml
- .github/workflows/awsfulltest.yml
- conf/modules.config
+ - .prettierignore
files_unchanged:
- CODE_OF_CONDUCT.md
- assets/nf-core-nallo_logo_light.png
@@ -17,6 +18,7 @@ lint:
- docs/images/nf-core-nallo_logo_dark.png
- .github/ISSUE_TEMPLATE/bug_report.yml
- .github/CONTRIBUTING.md
+ - .prettierignore
multiqc_config:
- report_comment
nextflow_config:
From 107869fdc78906757df1a7807061f7e51747bc68 Mon Sep 17 00:00:00 2001
From: fellen31
Date: Thu, 15 Aug 2024 16:24:47 +0200
Subject: [PATCH 53/59] update parameters
---
docs/parameters.md | 328 ++++++++++++++++++++++-----------------------
1 file changed, 164 insertions(+), 164 deletions(-)
diff --git a/docs/parameters.md b/docs/parameters.md
index 848834c1..890af543 100644
--- a/docs/parameters.md
+++ b/docs/parameters.md
@@ -1,177 +1,177 @@
-# genomic-medicine-sweden/nallo pipeline parameters
-
-Long-read variant calling pipeline
-
-## Workflow skip options
-
-Allows skipping certain parts of the pipeline
-
-| Parameter | Description | Type | Default | Required | Hidden |
-|-----------|-----------|-----------|-----------|-----------|-----------|
-| `skip_aligned_read_qc` | Skip QC of aligned reads | `boolean` | False | | |
-| `skip_raw_read_qc` | Skip QC of unaligned (raw) reads | `boolean` | False | | |
-| `skip_short_variant_calling` | Skip short variant calling | `boolean` | False | | |
-| `skip_assembly_wf` | Skip genome assembly and assembly variant calling | `boolean` | False | | |
-| `skip_mapping_wf` | Skip read mapping (alignment) | `boolean` | False | | |
-| `skip_methylation_wf` | Skip generation of methylation pileups | `boolean` | False | | |
-| `skip_repeat_calling` | Skip tandem repeat calling | `boolean` | False | | |
-| `skip_repeat_annotation` | Skip tandem repeat annotation | `boolean` | False | | |
-| `skip_phasing_wf` | Skip phasing of variants and haplotagging of reads | `boolean` | False | | |
-| `skip_snv_annotation` | Skip short variant annotation | `boolean` | False | | |
-| `skip_cnv_calling` | Skip CNV calling | `boolean` | False | | |
-| `skip_call_paralogs` | Skip the calling of specific paralogous genes | `boolean` | False | | |
-| `skip_rank_variants` | Skip ranking of short variants | `boolean` | False | | |
-
-## Input/output options
-
-Define where the pipeline should find input data and save output data.
-
-| Parameter | Description | Type | Default | Required | Hidden |
-|-----------|-----------|-----------|-----------|-----------|-----------|
+# genomic-medicine-sweden/nallo pipeline parameters
+
+Long-read variant calling pipeline
+
+## Workflow skip options
+
+Allows skipping certain parts of the pipeline
+
+| Parameter | Description | Type | Default | Required | Hidden |
+|-----------|-----------|-----------|-----------|-----------|-----------|
+| `skip_aligned_read_qc` | Skip QC of aligned reads | `boolean` | False | | |
+| `skip_raw_read_qc` | Skip QC of unaligned (raw) reads | `boolean` | False | | |
+| `skip_short_variant_calling` | Skip short variant calling | `boolean` | False | | |
+| `skip_assembly_wf` | Skip genome assembly and assembly variant calling | `boolean` | False | | |
+| `skip_mapping_wf` | Skip read mapping (alignment) | `boolean` | False | | |
+| `skip_methylation_wf` | Skip generation of methylation pileups | `boolean` | False | | |
+| `skip_repeat_calling` | Skip tandem repeat calling | `boolean` | False | | |
+| `skip_repeat_annotation` | Skip tandem repeat annotation | `boolean` | False | | |
+| `skip_phasing_wf` | Skip phasing of variants and haplotagging of reads | `boolean` | False | | |
+| `skip_snv_annotation` | Skip short variant annotation | `boolean` | False | | |
+| `skip_cnv_calling` | Skip CNV calling | `boolean` | False | | |
+| `skip_call_paralogs` | Skip the calling of specific paralogous genes | `boolean` | False | | |
+| `skip_rank_variants` | Skip ranking of short variants | `boolean` | False | | |
+
+## Input/output options
+
+Define where the pipeline should find input data and save output data.
+
+| Parameter | Description | Type | Default | Required | Hidden |
+|-----------|-----------|-----------|-----------|-----------|-----------|
| `input` | Path to comma-separated file containing information about the samples in the experiment. HelpYou will
-need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its
-location. It has to be a comma-separated file with 3 columns, and a header row. | `string` | | True | |
-| `outdir` | The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure. | `string` |
-| True | |
-| `email` | Email address for completion summary. HelpSet this parameter to your e-mail address to get a summary
-e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to
-specify this on the command line for every run. | `string` | | | |
-| `multiqc_title` | MultiQC report title. Printed as page header, used for filename if not otherwise specified. | `string` | | | |
-
-## Reference genome options
-
-Reference genome related files and options required for the workflow.
-
-| Parameter | Description | Type | Default | Required | Hidden |
-|-----------|-----------|-----------|-----------|-----------|-----------|
-| `fasta` | Reference genome | `string` | | | |
-| `genome` | Name of iGenomes reference. HelpIf using a reference genome configured in the pipeline using
-iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files
-e.g. `--genome GRCh38`.
See the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details. |
-`string` | | | |
-| `igenomes_ignore` | Do not load the iGenomes reference config. HelpDo not load `igenomes.config` when running
-the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in
-`igenomes.config`. | `boolean` | True | | True |
-
-## Institutional config options
-
-Parameters used to describe centralised config profiles. These should not be edited.
-
-| Parameter | Description | Type | Default | Required | Hidden |
-|-----------|-----------|-----------|-----------|-----------|-----------|
-| `custom_config_version` | Git commit id for Institutional configs. | `string` | master | | True |
-| `custom_config_base` | Base directory for Institutional configs. HelpIf you're running offline, Nextflow will
-not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you
-should download the files from the repo and tell Nextflow where to find them with this parameter. | `string` |
-https://raw.githubusercontent.com/nf-core/configs/master | | True |
-| `config_profile_name` | Institutional config name. | `string` | | | True |
-| `config_profile_description` | Institutional config description. | `string` | | | True |
-| `config_profile_contact` | Institutional config contact information. | `string` | | | True |
-| `config_profile_url` | Institutional config URL link. | `string` | | | True |
-
-## Max job request options
-
-Set the top limit for requested resources for any single job.
-
-| Parameter | Description | Type | Default | Required | Hidden |
-|-----------|-----------|-----------|-----------|-----------|-----------|
+need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its
+location. It has to be a comma-separated file with 3 columns, and a header row.| `string` | | True | |
+| `outdir` | The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure. | `string` |
+| True | |
+| `email` | Email address for completion summary. HelpSet this parameter to your e-mail address to get a summary
+e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to
+specify this on the command line for every run. | `string` | | | |
+| `multiqc_title` | MultiQC report title. Printed as page header, used for filename if not otherwise specified. | `string` | | | |
+
+## Reference genome options
+
+Reference genome related files and options required for the workflow.
+
+| Parameter | Description | Type | Default | Required | Hidden |
+|-----------|-----------|-----------|-----------|-----------|-----------|
+| `fasta` | Reference genome | `string` | | | |
+| `genome` | Name of iGenomes reference. HelpIf using a reference genome configured in the pipeline using
+iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files
+e.g. `--genome GRCh38`.
See the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details. |
+`string` | | | |
+| `igenomes_ignore` | Do not load the iGenomes reference config. HelpDo not load `igenomes.config` when running
+the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in
+`igenomes.config`. | `boolean` | True | | True |
+
+## Institutional config options
+
+Parameters used to describe centralised config profiles. These should not be edited.
+
+| Parameter | Description | Type | Default | Required | Hidden |
+|-----------|-----------|-----------|-----------|-----------|-----------|
+| `custom_config_version` | Git commit id for Institutional configs. | `string` | master | | True |
+| `custom_config_base` | Base directory for Institutional configs. HelpIf you're running offline, Nextflow will
+not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you
+should download the files from the repo and tell Nextflow where to find them with this parameter. | `string` |
+https://raw.githubusercontent.com/nf-core/configs/master | | True |
+| `config_profile_name` | Institutional config name. | `string` | | | True |
+| `config_profile_description` | Institutional config description. | `string` | | | True |
+| `config_profile_contact` | Institutional config contact information. | `string` | | | True |
+| `config_profile_url` | Institutional config URL link. | `string` | | | True |
+
+## Max job request options
+
+Set the top limit for requested resources for any single job.
+
+| Parameter | Description | Type | Default | Required | Hidden |
+|-----------|-----------|-----------|-----------|-----------|-----------|
| `max_cpus` | Maximum number of CPUs that can be requested for any single job. HelpUse to set an upper-limit for
-the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1` | `integer` | 16 | | True |
+the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`| `integer` | 16 | | True |
| `max_memory` | Maximum amount of memory that can be requested for any single job. HelpUse to set an upper-limit
-for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'` | `string` |
-128.GB | | True |
+for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`| `string` |
+128.GB | | True |
| `max_time` | Maximum amount of time that can be requested for any single job. HelpUse to set an upper-limit for
-the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'` | `string` | 240.h |
-| True |
-
-## Generic options
-
-Less common options for the pipeline, typically set in a config file.
-
-| Parameter | Description | Type | Default | Required | Hidden |
-|-----------|-----------|-----------|-----------|-----------|-----------|
-| `help` | Display help text. | `boolean` | | | True |
-| `version` | Display version and exit. | `boolean` | | | True |
-| `publish_dir_mode` | Method used to save pipeline results to output directory. HelpThe Nextflow `publishDir`
+the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'` | `string` | 240.h |
+| True |
+
+## Generic options
+
+Less common options for the pipeline, typically set in a config file.
+
+| Parameter | Description | Type | Default | Required | Hidden |
+|-----------|-----------|-----------|-----------|-----------|-----------|
+| `help` | Display help text. | `boolean` | | | True |
+| `version` | Display version and exit. | `boolean` | | | True |
+| `publish_dir_mode` | Method used to save pipeline results to output directory. HelpThe Nextflow `publishDir`
option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move
-these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details. | `string` | copy | |
-True |
-| `email_on_fail` | Email address for completion summary, only when pipeline fails. HelpAn email address to send
+these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details. | `string` | copy | |
+True |
+| `email_on_fail` | Email address for completion summary, only when pipeline fails. HelpAn email address to send
a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully. | `string` | | | True |
-| `plaintext_email` | Send plain-text email instead of HTML. | `boolean` | | | True |
-| `max_multiqc_email_size` | File size limit when attaching MultiQC reports to summary emails. | `string` | 25.MB | | True |
-| `monochrome_logs` | Do not use coloured log outputs. | `boolean` | | | True |
+| `plaintext_email` | Send plain-text email instead of HTML. | `boolean` | | | True |
+| `max_multiqc_email_size` | File size limit when attaching MultiQC reports to summary emails. | `string` | 25.MB | | True |
+| `monochrome_logs` | Do not use coloured log outputs. | `boolean` | | | True |
| `hook_url` | Incoming hook URL for messaging service HelpIncoming hook URL for messaging service. Currently, MS
-Teams and Slack are supported. | `string` | | | True |
-| `multiqc_config` | Custom config file to supply to MultiQC. | `string` | | | True |
-| `multiqc_logo` | Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file | `string` | | | True |
-| `multiqc_methods_description` | Custom MultiQC yaml file containing HTML including a methods description. | `string` | | | |
-| `validate_params` | Boolean whether to validate parameters against the schema at runtime | `boolean` | True | | True |
-| `validationShowHiddenParams` | Show all params when using `--help` HelpBy default, parameters set as _hidden_
-in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all
-parameters. | `boolean` | | | True |
-| `validationSkipDuplicateCheck` | nf-validation related parameter | `boolean` | | | True |
-| `validationS3PathCheck` | Boolean whether to validate validate AWS S3 paths | `boolean` | | | True |
-| `monochromeLogs` | Boolean whether to color nf-validation logs | `boolean` | | | True |
-
-## Workflow options
-
-Workflow options specific to genomic-medicine-sweden/nallo
-
-| Parameter | Description | Type | Default | Required | Hidden |
-|-----------|-----------|-----------|-----------|-----------|-----------|
-| `preset` | Enable or disable certain parts of the pipeline by default, depending on data type (`revio`, `pacbio`, `ONT_R10`) | `string` | revio |
-True | |
-| `variant_caller` | Which short variant software to use (`deepvariant`) | `string` | deepvariant | | |
-| `phaser` | Which phasing software to use (`whatshap`, `hiphase_snv`, `hiphase_sv`) | `string` | whatshap | | |
-| `hifiasm_mode` | Run hifiasm in hifi-only or hifi-trio mode (`hifi-only`, `trio-binning`) | `string` | hifi-only | | |
-| `parallel_alignments` | If parallel_alignments is bigger than 1, input files will be split and aligned in parallel to reduce processing time. |
-`integer` | 1 | | |
+Teams and Slack are supported.| `string` | | | True |
+| `multiqc_config` | Custom config file to supply to MultiQC. | `string` | | | True |
+| `multiqc_logo` | Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file | `string` | | | True |
+| `multiqc_methods_description` | Custom MultiQC yaml file containing HTML including a methods description. | `string` | | | |
+| `validate_params` | Boolean whether to validate parameters against the schema at runtime | `boolean` | True | | True |
+| `validationShowHiddenParams` | Show all params when using `--help` HelpBy default, parameters set as _hidden_
+in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all
+parameters. | `boolean` | | | True |
+| `validationSkipDuplicateCheck` | nf-validation related parameter | `boolean` | | | True |
+| `validationS3PathCheck` | Boolean whether to validate validate AWS S3 paths | `boolean` | | | True |
+| `monochromeLogs` | Boolean whether to color nf-validation logs | `boolean` | | | True |
+
+## Workflow options
+
+Workflow options specific to genomic-medicine-sweden/nallo
+
+| Parameter | Description | Type | Default | Required | Hidden |
+|-----------|-----------|-----------|-----------|-----------|-----------|
+| `preset` | Enable or disable certain parts of the pipeline by default, depending on data type (`revio`, `pacbio`, `ONT_R10`) | `string` | revio |
+True | |
+| `variant_caller` | Which short variant software to use (`deepvariant`) | `string` | deepvariant | | |
+| `phaser` | Which phasing software to use (`whatshap`, `hiphase_snv`, `hiphase_sv`) | `string` | whatshap | | |
+| `hifiasm_mode` | Run hifiasm in hifi-only or hifi-trio mode (`hifi-only`, `trio-binning`) | `string` | hifi-only | | |
+| `parallel_alignments` | If parallel_alignments is bigger than 1, input files will be split and aligned in parallel to reduce processing time. |
+`integer` | 1 | | |
| `parallel_snv` | If parallel_snv is bigger than 1, short variant calling will be done in parallel to reduce processing time. | `integer` | 13 | |
-|
-| `vep_cache_version` | VEP cache version | `integer` | 110 | | |
-| `vep_plugin_files` | A csv file with paths to vep plugin files. pLI and LoFtool are required. | `string` | | | |
-| `deepvariant_model_type` | Sets the model type used for DeepVariant. This is set automatically using `--preset` by default. | `string` | PACBIO |
-| True |
-
-## File inputs
-
-The different files that are required. Some are only required by certain workflows, see the usage documentation.
-
-| Parameter | Description | Type | Default | Required | Hidden |
-|-----------|-----------|-----------|-----------|-----------|-----------|
-| `cadd_prescored` | Path to a directory containing prescored indels for CADD. HelpThis folder contains the
-compressed files and indexes that would otherwise be in data/prescored folder as described in
-https://github.com/kircherlab/CADD-scripts/#manual-installation. | `string` | | | |
-| `cadd_resources` | Path to a directory containing CADD annotations. HelpThis folder contains the uncompressed
-files that would otherwise be in data/annotation folder as described in
-https://github.com/kircherlab/CADD-scripts/#manual-installation. | `string` | | | |
-| `dipcall_par` | A BED file with PAR regions for dipcall and DeepVariant. | `string` | | | |
-| `tandem_repeats` | A tandem repeat BED file for sniffles | `string` | | | |
-| `trgt_repeats` | A BED file with repeats to be genotyped with TRGT | `string` | | | |
-| `snp_db` | A csv file with echtvar databases to annotate SNVs with | `string` | | | |
-| `variant_catalog` | A variant catalog json-file for stranger | `string` | | | |
-| `variant_consequences_snv` | File containing list of SO terms listed in the order of severity from most severe to lease severe for annotating
-genomic SNVs. HelpFor more information check
-https://ensembl.org/info/genome/variation/prediction/predicted_data.html | `string` | | | |
-| `vep_cache` | A path to the VEP cache location | `string` | | | |
-| `bed` | A BED file with regions of interest, used to limit short variant calling. | `string` | | | |
-| `hificnv_xy` | A BED file containing expected copy number regions for XY samples. | `string` | | | |
-| `hificnv_xx` | A BED file containing expected copy number regions for XX samples. | `string` | | | |
-| `hificnv_exclude` | A BED file specifying regions to exclude with HiFiCNV, such as centromeres. | `string` | | | |
-| `reduced_penetrance` | A file with gene ids that have reduced penetrance. For use with genmod. | `string` | | | |
-| `score_config_snv` | A SNV rank model config file for genmod. | `string` | | | |
-| `somalier_sites` | A VCF of known polymorphic sites for somalier | `string` | | | |
-| `validationFailUnrecognisedParams` | Validation of parameters fails when an unrecognised parameter is found.
-HelpBy default, when an unrecognised parameter is found, it returns a warning. | `boolean` | |
-| True |
+|
+| `vep_cache_version` | VEP cache version | `integer` | 110 | | |
+| `vep_plugin_files` | A csv file with paths to vep plugin files. pLI and LoFtool are required. | `string` | | | |
+| `deepvariant_model_type` | Sets the model type used for DeepVariant. This is set automatically using `--preset` by default. | `string` | PACBIO |
+| True |
+
+## File inputs
+
+The different files that are required. Some are only required by certain workflows, see the usage documentation.
+
+| Parameter | Description | Type | Default | Required | Hidden |
+|-----------|-----------|-----------|-----------|-----------|-----------|
+| `cadd_prescored` | Path to a directory containing prescored indels for CADD. HelpThis folder contains the
+compressed files and indexes that would otherwise be in data/prescored folder as described in
+https://github.com/kircherlab/CADD-scripts/#manual-installation. | `string` | | | |
+| `cadd_resources` | Path to a directory containing CADD annotations. HelpThis folder contains the uncompressed
+files that would otherwise be in data/annotation folder as described in
+https://github.com/kircherlab/CADD-scripts/#manual-installation. | `string` | | | |
+| `par_regions` | Provide a bed file of chrX and chrY PAR regions for dipcall and DeepVariant | `string` | | | |
+| `tandem_repeats` | A tandem repeat BED file for sniffles | `string` | | | |
+| `trgt_repeats` | A BED file with repeats to be genotyped with TRGT | `string` | | | |
+| `snp_db` | A csv file with echtvar databases to annotate SNVs with | `string` | | | |
+| `variant_catalog` | A variant catalog json-file for stranger | `string` | | | |
+| `variant_consequences_snv` | File containing list of SO terms listed in the order of severity from most severe to lease severe for annotating
+genomic SNVs. HelpFor more information check
+https://ensembl.org/info/genome/variation/prediction/predicted_data.html | `string` | | | |
+| `vep_cache` | A path to the VEP cache location | `string` | | | |
+| `bed` | A BED file with regions of interest, used to limit short variant calling. | `string` | | | |
+| `hificnv_xy` | A BED file containing expected copy number regions for XY samples. | `string` | | | |
+| `hificnv_xx` | A BED file containing expected copy number regions for XX samples. | `string` | | | |
+| `hificnv_exclude` | A BED file specifying regions to exclude with HiFiCNV, such as centromeres. | `string` | | | |
+| `reduced_penetrance` | A file with gene ids that have reduced penetrance. For use with genmod. | `string` | | | |
+| `score_config_snv` | A SNV rank model config file for genmod. | `string` | | | |
+| `somalier_sites` | A VCF of known polymorphic sites for somalier | `string` | | | |
+| `validationFailUnrecognisedParams` | Validation of parameters fails when an unrecognised parameter is found.
+HelpBy default, when an unrecognised parameter is found, it returns a warning. | `boolean` | |
+| True |
| `validationLenientMode` | Validation of parameters in lenient more. HelpAllows string values that are parseable
-as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode). |
-`boolean` | | | True |
-| `pipelines_testdata_base_path` | Base URL or local path to location of pipeline test dataset files | `string` |
-https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/ | | True |
-
+as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode).|
+`boolean` | | | True |
+| `pipelines_testdata_base_path` | Base URL or local path to location of pipeline test dataset files | `string` |
+https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/ | | True |
+
From b14b7adb8cbb2c504f2d3d3537c5e5bb9fc0a99e Mon Sep 17 00:00:00 2001
From: fellen31
Date: Thu, 15 Aug 2024 16:36:17 +0200
Subject: [PATCH 54/59] try again with linting
---
.editorconfig | 5 +++++
.nf-core.yml | 1 -
2 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/.editorconfig b/.editorconfig
index 72dda289..15d11df6 100644
--- a/.editorconfig
+++ b/.editorconfig
@@ -31,3 +31,8 @@ indent_size = unset
# ignore python and markdown
[*.{py,md}]
indent_style = unset
+
+# ignore parameters.md
+[parameters.md]
+trim_trailing_whitespace = false
+indent_style = unset
diff --git a/.nf-core.yml b/.nf-core.yml
index 116dd7e0..cb773092 100644
--- a/.nf-core.yml
+++ b/.nf-core.yml
@@ -9,7 +9,6 @@ lint:
- .github/workflows/awstest.yml
- .github/workflows/awsfulltest.yml
- conf/modules.config
- - .prettierignore
files_unchanged:
- CODE_OF_CONDUCT.md
- assets/nf-core-nallo_logo_light.png
From a6e4fee7777ac70e5e20a14b29236d2d52940f30 Mon Sep 17 00:00:00 2001
From: Felix Lenner <52530259+fellen31@users.noreply.github.com>
Date: Tue, 27 Aug 2024 08:37:46 +0200
Subject: [PATCH 55/59] Update docs/README.md
Co-authored-by: Peter Pruisscher <57712924+peterpru@users.noreply.github.com>
---
docs/README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/README.md b/docs/README.md
index 09d8b2fd..2208a308 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -3,7 +3,7 @@
The genomic-medicine-sweden/nallo documentation is split into the following pages:
- [Usage](usage.md)
- - An overview of how the pipeline works, how to run it and a description of command-line flags and pipeline parameters needed to run the pipeline.
+ - An overview of how the pipeline works, how to run it, and a description of command-line flags and pipeline parameters needed to run the pipeline.
- [Parameters](parameters.md)
- A description of all available pipeline parameters, including those not descibed in [Usage](usage.md).
- [Output](output.md)
From 8ed32d70e33645edf79fb0bc6c54144e97aefe5f Mon Sep 17 00:00:00 2001
From: Felix Lenner <52530259+fellen31@users.noreply.github.com>
Date: Tue, 27 Aug 2024 08:40:59 +0200
Subject: [PATCH 56/59] Update output.md
---
docs/output.md | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/docs/output.md b/docs/output.md
index d288ad65..ae221835 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -298,8 +298,7 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ
### SNV Annotation
-[echtvar](https://github.com/brentp/echtvar) and [VEP](https://www.ensembl.org/vep) is used to annotate small variants.
-In case of affected samples, [echtvar](https://github.com/brentp/echtvar) and [VEP](https://www.ensembl.org/vep) is used to annotate small variants.
+[echtvar](https://github.com/brentp/echtvar) and [VEP](https://www.ensembl.org/vep) are used to annotate small variants.
[bcftools](https://samtools.github.io/bcftools/) is used to generate statistics.
From 6d2c31f6fcc1eae40c666e41de7548423aeb2b99 Mon Sep 17 00:00:00 2001
From: Felix Lenner <52530259+fellen31@users.noreply.github.com>
Date: Tue, 27 Aug 2024 09:27:04 +0200
Subject: [PATCH 57/59] version bump and fix missing stranger in readme (#330)
---
CHANGELOG.md | 3 ++-
README.md | 7 ++++---
assets/multiqc_config.yml | 2 +-
nextflow.config | 2 +-
4 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 19436299..ce4cdd2b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,7 +3,7 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
-## v0.3.0dev - [xxxx-xx-xx]
+## v0.3.0 - [2024-08-27]
### `Added`
@@ -66,6 +66,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#318](https://github.com/genomic-medicine-sweden/nallo/pull/318) - Updated docs and schema to clarify pipeline usage
- [#321](https://github.com/genomic-medicine-sweden/nallo/pull/321) - Changed the input to BUILD_INTERVALS to have `meta.id` when building intervals from reference
- [#323](https://github.com/genomic-medicine-sweden/nallo/pull/323) - Changed `parallel_alignment` to `parallel_alignments` in CI tests as well
+- [#330](https://github.com/genomic-medicine-sweden/nallo/pull/330) - Updated README and version bump
### `Removed`
diff --git a/README.md b/README.md
index a557844f..f22658d0 100644
--- a/README.md
+++ b/README.md
@@ -31,7 +31,7 @@
- SV calling and joint genotyping ([`sniffles2`](https://github.com/fritzsedlazeck/Sniffles))
- Tandem repeats (HiFi only) ([`TRGT`](https://github.com/PacificBiosciences/trgt/tree/main))
- Assembly based variant calls (HiFi only) ([`dipcall`](https://github.com/lh3/dipcall))
-- CNV-calling (HiFi only) ([`HiFiCNV`](https://github.com/PacificBiosciences/HiFiCNV))
+- CNV-calling ([`HiFiCNV`](https://github.com/PacificBiosciences/HiFiCNV))
- Call paralogous genes ([`Paraphase`](https://github.com/PacificBiosciences/paraphase))
##### Phasing and methylation
@@ -39,9 +39,10 @@
- Phase and haplotag reads ([`whatshap`](https://github.com/whatshap/whatshap) + [`hiphase`](https://github.com/PacificBiosciences/HiPhase))
- Methylation pileups ([`modkit`](https://github.com/nanoporetech/modkit))
-##### Annotation - SNV
+##### Annotation
-- Annotate variants with database(s) of choice, i.e. [gnomAD](https://gnomad.broadinstitute.org), [CADD](https://cadd.gs.washington.edu) etc. ([`echtvar`](https://github.com/brentp/echtvar) and [`VEP`](https://github.com/Ensembl/ensembl-vep))
+- Annotate SNVs and INDELs with database(s) of choice, i.e. [gnomAD](https://gnomad.broadinstitute.org), [CADD](https://cadd.gs.washington.edu) etc. ([`echtvar`](https://github.com/brentp/echtvar) and [`VEP`](https://github.com/Ensembl/ensembl-vep))
+- Annotate repeat expansions with [stranger](https://github.com/Clinical-Genomics/stranger)
##### Filtering and ranking
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
index 436be0b0..c829f8e3 100644
--- a/assets/multiqc_config.yml
+++ b/assets/multiqc_config.yml
@@ -1,6 +1,6 @@
report_comment: >
- This report has been generated by the genomic-medicine-sweden/nallo
+ This report has been generated by the genomic-medicine-sweden/nallo
analysis pipeline.
report_section_order:
diff --git a/nextflow.config b/nextflow.config
index 1f7d92d6..44e6630e 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -288,7 +288,7 @@ manifest {
description = """Long-read variant calling pipeline"""
mainScript = 'main.nf'
nextflowVersion = '!>=23.04.0'
- version = '0.3.0dev'
+ version = '0.3.0'
doi = ''
}
From 10d70813d2a4300789a71c248afc9125bfc2b04e Mon Sep 17 00:00:00 2001
From: Felix Lenner <52530259+fellen31@users.noreply.github.com>
Date: Wed, 28 Aug 2024 14:53:54 +0200
Subject: [PATCH 58/59] Use updated sex in genmod PED-file (#332)
---
CHANGELOG.md | 3 +-
conf/modules/general.config | 8 ++-
tests/main.nf.test.snap | 4 +-
workflows/nallo.nf | 119 +++++++++++++++++++++---------------
4 files changed, 80 insertions(+), 54 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index ce4cdd2b..c528759a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,7 +3,7 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
-## v0.3.0 - [2024-08-27]
+## v0.3.0 - [2024-08-28]
### `Added`
@@ -67,6 +67,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#321](https://github.com/genomic-medicine-sweden/nallo/pull/321) - Changed the input to BUILD_INTERVALS to have `meta.id` when building intervals from reference
- [#323](https://github.com/genomic-medicine-sweden/nallo/pull/323) - Changed `parallel_alignment` to `parallel_alignments` in CI tests as well
- [#330](https://github.com/genomic-medicine-sweden/nallo/pull/330) - Updated README and version bump
+- [#332](https://github.com/genomic-medicine-sweden/nallo/pull/332) - Changed the PED file input to genmod to include inferred sex from somalier
### `Removed`
diff --git a/conf/modules/general.config b/conf/modules/general.config
index 6ac419d4..3f5ad9f1 100644
--- a/conf/modules/general.config
+++ b/conf/modules/general.config
@@ -93,7 +93,13 @@ process {
]
}
- withName: '.*:NALLO:CREATE_PEDIGREE_FILE' {
+ withName: '.*:NALLO:SAMPLESHEET_PED' {
+ publishDir = [
+ enabled: false
+ ]
+ }
+
+ withName: '.*:NALLO:SOMALIER_PED' {
publishDir = [
path: { "${params.outdir}/pedigree" },
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
diff --git a/tests/main.nf.test.snap b/tests/main.nf.test.snap
index d54cb854..3c5adbbe 100644
--- a/tests/main.nf.test.snap
+++ b/tests/main.nf.test.snap
@@ -57,7 +57,7 @@
},
"test profile - multisample": {
"content": [
- "test.ped:md5,a1e82af069bce823564e204c316d5500",
+ "test.ped:md5,db74c6787a92a70ffaab766fa4d7a873",
"multiqc_citations.txt:md5,a27affce20d456d20ed387097a4f0350",
"multiqc_fastqc.txt:md5,b74145ef9fbf8addcc2997ca26b3aa45",
"74b4822241bd8d1bc42f494f1f3e326c",
@@ -145,6 +145,6 @@
"nf-test": "0.8.4",
"nextflow": "24.04.3"
},
- "timestamp": "2024-08-12T09:49:33.754269424"
+ "timestamp": "2024-08-28T13:37:54.494547598"
}
}
\ No newline at end of file
diff --git a/workflows/nallo.nf b/workflows/nallo.nf
index 0597fb8b..ebf26251 100644
--- a/workflows/nallo.nf
+++ b/workflows/nallo.nf
@@ -6,24 +6,24 @@ include { fromSamplesheet } from 'plugin/nf-validation'
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
-include { ANNOTATE_CSQ_PLI as ANN_CSQ_PLI_SNV } from '../subworkflows/local/annotate_consequence_pli'
-include { ANNOTATE_REPEAT_EXPANSIONS } from '../subworkflows/local/annotate_repeat_expansions'
-include { ASSEMBLY } from '../subworkflows/local/genome_assembly'
-include { ASSEMBLY_VARIANT_CALLING } from '../subworkflows/local/assembly_variant_calling'
-include { CONVERT_INPUT_FILES } from '../subworkflows/local/convert_input_files'
-include { BAM_INFER_SEX } from '../subworkflows/local/bam_infer_sex'
-include { CALL_PARALOGS } from '../subworkflows/local/call_paralogs'
-include { CALL_REPEAT_EXPANSIONS } from '../subworkflows/local/call_repeat_expansions'
-include { CNV } from '../subworkflows/local/cnv'
-include { METHYLATION } from '../subworkflows/local/methylation'
-include { PHASING } from '../subworkflows/local/phasing'
-include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome'
-include { QC_ALIGNED_READS } from '../subworkflows/local/qc_aligned_reads'
-include { RANK_VARIANTS as RANK_VARIANTS_SNV } from '../subworkflows/local/rank_variants'
-include { SCATTER_GENOME } from '../subworkflows/local/scatter_genome'
-include { SHORT_VARIANT_CALLING } from '../subworkflows/local/short_variant_calling'
-include { SNV_ANNOTATION } from '../subworkflows/local/snv_annotation'
-include { STRUCTURAL_VARIANT_CALLING } from '../subworkflows/local/structural_variant_calling'
+include { ANNOTATE_CSQ_PLI as ANN_CSQ_PLI_SNV } from '../subworkflows/local/annotate_consequence_pli'
+include { ANNOTATE_REPEAT_EXPANSIONS } from '../subworkflows/local/annotate_repeat_expansions'
+include { ASSEMBLY } from '../subworkflows/local/genome_assembly'
+include { ASSEMBLY_VARIANT_CALLING } from '../subworkflows/local/assembly_variant_calling'
+include { CONVERT_INPUT_FILES } from '../subworkflows/local/convert_input_files'
+include { BAM_INFER_SEX } from '../subworkflows/local/bam_infer_sex'
+include { CALL_PARALOGS } from '../subworkflows/local/call_paralogs'
+include { CALL_REPEAT_EXPANSIONS } from '../subworkflows/local/call_repeat_expansions'
+include { CNV } from '../subworkflows/local/cnv'
+include { METHYLATION } from '../subworkflows/local/methylation'
+include { PHASING } from '../subworkflows/local/phasing'
+include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome'
+include { QC_ALIGNED_READS } from '../subworkflows/local/qc_aligned_reads'
+include { RANK_VARIANTS as RANK_VARIANTS_SNV } from '../subworkflows/local/rank_variants'
+include { SCATTER_GENOME } from '../subworkflows/local/scatter_genome'
+include { SHORT_VARIANT_CALLING } from '../subworkflows/local/short_variant_calling'
+include { SNV_ANNOTATION } from '../subworkflows/local/snv_annotation'
+include { STRUCTURAL_VARIANT_CALLING } from '../subworkflows/local/structural_variant_calling'
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -32,24 +32,25 @@ include { STRUCTURAL_VARIANT_CALLING } from '../subworkflows/local/stru
*/
// local
-include { CREATE_PEDIGREE_FILE } from '../modules/local/create_pedigree_file'
-include { ECHTVAR_ENCODE } from '../modules/local/echtvar/encode/main'
-include { FQCRS } from '../modules/local/fqcrs'
-include { SAMTOOLS_MERGE } from '../modules/nf-core/samtools/merge/main'
+include { CREATE_PEDIGREE_FILE as SAMPLESHEET_PED } from '../modules/local/create_pedigree_file'
+include { CREATE_PEDIGREE_FILE as SOMALIER_PED } from '../modules/local/create_pedigree_file'
+include { ECHTVAR_ENCODE } from '../modules/local/echtvar/encode/main'
+include { FQCRS } from '../modules/local/fqcrs'
+include { SAMTOOLS_MERGE } from '../modules/nf-core/samtools/merge/main'
// nf-core
-include { BCFTOOLS_CONCAT } from '../modules/nf-core/bcftools/concat/main'
-include { BCFTOOLS_PLUGINSPLIT } from '../modules/nf-core/bcftools/pluginsplit/main'
-include { BCFTOOLS_STATS } from '../modules/nf-core/bcftools/stats/main'
-include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main'
-include { FASTQC } from '../modules/nf-core/fastqc/main'
-include { MINIMAP2_ALIGN } from '../modules/nf-core/minimap2/align/main'
-include { MULTIQC } from '../modules/nf-core/multiqc/main'
-include { SPLITUBAM } from '../modules/nf-core/splitubam/main'
-include { paramsSummaryMap } from 'plugin/nf-validation'
-include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline'
-include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline'
-include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_nallo_pipeline'
+include { BCFTOOLS_CONCAT } from '../modules/nf-core/bcftools/concat/main'
+include { BCFTOOLS_PLUGINSPLIT } from '../modules/nf-core/bcftools/pluginsplit/main'
+include { BCFTOOLS_STATS } from '../modules/nf-core/bcftools/stats/main'
+include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main'
+include { FASTQC } from '../modules/nf-core/fastqc/main'
+include { MINIMAP2_ALIGN } from '../modules/nf-core/minimap2/align/main'
+include { MULTIQC } from '../modules/nf-core/multiqc/main'
+include { SPLITUBAM } from '../modules/nf-core/splitubam/main'
+include { paramsSummaryMap } from 'plugin/nf-validation'
+include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline'
+include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline'
+include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_nallo_pipeline'
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -109,20 +110,6 @@ workflow NALLO {
// Check parameter that doesn't conform to schema validation here
if (params.phaser.matches('hiphase_sv|hiphase_snv') && params.preset == 'ONT_R10') { error "The HiPhase license only permits analysis of data from PacBio. For details see: https://github.com/PacificBiosciences/HiPhase/blob/main/LICENSE.md" }
- // Create PED from samplesheet
- ch_input
- .map { meta, files -> [ meta.project, meta ] }
- .groupTuple()
- .set { ch_ped_in }
-
- ch_pedfile = CREATE_PEDIGREE_FILE ( ch_ped_in )
- ch_versions = ch_versions.mix(CREATE_PEDIGREE_FILE.out.versions)
-
- CREATE_PEDIGREE_FILE.out.ped
- .map { project, ped -> [ [ 'id': project ], ped ] }
- .collect()
- .set { ch_pedfile }
-
// Read and store paths in the vep_plugin_files file
if (params.vep_plugin_files) {
ch_vep_extra_files_unsplit.splitCsv ( header:true )
@@ -247,10 +234,26 @@ workflow NALLO {
.map { meta, bam, bai -> [ meta - meta.subMap('n_files'), bam, bai ] }
.set { bam_infer_sex_in }
+ //
+ // Create PED from samplesheet
+ //
+ ch_input
+ .map { meta, files -> [ meta.project, meta ] }
+ .groupTuple()
+ .set { ch_samplesheet_ped_in }
+
+ SAMPLESHEET_PED ( ch_samplesheet_ped_in )
+ ch_versions = ch_versions.mix(SAMPLESHEET_PED.out.versions)
+
+ SAMPLESHEET_PED.out.ped
+ .map { project, ped -> [ [ 'id': project ], ped ] }
+ .collect()
+ .set { ch_samplesheet_pedfile }
+
//
// Check sex and relatedness, and update with infered sex if the sex for a sample is unknown
//
- BAM_INFER_SEX ( bam_infer_sex_in, fasta, fai, ch_somalier_sites, ch_pedfile )
+ BAM_INFER_SEX ( bam_infer_sex_in, fasta, fai, ch_somalier_sites, ch_samplesheet_pedfile )
ch_versions = ch_versions.mix(BAM_INFER_SEX.out.versions)
ch_multiqc_files = ch_multiqc_files.mix(BAM_INFER_SEX.out.somalier_samples.map{it[1]}.collect().ifEmpty([]))
@@ -260,6 +263,22 @@ workflow NALLO {
bai = BAM_INFER_SEX.out.bai
bam_bai = BAM_INFER_SEX.out.bam_bai
+ //
+ // Create PED with updated sex
+ //
+ bam
+ .map { meta, files -> [ meta.project, meta ] }
+ .groupTuple()
+ .set { ch_somalier_ped_in }
+
+ SOMALIER_PED ( ch_somalier_ped_in )
+ ch_versions = ch_versions.mix(SOMALIER_PED.out.versions)
+
+ SOMALIER_PED.out.ped
+ .map { project, ped -> [ [ 'id': project ], ped ] }
+ .collect()
+ .set { ch_updated_pedfile }
+
//
// Run aligned read QC with mosdepth and cramino
//
@@ -379,7 +398,7 @@ workflow NALLO {
// Only run if we have affected individuals
RANK_VARIANTS_SNV (
ANN_CSQ_PLI_SNV.out.vcf_ann.filter { meta, vcf -> meta.contains_affected },
- ch_pedfile.map { meta, ped -> ped },
+ ch_updated_pedfile.map { meta, ped -> ped },
ch_reduced_penetrance,
ch_score_config_snv
)
From 14849a81fdc32db9a42b98a1d47cb16dcd7bfcf0 Mon Sep 17 00:00:00 2001
From: Felix Lenner <52530259+fellen31@users.noreply.github.com>
Date: Thu, 29 Aug 2024 11:17:20 +0200
Subject: [PATCH 59/59] Add sample name to TRGT output (#333)
---
CHANGELOG.md | 4 +++-
conf/modules/call_repeat_expansions.config | 13 ++++-------
modules/local/trgt/main.nf | 3 ++-
.../local/call_repeat_expansions/main.nf | 2 +-
tests/main.nf.test.snap | 22 +++++++++----------
5 files changed, 21 insertions(+), 23 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c528759a..d3277ba2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,7 +3,7 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
-## v0.3.0 - [2024-08-28]
+## v0.3.0 - [2024-08-29]
### `Added`
@@ -68,6 +68,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#323](https://github.com/genomic-medicine-sweden/nallo/pull/323) - Changed `parallel_alignment` to `parallel_alignments` in CI tests as well
- [#330](https://github.com/genomic-medicine-sweden/nallo/pull/330) - Updated README and version bump
- [#332](https://github.com/genomic-medicine-sweden/nallo/pull/332) - Changed the PED file input to genmod to include inferred sex from somalier
+- [#333](https://github.com/genomic-medicine-sweden/nallo/pull/333) - Updated TRGT to 0.7.0 and added `meta.id` as output sample name
### `Removed`
@@ -117,6 +118,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
| tabix | 1.19.1 | 1.20 |
| echtvar | 0.1.7 | 0.2.0 |
| somalier | 0.2.15 | 0.2.18 |
+| TRGT | 0.4.0 | 0.7.0 |
| cadd | | 1.6.post1 |
| gawk | | 5.3.0 |
| add_most_severe_consequence | | v1.0 |
diff --git a/conf/modules/call_repeat_expansions.config b/conf/modules/call_repeat_expansions.config
index 67f7b81e..d46150df 100644
--- a/conf/modules/call_repeat_expansions.config
+++ b/conf/modules/call_repeat_expansions.config
@@ -24,10 +24,12 @@ process {
]
}
- withName: '.*:CALL_REPEAT_EXPANSIONS:SAMTOOLS_SORT_TRGT' {
+ withName: '.*:CALL_REPEAT_EXPANSIONS:TRGT' {
+ ext.args = { "--sample-name ${meta.id}" }
+ }
+ withName: '.*:CALL_REPEAT_EXPANSIONS:SAMTOOLS_SORT_TRGT' {
ext.prefix = { "${meta.id}_spanning_sorted" }
-
publishDir = [
path: { "${params.outdir}/repeat_calling/trgt/single_sample/${meta.id}" },
mode: params.publish_dir_mode,
@@ -36,7 +38,6 @@ process {
}
withName: '.*:CALL_REPEAT_EXPANSIONS:SAMTOOLS_INDEX_TRGT' {
-
publishDir = [
path: { "${params.outdir}/repeat_calling/trgt/single_sample/${meta.id}" },
mode: params.publish_dir_mode,
@@ -45,13 +46,11 @@ process {
}
withName: '.*:CALL_REPEAT_EXPANSIONS:BCFTOOLS_SORT_TRGT' {
-
ext.prefix = { "${meta.id}_sorted" }
ext.args = [
'--output-type z',
'--write-index=tbi'
].join(' ')
-
publishDir = [
path: { "${params.outdir}/repeat_calling/trgt/single_sample/${meta.id}" },
mode: params.publish_dir_mode,
@@ -60,13 +59,11 @@ process {
}
withName: '.*:CALL_REPEAT_EXPANSIONS:BCFTOOLS_MERGE' {
-
ext.args = [
'--output-type z',
'--write-index=tbi',
'--force-single'
].join(' ')
-
publishDir = [
path: { "${params.outdir}/repeat_calling/trgt/multi_sample/${meta.id}" },
mode: params.publish_dir_mode,
@@ -75,9 +72,7 @@ process {
}
withName: '.*:CALL_REPEAT_EXPANSIONS:BCFTOOLS_INDEX_MERGE' {
-
ext.args = '--tbi'
-
publishDir = [
path: { "${params.outdir}/repeat_calling/trgt/multi_sample/${meta.id}" },
mode: params.publish_dir_mode,
diff --git a/modules/local/trgt/main.nf b/modules/local/trgt/main.nf
index f195eda4..0c493bf4 100644
--- a/modules/local/trgt/main.nf
+++ b/modules/local/trgt/main.nf
@@ -3,11 +3,12 @@ process TRGT {
label 'process_medium'
conda "${moduleDir}/environment.yml"
- container "pacbio/trgt:0.4.0"
+ container "biocontainers/trgt:0.7.0--hdfd78af_0"
input:
tuple val(meta), path(bam), path(bai), val(sex)
tuple val(meta2), path(fasta)
+ tuple val(meta3), path(fai)
path(repeats)
output:
diff --git a/subworkflows/local/call_repeat_expansions/main.nf b/subworkflows/local/call_repeat_expansions/main.nf
index 4999e0bf..8b71453e 100644
--- a/subworkflows/local/call_repeat_expansions/main.nf
+++ b/subworkflows/local/call_repeat_expansions/main.nf
@@ -22,7 +22,7 @@ workflow CALL_REPEAT_EXPANSIONS {
.set { ch_trgt_input }
// Run TGRT
- TRGT ( ch_trgt_input, ch_fasta, ch_trgt_bed.map { it[1] } )
+ TRGT ( ch_trgt_input, ch_fasta, ch_fai, ch_trgt_bed.map { it[1] } )
// Sort and index bam
SAMTOOLS_SORT_TRGT ( TRGT.out.bam, [[],[]] )
diff --git a/tests/main.nf.test.snap b/tests/main.nf.test.snap
index 3c5adbbe..a3412aba 100644
--- a/tests/main.nf.test.snap
+++ b/tests/main.nf.test.snap
@@ -40,9 +40,9 @@
"HG002_Revio.regions.bed.gz:md5,e051f64c7a780d67ea6727a327dd4281",
"HG002_Revio.regions.bed.gz.csi:md5,026eef1c69fb4aa3a1687463fe2088ab",
"HG002_PacBio_Revio.fastq.gz.tsv.zst:md5,4b073293b3e771d19b4cfdb07909571b",
- "HG002_Revio_sorted.vcf.gz:md5,bc06de08b8e36b3b48e0d7b9e21df389",
- "HG002_Revio_sorted.vcf.gz.tbi:md5,08a5c82838264c558eb30726906f47e0",
- "110181f29066158df34abbad9e3becc8",
+ "HG002_Revio_sorted.vcf.gz:md5,fbb5699b8f74fc105fb154e8fac7bfea",
+ "HG002_Revio_sorted.vcf.gz.tbi:md5,0466518ee265ba63160ed27cee0dec88",
+ "65999ab8f2bc7841de8172468bf23ab6",
[
"# This file was produced by bcftools stats (1.20+htslib-1.20) and can be plotted using plot-vcfstats.",
"# The command line was:\tbcftools stats HG002_Revio.vcf.gz",
@@ -53,7 +53,7 @@
"nf-test": "0.8.4",
"nextflow": "24.04.3"
},
- "timestamp": "2024-08-12T09:45:54.652442138"
+ "timestamp": "2024-08-29T10:42:09.011660883"
},
"test profile - multisample": {
"content": [
@@ -92,9 +92,9 @@
"HG002_Revio_A.regions.bed.gz:md5,e051f64c7a780d67ea6727a327dd4281",
"HG002_Revio_A.regions.bed.gz.csi:md5,026eef1c69fb4aa3a1687463fe2088ab",
"HG002_PacBio_Revio.bam_other.fastq.gz.tsv.zst:md5,4b073293b3e771d19b4cfdb07909571b",
- "HG002_Revio_A_sorted.vcf.gz:md5,b95e709a27fe1df9ee1487b99f396bf4",
- "HG002_Revio_A_sorted.vcf.gz.tbi:md5,b1eb1f21f36782089b8e0bb0a54105ed",
- "110181f29066158df34abbad9e3becc8",
+ "HG002_Revio_A_sorted.vcf.gz:md5,680938d6ebeafe73d8df0b21c0310276",
+ "HG002_Revio_A_sorted.vcf.gz.tbi:md5,a6554ab817e7c232a1554ea85fa00151",
+ "65999ab8f2bc7841de8172468bf23ab6",
[
"# This file was produced by bcftools stats (1.20+htslib-1.20) and can be plotted using plot-vcfstats.",
"# The command line was:\tbcftools stats HG002_Revio_A.vcf.gz",
@@ -132,9 +132,9 @@
"HG002_Revio_B.regions.bed.gz:md5,deaca22783bd058cdc8756efa25b5f53",
"HG002_Revio_B.regions.bed.gz.csi:md5,dd9a0d36d71da0d274d1c9ca6f8571ae",
"HG002_Revio_B.merged.fastq.gz.tsv.zst:md5,0641e175a07429b61710329a2eeef450",
- "HG002_Revio_B_sorted.vcf.gz:md5,05ae66b46d2f87a2133fcdf93d30f38c",
- "HG002_Revio_B_sorted.vcf.gz.tbi:md5,244a3f966e3434220cd69fcb04b08d01",
- "18e3bd1fe43fc17ace2f57db5861498c",
+ "HG002_Revio_B_sorted.vcf.gz:md5,ce617741468f4bc7f504f8f488332098",
+ "HG002_Revio_B_sorted.vcf.gz.tbi:md5,e255a5ea92885967f0c126bddc8ea3b2",
+ "6b0cf3f492ce898398835d1102afd369",
[
"# This file was produced by bcftools stats (1.20+htslib-1.20) and can be plotted using plot-vcfstats.",
"# The command line was:\tbcftools stats HG002_Revio_B.vcf.gz",
@@ -145,6 +145,6 @@
"nf-test": "0.8.4",
"nextflow": "24.04.3"
},
- "timestamp": "2024-08-28T13:37:54.494547598"
+ "timestamp": "2024-08-29T10:45:44.41793623"
}
}
\ No newline at end of file
|