From c3c2ae28d5fe20c3c6d6a709544087374e04ddc5 Mon Sep 17 00:00:00 2001 From: Felix Lenner <52530259+fellen31@users.noreply.github.com> Date: Wed, 25 Sep 2024 17:16:04 +0200 Subject: [PATCH] Add longphase (#388) * Add longphase * Add tool to CHANGELOG * Update parameters.md * Update CHANGELOG.md * Update main.nf.test.snap * Revert back to using for run example --- CHANGELOG.md | 13 +- CITATIONS.md | 4 + README.md | 46 +- conf/base.config | 2 +- conf/modules/phasing.config | 49 +- conf/modules/short_variant_calling.config | 3 +- docs/output.md | 30 +- docs/parameters.md | 2 +- modules.json | 10 + modules/local/hiphase/main.nf | 4 +- .../longphase/haplotag/environment.yml | 7 + modules/nf-core/longphase/haplotag/main.nf | 66 +++ modules/nf-core/longphase/haplotag/meta.yml | 89 ++++ .../longphase/haplotag/tests/main.nf.test | 202 +++++++++ .../haplotag/tests/main.nf.test.snap | 159 +++++++ .../longphase/haplotag/tests/nextflow.config | 5 + .../nf-core/longphase/haplotag/tests/tags.yml | 2 + .../nf-core/longphase/phase/environment.yml | 8 + modules/nf-core/longphase/phase/main.nf | 69 +++ modules/nf-core/longphase/phase/meta.yml | 81 ++++ .../longphase/phase/tests/main.nf.test | 157 +++++++ .../longphase/phase/tests/main.nf.test.snap | 134 ++++++ .../longphase/phase/tests/nextflow.config | 5 + .../nf-core/longphase/phase/tests/tags.yml | 2 + nextflow.config | 2 +- nextflow_schema.json | 6 +- subworkflows/local/phasing.nf | 248 +++++----- .../local/short_variant_calling/main.nf | 3 +- .../tests/main.nf.test.snap | 426 ++++++++++++++---- .../tests/nextflow.config | 3 +- .../local/utils_nfcore_nallo_pipeline/main.nf | 7 +- tests/main.nf.test | 71 +-- tests/main.nf.test.snap | 18 +- workflows/nallo.nf | 10 +- 34 files changed, 1621 insertions(+), 322 deletions(-) create mode 100644 modules/nf-core/longphase/haplotag/environment.yml create mode 100644 modules/nf-core/longphase/haplotag/main.nf create mode 100644 modules/nf-core/longphase/haplotag/meta.yml create mode 100644 modules/nf-core/longphase/haplotag/tests/main.nf.test create mode 100644 modules/nf-core/longphase/haplotag/tests/main.nf.test.snap create mode 100644 modules/nf-core/longphase/haplotag/tests/nextflow.config create mode 100644 modules/nf-core/longphase/haplotag/tests/tags.yml create mode 100644 modules/nf-core/longphase/phase/environment.yml create mode 100644 modules/nf-core/longphase/phase/main.nf create mode 100644 modules/nf-core/longphase/phase/meta.yml create mode 100644 modules/nf-core/longphase/phase/tests/main.nf.test create mode 100644 modules/nf-core/longphase/phase/tests/main.nf.test.snap create mode 100644 modules/nf-core/longphase/phase/tests/nextflow.config create mode 100644 modules/nf-core/longphase/phase/tests/tags.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index b38a88aa..048c5111 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#366](https://github.com/genomic-medicine-sweden/nallo/pull/366) - Added sorting of samples when creating PED files, so the output is always the same - [#367](https://github.com/genomic-medicine-sweden/nallo/pull/367) - Added Severus as the default SV caller, together with a `--sv_caller` parameter to choose caller - [#371](https://github.com/genomic-medicine-sweden/nallo/pull/371) - Added `FOUND_IN=caller` tags to SV output +- [#388](https://github.com/genomic-medicine-sweden/nallo/pull/388) - Added longphase as the default phaser +- [#388](https://github.com/genomic-medicine-sweden/nallo/pull/388) - Added single-sample tbi output to the short variant calling subworkflow - [#393](https://github.com/genomic-medicine-sweden/nallo/pull/393) - Added a new `--minimap2_read_mapping_preset` parameter ### `Changed` @@ -32,6 +34,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#365](https://github.com/genomic-medicine-sweden/nallo/pull/365) - Changed CI to only use nf-test for pipeline tests - [#381](https://github.com/genomic-medicine-sweden/nallo/pull/381) - Updated CI nf-test version to 0.9.0 - [#382](https://github.com/genomic-medicine-sweden/nallo/pull/382) - Changed vep_plugin_files description in schema and docs +- [#388](https://github.com/genomic-medicine-sweden/nallo/pull/388) - Changed phasing output structure and naming, and updated docs - [#393](https://github.com/genomic-medicine-sweden/nallo/pull/393) - Changed the default minimap2 preset for PacBio data from `map-hifi` to `lr:hqae` - [#397](https://github.com/genomic-medicine-sweden/nallo/pull/397) - Changed `pipelines_testdata_base_path` to pin a specific commit @@ -40,6 +43,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#352](https://github.com/genomic-medicine-sweden/nallo/pull/352) - Removed the fqcrs module - [#356](https://github.com/genomic-medicine-sweden/nallo/pull/356) - Removed filter_vep section from output documentation since it is not in the pipeline - [#379](https://github.com/genomic-medicine-sweden/nallo/pull/379) - Removed VEP Plugins from testdata ([genomic-medicine-sweden/test-datasets#16](https://github.com/genomic-medicine-sweden/test-datasets/pull/16)) +- [#388](https://github.com/genomic-medicine-sweden/nallo/pull/388) - Removed support for co-phasing SVs with HiPhase, as the officially supported caller (pbsv) is not in the pipeline ### `Fixed` @@ -64,10 +68,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Module updates -| Tool | Old version | New version | -| ------- | ----------- | ----------- | -| fqcrs | 0.1.0 | -| severus | | 1.1 | +| Tool | Old version | New version | +| ---------- | ----------- | ----------- | +| fqcrs | 0.1.0 | +| severus | | 1.1 | +| longphase  |   | 1.7.3   | > [!NOTE] > Version has been updated if both old and new version information is present. diff --git a/CITATIONS.md b/CITATIONS.md index 9ae9e0e4..21c4c141 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -70,6 +70,10 @@ - [HiFiCNV](https://github.com/PacificBiosciences/HiFiCNV) +- [LongPhase](https://github.com/twolinin/longphase) + + > Jyun-Hong Lin, Liang-Chi Chen, Shu-Chi Yu, Yao-Ting Huang, LongPhase: an ultra-fast chromosome-scale phasing algorithm for small and large variants, Bioinformatics, Volume 38, Issue 7, March 2022, Pages 1816–1822, https://doi.org/10.1093/bioinformatics/btac058 + - [minimap2](https://academic.oup.com/bioinformatics/article/34/18/3094/4994778) > Heng Li, Minimap2: pairwise alignment for nucleotide sequences, Bioinformatics, Volume 34, Issue 18, September 2018, Pages 3094–3100, https://doi.org/10.1093/bioinformatics/bty191 diff --git a/README.md b/README.md index 7db1f97c..bc59d979 100644 --- a/README.md +++ b/README.md @@ -3,54 +3,53 @@ [![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.13748210.svg)](https://doi.org/10.5281/zenodo.13748210) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) -[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) [![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/genomic-medicine-sweden/nallo) ## Introduction -**genomic-medicine-sweden/nallo** is a bioinformatics analysis pipeline for long-read rare disease SV/SNV identification using both PacBio and (targeted) ONT-data. Heavily influenced by best-practice pipelines such as [nf-core/nanoseq](https://github.com/nf-core/nanoseq), [nf-core/sarek](https://nf-co.re/sarek), [nf-core/raredisease](https://nf-co.re/raredisease), [PacBio Human WGS Workflow](https://github.com/PacificBiosciences/pb-human-wgs-workflow-snakemake), [epi2me-labs/wf-human-variation](https://github.com/epi2me-labs/wf-human-variation) and [brentp/rare-disease-wf](https://github.com/brentp/rare-disease-wf). +**genomic-medicine-sweden/nallo** is a bioinformatics analysis pipeline for long-reads from both PacBio and (targeted) ONT-data, focused on rare-disease. Heavily influenced by best-practice pipelines such as [nf-core/sarek](https://nf-co.re/sarek), [nf-core/raredisease](https://nf-co.re/raredisease), [nf-core/nanoseq](https://github.com/nf-core/nanoseq), [PacBio Human WGS Workflow](https://github.com/PacificBiosciences/pb-human-wgs-workflow-snakemake), [epi2me-labs/wf-human-variation](https://github.com/epi2me-labs/wf-human-variation) and [brentp/rare-disease-wf](https://github.com/brentp/rare-disease-wf). -## Pipeline summary +## Overview genomic-medicine-sweden/nallo workflow +## Pipeline summary + ##### QC -- FastQC ([`FastQC`](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) -- Aligned read QC ([`cramino`](https://github.com/wdecoster/cramino)) -- Depth information ([`mosdepth`](https://github.com/brentp/mosdepth)) +- Read QC with [FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/), [cramino](https://github.com/wdecoster/cramino) and [mosdepth](https://github.com/brentp/mosdepth) ##### Alignment & assembly -- Align reads to reference ([`minimap2`](https://github.com/lh3/minimap2)) -- Assemble (trio-binned) haploid genomes (HiFi only) ([`hifiasm`](https://github.com/chhylp123/hifiasm)) +- Align reads to reference with [minimap2](https://github.com/lh3/minimap2) +- Assemble (trio-binned) haploid genomes with [hifiasm](https://github.com/chhylp123/hifiasm) (HiFi only) ##### Variant calling -- Short variant calling & joint genotyping of SNVs ([`deepvariant`](https://github.com/google/deepvariant) + [`GLNexus`](https://github.com/dnanexus-rnd/GLnexus)) -- SV calling with [Severus](https://github.com/KolmogorovLab/Severus) or [Sniffles2](https://github.com/fritzsedlazeck/Sniffles) -- Tandem repeats (HiFi only) ([`TRGT`](https://github.com/PacificBiosciences/trgt/tree/main)) -- Assembly based variant calls (HiFi only) ([`dipcall`](https://github.com/lh3/dipcall)) -- CNV-calling ([`HiFiCNV`](https://github.com/PacificBiosciences/HiFiCNV)) -- Call paralogous genes ([`Paraphase`](https://github.com/PacificBiosciences/paraphase)) +- Call SNVs & joint genotyping with [deepvariant](https://github.com/google/deepvariant) and [GLNexus](https://github.com/dnanexus-rnd/GLnexus) +- Call SVs with [Severus](https://github.com/KolmogorovLab/Severus) or [Sniffles2](https://github.com/fritzsedlazeck/Sniffles) +- Call CNVs with [HiFiCNV](https://github.com/PacificBiosciences/HiFiCNV) +- Call tandem repeats with [TRGT](https://github.com/PacificBiosciences/trgt/tree/main) (HiFi only) +- Call paralogous genes with [Paraphase](https://github.com/PacificBiosciences/paraphase) +- Call variants from assembly with [dipcall](https://github.com/lh3/dipcall) (HiFi only) ##### Phasing and methylation -- Phase and haplotag reads ([`whatshap`](https://github.com/whatshap/whatshap) + [`hiphase`](https://github.com/PacificBiosciences/HiPhase)) -- Methylation pileups ([`modkit`](https://github.com/nanoporetech/modkit)) +- Phase and haplotag reads with [LongPhase](https://github.com/twolinin/longphase), [whatshap](https://github.com/whatshap/whatshap) or [HiPhase](https://github.com/PacificBiosciences/HiPhase) +- Create methylation pileups with [modkit](https://github.com/nanoporetech/modkit) ##### Annotation -- Annotate SNVs and INDELs with database(s) of choice, i.e. [gnomAD](https://gnomad.broadinstitute.org), [CADD](https://cadd.gs.washington.edu) etc. ([`echtvar`](https://github.com/brentp/echtvar) and [`VEP`](https://github.com/Ensembl/ensembl-vep)) +- Annotate SNVs and INDELs with databases of choice, i.e. [gnomAD](https://gnomad.broadinstitute.org), [CADD](https://cadd.gs.washington.edu) etc. with [echtvar](https://github.com/brentp/echtvar) and [VEP](https://github.com/Ensembl/ensembl-vep) - Annotate repeat expansions with [stranger](https://github.com/Clinical-Genomics/stranger) -##### Filtering and ranking +##### Ranking -- Rank variants ([`GENMOD`](https://github.com/Clinical-Genomics/genmod)) +- Rank SNVs with [GENMOD](https://github.com/Clinical-Genomics/genmod) ## Usage @@ -63,14 +62,15 @@ Prepare a samplesheet with input data: ``` project,sample,file,family_id,paternal_id,maternal_id,sex,phenotype -testrun,HG002,/path/to/HG002.fastq.gz,FAM1,HG003,HG004,1,2 -testrun,HG005,/path/to/HG005.bam,FAM1,HG003,HG004,2,1 +NIST,HG002,/path/to/HG002.fastq.gz,FAM1,HG003,HG004,1,2 +NIST,HG005,/path/to/HG005.bam,FAM1,HG003,HG004,2,1 ``` -Now, you can run the pipeline using: +Supply a reference genome with `--fasta` and choose a matching `--preset` for your data (`revio`, `pacbio`, `ONT_R10`). Now, you can run the pipeline using: ```bash -nextflow run genomic-medicine-sweden/nallo -profile YOURPROFILE \ +nextflow run genomic-medicine-sweden/nallo \ + -profile \ --input samplesheet.csv \ --preset \ --fasta \ diff --git a/conf/base.config b/conf/base.config index 2bc62420..50230189 100644 --- a/conf/base.config +++ b/conf/base.config @@ -61,7 +61,7 @@ process { maxRetries = 2 } - withName: '.*:SAMTOOLS_MERGE' { + withName: 'SAMTOOLS_MERGE|SAMTOOLS_INDEX' { label = 'process_medium' } } diff --git a/conf/modules/phasing.config b/conf/modules/phasing.config index 1342807a..136bb6a4 100644 --- a/conf/modules/phasing.config +++ b/conf/modules/phasing.config @@ -24,8 +24,7 @@ process { ] } - withName: '.*:PHASING:HIPHASE_SNV' { - ext.prefix = { "$meta.id}_phased" } + withName: '.*:PHASING:HIPHASE' { ext.args = { [ '--ignore-read-groups', "--stats-file ${meta.id}_phased.stats.tsv", @@ -35,22 +34,38 @@ process { publishDir = [ path: { "${params.outdir}/" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : ((filename.endsWith('bam') || filename.endsWith('bai')) ? "aligned_reads/${meta.id}/${filename}" : "phasing/hiphase/snv/${meta.id}/${filename}" ) } + saveAs: { filename -> filename.equals('versions.yml') ? null : ((filename.endsWith('bam') || filename.endsWith('bai')) ? "aligned_reads/${meta.id}/${filename}" : "phased_variants/${meta.id}/${filename}" ) } ] } - withName: '.*:PHASING:HIPHASE_SV' { - ext.prefix = { "$meta.id}_phased" } - ext.args = { [ - '--ignore-read-groups', - "--stats-file ${meta.id}_phased.stats.tsv", - "--blocks-file ${meta.id}_phased.blocks.tsv", - "--summary-file ${meta.id}_phased.summary.tsv" - ].join(' ') } + withName: '.*:PHASING:LONGPHASE_PHASE' { + ext.prefix = { "${meta.id}_phased" } + ext.args = [ + params.preset.equals('ONT_R10') ? "--ont" : "--pb", + '--indels' + ].join(' ') publishDir = [ - path: { "${params.outdir}/" }, + path: { "${params.outdir}/phased_variants/${meta.id}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : ((filename.endsWith('bam') || filename.endsWith('bai')) ? "aligned_reads/${meta.id}/${filename}" : "phasing/hiphase/sv/${meta.id}/${filename}" ) } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:PHASING:TABIX_LONGPHASE_PHASE' { + publishDir = [ + path: { "${params.outdir}/phased_variants/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + + withName: '.*:PHASING:LONGPHASE_HAPLOTAG' { + ext.prefix = { "${meta.id}_haplotagged" } + publishDir = [ + path: { "${params.outdir}/aligned_reads/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } @@ -61,7 +76,7 @@ process { '--indels' ].join(' ') publishDir = [ - path: { "${params.outdir}/phasing/whatshap/phase/${meta.id}" }, + path: { "${params.outdir}/phased_variants/${meta.id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -70,14 +85,14 @@ process { withName: '.*:PHASING:WHATSHAP_STATS' { ext.prefix = { "${meta.id}_stats" } publishDir = [ - path: { "${params.outdir}/phasing/whatshap/stats/${meta.id}" }, + path: { "${params.outdir}/qc/phasing_stats/${meta.id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } withName: '.*:PHASING:WHATSHAP_HAPLOTAG' { - ext.prefix = { "${meta.id}_phased" } + ext.prefix = { "${meta.id}_haplotagged" } ext.args = [ '--ignore-read-groups', '--tag-supplementary' @@ -89,7 +104,7 @@ process { ] } - withName: '.*:PHASING:SAMTOOLS_INDEX_WHATSHAP' { + withName: '.*:PHASING:SAMTOOLS_INDEX_WHATSHAP|.*:PHASING:SAMTOOLS_INDEX_LONGPHASE' { publishDir = [ path: { "${params.outdir}/aligned_reads/${meta.id}" }, mode: params.publish_dir_mode, diff --git a/conf/modules/short_variant_calling.config b/conf/modules/short_variant_calling.config index 2559d947..4c6369b4 100644 --- a/conf/modules/short_variant_calling.config +++ b/conf/modules/short_variant_calling.config @@ -47,7 +47,8 @@ process { ext.args = [ '-m -', '-w 10000', - '--output-type u', + '--output-type z', + '--write-index=tbi' ].join(' ') } diff --git a/docs/output.md b/docs/output.md index 7a263bd5..3a457b0e 100644 --- a/docs/output.md +++ b/docs/output.md @@ -157,40 +157,22 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ ### Phasing -[WhatsHap](https://whatshap.readthedocs.io/en/latest/) or [HiPhase](https://github.com/PacificBiosciences/HiPhase) are used to phase variants and haplotag reads. +[LongPhase](https://github.com/twolinin/longphase), [WhatsHap](https://whatshap.readthedocs.io/en/latest/) or [HiPhase](https://github.com/PacificBiosciences/HiPhase) are used to phase variants and haplotag reads.
-Output files from WhatsHap +Output files from phasing - `{outputdir}/aligned_reads/{sample}/` - - `{sample}_phased.bam`: BAM file with haplotags - - `{sample}_phased.bam.bai`: Index of the corresponding bam file -- `{outputdir}/phasing/whatshap/phase/{sample}/` + - `{sample}_haplotagged.bam`: BAM file with haplotags + - `{sample}_haplotagged.bam.bai`: Index of the corresponding bam file +- `{outputdir}/phased_variants/{sample}/` - `*.vcf.gz`: VCF file with phased variants - `*.vcf.gz.tbi`: Index of the corresponding VCF file -- `{outputdir}/phasing/whatshap/stats/{sample}/` +- `{outputdir}/qc/phasing_stats/{sample}/` - `*.blocks.tsv`: File with phase blocks - `*.stats.tsv`: File with phasing statistics
-
-Output files from HiPhase - -- `{outputdir}/aligned_reads/{sample}/` - - - `{sample}_phased.bam`: BAM file with haplotags - - `{sample}_phased.bam.bai`: Index of the corresponding bam file - -- `{outputdir}/phasing/hiphase/{snv,sv}/{sample}/` - - - `*.blocks.tsv`: File with phase blocks - - `*.stats.tsv.gz`: File with phasing statistics - - `*.vcf.gz`: VCF file with phased variants - - `*.vcf.gz.tbi`: Index of the corresponding VCF file - - `*.summary.tsv`: HiPhase summary file - -
- ### Pipeline information [Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. diff --git a/docs/parameters.md b/docs/parameters.md index ce5c816d..2f086356 100644 --- a/docs/parameters.md +++ b/docs/parameters.md @@ -97,7 +97,7 @@ Workflow options specific to genomic-medicine-sweden/nallo | `preset` | Enable or disable certain parts of the pipeline by default, depending on data type (`revio`, `pacbio`, `ONT_R10`) | `string` | revio | True | | | `variant_caller` | Which short variant software to use (`deepvariant`) | `string` | deepvariant | | | | `sv_caller` | Which structural variant caller to use (`severus`, `sniffles`) | `string` | severus | | | -| `phaser` | Which phasing software to use (`whatshap`, `hiphase_snv`, `hiphase_sv`) | `string` | whatshap | | | +| `phaser` | Which phasing software to use (`longphase`, `whatshap`, `hiphase`) | `string` | longphase | | | | `hifiasm_mode` | Run hifiasm in hifi-only or hifi-trio mode (`hifi-only`, `trio-binning`) | `string` | hifi-only | | | | `parallel_alignments` | If parallel_alignments is bigger than 1, input files will be split and aligned in parallel to reduce processing time. | `integer` | 1 | | | | `parallel_snv` | If parallel_snv is bigger than 1, short variant calling will be done in parallel to reduce processing time. | `integer` | 13 | | | diff --git a/modules.json b/modules.json index d03948e8..6d7b4b1a 100644 --- a/modules.json +++ b/modules.json @@ -141,6 +141,16 @@ "git_sha": "aecb06fcdb995ff3e3df7c7a1fd119367d6d1996", "installed_by": ["modules"] }, + "longphase/haplotag": { + "branch": "master", + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": ["modules"] + }, + "longphase/phase": { + "branch": "master", + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": ["modules"] + }, "minimap2/align": { "branch": "master", "git_sha": "a33ef9475558c6b8da08c5f522ddaca1ec810306", diff --git a/modules/local/hiphase/main.nf b/modules/local/hiphase/main.nf index 44b5d4bf..3de4359a 100644 --- a/modules/local/hiphase/main.nf +++ b/modules/local/hiphase/main.nf @@ -42,7 +42,7 @@ process HIPHASE { vcfInputs.add('--vcf') vcfInputs.add(vcf) vcfOutputs.add('--output-vcf') - vcfOutputs.add("${prefix}.vcf.gz") + vcfOutputs.add("${prefix}_phased.vcf.gz") vcfNames.add(vcf.getName()) } @@ -58,7 +58,7 @@ process HIPHASE { if(output_bam) { bamOutputs.add('--output-bam') - bamOutputs.add("${prefix}.bam") + bamOutputs.add("${prefix}_haplotagged.bam") } } diff --git a/modules/nf-core/longphase/haplotag/environment.yml b/modules/nf-core/longphase/haplotag/environment.yml new file mode 100644 index 00000000..06445a93 --- /dev/null +++ b/modules/nf-core/longphase/haplotag/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::longphase=1.7.3" diff --git a/modules/nf-core/longphase/haplotag/main.nf b/modules/nf-core/longphase/haplotag/main.nf new file mode 100644 index 00000000..701af31f --- /dev/null +++ b/modules/nf-core/longphase/haplotag/main.nf @@ -0,0 +1,66 @@ +process LONGPHASE_HAPLOTAG { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/longphase:1.7.3--hf5e1c6e_0': + 'biocontainers/longphase:1.7.3--hf5e1c6e_0' }" + + input: + tuple val(meta), path(bam), path(bai), path(snps), path(svs), path(mods) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + + + output: + tuple val(meta), path("*.{bam,cram}"), emit: bam + tuple val(meta), path("*.log") , emit: log , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def sv_file = params.svs ? "--sv-file ${svs}" : "" + def mod_file = params.mods ? "--mod-file ${mods}" : "" + + """ + longphase \\ + haplotag \\ + $args \\ + --threads $task.cpus \\ + -o ${prefix} \\ + --reference ${fasta} \\ + --snp-file ${snps} \\ + --bam ${bam} \\ + ${sv_file} \\ + ${mod_file} + + if [ -f "${prefix}.out" ]; then + mv ${prefix}.out ${prefix}.log + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = args.contains('--cram') ? "cram" : "bam" + def log = args.contains('--log') ? "touch ${prefix}.log" : '' + """ + touch ${prefix}.${suffix} + ${log} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/longphase/haplotag/meta.yml b/modules/nf-core/longphase/haplotag/meta.yml new file mode 100644 index 00000000..2fe7c569 --- /dev/null +++ b/modules/nf-core/longphase/haplotag/meta.yml @@ -0,0 +1,89 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "longphase_haplotag" +description: LongPhase is an ultra-fast program for simultaneously co-phasing SNPs, small indels, large SVs, and (5mC) modifications for Nanopore and PacBio platforms. +keywords: + - haplotag + - long-read + - genomics +tools: + - "longphase": + description: "LongPhase is an ultra-fast program for simultaneously co-phasing SNPs, small indels, large SVs, and (5mC) modifications for Nanopore and PacBio platforms." + homepage: "https://github.com/twolinin/longphase" + documentation: "https://github.com/twolinin/longphase" + tool_dev_url: "https://github.com/twolinin/longphase" + doi: "10.1093/bioinformatics/btac058" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - bam: + type: file + description: Sorted BAM/CRAM file + pattern: "*.{bam,cram}" + - bai: + type: file + description: Index of sorted BAM/CRAM file + pattern: "*.{bai,crai,csi}" + - snps: + type: file + description: VCF file with SNPs (and INDELs) + pattern: "*.{vcf,vcf.gz}" + - svs: + type: file + description: VCF file with SVs + pattern: "*.{vcf,vcf.gz}" + - mods: + type: file + description: modcall-generated VCF with modifications + pattern: "*.{vcf,vcf.gz}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'hg38' ]` + - fasta: + type: file + description: Reference fasta file + pattern: "*.fasta" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'hg38' ]` + - fai: + type: file + description: Reference fai index + pattern: "*.fai" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bam: + type: file + description: BAM file with haplotagged reads + pattern: "*.bam" + - cram: + type: file + description: CRAM file with haplotagged reads + pattern: "*.cram" + - log: + type: file + description: Log file + pattern: "*.log" + +authors: + - "@fellen31" +maintainers: + - "@fellen31" diff --git a/modules/nf-core/longphase/haplotag/tests/main.nf.test b/modules/nf-core/longphase/haplotag/tests/main.nf.test new file mode 100644 index 00000000..c80133c6 --- /dev/null +++ b/modules/nf-core/longphase/haplotag/tests/main.nf.test @@ -0,0 +1,202 @@ +nextflow_process { + + name "Test Process LONGPHASE_HAPLOTAG" + script "../main.nf" + process "LONGPHASE_HAPLOTAG" + + tag "modules" + tag "modules_nfcore" + tag "longphase" + tag "longphase/haplotag" + + test("[ bam, bai, snps, [], [] ], fasta, fai") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true), + [], + [] + ] + input[1] = [ + [ id:'reference' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'reference' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + bam(process.out.bam.get(0).get(1), stringency: 'silent').getHeader(), + bam(process.out.bam.get(0).get(1), stringency: 'silent').getReadsMD5(), + ).match() } + ) + } + + } + + test("[ bam, bai, snps, [], [] ], fasta, fai - log & cram") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true), + [], + [] + ] + input[1] = [ + [ id:'reference' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'reference' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + process.out.log, + bam(process.out.bam.get(0).get(1), 'https://github.com/nf-core/test-datasets/raw/modules/data/genomics/homo_sapiens/genome/genome.fasta', stringency: 'silent').getHeader()[2..5], + bam(process.out.bam.get(0).get(1), 'https://github.com/nf-core/test-datasets/raw/modules/data/genomics/homo_sapiens/genome/genome.fasta', stringency: 'silent').getReadsMD5(), + ).match() } + ) + } + + } + + test("[ bam, bai, snps, svs, [] ], fasta, fai") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'reference' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'reference' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + bam(process.out.bam.get(0).get(1), stringency: 'silent').getHeader(), + bam(process.out.bam.get(0).get(1), stringency: 'silent').getReadsMD5(), + ).match() } + ) + } + + } + + test("[ bam, bai, snps, [], [] ], fasta, fai -stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true), + [], + [] + ] + input[1] = [ + [ id:'reference' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'reference' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("[ bam, bai, snps, [], [] ], fasta, fai - log & cram -stub") { + + options "-stub" + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true), + [], + [] + ] + input[1] = [ + [ id:'reference' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'reference' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } +} \ No newline at end of file diff --git a/modules/nf-core/longphase/haplotag/tests/main.nf.test.snap b/modules/nf-core/longphase/haplotag/tests/main.nf.test.snap new file mode 100644 index 00000000..225a4f7e --- /dev/null +++ b/modules/nf-core/longphase/haplotag/tests/main.nf.test.snap @@ -0,0 +1,159 @@ +{ + "[ bam, bai, snps, [], [] ], fasta, fai - log & cram -stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.cram:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,3c169b598f6e15332b6d7ec1d6d96810" + ], + "bam": [ + [ + { + "id": "test" + }, + "test.cram:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,3c169b598f6e15332b6d7ec1d6d96810" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-08-09T09:40:30.738831707" + }, + "[ bam, bai, snps, [], [] ], fasta, fai": { + "content": [ + [ + "versions.yml:md5,3c169b598f6e15332b6d7ec1d6d96810" + ], + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:chr22\tLN:40001", + "@RG\tID:test\tSM:test", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -y -x map-ont --secondary=no -R @RG\\tID:test\\tSM:test -t 30 -a genome.mmi test.bam_other.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.19.2\tCL:samtools sort -@ 29 -o test.bam_other.fastq.gz.bam --write-index", + "@PG\tID:longphase\tPN:longphase\tPP:samtools\tVN:1.7.3\tCL:longphase haplotag --threads 2 -o test --reference genome.fasta --snp-file test.genome.vcf.gz --bam test.sorted.bam " + ], + "721264eb2824a3146b331f2532d10180" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-25T09:12:34.848038423" + }, + "[ bam, bai, snps, [], [] ], fasta, fai -stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,3c169b598f6e15332b6d7ec1d6d96810" + ], + "bam": [ + [ + { + "id": "test" + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + + ], + "versions": [ + "versions.yml:md5,3c169b598f6e15332b6d7ec1d6d96810" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-08-09T09:40:20.836809553" + }, + "[ bam, bai, snps, svs, [] ], fasta, fai": { + "content": [ + [ + "versions.yml:md5,3c169b598f6e15332b6d7ec1d6d96810" + ], + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:chr22\tLN:40001", + "@RG\tID:test\tSM:test", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -y -x map-ont --secondary=no -R @RG\\tID:test\\tSM:test -t 30 -a genome.mmi test.bam_other.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.19.2\tCL:samtools sort -@ 29 -o test.bam_other.fastq.gz.bam --write-index", + "@PG\tID:longphase\tPN:longphase\tPP:samtools\tVN:1.7.3\tCL:longphase haplotag --threads 2 -o test --reference genome.fasta --snp-file test.genome.vcf.gz --bam test.sorted.bam " + ], + "721264eb2824a3146b331f2532d10180" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-25T09:13:39.901419316" + }, + "[ bam, bai, snps, [], [] ], fasta, fai - log & cram": { + "content": [ + [ + "versions.yml:md5,3c169b598f6e15332b6d7ec1d6d96810" + ], + [ + [ + { + "id": "test" + }, + "test.log:md5,6203f10696f4b0909f0d327c021df773" + ] + ], + [ + "@RG\tID:test\tSM:test", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -y -x map-ont --secondary=no -R @RG\\tID:test\\tSM:test -t 30 -a genome.mmi test.bam_other.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.19.2\tCL:samtools sort -@ 29 -o test.bam_other.fastq.gz.bam --write-index", + "@PG\tID:longphase\tPN:longphase\tPP:samtools\tVN:1.7.3\tCL:longphase haplotag --log --cram --threads 2 -o test --reference genome.fasta --snp-file test.genome.vcf.gz --bam test.sorted.bam " + ], + "721264eb2824a3146b331f2532d10180" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-25T09:45:54.254102844" + } +} \ No newline at end of file diff --git a/modules/nf-core/longphase/haplotag/tests/nextflow.config b/modules/nf-core/longphase/haplotag/tests/nextflow.config new file mode 100644 index 00000000..d50498b2 --- /dev/null +++ b/modules/nf-core/longphase/haplotag/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'LONGPHASE_HAPLOTAG' { + ext.args = '--log --cram' + } +} diff --git a/modules/nf-core/longphase/haplotag/tests/tags.yml b/modules/nf-core/longphase/haplotag/tests/tags.yml new file mode 100644 index 00000000..117e16a5 --- /dev/null +++ b/modules/nf-core/longphase/haplotag/tests/tags.yml @@ -0,0 +1,2 @@ +longphase/haplotag: + - "modules/nf-core/longphase/haplotag/**" diff --git a/modules/nf-core/longphase/phase/environment.yml b/modules/nf-core/longphase/phase/environment.yml new file mode 100644 index 00000000..96e04eb6 --- /dev/null +++ b/modules/nf-core/longphase/phase/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::longphase=1.7.3" + - "bioconda::htslib=1.20" diff --git a/modules/nf-core/longphase/phase/main.nf b/modules/nf-core/longphase/phase/main.nf new file mode 100644 index 00000000..ad29d997 --- /dev/null +++ b/modules/nf-core/longphase/phase/main.nf @@ -0,0 +1,69 @@ +process LONGPHASE_PHASE { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-d626bb8ec5a659accfbd8490bc1ac4a940722258:682e8c0cc0ceebf9bd38371a58249aabce93b1b3-0': + 'biocontainers/mulled-v2-d626bb8ec5a659accfbd8490bc1ac4a940722258:682e8c0cc0ceebf9bd38371a58249aabce93b1b3-0' }" + + input: + tuple val(meta), path(bam), path(bai), path(snps), path(svs), path(mods) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + + + output: + tuple val(meta), path("*.vcf.gz"), emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def sv_file = params.svs ? "--sv-file ${svs}" : "" + def mod_file = params.mods ? "--mod-file ${mods}" : "" + + def bamList = [] + for (file in bam) { + bamList.add("-b") + bamList.add(file) + } + """ + longphase \\ + phase \\ + $args \\ + --threads $task.cpus \\ + -o ${prefix} \\ + --reference ${fasta} \\ + --snp-file ${snps} \\ + ${bamList.join(" ")} \\ + ${sv_file} \\ + ${mod_file} \\ + + bgzip \\ + --threads $task.cpus \\ + $args2 \\ + ${prefix}.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | bgzip -c > ${prefix}.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/longphase/phase/meta.yml b/modules/nf-core/longphase/phase/meta.yml new file mode 100644 index 00000000..42899177 --- /dev/null +++ b/modules/nf-core/longphase/phase/meta.yml @@ -0,0 +1,81 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "longphase_phase" +description: LongPhase is an ultra-fast program for simultaneously co-phasing SNPs, small indels, large SVs, and (5mC) modifications for Nanopore and PacBio platforms. +keywords: + - phase + - long-read + - genomics +tools: + - "longphase": + description: "LongPhase is an ultra-fast program for simultaneously co-phasing SNPs, small indels, large SVs, and (5mC) modifications for Nanopore and PacBio platforms." + homepage: "https://github.com/twolinin/longphase" + documentation: "https://github.com/twolinin/longphase" + tool_dev_url: "https://github.com/twolinin/longphase" + doi: "10.1093/bioinformatics/btac058" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - bam: + type: file + description: Sorted BAM/CRAM file(s) + pattern: "*.{bam,cram}" + - bai: + type: file + description: Index of sorted BAM/CRAM file(s) + pattern: "*.{bai,crai,csi}" + - snps: + type: file + description: VCF file with SNPs (and INDELs) + pattern: "*.{vcf,vcf.gz}" + - svs: + type: file + description: VCF file with SVs + pattern: "*.{vcf,vcf.gz}" + - mods: + type: file + description: modcall-generated VCF with modifications + pattern: "*.{vcf,vcf.gz}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'hg38' ]` + - fasta: + type: file + description: Reference fasta file + pattern: "*.fasta" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'hg38' ]` + - fai: + type: file + description: Reference fai index + pattern: "*.fai" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf: + type: file + description: Compressed VCF file with phased variants + pattern: "*.vcf.gz" + +authors: + - "@fellen31" +maintainers: + - "@fellen31" diff --git a/modules/nf-core/longphase/phase/tests/main.nf.test b/modules/nf-core/longphase/phase/tests/main.nf.test new file mode 100644 index 00000000..3e303312 --- /dev/null +++ b/modules/nf-core/longphase/phase/tests/main.nf.test @@ -0,0 +1,157 @@ +nextflow_process { + + name "Test Process LONGPHASE_PHASE" + script "../main.nf" + process "LONGPHASE_PHASE" + + tag "modules" + tag "modules_nfcore" + tag "longphase" + tag "longphase/phase" + config "./nextflow.config" + + test("[ bam, bai, snps, [], [] ], fasta, fai") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true), + [], + [] + ] + input[1] = [ + [ id:'reference' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'reference' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +test("[ bam, bai, snps, svs, [] ], fasta, fai") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'reference' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'reference' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +test("[ bam x2, bai x2, snps, svs, [] ], fasta, fai") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test2.sorted.bam', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test2.sorted.bam.bai', checkIfExists: true) + ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'reference' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'reference' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("[ bam, bai, snps, [], [] ], fasta, fai - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true), + [], + [] + ] + input[1] = [ + [ id:'reference' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'reference' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/longphase/phase/tests/main.nf.test.snap b/modules/nf-core/longphase/phase/tests/main.nf.test.snap new file mode 100644 index 00000000..02e2bb17 --- /dev/null +++ b/modules/nf-core/longphase/phase/tests/main.nf.test.snap @@ -0,0 +1,134 @@ +{ + "[ bam, bai, snps, [], [] ], fasta, fai": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,fd2d21056b2de4722f12d5e883d9cb0a" + ] + ], + "1": [ + "versions.yml:md5,8d8d82510dd1fbe01a91c575c472897f" + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,fd2d21056b2de4722f12d5e883d9cb0a" + ] + ], + "versions": [ + "versions.yml:md5,8d8d82510dd1fbe01a91c575c472897f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-22T12:14:04.269956432" + }, + "[ bam, bai, snps, svs, [] ], fasta, fai": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,fd2d21056b2de4722f12d5e883d9cb0a" + ] + ], + "1": [ + "versions.yml:md5,8d8d82510dd1fbe01a91c575c472897f" + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,fd2d21056b2de4722f12d5e883d9cb0a" + ] + ], + "versions": [ + "versions.yml:md5,8d8d82510dd1fbe01a91c575c472897f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-22T12:14:39.961315592" + }, + "[ bam x2, bai x2, snps, svs, [] ], fasta, fai": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,3ced25dc8c1ec0a7c64481c8a163d687" + ] + ], + "1": [ + "versions.yml:md5,8d8d82510dd1fbe01a91c575c472897f" + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,3ced25dc8c1ec0a7c64481c8a163d687" + ] + ], + "versions": [ + "versions.yml:md5,8d8d82510dd1fbe01a91c575c472897f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-22T12:15:07.970597495" + }, + "[ bam, bai, snps, [], [] ], fasta, fai - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,8d8d82510dd1fbe01a91c575c472897f" + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,8d8d82510dd1fbe01a91c575c472897f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-22T12:15:40.296227382" + } +} \ No newline at end of file diff --git a/modules/nf-core/longphase/phase/tests/nextflow.config b/modules/nf-core/longphase/phase/tests/nextflow.config new file mode 100644 index 00000000..317f040a --- /dev/null +++ b/modules/nf-core/longphase/phase/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'LONGPHASE_PHASE' { + ext.args = '--ont' + } +} diff --git a/modules/nf-core/longphase/phase/tests/tags.yml b/modules/nf-core/longphase/phase/tests/tags.yml new file mode 100644 index 00000000..260a0d08 --- /dev/null +++ b/modules/nf-core/longphase/phase/tests/tags.yml @@ -0,0 +1,2 @@ +longphase/phase: + - "modules/nf-core/longphase/phase/**" diff --git a/nextflow.config b/nextflow.config index f8e3ce96..ed5cc4ba 100644 --- a/nextflow.config +++ b/nextflow.config @@ -45,7 +45,7 @@ params { deepvariant_model_type = params.preset == 'ONT_R10' ? 'ONT_R104' : 'PACBIO' minimap2_read_mapping_preset = params.preset == 'ONT_R10' ? 'lr:hq' : 'lr:hqae' - phaser = 'whatshap' + phaser = 'longphase' sv_caller = 'severus' preset = 'revio' hifiasm_mode = 'hifi-only' diff --git a/nextflow_schema.json b/nextflow_schema.json index 1bb0bbe3..6ab7a870 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -369,9 +369,9 @@ }, "phaser": { "type": "string", - "default": "whatshap", - "description": "Which phasing software to use (`whatshap`, `hiphase_snv`, `hiphase_sv`)", - "enum": ["whatshap", "hiphase_snv", "hiphase_sv"] + "default": "longphase", + "description": "Which phasing software to use (`longphase`, `whatshap`, `hiphase`)", + "enum": ["longphase", "whatshap", "hiphase"] }, "hifiasm_mode": { "type": "string", diff --git a/subworkflows/local/phasing.nf b/subworkflows/local/phasing.nf index fb87a27b..4d3757ff 100644 --- a/subworkflows/local/phasing.nf +++ b/subworkflows/local/phasing.nf @@ -1,126 +1,138 @@ -include { BCFTOOLS_FILLFROMFASTA } from '../../modules/local/bcftools/fillfromfasta/main' -include { BCFTOOLS_REHEADER } from '../../modules/nf-core/bcftools/reheader/main' -include { CRAMINO as CRAMINO_PHASED } from '../../modules/local/cramino' -include { HIPHASE as HIPHASE_SNV } from '../../modules/local/hiphase/main' -include { HIPHASE as HIPHASE_SV } from '../../modules/local/hiphase/main' -include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_WHATSHAP } from '../../modules/nf-core/samtools/index/main' -include { TABIX_BGZIPTABIX } from '../../modules/nf-core/tabix/bgziptabix/main' -include { TABIX_TABIX } from '../../modules/nf-core/tabix/tabix/main' -include { WHATSHAP_HAPLOTAG } from '../../modules/local/whatshap/haplotag/main' -include { WHATSHAP_PHASE } from '../../modules/local/whatshap/phase/main' -include { WHATSHAP_STATS } from '../../modules/local/whatshap/stats/main' +include { CRAMINO as CRAMINO_PHASED } from '../../modules/local/cramino' +include { HIPHASE } from '../../modules/local/hiphase/main' +include { LONGPHASE_HAPLOTAG } from '../../modules/nf-core/longphase/haplotag/main' +include { LONGPHASE_PHASE } from '../../modules/nf-core/longphase/phase/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_LONGPHASE } from '../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_WHATSHAP } from '../../modules/nf-core/samtools/index/main' +include { TABIX_TABIX as TABIX_LONGPHASE_PHASE } from '../../modules/nf-core/tabix/tabix/main' +include { WHATSHAP_HAPLOTAG } from '../../modules/local/whatshap/haplotag/main' +include { WHATSHAP_PHASE } from '../../modules/local/whatshap/phase/main' +include { WHATSHAP_STATS } from '../../modules/local/whatshap/stats/main' workflow PHASING { take: - ch_vcf // channel: [ val(meta), vcf ] - ch_sv_vcf // channel: [ val(meta), vcf ] - ch_bam_bai // channel: [ val(meta), bam, bai ] - fasta // channel: [ val(meta), fasta ] - fai // channel: [ val(meta), fai ] + ch_vcf // channel: [ val(meta), path(vcf) ] + ch_vcf_index // channel: [ val(meta), path(tbi) ] + ch_bam_bai // channel: [ val(meta), path(bam), path(bai) ] + fasta // channel: [ val(meta), path(fasta) ] + fai // channel: [ val(meta), path(fai) ] main: - ch_versions = Channel.empty() - ch_bam_bai_haplotagged = Channel.empty() - ch_vcf_index = Channel.empty() - - TABIX_TABIX(ch_vcf) - ch_versions = ch_versions.mix(TABIX_TABIX.out.versions) - - if (params.phaser.equals("whatshap")) { - - WHATSHAP_PHASE( ch_vcf.join(ch_bam_bai), fasta, fai ) - ch_versions = ch_versions.mix(WHATSHAP_PHASE.out.versions) - - WHATSHAP_PHASE.out.vcf_tbi - .join(ch_bam_bai) - .set { ch_whatshap_haplotag_in } - - WHATSHAP_HAPLOTAG(ch_whatshap_haplotag_in, fasta, fai) - ch_versions = ch_versions.mix(WHATSHAP_HAPLOTAG.out.versions) - - SAMTOOLS_INDEX_WHATSHAP( WHATSHAP_HAPLOTAG.out.bam ) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX_WHATSHAP.out.versions) - - WHATSHAP_HAPLOTAG - .out.bam - .join(SAMTOOLS_INDEX_WHATSHAP.out.bai) - .set { ch_bam_bai_haplotagged } - - ch_vcf_index = ch_vcf_index.mix( WHATSHAP_PHASE.out.vcf_tbi ) - - } else if (params.phaser.equals("hiphase_snv")) { - ch_vcf - .join(TABIX_TABIX.out.csi) - .join(ch_bam_bai) - .set { ch_hiphase_snv_in } - - HIPHASE_SNV( ch_hiphase_snv_in, fasta, fai, true ) - ch_versions = ch_versions.mix(HIPHASE_SNV.out.versions) - - HIPHASE_SNV.out.bams - .join(HIPHASE_SNV.out.bais) - .set { ch_bam_bai_haplotagged } - - ch_vcf_index = ch_vcf_index.mix( HIPHASE_SNV.out.vcfs.join(HIPHASE_SNV.out.vcfs_tbi) ) - - } else if (params.phaser.equals("hiphase_sv")) { - // Sniffles specific... - BCFTOOLS_REHEADER( - ch_sv_vcf - .map { meta, vcf -> [meta, vcf, [], []] }, - [[],[]] - ) - ch_versions = ch_versions.mix(BCFTOOLS_REHEADER.out.versions) - - // Might be that newer versions of HiPhase ignores certain SVs - // if BCFTOOLS_FILLFROMFASTA is not run, instead of craching - BCFTOOLS_FILLFROMFASTA(BCFTOOLS_REHEADER.out.vcf, fasta) - ch_versions = ch_versions.mix(BCFTOOLS_FILLFROMFASTA.out.versions) - - TABIX_BGZIPTABIX(BCFTOOLS_FILLFROMFASTA.out.vcf) - ch_versions = ch_versions.mix(TABIX_BGZIPTABIX.out.versions) - - TABIX_BGZIPTABIX.out.gz_tbi - .map { meta, gz, tbi -> [ meta, gz ] } - .set { ch_sv_vcf } - - TABIX_BGZIPTABIX.out.gz_tbi - .map { meta, gz, tbi -> [ meta, tbi ] } - .set { ch_sv_tbi } - - ch_vcf - .concat(ch_sv_vcf) - .groupTuple() - .set { ch_hiphase_vcf } - - TABIX_TABIX.out.csi - .concat(ch_sv_tbi) - .groupTuple() - .set { ch_hiphase_tbi } - - ch_hiphase_vcf - .join(ch_hiphase_tbi) - .join(ch_bam_bai) - .set { ch_hiphase_in } - - HIPHASE_SV( ch_hiphase_in, fasta, fai, true ) - ch_versions = ch_versions.mix(HIPHASE_SV.out.versions) - - HIPHASE_SV.out.bams - .join(HIPHASE_SV.out.bais) - .set { ch_bam_bai_haplotagged } - - ch_vcf_index = ch_vcf_index.mix( HIPHASE_SV.out.vcfs.join(HIPHASE_SV.out.vcfs_tbi) ) - } - - WHATSHAP_STATS( ch_vcf_index ) - ch_versions = ch_versions.mix(WHATSHAP_STATS.out.versions) - - CRAMINO_PHASED( ch_bam_bai_haplotagged ) - ch_versions = ch_versions.mix(CRAMINO_PHASED.out.versions) + ch_versions = Channel.empty() + + // Phase variants and haplotag reads with Longphase + if (params.phaser.equals("longphase")) { + + ch_bam_bai + .join( ch_vcf ) + .map { meta, bam, bai, snvs -> [ meta, bam, bai, snvs, [], [] ] } + .set { ch_longphase_phase_in } + + LONGPHASE_PHASE ( + ch_longphase_phase_in, + fasta, + fai + ) + ch_versions = ch_versions.mix(LONGPHASE_PHASE.out.versions) + + TABIX_LONGPHASE_PHASE ( + LONGPHASE_PHASE.out.vcf + ) + ch_versions = ch_versions.mix(TABIX_LONGPHASE_PHASE.out.versions) + + LONGPHASE_PHASE.out.vcf + .join( TABIX_LONGPHASE_PHASE.out.tbi ) + .set { ch_phased_vcf_index } + + ch_bam_bai + .join( LONGPHASE_PHASE.out.vcf ) + .map { meta, bam, bai, vcf -> [ meta, bam, bai, vcf, [], [] ] } + .set { ch_longphase_haplotag_in } + + LONGPHASE_HAPLOTAG ( + ch_longphase_haplotag_in, + fasta, + fai + ) + ch_versions = ch_versions.mix(LONGPHASE_HAPLOTAG.out.versions) + + SAMTOOLS_INDEX_LONGPHASE ( + LONGPHASE_HAPLOTAG.out.bam + ) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_LONGPHASE.out.versions) + + LONGPHASE_HAPLOTAG.out.bam + .join( SAMTOOLS_INDEX_LONGPHASE.out.bai ) + .set { ch_bam_bai_haplotagged } + + // Phase variants and haplotag reads with whatshap + } else if (params.phaser.equals("whatshap")) { + + WHATSHAP_PHASE( + ch_vcf.join( ch_bam_bai ), + fasta, + fai + ) + ch_versions = ch_versions.mix(WHATSHAP_PHASE.out.versions) + + WHATSHAP_PHASE.out.vcf_tbi + .join( ch_bam_bai ) + .set { ch_whatshap_haplotag_in } + + WHATSHAP_HAPLOTAG ( + ch_whatshap_haplotag_in, + fasta, + fai + ) + ch_versions = ch_versions.mix(WHATSHAP_HAPLOTAG.out.versions) + + SAMTOOLS_INDEX_WHATSHAP ( + WHATSHAP_HAPLOTAG.out.bam + ) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_WHATSHAP.out.versions) + + WHATSHAP_HAPLOTAG.out.bam + .join( SAMTOOLS_INDEX_WHATSHAP.out.bai ) + .set { ch_bam_bai_haplotagged } + + WHATSHAP_PHASE.out.vcf_tbi + .set { ch_phased_vcf_index } + + // Phase variants and haplotag reads with HiPhase + } else if (params.phaser.equals("hiphase")) { + ch_vcf + .join( ch_vcf_index ) + .join( ch_bam_bai ) + .set { ch_hiphase_snv_in } + + HIPHASE ( + ch_hiphase_snv_in, + fasta, + fai, + true + ) + ch_versions = ch_versions.mix(HIPHASE.out.versions) + + HIPHASE.out.bams + .join( HIPHASE.out.bais ) + .set { ch_bam_bai_haplotagged } + + HIPHASE.out.vcfs + .join( HIPHASE.out.vcfs_tbi ) + .set { ch_phased_vcf_index } + + } + + // Phasing stats + WHATSHAP_STATS ( ch_phased_vcf_index ) + ch_versions = ch_versions.mix(WHATSHAP_STATS.out.versions) + + // Phasing QC + CRAMINO_PHASED ( ch_bam_bai_haplotagged ) + ch_versions = ch_versions.mix(CRAMINO_PHASED.out.versions) emit: - haplotagged_bam_bai = ch_bam_bai_haplotagged // channel: [ val(meta), bam, bai ] - stats = WHATSHAP_STATS.out.stats // channel: [ val(meta), txt ] - versions = ch_versions // channel: [ versions.yml ] + haplotagged_bam_bai = ch_bam_bai_haplotagged // channel: [ val(meta), path(bam), path(bai) ] + stats = WHATSHAP_STATS.out.stats // channel: [ val(meta), path(txt) ] + versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/subworkflows/local/short_variant_calling/main.nf b/subworkflows/local/short_variant_calling/main.nf index 5395cd2e..18be861b 100644 --- a/subworkflows/local/short_variant_calling/main.nf +++ b/subworkflows/local/short_variant_calling/main.nf @@ -93,7 +93,8 @@ workflow SHORT_VARIANT_CALLING { ch_versions = ch_versions.mix(BCFTOOLS_NORM_MULTISAMPLE.out.versions) emit: - snp_calls_vcf = BCFTOOLS_NORM_SINGLESAMPLE.out.vcf // channel: [ val(meta), path(bcf) ] + snp_calls_vcf = BCFTOOLS_NORM_SINGLESAMPLE.out.vcf // channel: [ val(meta), path(vcf) ] + snp_calls_tbi = BCFTOOLS_NORM_SINGLESAMPLE.out.tbi // channel: [ val(meta), path(tbi) ] combined_bcf = BCFTOOLS_NORM_MULTISAMPLE.out.vcf // channel: [ val(meta), path(bcf) ] combined_csi = BCFTOOLS_NORM_MULTISAMPLE.out.csi // channel: [ val(meta), path(csi) ] versions = ch_versions // channel: [ path(versions.yml) ] diff --git a/subworkflows/local/short_variant_calling/tests/main.nf.test.snap b/subworkflows/local/short_variant_calling/tests/main.nf.test.snap index 5aa2cc19..9d6ea38e 100644 --- a/subworkflows/local/short_variant_calling/tests/main.nf.test.snap +++ b/subworkflows/local/short_variant_calling/tests/main.nf.test.snap @@ -8,10 +8,19 @@ "id": "test", "single_end": false }, - "test_norm_singlesample.bcf:md5,94d38d8fd3c50a7ede72a080fbd4992c" + "test_norm_singlesample.vcf.gz:md5,8d1b64c0eed7eb66dd3294fcbc15b4a1" ] ], "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,41ab8f51ac55e08b2339feb5a6b40764" + ] + ], + "2": [ [ { "id": "genome.bed", @@ -21,7 +30,7 @@ "genome.bed_norm_multisample.vcf.gz:md5,56d662f0a527803ae72e40625c01014b" ] ], - "2": [ + "3": [ [ { "id": "genome.bed", @@ -31,7 +40,7 @@ "genome.bed_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9" ] ], - "3": [ + "4": [ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", "versions.yml:md5,13101c9283d4a82e859574b0a981311c", "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", @@ -59,13 +68,22 @@ "genome.bed_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9" ] ], + "snp_calls_tbi": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,41ab8f51ac55e08b2339feb5a6b40764" + ] + ], "snp_calls_vcf": [ [ { "id": "test", "single_end": false }, - "test_norm_singlesample.bcf:md5,94d38d8fd3c50a7ede72a080fbd4992c" + "test_norm_singlesample.vcf.gz:md5,8d1b64c0eed7eb66dd3294fcbc15b4a1" ] ], "versions": [ @@ -82,7 +100,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-19T14:06:15.287122968" + "timestamp": "2024-09-20T09:13:58.152956123" }, "1 sample - 2 bed, fasta, fai, bed, [] - stub": { "content": [ @@ -93,10 +111,19 @@ "id": "test", "single_end": false }, - "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + "test_norm_singlesample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ [ { "id": "genome.bed", @@ -114,7 +141,7 @@ "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], - "2": [ + "3": [ [ { "id": "genome.bed", @@ -132,7 +159,7 @@ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "3": [ + "4": [ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", "versions.yml:md5,13101c9283d4a82e859574b0a981311c", @@ -180,13 +207,22 @@ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "snp_calls_tbi": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], "snp_calls_vcf": [ [ { "id": "test", "single_end": false }, - "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + "test_norm_singlesample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "versions": [ @@ -207,7 +243,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-19T14:09:04.780658252" + "timestamp": "2024-09-20T09:16:49.165250859" }, "1 sample - 1 bed, fasta, fai, bed, []": { "content": [ @@ -218,10 +254,19 @@ "id": "test", "single_end": false }, - "test_norm_singlesample.bcf:md5,94d38d8fd3c50a7ede72a080fbd4992c" + "test_norm_singlesample.vcf.gz:md5,8d1b64c0eed7eb66dd3294fcbc15b4a1" ] ], "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,41ab8f51ac55e08b2339feb5a6b40764" + ] + ], + "2": [ [ { "id": "genome.bed", @@ -231,7 +276,7 @@ "genome.bed_norm_multisample.vcf.gz:md5,56d662f0a527803ae72e40625c01014b" ] ], - "2": [ + "3": [ [ { "id": "genome.bed", @@ -241,7 +286,7 @@ "genome.bed_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9" ] ], - "3": [ + "4": [ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", "versions.yml:md5,13101c9283d4a82e859574b0a981311c", "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", @@ -269,13 +314,22 @@ "genome.bed_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9" ] ], + "snp_calls_tbi": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,41ab8f51ac55e08b2339feb5a6b40764" + ] + ], "snp_calls_vcf": [ [ { "id": "test", "single_end": false }, - "test_norm_singlesample.bcf:md5,94d38d8fd3c50a7ede72a080fbd4992c" + "test_norm_singlesample.vcf.gz:md5,8d1b64c0eed7eb66dd3294fcbc15b4a1" ] ], "versions": [ @@ -292,7 +346,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-19T14:06:44.174749859" + "timestamp": "2024-09-20T09:14:27.319216407" }, "2 samples - 2 bed, fasta, fai, bed, par_bed": { "content": [ @@ -303,17 +357,33 @@ "id": "test", "single_end": false }, - "test_norm_singlesample.bcf:md5,f978f9f75d4892499156218a300fd246" + "test_norm_singlesample.vcf.gz:md5,f8e492c9c4d6e0cb027b300bb3f7155a" ], [ { "id": "test2", "single_end": false }, - "test2_norm_singlesample.bcf:md5,562e4ab7cea09c4f296550784e02ca84" + "test2_norm_singlesample.vcf.gz:md5,0e42833e64e0f427ca09259e17e2765b" ] ], "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,f1d5f7e2321e7f0534d251c12b373221" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2_norm_singlesample.vcf.gz.tbi:md5,2c4354a182ae1f4ad94f056b2fbf62a8" + ] + ], + "2": [ [ { "id": "genome.bed", @@ -331,7 +401,7 @@ "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,ec12cca4d6849a5f47823331c4b9a8f3" ] ], - "2": [ + "3": [ [ { "id": "genome.bed", @@ -349,7 +419,7 @@ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,12d9802de9b9e071408e526930050626" ] ], - "3": [ + "4": [ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", "versions.yml:md5,13101c9283d4a82e859574b0a981311c", @@ -401,20 +471,36 @@ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,12d9802de9b9e071408e526930050626" ] ], + "snp_calls_tbi": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,f1d5f7e2321e7f0534d251c12b373221" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2_norm_singlesample.vcf.gz.tbi:md5,2c4354a182ae1f4ad94f056b2fbf62a8" + ] + ], "snp_calls_vcf": [ [ { "id": "test", "single_end": false }, - "test_norm_singlesample.bcf:md5,f978f9f75d4892499156218a300fd246" + "test_norm_singlesample.vcf.gz:md5,f8e492c9c4d6e0cb027b300bb3f7155a" ], [ { "id": "test2", "single_end": false }, - "test2_norm_singlesample.bcf:md5,562e4ab7cea09c4f296550784e02ca84" + "test2_norm_singlesample.vcf.gz:md5,0e42833e64e0f427ca09259e17e2765b" ] ], "versions": [ @@ -439,7 +525,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-19T14:08:22.570429008" + "timestamp": "2024-09-20T09:16:06.366289968" }, "2 samples - 2 bed, fasta, fai, bed, [] - stub": { "content": [ @@ -450,17 +536,33 @@ "id": "test", "single_end": false }, - "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + "test_norm_singlesample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ], [ { "id": "test2", "single_end": false }, - "test2_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + "test2_norm_singlesample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2_norm_singlesample.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ [ { "id": "genome.bed", @@ -478,7 +580,7 @@ "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], - "2": [ + "3": [ [ { "id": "genome.bed", @@ -496,7 +598,7 @@ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "3": [ + "4": [ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", "versions.yml:md5,13101c9283d4a82e859574b0a981311c", @@ -548,20 +650,36 @@ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "snp_calls_tbi": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2_norm_singlesample.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], "snp_calls_vcf": [ [ { "id": "test", "single_end": false }, - "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + "test_norm_singlesample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ], [ { "id": "test2", "single_end": false }, - "test2_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + "test2_norm_singlesample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "versions": [ @@ -586,7 +704,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-19T14:09:16.982831625" + "timestamp": "2024-09-20T09:17:01.14095623" }, "2 samples - 2 bed, fasta, fai, bed, par_bed - stub": { "content": [ @@ -597,17 +715,33 @@ "id": "test", "single_end": false }, - "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + "test_norm_singlesample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ], [ { "id": "test2", "single_end": false }, - "test2_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + "test2_norm_singlesample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2_norm_singlesample.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ [ { "id": "genome.bed", @@ -625,7 +759,7 @@ "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], - "2": [ + "3": [ [ { "id": "genome.bed", @@ -643,7 +777,7 @@ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "3": [ + "4": [ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", "versions.yml:md5,13101c9283d4a82e859574b0a981311c", @@ -695,20 +829,36 @@ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "snp_calls_tbi": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2_norm_singlesample.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], "snp_calls_vcf": [ [ { "id": "test", "single_end": false }, - "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + "test_norm_singlesample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ], [ { "id": "test2", "single_end": false }, - "test2_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + "test2_norm_singlesample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "versions": [ @@ -733,7 +883,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-19T14:09:29.288580006" + "timestamp": "2024-09-20T09:17:13.216410221" }, "1 sample - 1 bed, fasta, fai, bed, [] - stub": { "content": [ @@ -744,10 +894,19 @@ "id": "test", "single_end": false }, - "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + "test_norm_singlesample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ [ { "id": "genome.bed", @@ -757,7 +916,7 @@ "genome.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], - "2": [ + "3": [ [ { "id": "genome.bed", @@ -767,7 +926,7 @@ "genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "3": [ + "4": [ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", "versions.yml:md5,13101c9283d4a82e859574b0a981311c", "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", @@ -795,13 +954,22 @@ "genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "snp_calls_tbi": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], "snp_calls_vcf": [ [ { "id": "test", "single_end": false }, - "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + "test_norm_singlesample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "versions": [ @@ -818,7 +986,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-19T14:08:53.503322275" + "timestamp": "2024-09-20T09:16:38.070360001" }, "1 sample - no bed, fasta, fai, [], []": { "content": [ @@ -829,14 +997,23 @@ "id": "test", "single_end": false }, - "test_norm_singlesample.bcf:md5,94d38d8fd3c50a7ede72a080fbd4992c" + "test_norm_singlesample.vcf.gz:md5,8d1b64c0eed7eb66dd3294fcbc15b4a1" ] ], "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,41ab8f51ac55e08b2339feb5a6b40764" + ] + ], + "2": [ [ { "id": [ - + ], "project": null, "contains_affected": false @@ -844,11 +1021,11 @@ "[]_norm_multisample.vcf.gz:md5,56d662f0a527803ae72e40625c01014b" ] ], - "2": [ + "3": [ [ { "id": [ - + ], "project": null, "contains_affected": false @@ -856,7 +1033,7 @@ "[]_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9" ] ], - "3": [ + "4": [ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", "versions.yml:md5,13101c9283d4a82e859574b0a981311c", "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", @@ -868,7 +1045,7 @@ [ { "id": [ - + ], "project": null, "contains_affected": false @@ -880,7 +1057,7 @@ [ { "id": [ - + ], "project": null, "contains_affected": false @@ -888,13 +1065,22 @@ "[]_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9" ] ], + "snp_calls_tbi": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,41ab8f51ac55e08b2339feb5a6b40764" + ] + ], "snp_calls_vcf": [ [ { "id": "test", "single_end": false }, - "test_norm_singlesample.bcf:md5,94d38d8fd3c50a7ede72a080fbd4992c" + "test_norm_singlesample.vcf.gz:md5,8d1b64c0eed7eb66dd3294fcbc15b4a1" ] ], "versions": [ @@ -911,7 +1097,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-19T14:05:46.058241488" + "timestamp": "2024-09-20T09:13:29.038685117" }, "1 sample - 2 bed, fasta, fai, bed, []": { "content": [ @@ -922,10 +1108,19 @@ "id": "test", "single_end": false }, - "test_norm_singlesample.bcf:md5,f978f9f75d4892499156218a300fd246" + "test_norm_singlesample.vcf.gz:md5,f8e492c9c4d6e0cb027b300bb3f7155a" ] ], "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,f1d5f7e2321e7f0534d251c12b373221" + ] + ], + "2": [ [ { "id": "genome.bed", @@ -943,7 +1138,7 @@ "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,56d662f0a527803ae72e40625c01014b" ] ], - "2": [ + "3": [ [ { "id": "genome.bed", @@ -961,7 +1156,7 @@ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9" ] ], - "3": [ + "4": [ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", "versions.yml:md5,13101c9283d4a82e859574b0a981311c", @@ -1009,13 +1204,22 @@ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9" ] ], + "snp_calls_tbi": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,f1d5f7e2321e7f0534d251c12b373221" + ] + ], "snp_calls_vcf": [ [ { "id": "test", "single_end": false }, - "test_norm_singlesample.bcf:md5,f978f9f75d4892499156218a300fd246" + "test_norm_singlesample.vcf.gz:md5,f8e492c9c4d6e0cb027b300bb3f7155a" ] ], "versions": [ @@ -1036,7 +1240,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-19T14:07:15.005268156" + "timestamp": "2024-09-20T09:14:58.277461525" }, "2 samples - 2 bed, fasta, fai, bed, []": { "content": [ @@ -1047,17 +1251,33 @@ "id": "test", "single_end": false }, - "test_norm_singlesample.bcf:md5,f978f9f75d4892499156218a300fd246" + "test_norm_singlesample.vcf.gz:md5,f8e492c9c4d6e0cb027b300bb3f7155a" ], [ { "id": "test2", "single_end": false }, - "test2_norm_singlesample.bcf:md5,562e4ab7cea09c4f296550784e02ca84" + "test2_norm_singlesample.vcf.gz:md5,0e42833e64e0f427ca09259e17e2765b" ] ], "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,f1d5f7e2321e7f0534d251c12b373221" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2_norm_singlesample.vcf.gz.tbi:md5,2c4354a182ae1f4ad94f056b2fbf62a8" + ] + ], + "2": [ [ { "id": "genome.bed", @@ -1075,7 +1295,7 @@ "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,ec12cca4d6849a5f47823331c4b9a8f3" ] ], - "2": [ + "3": [ [ { "id": "genome.bed", @@ -1093,7 +1313,7 @@ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,12d9802de9b9e071408e526930050626" ] ], - "3": [ + "4": [ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", "versions.yml:md5,13101c9283d4a82e859574b0a981311c", @@ -1145,20 +1365,36 @@ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,12d9802de9b9e071408e526930050626" ] ], + "snp_calls_tbi": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,f1d5f7e2321e7f0534d251c12b373221" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2_norm_singlesample.vcf.gz.tbi:md5,2c4354a182ae1f4ad94f056b2fbf62a8" + ] + ], "snp_calls_vcf": [ [ { "id": "test", "single_end": false }, - "test_norm_singlesample.bcf:md5,f978f9f75d4892499156218a300fd246" + "test_norm_singlesample.vcf.gz:md5,f8e492c9c4d6e0cb027b300bb3f7155a" ], [ { "id": "test2", "single_end": false }, - "test2_norm_singlesample.bcf:md5,562e4ab7cea09c4f296550784e02ca84" + "test2_norm_singlesample.vcf.gz:md5,0e42833e64e0f427ca09259e17e2765b" ] ], "versions": [ @@ -1183,7 +1419,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-19T14:07:48.515375217" + "timestamp": "2024-09-20T09:15:32.21563308" }, "1 sample - no bed, fasta, fai, [], [] - stub": { "content": [ @@ -1194,14 +1430,23 @@ "id": "test", "single_end": false }, - "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + "test_norm_singlesample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ [ { "id": [ - + ], "project": null, "contains_affected": false @@ -1209,11 +1454,11 @@ "[]_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], - "2": [ + "3": [ [ { "id": [ - + ], "project": null, "contains_affected": false @@ -1221,7 +1466,7 @@ "[]_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "3": [ + "4": [ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", "versions.yml:md5,13101c9283d4a82e859574b0a981311c", "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", @@ -1233,7 +1478,7 @@ [ { "id": [ - + ], "project": null, "contains_affected": false @@ -1245,7 +1490,7 @@ [ { "id": [ - + ], "project": null, "contains_affected": false @@ -1253,13 +1498,22 @@ "[]_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "snp_calls_tbi": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], "snp_calls_vcf": [ [ { "id": "test", "single_end": false }, - "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + "test_norm_singlesample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "versions": [ @@ -1276,7 +1530,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-19T14:08:32.905425335" + "timestamp": "2024-09-20T09:16:16.776853798" }, "1 sample - 1 bed, fasta, fai, [], [] - stub": { "content": [ @@ -1287,10 +1541,19 @@ "id": "test", "single_end": false }, - "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + "test_norm_singlesample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ [ { "id": "genome.bed", @@ -1300,7 +1563,7 @@ "genome.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], - "2": [ + "3": [ [ { "id": "genome.bed", @@ -1310,7 +1573,7 @@ "genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "3": [ + "4": [ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", "versions.yml:md5,13101c9283d4a82e859574b0a981311c", "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", @@ -1338,13 +1601,22 @@ "genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "snp_calls_tbi": [ + [ + { + "id": "test", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], "snp_calls_vcf": [ [ { "id": "test", "single_end": false }, - "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + "test_norm_singlesample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "versions": [ @@ -1361,6 +1633,6 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-19T14:08:43.136478303" + "timestamp": "2024-09-20T09:16:27.732828098" } -} \ No newline at end of file +} diff --git a/subworkflows/local/short_variant_calling/tests/nextflow.config b/subworkflows/local/short_variant_calling/tests/nextflow.config index f9b1d1a5..e0f992af 100644 --- a/subworkflows/local/short_variant_calling/tests/nextflow.config +++ b/subworkflows/local/short_variant_calling/tests/nextflow.config @@ -37,7 +37,8 @@ process { '--no-version', '-m -', '-w 10000', - '--output-type u', + '--output-type z', + '--write-index=tbi' ].join(' ') } diff --git a/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf b/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf index 7711da7f..c2973a9a 100644 --- a/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf @@ -390,14 +390,14 @@ def toolCitationText() { "WhatsHap (Martin et al. 2016)", ] } - if(params.phaser == 'hiphase_sv') { + if(params.phaser == 'hiphase') { citation_text = citation_text + [ "HiPhase (Holt et al. 2024)", ] } - if(params.phaser == 'hiphase_snv') { + if(params.phaser == 'longphase') { citation_text = citation_text + [ - "HiPhase (Holt et al. 2024)", + "LongPhase (Lin et al. 2024)", ] } if (!params.skip_methylation_wf) { @@ -445,6 +445,7 @@ def toolBibliographyText() { "
  • Yun T, Li H, Chang PC, Lin MF, Carroll A, McLean CY. Accurate, scalable cohort variant calls using DeepVariant and GLnexus. Robinson P, ed. Bioinformatics. 2021;36(24):5582-5589. doi:10.1093/bioinformatics/btaa1081
  • ", "
  • Cheng, H., Concepcion, G.T., Feng, X. et al. Haplotype-resolved de novo assembly using phased assembly graphs with hifiasm. Nat Methods 18, 170–175 (2021). https://doi.org/10.1038/s41592-020-01056-5
  • ", "
  • James M Holt, Christopher T Saunders, William J Rowell, Zev Kronenberg, Aaron M Wenger, Michael Eberle, HiPhase: jointly phasing small, structural, and tandem repeat variants from HiFi sequencing, Bioinformatics, Volume 40, Issue 2, February 2024, btae042, https://doi.org/10.1093/bioinformatics/btae042
  • ", + "
  • Jyun-Hong Lin, Liang-Chi Chen, Shu-Chi Yu, Yao-Ting Huang, LongPhase: an ultra-fast chromosome-scale phasing algorithm for small and large variants, Bioinformatics, Volume 38, Issue 7, March 2022, Pages 1816–1822, https://doi.org/10.1093/bioinformatics/btac058
  • ", "
  • Heng Li, Minimap2: pairwise alignment for nucleotide sequences, Bioinformatics, Volume 34, Issue 18, September 2018, Pages 3094–3100, https://doi.org/10.1093/bioinformatics/bty191
  • ", "
  • Pedersen BS, Quinlan AR. Mosdepth: quick coverage calculation for genomes and exomes. Hancock J, ed. Bioinformatics. 2018;34(5):867-868. doi:10.1093/bioinformatics/btx699
  • ", "
  • Genome-wide profiling of highly similar paralogous genes using HiFi sequencing. Xiao Chen, Daniel Baker, Egor Dolzhenko, Joseph M Devaney, Jessica Noya, April S Berlyoung, Rhonda Brandon, Kathleen S Hruska, Lucas Lochovsky, Paul Kruszka, Scott Newman, Emily Farrow, Isabelle Thiffault, Tomi Pastinen, Dalia Kasperaviciute, Christian Gilissen, Lisenka Vissers, Alexander Hoischen, Seth Berger, Eric Vilain, Emmanuèle Délot, UCI Genomics Research to Elucidate the Genetics of Rare Diseases (UCI GREGoR) Consortium, Michael A Eberle. bioRxiv 2024.04.19.590294; doi: https://doi.org/10.1101/2024.04.19.590294
  • ", diff --git a/tests/main.nf.test b/tests/main.nf.test index 8aa188f5..40a5a178 100644 --- a/tests/main.nf.test +++ b/tests/main.nf.test @@ -30,7 +30,7 @@ nextflow_pipeline { file("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), file("$outputDir/multiqc/multiqc_data/multiqc_somalier.txt"), // Assert with snapshot HG002_Revio - bam("$outputDir/aligned_reads/HG002_Revio/HG002_Revio_phased.bam", stringency: 'silent').getReadsMD5(), + bam("$outputDir/aligned_reads/HG002_Revio/HG002_Revio_haplotagged.bam", stringency: 'silent').getReadsMD5(), file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio/HG002_Revio.asm.bp.hap1.p_ctg.assembly_summary"), file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio/HG002_Revio.asm.bp.hap2.p_ctg.assembly_summary"), bam("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap1.bam", stringency: 'silent').getReadsMD5(), @@ -81,7 +81,7 @@ nextflow_pipeline { { assert new File("$outputDir/svs/multi_sample/test/test_svs.vcf.gz").exists() }, { assert new File("$outputDir/svs/multi_sample/test/test_svs.vcf.gz.tbi").exists() }, // Assert exists HG002_Revio - { assert new File("$outputDir/aligned_reads/HG002_Revio/HG002_Revio_phased.bam.bai").exists() }, + { assert new File("$outputDir/aligned_reads/HG002_Revio/HG002_Revio_haplotagged.bam.bai").exists() }, { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap1.bam.bai").exists() }, { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap1.sam.gz").exists() }, { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap2.bam.bai").exists() }, @@ -99,10 +99,10 @@ nextflow_pipeline { { assert new File("$outputDir/paraphase/HG002_Revio/HG002_Revio.paraphase.json").exists() }, { assert new File("$outputDir/qc/cramino/phased/HG002_Revio/HG002_Revio_cramino_aligned_phased.txt").exists() }, { assert new File("$outputDir/qc/cramino/unphased/HG002_Revio/HG002_Revio_cramino_aligned.txt").exists() }, - { assert new File("$outputDir/phasing/whatshap/phase/HG002_Revio/HG002_Revio_phased.vcf.gz").exists() }, - { assert new File("$outputDir/phasing/whatshap/phase/HG002_Revio/HG002_Revio_phased.vcf.gz.tbi").exists() }, - { assert new File("$outputDir/phasing/whatshap/stats/HG002_Revio/HG002_Revio_stats.stats.tsv").exists() }, - { assert new File("$outputDir/phasing/whatshap/stats/HG002_Revio/HG002_Revio_stats.blocks.tsv").exists() }, + { assert new File("$outputDir/phased_variants/HG002_Revio/HG002_Revio_phased.vcf.gz").exists() }, + { assert new File("$outputDir/phased_variants/HG002_Revio/HG002_Revio_phased.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/qc/phasing_stats/HG002_Revio/HG002_Revio_stats.stats.tsv").exists() }, + { assert new File("$outputDir/qc/phasing_stats/HG002_Revio/HG002_Revio_stats.blocks.tsv").exists() }, { assert new File("$outputDir/qc/fastqc/HG002_Revio/HG002_Revio_fastqc.html").exists() }, { assert new File("$outputDir/qc/fastqc/HG002_Revio/HG002_Revio_fastqc.zip").exists() }, { assert new File("$outputDir/repeat_annotation/stranger/HG002_Revio/HG002_Revio_repeat_expansion_stranger.vcf.gz").exists() }, @@ -126,6 +126,8 @@ nextflow_pipeline { outdir = "$outputDir" parallel_snv = 2 preset = "revio" + phaser = "hiphase" + } } @@ -138,7 +140,7 @@ nextflow_pipeline { file("$outputDir/multiqc/multiqc_data/multiqc_citations.txt"), file("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), // Assert with snapshot HG002_Revio_A - bam("$outputDir/aligned_reads/HG002_Revio_A/HG002_Revio_A_phased.bam", stringency: 'silent').getReadsMD5(), + bam("$outputDir/aligned_reads/HG002_Revio_A/HG002_Revio_A_haplotagged.bam", stringency: 'silent').getReadsMD5(), file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio_A/HG002_Revio_A.asm.bp.hap1.p_ctg.assembly_summary"), file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio_A/HG002_Revio_A.asm.bp.hap2.p_ctg.assembly_summary"), bam("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap1.bam", stringency: 'silent').getReadsMD5(), @@ -174,7 +176,7 @@ nextflow_pipeline { bam("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio_A/HG002_Revio_A_spanning_sorted.bam", stringency: 'silent').getReadsMD5(), file("$outputDir/snvs/stats/single_sample/HG002_Revio_A.vcf.gz.bcftools_stats.txt").readLines()[0..2], // Assert with snapshot HG002_Revio_B - bam("$outputDir/aligned_reads/HG002_Revio_B/HG002_Revio_B_phased.bam", stringency: 'silent').getReadsMD5(), + bam("$outputDir/aligned_reads/HG002_Revio_B/HG002_Revio_B_haplotagged.bam", stringency: 'silent').getReadsMD5(), file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio_B/HG002_Revio_B.asm.bp.hap1.p_ctg.assembly_summary"), file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio_B/HG002_Revio_B.asm.bp.hap2.p_ctg.assembly_summary"), bam("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap1.bam", stringency: 'silent').getReadsMD5(), @@ -229,7 +231,7 @@ nextflow_pipeline { { assert new File("$outputDir/repeat_calling/trgt/multi_sample/test/test.vcf.gz").exists() }, { assert new File("$outputDir/repeat_calling/trgt/multi_sample/test/test.vcf.gz.tbi").exists() }, // Assert exists HG002_Revio_A - { assert new File("$outputDir/aligned_reads/HG002_Revio_A/HG002_Revio_A_phased.bam.bai").exists() }, + { assert new File("$outputDir/aligned_reads/HG002_Revio_A/HG002_Revio_A_haplotagged.bam.bai").exists() }, { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap1.bam.bai").exists() }, { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap1.sam.gz").exists() }, { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap2.bam.bai").exists() }, @@ -247,11 +249,11 @@ nextflow_pipeline { { assert new File("$outputDir/paraphase/HG002_Revio_A/HG002_Revio_A.paraphase.json").exists() }, { assert new File("$outputDir/qc/cramino/phased/HG002_Revio_A/HG002_Revio_A_cramino_aligned_phased.txt").exists() }, { assert new File("$outputDir/qc/cramino/unphased/HG002_Revio_A/HG002_Revio_A_cramino_aligned.txt").exists() }, - { assert new File("$outputDir/aligned_reads/HG002_Revio_A/HG002_Revio_A_phased.bam.bai").exists() }, - { assert new File("$outputDir/phasing/whatshap/phase/HG002_Revio_A/HG002_Revio_A_phased.vcf.gz").exists() }, - { assert new File("$outputDir/phasing/whatshap/phase/HG002_Revio_A/HG002_Revio_A_phased.vcf.gz.tbi").exists() }, - { assert new File("$outputDir/phasing/whatshap/stats/HG002_Revio_A/HG002_Revio_A_stats.stats.tsv").exists() }, - { assert new File("$outputDir/phasing/whatshap/stats/HG002_Revio_A/HG002_Revio_A_stats.blocks.tsv").exists() }, + { assert new File("$outputDir/aligned_reads/HG002_Revio_A/HG002_Revio_A_haplotagged.bam.bai").exists() }, + { assert new File("$outputDir/phased_variants/HG002_Revio_A/HG002_Revio_A_phased.vcf.gz").exists() }, + { assert new File("$outputDir/phased_variants/HG002_Revio_A/HG002_Revio_A_phased.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/qc/phasing_stats/HG002_Revio_A/HG002_Revio_A_stats.stats.tsv").exists() }, + { assert new File("$outputDir/qc/phasing_stats/HG002_Revio_A/HG002_Revio_A_stats.blocks.tsv").exists() }, { assert new File("$outputDir/qc/fastqc/HG002_Revio_A/HG002_Revio_A_fastqc.html").exists() }, { assert new File("$outputDir/qc/fastqc/HG002_Revio_A/HG002_Revio_A_fastqc.zip").exists() }, { assert new File("$outputDir/repeat_annotation/stranger/HG002_Revio_A/HG002_Revio_A_repeat_expansion_stranger.vcf.gz").exists() }, @@ -262,7 +264,7 @@ nextflow_pipeline { { assert new File("$outputDir/svs/single_sample/HG002_Revio_A/HG002_Revio_A_severus.vcf.gz").exists() }, { assert new File("$outputDir/svs/single_sample/HG002_Revio_A/HG002_Revio_A_severus.vcf.gz.tbi").exists() }, // Assert exists HG002_Revio_B - { assert new File("$outputDir/aligned_reads/HG002_Revio_B/HG002_Revio_B_phased.bam.bai").exists() }, + { assert new File("$outputDir/aligned_reads/HG002_Revio_B/HG002_Revio_B_haplotagged.bam.bai").exists() }, { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap1.bam.bai").exists() }, { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap1.sam.gz").exists() }, { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap2.bam.bai").exists() }, @@ -280,11 +282,11 @@ nextflow_pipeline { { assert new File("$outputDir/paraphase/HG002_Revio_B/HG002_Revio_B.paraphase.json").exists() }, { assert new File("$outputDir/qc/cramino/phased/HG002_Revio_B/HG002_Revio_B_cramino_aligned_phased.txt").exists() }, { assert new File("$outputDir/qc/cramino/unphased/HG002_Revio_B/HG002_Revio_B_cramino_aligned.txt").exists() }, - { assert new File("$outputDir/aligned_reads/HG002_Revio_B/HG002_Revio_B_phased.bam.bai").exists() }, - { assert new File("$outputDir/phasing/whatshap/phase/HG002_Revio_B/HG002_Revio_B_phased.vcf.gz").exists() }, - { assert new File("$outputDir/phasing/whatshap/phase/HG002_Revio_B/HG002_Revio_B_phased.vcf.gz.tbi").exists() }, - { assert new File("$outputDir/phasing/whatshap/stats/HG002_Revio_B/HG002_Revio_B_stats.stats.tsv").exists() }, - { assert new File("$outputDir/phasing/whatshap/stats/HG002_Revio_B/HG002_Revio_B_stats.blocks.tsv").exists() }, + { assert new File("$outputDir/aligned_reads/HG002_Revio_B/HG002_Revio_B_haplotagged.bam.bai").exists() }, + { assert new File("$outputDir/phased_variants/HG002_Revio_B/HG002_Revio_B_phased.vcf.gz").exists() }, + { assert new File("$outputDir/phased_variants/HG002_Revio_B/HG002_Revio_B_phased.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/qc/phasing_stats/HG002_Revio_B/HG002_Revio_B_stats.stats.tsv").exists() }, + { assert new File("$outputDir/qc/phasing_stats/HG002_Revio_B/HG002_Revio_B_stats.blocks.tsv").exists() }, { assert new File("$outputDir/qc/fastqc/HG002_Revio_B/HG002_Revio_B_fastqc.html").exists() }, { assert new File("$outputDir/qc/fastqc/HG002_Revio_B/HG002_Revio_B_fastqc.zip").exists() }, { assert new File("$outputDir/repeat_annotation/stranger/HG002_Revio_B/HG002_Revio_B_repeat_expansion_stranger.vcf.gz").exists() }, @@ -309,6 +311,7 @@ nextflow_pipeline { preset = 'ONT_R10' parallel_alignments = 2 parallel_snv = 1 + phaser = "whatshap" } } @@ -321,7 +324,7 @@ nextflow_pipeline { file("$outputDir/multiqc/multiqc_data/multiqc_citations.txt"), file("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), // Assert with snapshot HG002_ONT_A - bam("$outputDir/aligned_reads/HG002_ONT_A/HG002_ONT_A_phased.bam", stringency: 'silent').getReadsMD5(), + bam("$outputDir/aligned_reads/HG002_ONT_A/HG002_ONT_A_haplotagged.bam", stringency: 'silent').getReadsMD5(), file("$outputDir/cnv_calling/hificnv/HG002_ONT_A/HG002_ONT_A.HG002_ONT_A.copynum.bedgraph"), file("$outputDir/cnv_calling/hificnv/HG002_ONT_A/HG002_ONT_A.HG002_ONT_A.depth.bw"), file("$outputDir/cnv_calling/hificnv/HG002_ONT_A/HG002_ONT_A.HG002_ONT_A.maf.bw"), @@ -339,7 +342,7 @@ nextflow_pipeline { file("$outputDir/qc/mosdepth/HG002_ONT_A/HG002_ONT_A.regions.bed.gz.csi"), file("$outputDir/snvs/stats/single_sample/HG002_ONT_A.vcf.gz.bcftools_stats.txt").readLines()[0..2], // Assert with snapshot HG002_ONT_B - bam("$outputDir/aligned_reads/HG002_ONT_B/HG002_ONT_B_phased.bam", stringency: 'silent').getReadsMD5(), + bam("$outputDir/aligned_reads/HG002_ONT_B/HG002_ONT_B_haplotagged.bam", stringency: 'silent').getReadsMD5(), file("$outputDir/cnv_calling/hificnv/HG002_ONT_B/HG002_ONT_B.HG002_ONT_B.copynum.bedgraph"), file("$outputDir/cnv_calling/hificnv/HG002_ONT_B/HG002_ONT_B.HG002_ONT_B.depth.bw"), file("$outputDir/cnv_calling/hificnv/HG002_ONT_B/HG002_ONT_B.HG002_ONT_B.maf.bw"), @@ -374,7 +377,7 @@ nextflow_pipeline { { assert new File("$outputDir/svs/multi_sample/test/test_svs.vcf.gz").exists() }, { assert new File("$outputDir/svs/multi_sample/test/test_svs.vcf.gz.tbi").exists() }, // Assert exists HG002_ONT_A - { assert new File("$outputDir/aligned_reads/HG002_ONT_A/HG002_ONT_A_phased.bam.bai").exists() }, + { assert new File("$outputDir/aligned_reads/HG002_ONT_A/HG002_ONT_A_haplotagged.bam.bai").exists() }, { assert new File("$outputDir/cnv_calling/hificnv/HG002_ONT_A/HG002_ONT_A.log").exists() }, { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_ONT_A/HG002_ONT_A_modkit_pileup_phased_1.bed.gz").exists() }, { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_ONT_A/HG002_ONT_A_modkit_pileup_phased_1.bed.gz.tbi").exists() }, @@ -388,11 +391,11 @@ nextflow_pipeline { { assert new File("$outputDir/paraphase/HG002_ONT_A/HG002_ONT_A.paraphase.json").exists() }, { assert new File("$outputDir/qc/cramino/phased/HG002_ONT_A/HG002_ONT_A_cramino_aligned_phased.txt").exists() }, { assert new File("$outputDir/qc/cramino/unphased/HG002_ONT_A/HG002_ONT_A_cramino_aligned.txt").exists() }, - { assert new File("$outputDir/aligned_reads/HG002_ONT_A/HG002_ONT_A_phased.bam.bai").exists() }, - { assert new File("$outputDir/phasing/whatshap/phase/HG002_ONT_A/HG002_ONT_A_phased.vcf.gz").exists() }, - { assert new File("$outputDir/phasing/whatshap/phase/HG002_ONT_A/HG002_ONT_A_phased.vcf.gz.tbi").exists() }, - { assert new File("$outputDir/phasing/whatshap/stats/HG002_ONT_A/HG002_ONT_A_stats.stats.tsv").exists() }, - { assert new File("$outputDir/phasing/whatshap/stats/HG002_ONT_A/HG002_ONT_A_stats.blocks.tsv").exists() }, + { assert new File("$outputDir/aligned_reads/HG002_ONT_A/HG002_ONT_A_haplotagged.bam.bai").exists() }, + { assert new File("$outputDir/phased_variants/HG002_ONT_A/HG002_ONT_A_phased.vcf.gz").exists() }, + { assert new File("$outputDir/phased_variants/HG002_ONT_A/HG002_ONT_A_phased.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/qc/phasing_stats/HG002_ONT_A/HG002_ONT_A_stats.stats.tsv").exists() }, + { assert new File("$outputDir/qc/phasing_stats/HG002_ONT_A/HG002_ONT_A_stats.blocks.tsv").exists() }, { assert new File("$outputDir/qc/fastqc/HG002_ONT_A/HG002_ONT_A_fastqc.html").exists() }, { assert new File("$outputDir/qc/fastqc/HG002_ONT_A/HG002_ONT_A_fastqc.zip").exists() }, { assert new File("$outputDir/snvs/single_sample/HG002_ONT_A/HG002_ONT_A_snv_annotated_ranked.vcf.gz").exists() }, @@ -400,7 +403,7 @@ nextflow_pipeline { { assert new File("$outputDir/svs/single_sample/HG002_ONT_A/HG002_ONT_A_severus.vcf.gz").exists() }, { assert new File("$outputDir/svs/single_sample/HG002_ONT_A/HG002_ONT_A_severus.vcf.gz.tbi").exists() }, // Assert exists HG002_ONT_B - { assert new File("$outputDir/aligned_reads/HG002_ONT_B/HG002_ONT_B_phased.bam.bai").exists() }, + { assert new File("$outputDir/aligned_reads/HG002_ONT_B/HG002_ONT_B_haplotagged.bam.bai").exists() }, { assert new File("$outputDir/cnv_calling/hificnv/HG002_ONT_B/HG002_ONT_B.log").exists() }, { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_ONT_B/HG002_ONT_B_modkit_pileup_phased_1.bed.gz").exists() }, { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_ONT_B/HG002_ONT_B_modkit_pileup_phased_1.bed.gz.tbi").exists() }, @@ -414,11 +417,11 @@ nextflow_pipeline { { assert new File("$outputDir/paraphase/HG002_ONT_B/HG002_ONT_B.paraphase.json").exists() }, { assert new File("$outputDir/qc/cramino/phased/HG002_ONT_B/HG002_ONT_B_cramino_aligned_phased.txt").exists() }, { assert new File("$outputDir/qc/cramino/unphased/HG002_ONT_B/HG002_ONT_B_cramino_aligned.txt").exists() }, - { assert new File("$outputDir/aligned_reads/HG002_ONT_B/HG002_ONT_B_phased.bam.bai").exists() }, - { assert new File("$outputDir/phasing/whatshap/phase/HG002_ONT_B/HG002_ONT_B_phased.vcf.gz").exists() }, - { assert new File("$outputDir/phasing/whatshap/phase/HG002_ONT_B/HG002_ONT_B_phased.vcf.gz.tbi").exists() }, - { assert new File("$outputDir/phasing/whatshap/stats/HG002_ONT_B/HG002_ONT_B_stats.stats.tsv").exists() }, - { assert new File("$outputDir/phasing/whatshap/stats/HG002_ONT_B/HG002_ONT_B_stats.blocks.tsv").exists() }, + { assert new File("$outputDir/aligned_reads/HG002_ONT_B/HG002_ONT_B_haplotagged.bam.bai").exists() }, + { assert new File("$outputDir/phased_variants/HG002_ONT_B/HG002_ONT_B_phased.vcf.gz").exists() }, + { assert new File("$outputDir/phased_variants/HG002_ONT_B/HG002_ONT_B_phased.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/qc/phasing_stats/HG002_ONT_B/HG002_ONT_B_stats.stats.tsv").exists() }, + { assert new File("$outputDir/qc/phasing_stats/HG002_ONT_B/HG002_ONT_B_stats.blocks.tsv").exists() }, { assert new File("$outputDir/qc/fastqc/HG002_ONT_B/HG002_ONT_B_fastqc.html").exists() }, { assert new File("$outputDir/qc/fastqc/HG002_ONT_B/HG002_ONT_B_fastqc.zip").exists() }, { assert new File("$outputDir/snvs/single_sample/HG002_ONT_B/HG002_ONT_B_snv_annotated_ranked.vcf.gz").exists() }, diff --git a/tests/main.nf.test.snap b/tests/main.nf.test.snap index 119a60a8..114b751e 100644 --- a/tests/main.nf.test.snap +++ b/tests/main.nf.test.snap @@ -39,8 +39,8 @@ "HG002_Revio_cramino_aligned.arrow:md5,a76219e9046db32c4b3d6d78425c5d78", "HG002_Revio.regions.bed.gz:md5,31db0f14146b65e339b54b963c304947", "HG002_Revio.regions.bed.gz.csi:md5,026eef1c69fb4aa3a1687463fe2088ab", - "HG002_Revio_sorted.vcf.gz:md5,fbb5699b8f74fc105fb154e8fac7bfea", - "HG002_Revio_sorted.vcf.gz.tbi:md5,0466518ee265ba63160ed27cee0dec88", + "HG002_Revio_sorted.vcf.gz:md5,735d8ba586b01fb27e009e3964112ce9", + "HG002_Revio_sorted.vcf.gz.tbi:md5,d4c7838d72a4200e790227ba4d7b25d5", "65999ab8f2bc7841de8172468bf23ab6", [ "# This file was produced by bcftools stats (1.20+htslib-1.20) and can be plotted using plot-vcfstats.", @@ -52,7 +52,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-20T12:30:55.088135056" + "timestamp": "2024-09-20T11:17:02.410377719" }, "test profile - multisample": { "content": [ @@ -90,8 +90,8 @@ "HG002_Revio_A_cramino_aligned.arrow:md5,a76219e9046db32c4b3d6d78425c5d78", "HG002_Revio_A.regions.bed.gz:md5,31db0f14146b65e339b54b963c304947", "HG002_Revio_A.regions.bed.gz.csi:md5,026eef1c69fb4aa3a1687463fe2088ab", - "HG002_Revio_A_sorted.vcf.gz:md5,680938d6ebeafe73d8df0b21c0310276", - "HG002_Revio_A_sorted.vcf.gz.tbi:md5,a6554ab817e7c232a1554ea85fa00151", + "HG002_Revio_A_sorted.vcf.gz:md5,e7f53d86c027a7778e0e642cb2da8884", + "HG002_Revio_A_sorted.vcf.gz.tbi:md5,d4c7838d72a4200e790227ba4d7b25d5", "65999ab8f2bc7841de8172468bf23ab6", [ "# This file was produced by bcftools stats (1.20+htslib-1.20) and can be plotted using plot-vcfstats.", @@ -129,8 +129,8 @@ "HG002_Revio_B_cramino_aligned.arrow:md5,3bb08ac5958c6cb0801f319066c3a1b2", "HG002_Revio_B.regions.bed.gz:md5,837978e7b90ef81cb5aa015fbe7bb4a6", "HG002_Revio_B.regions.bed.gz.csi:md5,d0640255d527e36655281f64e184b02c", - "HG002_Revio_B_sorted.vcf.gz:md5,a8aaec5870a60a4fabf6aff849ba1e61", - "HG002_Revio_B_sorted.vcf.gz.tbi:md5,e255a5ea92885967f0c126bddc8ea3b2", + "HG002_Revio_B_sorted.vcf.gz:md5,8703d5602d79e9fe4d70aced838f518f", + "HG002_Revio_B_sorted.vcf.gz.tbi:md5,01f65612542a83475217568bd50d4efd", "65999ab8f2bc7841de8172468bf23ab6", [ "# This file was produced by bcftools stats (1.20+htslib-1.20) and can be plotted using plot-vcfstats.", @@ -142,7 +142,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-20T12:32:28.96463908" + "timestamp": "2024-09-20T09:48:22.654535823" }, "test profile - multisample - ont - parallel_alignments 2 - parallel_snv 1": { "content": [ @@ -198,4 +198,4 @@ }, "timestamp": "2024-09-20T12:33:56.873568079" } -} \ No newline at end of file +} diff --git a/workflows/nallo.nf b/workflows/nallo.nf index 3eecb23d..0bb337e5 100644 --- a/workflows/nallo.nf +++ b/workflows/nallo.nf @@ -106,7 +106,7 @@ workflow NALLO { : '' // Check parameter that doesn't conform to schema validation here - if (params.phaser.matches('hiphase_sv|hiphase_snv') && params.preset == 'ONT_R10') { error "The HiPhase license only permits analysis of data from PacBio. For details see: https://github.com/PacificBiosciences/HiPhase/blob/main/LICENSE.md" } + if (params.phaser.matches('hiphase') && params.preset == 'ONT_R10') { error "The HiPhase license only permits analysis of data from PacBio. For details see: https://github.com/PacificBiosciences/HiPhase/blob/main/LICENSE.md" } // Read and store paths in the vep_plugin_files file if (params.vep_plugin_files) { @@ -451,7 +451,13 @@ workflow NALLO { // if(!params.skip_phasing_wf) { - PHASING( SHORT_VARIANT_CALLING.out.snp_calls_vcf, CALL_SVS.out.ch_sv_calls_vcf, bam_bai, fasta, fai) + PHASING ( + SHORT_VARIANT_CALLING.out.snp_calls_vcf, + SHORT_VARIANT_CALLING.out.snp_calls_tbi, + bam_bai, + fasta, + fai + ) ch_versions = ch_versions.mix(PHASING.out.versions) ch_multiqc_files = ch_multiqc_files.mix(PHASING.out.stats.collect{it[1]}.ifEmpty([]))