From d56bcf0d3abae027e9e47864cb6b27d770c3ce25 Mon Sep 17 00:00:00 2001 From: fellen31 Date: Tue, 17 Sep 2024 08:51:17 +0200 Subject: [PATCH] Add longphase --- CHANGELOG.md | 4 + CITATIONS.md | 4 + conf/base.config | 2 +- conf/modules/phasing.config | 37 +- conf/modules/short_variant_calling.config | 3 +- conf/test.config | 2 +- docs/output.md | 30 +- docs/parameters.md | 270 +++++------ modules.json | 10 + modules/local/hiphase/main.nf | 4 +- .../longphase/haplotag/environment.yml | 7 + modules/nf-core/longphase/haplotag/main.nf | 66 +++ modules/nf-core/longphase/haplotag/meta.yml | 89 ++++ .../longphase/haplotag/tests/main.nf.test | 202 ++++++++ .../haplotag/tests/main.nf.test.snap | 159 ++++++ .../longphase/haplotag/tests/nextflow.config | 5 + .../nf-core/longphase/haplotag/tests/tags.yml | 2 + .../nf-core/longphase/phase/environment.yml | 8 + modules/nf-core/longphase/phase/main.nf | 69 +++ modules/nf-core/longphase/phase/meta.yml | 81 ++++ .../longphase/phase/tests/main.nf.test | 157 ++++++ .../longphase/phase/tests/main.nf.test.snap | 134 +++++ .../longphase/phase/tests/nextflow.config | 5 + .../nf-core/longphase/phase/tests/tags.yml | 2 + nextflow.config | 2 +- nextflow_schema.json | 6 +- subworkflows/local/phasing.nf | 248 +++++----- .../local/short_variant_calling/main.nf | 3 +- .../tests/main.nf.test.snap | 456 ++++++++++++++---- .../tests/nextflow.config | 3 +- .../local/utils_nfcore_nallo_pipeline/main.nf | 7 +- tests/main.nf.test | 70 +-- tests/main.nf.test.snap | 24 +- workflows/nallo.nf | 10 +- 34 files changed, 1731 insertions(+), 450 deletions(-) create mode 100644 modules/nf-core/longphase/haplotag/environment.yml create mode 100644 modules/nf-core/longphase/haplotag/main.nf create mode 100644 modules/nf-core/longphase/haplotag/meta.yml create mode 100644 modules/nf-core/longphase/haplotag/tests/main.nf.test create mode 100644 modules/nf-core/longphase/haplotag/tests/main.nf.test.snap create mode 100644 modules/nf-core/longphase/haplotag/tests/nextflow.config create mode 100644 modules/nf-core/longphase/haplotag/tests/tags.yml create mode 100644 modules/nf-core/longphase/phase/environment.yml create mode 100644 modules/nf-core/longphase/phase/main.nf create mode 100644 modules/nf-core/longphase/phase/meta.yml create mode 100644 modules/nf-core/longphase/phase/tests/main.nf.test create mode 100644 modules/nf-core/longphase/phase/tests/main.nf.test.snap create mode 100644 modules/nf-core/longphase/phase/tests/nextflow.config create mode 100644 modules/nf-core/longphase/phase/tests/tags.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 67e3c037..d12b18d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#366](https://github.com/genomic-medicine-sweden/nallo/pull/366) - Added sorting of samples when creating PED files, so the output is always the same - [#367](https://github.com/genomic-medicine-sweden/nallo/pull/367) - Added Severus as the default SV caller, together with a `--sv_caller` parameter to choose caller - [#371](https://github.com/genomic-medicine-sweden/nallo/pull/371) - Added `FOUND_IN=caller` tags to SV output +- [#388](https://github.com/genomic-medicine-sweden/nallo/pull/388) - Added longphase as the default phaser +- [#388](https://github.com/genomic-medicine-sweden/nallo/pull/388) - Added single-sample tbi output to the short variant calling subworkflow ### `Changed` @@ -31,12 +33,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#365](https://github.com/genomic-medicine-sweden/nallo/pull/365) - Changed CI to only use nf-test for pipeline tests - [#381](https://github.com/genomic-medicine-sweden/nallo/pull/381) - Updated CI nf-test version to 0.9.0 - [#382](https://github.com/genomic-medicine-sweden/nallo/pull/382) - Changed vep_plugin_files description in schema and docs +- [#388](https://github.com/genomic-medicine-sweden/nallo/pull/388) - Changed phasing output structure and naming, and updated docs ### `Removed` - [#352](https://github.com/genomic-medicine-sweden/nallo/pull/352) - Removed the fqcrs module - [#356](https://github.com/genomic-medicine-sweden/nallo/pull/356) - Removed filter_vep section from output documentation since it is not in the pipeline - [#379](https://github.com/genomic-medicine-sweden/nallo/pull/379) - Removed VEP Plugins from testdata ([genomic-medicine-sweden/test-datasets#16](https://github.com/genomic-medicine-sweden/test-datasets/pull/16)) +- [#388](https://github.com/genomic-medicine-sweden/nallo/pull/388) - Removed support for co-phasing SVs with HiPhase, as the officially supported caller (pbsv) is not in the pipeline ### `Fixed` diff --git a/CITATIONS.md b/CITATIONS.md index 9ae9e0e4..21c4c141 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -70,6 +70,10 @@ - [HiFiCNV](https://github.com/PacificBiosciences/HiFiCNV) +- [LongPhase](https://github.com/twolinin/longphase) + + > Jyun-Hong Lin, Liang-Chi Chen, Shu-Chi Yu, Yao-Ting Huang, LongPhase: an ultra-fast chromosome-scale phasing algorithm for small and large variants, Bioinformatics, Volume 38, Issue 7, March 2022, Pages 1816–1822, https://doi.org/10.1093/bioinformatics/btac058 + - [minimap2](https://academic.oup.com/bioinformatics/article/34/18/3094/4994778) > Heng Li, Minimap2: pairwise alignment for nucleotide sequences, Bioinformatics, Volume 34, Issue 18, September 2018, Pages 3094–3100, https://doi.org/10.1093/bioinformatics/bty191 diff --git a/conf/base.config b/conf/base.config index 2bc62420..50230189 100644 --- a/conf/base.config +++ b/conf/base.config @@ -61,7 +61,7 @@ process { maxRetries = 2 } - withName: '.*:SAMTOOLS_MERGE' { + withName: 'SAMTOOLS_MERGE|SAMTOOLS_INDEX' { label = 'process_medium' } } diff --git a/conf/modules/phasing.config b/conf/modules/phasing.config index 1342807a..749de352 100644 --- a/conf/modules/phasing.config +++ b/conf/modules/phasing.config @@ -24,8 +24,7 @@ process { ] } - withName: '.*:PHASING:HIPHASE_SNV' { - ext.prefix = { "$meta.id}_phased" } + withName: '.*:PHASING:HIPHASE' { ext.args = { [ '--ignore-read-groups', "--stats-file ${meta.id}_phased.stats.tsv", @@ -35,22 +34,28 @@ process { publishDir = [ path: { "${params.outdir}/" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : ((filename.endsWith('bam') || filename.endsWith('bai')) ? "aligned_reads/${meta.id}/${filename}" : "phasing/hiphase/snv/${meta.id}/${filename}" ) } + saveAs: { filename -> filename.equals('versions.yml') ? null : ((filename.endsWith('bam') || filename.endsWith('bai')) ? "aligned_reads/${meta.id}/${filename}" : "phased_variants/${meta.id}/${filename}" ) } ] } - withName: '.*:PHASING:HIPHASE_SV' { - ext.prefix = { "$meta.id}_phased" } - ext.args = { [ - '--ignore-read-groups', - "--stats-file ${meta.id}_phased.stats.tsv", - "--blocks-file ${meta.id}_phased.blocks.tsv", - "--summary-file ${meta.id}_phased.summary.tsv" - ].join(' ') } + withName: '.*:PHASING:LONGPHASE_PHASE' { + ext.prefix = { "${meta.id}_phased" } + ext.args = [ + params.preset.equals('ONT_R10') ? "--ont" : "--pb", + '--indels' + ].join(' ') publishDir = [ - path: { "${params.outdir}/" }, + path: { "${params.outdir}/phased_variants/${meta.id}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : ((filename.endsWith('bam') || filename.endsWith('bai')) ? "aligned_reads/${meta.id}/${filename}" : "phasing/hiphase/sv/${meta.id}/${filename}" ) } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*:PHASING:LONGPHASE_HAPLOTAG' { + ext.prefix = { "${meta.id}_haplotagged" } + publishDir = [ + path: { "${params.outdir}/aligned_reads/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } @@ -61,7 +66,7 @@ process { '--indels' ].join(' ') publishDir = [ - path: { "${params.outdir}/phasing/whatshap/phase/${meta.id}" }, + path: { "${params.outdir}/phased_variants/${meta.id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -70,14 +75,14 @@ process { withName: '.*:PHASING:WHATSHAP_STATS' { ext.prefix = { "${meta.id}_stats" } publishDir = [ - path: { "${params.outdir}/phasing/whatshap/stats/${meta.id}" }, + path: { "${params.outdir}/qc/phasing_stats/${meta.id}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } withName: '.*:PHASING:WHATSHAP_HAPLOTAG' { - ext.prefix = { "${meta.id}_phased" } + ext.prefix = { "${meta.id}_haplotagged" } ext.args = [ '--ignore-read-groups', '--tag-supplementary' diff --git a/conf/modules/short_variant_calling.config b/conf/modules/short_variant_calling.config index 2559d947..4c6369b4 100644 --- a/conf/modules/short_variant_calling.config +++ b/conf/modules/short_variant_calling.config @@ -47,7 +47,8 @@ process { ext.args = [ '-m -', '-w 10000', - '--output-type u', + '--output-type z', + '--write-index=tbi' ].join(' ') } diff --git a/conf/test.config b/conf/test.config index cad5f2ef..534bb276 100644 --- a/conf/test.config +++ b/conf/test.config @@ -26,7 +26,7 @@ params { // References fasta = params.pipelines_testdata_base_path + 'nallo/reference/hg38.test.fa.gz' - input = 'https://github.com/genomic-medicine-sweden/test-datasets/raw/2948776ddf24ea131f527aa1f2dc23a43bb7b952/testdata/samplesheet.csv' + input = params.pipelines_testdata_base_path + 'nallo/testdata/samplesheet.csv' bed = params.pipelines_testdata_base_path + 'nallo/reference/test_data.bed' hificnv_xy = params.pipelines_testdata_base_path + 'nallo/reference/expected_cn.hg38.XY.bed' hificnv_xx = params.pipelines_testdata_base_path + 'nallo/reference/expected_cn.hg38.XX.bed' diff --git a/docs/output.md b/docs/output.md index 7a263bd5..3a457b0e 100644 --- a/docs/output.md +++ b/docs/output.md @@ -157,40 +157,22 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ ### Phasing -[WhatsHap](https://whatshap.readthedocs.io/en/latest/) or [HiPhase](https://github.com/PacificBiosciences/HiPhase) are used to phase variants and haplotag reads. +[LongPhase](https://github.com/twolinin/longphase), [WhatsHap](https://whatshap.readthedocs.io/en/latest/) or [HiPhase](https://github.com/PacificBiosciences/HiPhase) are used to phase variants and haplotag reads.
-Output files from WhatsHap +Output files from phasing - `{outputdir}/aligned_reads/{sample}/` - - `{sample}_phased.bam`: BAM file with haplotags - - `{sample}_phased.bam.bai`: Index of the corresponding bam file -- `{outputdir}/phasing/whatshap/phase/{sample}/` + - `{sample}_haplotagged.bam`: BAM file with haplotags + - `{sample}_haplotagged.bam.bai`: Index of the corresponding bam file +- `{outputdir}/phased_variants/{sample}/` - `*.vcf.gz`: VCF file with phased variants - `*.vcf.gz.tbi`: Index of the corresponding VCF file -- `{outputdir}/phasing/whatshap/stats/{sample}/` +- `{outputdir}/qc/phasing_stats/{sample}/` - `*.blocks.tsv`: File with phase blocks - `*.stats.tsv`: File with phasing statistics
-
-Output files from HiPhase - -- `{outputdir}/aligned_reads/{sample}/` - - - `{sample}_phased.bam`: BAM file with haplotags - - `{sample}_phased.bam.bai`: Index of the corresponding bam file - -- `{outputdir}/phasing/hiphase/{snv,sv}/{sample}/` - - - `*.blocks.tsv`: File with phase blocks - - `*.stats.tsv.gz`: File with phasing statistics - - `*.vcf.gz`: VCF file with phased variants - - `*.vcf.gz.tbi`: Index of the corresponding VCF file - - `*.summary.tsv`: HiPhase summary file - -
- ### Pipeline information [Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. diff --git a/docs/parameters.md b/docs/parameters.md index 7a34e4dd..c549bdb8 100644 --- a/docs/parameters.md +++ b/docs/parameters.md @@ -1,142 +1,136 @@ +# genomic-medicine-sweden/nallo pipeline parameters +Long-read variant calling pipeline -# genomic-medicine-sweden/nallo pipeline parameters - -Long-read variant calling pipeline - -## Workflow skip options - -Allows skipping certain parts of the pipeline - -| Parameter | Description | Type | Default | Required | Hidden | -|-----------|-----------|-----------|-----------|-----------|-----------| -| `skip_qc` | Skip QC of reads | `boolean` | False | | | -| `skip_short_variant_calling` | Skip short variant calling | `boolean` | False | | | -| `skip_assembly_wf` | Skip genome assembly and assembly variant calling | `boolean` | False | | | -| `skip_mapping_wf` | Skip read mapping (alignment) | `boolean` | False | | | -| `skip_methylation_wf` | Skip generation of methylation pileups | `boolean` | False | | | -| `skip_repeat_calling` | Skip tandem repeat calling | `boolean` | False | | | -| `skip_repeat_annotation` | Skip tandem repeat annotation | `boolean` | False | | | -| `skip_phasing_wf` | Skip phasing of variants and haplotagging of reads | `boolean` | False | | | -| `skip_snv_annotation` | Skip short variant annotation | `boolean` | False | | | -| `skip_cnv_calling` | Skip CNV calling | `boolean` | False | | | -| `skip_call_paralogs` | Skip the calling of specific paralogous genes | `boolean` | False | | | -| `skip_rank_variants` | Skip ranking of short variants | `boolean` | False | | | - -## Input/output options - -Define where the pipeline should find input data and save output data. - -| Parameter | Description | Type | Default | Required | Hidden | -|-----------|-----------|-----------|-----------|-----------|-----------| -| `input` | Path to comma-separated file containing information about the samples in the experiment.
HelpYou will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row.
| `string` | | True | | -| `outdir` | The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure. | `string` | | True | | -| `email` | Email address for completion summary.
HelpSet this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.
| `string` | | | | -| `multiqc_title` | MultiQC report title. Printed as page header, used for filename if not otherwise specified. | `string` | | | | - -## Reference genome options - -Reference genome related files and options required for the workflow. - -| Parameter | Description | Type | Default | Required | Hidden | -|-----------|-----------|-----------|-----------|-----------|-----------| -| `fasta` | Reference genome | `string` | | | | -| `genome` | Name of iGenomes reference.
HelpIf using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`.

See the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details.
| `string` | | | | -| `igenomes_ignore` | Do not load the iGenomes reference config.
HelpDo not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`.
| `boolean` | True | | True | - -## Institutional config options - -Parameters used to describe centralised config profiles. These should not be edited. - -| Parameter | Description | Type | Default | Required | Hidden | -|-----------|-----------|-----------|-----------|-----------|-----------| -| `custom_config_version` | Git commit id for Institutional configs. | `string` | master | | True | -| `custom_config_base` | Base directory for Institutional configs.
HelpIf you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.
| `string` | https://raw.githubusercontent.com/nf-core/configs/master | | True | -| `config_profile_name` | Institutional config name. | `string` | | | True | -| `config_profile_description` | Institutional config description. | `string` | | | True | -| `config_profile_contact` | Institutional config contact information. | `string` | | | True | -| `config_profile_url` | Institutional config URL link. | `string` | | | True | - -## Max job request options - -Set the top limit for requested resources for any single job. - -| Parameter | Description | Type | Default | Required | Hidden | -|-----------|-----------|-----------|-----------|-----------|-----------| -| `max_cpus` | Maximum number of CPUs that can be requested for any single job.
HelpUse to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`
| `integer` | 16 | | True | -| `max_memory` | Maximum amount of memory that can be requested for any single job.
HelpUse to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`
| `string` | 128.GB | | True | -| `max_time` | Maximum amount of time that can be requested for any single job.
HelpUse to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`
| `string` | 240.h | | True | - -## Generic options - -Less common options for the pipeline, typically set in a config file. - -| Parameter | Description | Type | Default | Required | Hidden | -|-----------|-----------|-----------|-----------|-----------|-----------| -| `help` | Display help text. | `boolean` | | | True | -| `version` | Display version and exit. | `boolean` | | | True | -| `publish_dir_mode` | Method used to save pipeline results to output directory.
HelpThe Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.
| `string` | copy | | True | -| `email_on_fail` | Email address for completion summary, only when pipeline fails.
HelpAn email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully.
| `string` | | | True | -| `plaintext_email` | Send plain-text email instead of HTML. | `boolean` | | | True | -| `max_multiqc_email_size` | File size limit when attaching MultiQC reports to summary emails. | `string` | 25.MB | | True | -| `monochrome_logs` | Do not use coloured log outputs. | `boolean` | | | True | -| `hook_url` | Incoming hook URL for messaging service
HelpIncoming hook URL for messaging service. Currently, MS Teams and Slack are supported.
| `string` | | | True | -| `multiqc_config` | Custom config file to supply to MultiQC. | `string` | | | True | -| `multiqc_logo` | Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file | `string` | | | True | -| `multiqc_methods_description` | Custom MultiQC yaml file containing HTML including a methods description. | `string` | | | | -| `validate_params` | Boolean whether to validate parameters against the schema at runtime | `boolean` | True | | True | -| `validationShowHiddenParams` | Show all params when using `--help`
HelpBy default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters.
| `boolean` | | | True | -| `validationSkipDuplicateCheck` | nf-validation related parameter | `boolean` | | | True | -| `validationS3PathCheck` | Boolean whether to validate validate AWS S3 paths | `boolean` | | | True | -| `monochromeLogs` | Boolean whether to color nf-validation logs | `boolean` | | | True | - -## Workflow options - -Workflow options specific to genomic-medicine-sweden/nallo - -| Parameter | Description | Type | Default | Required | Hidden | -|-----------|-----------|-----------|-----------|-----------|-----------| -| `preset` | Enable or disable certain parts of the pipeline by default, depending on data type (`revio`, `pacbio`, `ONT_R10`) | `string` | revio | True | | -| `variant_caller` | Which short variant software to use (`deepvariant`) | `string` | deepvariant | | | -| `sv_caller` | Which structural variant caller to use (`severus`, `sniffles`) | `string` | severus | | | -| `phaser` | Which phasing software to use (`whatshap`, `hiphase_snv`, `hiphase_sv`) | `string` | whatshap | | | -| `hifiasm_mode` | Run hifiasm in hifi-only or hifi-trio mode (`hifi-only`, `trio-binning`) | `string` | hifi-only | | | -| `parallel_alignments` | If parallel_alignments is bigger than 1, input files will be split and aligned in parallel to reduce processing time. | `integer` | 1 | | | -| `parallel_snv` | If parallel_snv is bigger than 1, short variant calling will be done in parallel to reduce processing time. | `integer` | 13 | | | -| `vep_cache_version` | VEP cache version | `integer` | 110 | | | -| `vep_plugin_files` | A csv file with vep_plugins as header, and then paths to vep plugin files. Paths to pLI_values.txt and LoFtool_scores.txt are required. | `string` | | -| | -| `deepvariant_model_type` | Sets the model type used for DeepVariant. This is set automatically using `--preset` by default. | `string` | PACBIO | | True | -| `extra_modkit_options` | Extra options to modkit, used for test profile. | `string` | | | True | -| `extra_vep_options` | Extra options to VEP, used for test profile. | `string` | | | True | -| `extra_paraphase_options` | Extra options to Paraphase, used for test profile. | `string` | | | True | -| `extra_hifiasm_options` | Extra options to hifiasm, used for test profile. | `string` | | | True | - -## File inputs - -The different files that are required. Some are only required by certain workflows, see the usage documentation. - -| Parameter | Description | Type | Default | Required | Hidden | -|-----------|-----------|-----------|-----------|-----------|-----------| -| `cadd_prescored` | Path to a directory containing prescored indels for CADD.
HelpThis folder contains the compressed files and indexes that would otherwise be in data/prescored folder as described in https://github.com/kircherlab/CADD-scripts/#manual-installation.
| `string` | | | | -| `cadd_resources` | Path to a directory containing CADD annotations.
HelpThis folder contains the uncompressed files that would otherwise be in data/annotation folder as described in https://github.com/kircherlab/CADD-scripts/#manual-installation.
| `string` | | | | -| `par_regions` | Provide a bed file of chrX and chrY PAR regions for dipcall and DeepVariant | `string` | | | | -| `tandem_repeats` | A tandem repeat BED file for sniffles | `string` | | | | -| `trgt_repeats` | A BED file with repeats to be genotyped with TRGT | `string` | | | | -| `snp_db` | A csv file with echtvar databases to annotate SNVs with | `string` | | | | -| `variant_catalog` | A variant catalog json-file for stranger | `string` | | | | -| `variant_consequences_snv` | File containing list of SO terms listed in the order of severity from most severe to lease severe for annotating genomic SNVs. For more information check https://ensembl.org/info/genome/variation/prediction/predicted_data.html | `string` | | | | -| `vep_cache` | A path to the VEP cache location | `string` | | | | -| `bed` | A BED file with regions of interest, used to limit short variant calling. | `string` | | | | -| `hificnv_xy` | A BED file containing expected copy number regions for XY samples. | `string` | | | | -| `hificnv_xx` | A BED file containing expected copy number regions for XX samples. | `string` | | | | -| `hificnv_exclude` | A BED file specifying regions to exclude with HiFiCNV, such as centromeres. | `string` | | | | -| `reduced_penetrance` | A file with gene ids that have reduced penetrance. For use with genmod. | `string` | | | | -| `score_config_snv` | A SNV rank model config file for genmod. | `string` | | | | -| `somalier_sites` | A VCF of known polymorphic sites for somalier | `string` | | | | -| `validationFailUnrecognisedParams` | Validation of parameters fails when an unrecognised parameter is found.
HelpBy default, when an unrecognised parameter is found, it returns a warning.
| `boolean` | | | True | -| `validationLenientMode` | Validation of parameters in lenient more.
HelpAllows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode).
| `boolean` | | | True | -| `pipelines_testdata_base_path` | Base URL or local path to location of pipeline test dataset files | `string` | https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/ | | True | - +## Workflow skip options +Allows skipping certain parts of the pipeline +| Parameter | Description | Type | Default | Required | Hidden | +|-----------|-----------|-----------|-----------|-----------|-----------| +| `skip_qc` | Skip QC of reads | `boolean` | False | | | +| `skip_short_variant_calling` | Skip short variant calling | `boolean` | False | | | +| `skip_assembly_wf` | Skip genome assembly and assembly variant calling | `boolean` | False | | | +| `skip_mapping_wf` | Skip read mapping (alignment) | `boolean` | False | | | +| `skip_methylation_wf` | Skip generation of methylation pileups | `boolean` | False | | | +| `skip_repeat_calling` | Skip tandem repeat calling | `boolean` | False | | | +| `skip_repeat_annotation` | Skip tandem repeat annotation | `boolean` | False | | | +| `skip_phasing_wf` | Skip phasing of variants and haplotagging of reads | `boolean` | False | | | +| `skip_snv_annotation` | Skip short variant annotation | `boolean` | False | | | +| `skip_cnv_calling` | Skip CNV calling | `boolean` | False | | | +| `skip_call_paralogs` | Skip the calling of specific paralogous genes | `boolean` | False | | | +| `skip_rank_variants` | Skip ranking of short variants | `boolean` | False | | | + +## Input/output options + +Define where the pipeline should find input data and save output data. + +| Parameter | Description | Type | Default | Required | Hidden | +|-----------|-----------|-----------|-----------|-----------|-----------| +| `input` | Path to comma-separated file containing information about the samples in the experiment.
HelpYou will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row.
| `string` | | True | | +| `outdir` | The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure. | `string` | | True | | +| `email` | Email address for completion summary.
HelpSet this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.
| `string` | | | | +| `multiqc_title` | MultiQC report title. Printed as page header, used for filename if not otherwise specified. | `string` | | | | + +## Reference genome options + +Reference genome related files and options required for the workflow. + +| Parameter | Description | Type | Default | Required | Hidden | +|-----------|-----------|-----------|-----------|-----------|-----------| +| `fasta` | Reference genome | `string` | | | | +| `genome` | Name of iGenomes reference.
HelpIf using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`.

See the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details.
| `string` | | | | +| `igenomes_ignore` | Do not load the iGenomes reference config.
HelpDo not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`.
| `boolean` | True | | True | + +## Institutional config options + +Parameters used to describe centralised config profiles. These should not be edited. + +| Parameter | Description | Type | Default | Required | Hidden | +|-----------|-----------|-----------|-----------|-----------|-----------| +| `custom_config_version` | Git commit id for Institutional configs. | `string` | master | | True | +| `custom_config_base` | Base directory for Institutional configs.
HelpIf you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.
| `string` | https://raw.githubusercontent.com/nf-core/configs/master | | True | +| `config_profile_name` | Institutional config name. | `string` | | | True | +| `config_profile_description` | Institutional config description. | `string` | | | True | +| `config_profile_contact` | Institutional config contact information. | `string` | | | True | +| `config_profile_url` | Institutional config URL link. | `string` | | | True | + +## Max job request options + +Set the top limit for requested resources for any single job. + +| Parameter | Description | Type | Default | Required | Hidden | +|-----------|-----------|-----------|-----------|-----------|-----------| +| `max_cpus` | Maximum number of CPUs that can be requested for any single job.
HelpUse to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`
| `integer` | 16 | | True | +| `max_memory` | Maximum amount of memory that can be requested for any single job.
HelpUse to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`
| `string` | 128.GB | | True | +| `max_time` | Maximum amount of time that can be requested for any single job.
HelpUse to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`
| `string` | 240.h | | True | + +## Generic options + +Less common options for the pipeline, typically set in a config file. + +| Parameter | Description | Type | Default | Required | Hidden | +|-----------|-----------|-----------|-----------|-----------|-----------| +| `help` | Display help text. | `boolean` | | | True | +| `version` | Display version and exit. | `boolean` | | | True | +| `publish_dir_mode` | Method used to save pipeline results to output directory.
HelpThe Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.
| `string` | copy | | True | +| `email_on_fail` | Email address for completion summary, only when pipeline fails.
HelpAn email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully.
| `string` | | | True | +| `plaintext_email` | Send plain-text email instead of HTML. | `boolean` | | | True | +| `max_multiqc_email_size` | File size limit when attaching MultiQC reports to summary emails. | `string` | 25.MB | | True | +| `monochrome_logs` | Do not use coloured log outputs. | `boolean` | | | True | +| `hook_url` | Incoming hook URL for messaging service
HelpIncoming hook URL for messaging service. Currently, MS Teams and Slack are supported.
| `string` | | | True | +| `multiqc_config` | Custom config file to supply to MultiQC. | `string` | | | True | +| `multiqc_logo` | Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file | `string` | | | True | +| `multiqc_methods_description` | Custom MultiQC yaml file containing HTML including a methods description. | `string` | | | | +| `validate_params` | Boolean whether to validate parameters against the schema at runtime | `boolean` | True | | True | +| `validationShowHiddenParams` | Show all params when using `--help`
HelpBy default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters.
| `boolean` | | | True | +| `validationSkipDuplicateCheck` | nf-validation related parameter | `boolean` | | | True | +| `validationS3PathCheck` | Boolean whether to validate validate AWS S3 paths | `boolean` | | | True | +| `monochromeLogs` | Boolean whether to color nf-validation logs | `boolean` | | | True | + +## Workflow options + +Workflow options specific to genomic-medicine-sweden/nallo + +| Parameter | Description | Type | Default | Required | Hidden | +|-----------|-----------|-----------|-----------|-----------|-----------| +| `preset` | Enable or disable certain parts of the pipeline by default, depending on data type (`revio`, `pacbio`, `ONT_R10`) | `string` | revio | True | | +| `variant_caller` | Which short variant software to use (`deepvariant`) | `string` | deepvariant | | | +| `sv_caller` | Which structural variant caller to use (`severus`, `sniffles`) | `string` | severus | | | +| `phaser` | Which phasing software to use (`longphase`, `whatshap`, `hiphase`) | `string` | longphase_snv | | | +| `hifiasm_mode` | Run hifiasm in hifi-only or hifi-trio mode (`hifi-only`, `trio-binning`) | `string` | hifi-only | | | +| `parallel_alignments` | If parallel_alignments is bigger than 1, input files will be split and aligned in parallel to reduce processing time. | `integer` | 1 | | | +| `parallel_snv` | If parallel_snv is bigger than 1, short variant calling will be done in parallel to reduce processing time. | `integer` | 13 | | | +| `vep_cache_version` | VEP cache version | `integer` | 110 | | | +| `vep_plugin_files` | A csv file with vep_plugins as header, and then paths to vep plugin files. Paths to pLI_values.txt and LoFtool_scores.txt are required. | `string` | | | | +| `deepvariant_model_type` | Sets the model type used for DeepVariant. This is set automatically using `--preset` by default. | `string` | PACBIO | | True | +| `extra_modkit_options` | Extra options to modkit, used for test profile. | `string` | | | True | +| `extra_vep_options` | Extra options to VEP, used for test profile. | `string` | | | True | +| `extra_paraphase_options` | Extra options to Paraphase, used for test profile. | `string` | | | True | +| `extra_hifiasm_options` | Extra options to hifiasm, used for test profile. | `string` | | | True | + +## File inputs + +The different files that are required. Some are only required by certain workflows, see the usage documentation. + +| Parameter | Description | Type | Default | Required | Hidden | +|-----------|-----------|-----------|-----------|-----------|-----------| +| `cadd_prescored` | Path to a directory containing prescored indels for CADD.
HelpThis folder contains the compressed files and indexes that would otherwise be in data/prescored folder as described in https://github.com/kircherlab/CADD-scripts/#manual-installation.
| `string` | | | | +| `cadd_resources` | Path to a directory containing CADD annotations.
HelpThis folder contains the uncompressed files that would otherwise be in data/annotation folder as described in https://github.com/kircherlab/CADD-scripts/#manual-installation.
| `string` | | | | +| `par_regions` | Provide a bed file of chrX and chrY PAR regions for dipcall and DeepVariant | `string` | | | | +| `tandem_repeats` | A tandem repeat BED file for sniffles | `string` | | | | +| `trgt_repeats` | A BED file with repeats to be genotyped with TRGT | `string` | | | | +| `snp_db` | A csv file with echtvar databases to annotate SNVs with | `string` | | | | +| `variant_catalog` | A variant catalog json-file for stranger | `string` | | | | +| `variant_consequences_snv` | File containing list of SO terms listed in the order of severity from most severe to lease severe for annotating genomic SNVs. For more information check https://ensembl.org/info/genome/variation/prediction/predicted_data.html | `string` | | | | +| `vep_cache` | A path to the VEP cache location | `string` | | | | +| `bed` | A BED file with regions of interest, used to limit short variant calling. | `string` | | | | +| `hificnv_xy` | A BED file containing expected copy number regions for XY samples. | `string` | | | | +| `hificnv_xx` | A BED file containing expected copy number regions for XX samples. | `string` | | | | +| `hificnv_exclude` | A BED file specifying regions to exclude with HiFiCNV, such as centromeres. | `string` | | | | +| `reduced_penetrance` | A file with gene ids that have reduced penetrance. For use with genmod. | `string` | | | | +| `score_config_snv` | A SNV rank model config file for genmod. | `string` | | | | +| `somalier_sites` | A VCF of known polymorphic sites for somalier | `string` | | | | +| `validationFailUnrecognisedParams` | Validation of parameters fails when an unrecognised parameter is found.
HelpBy default, when an unrecognised parameter is found, it returns a warning.
| `boolean` | | | True | +| `validationLenientMode` | Validation of parameters in lenient more.
HelpAllows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode).
| `boolean` | | | True | +| `pipelines_testdata_base_path` | Base URL or local path to location of pipeline test dataset files | `string` | https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/ | | True | diff --git a/modules.json b/modules.json index d03948e8..6d7b4b1a 100644 --- a/modules.json +++ b/modules.json @@ -141,6 +141,16 @@ "git_sha": "aecb06fcdb995ff3e3df7c7a1fd119367d6d1996", "installed_by": ["modules"] }, + "longphase/haplotag": { + "branch": "master", + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": ["modules"] + }, + "longphase/phase": { + "branch": "master", + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": ["modules"] + }, "minimap2/align": { "branch": "master", "git_sha": "a33ef9475558c6b8da08c5f522ddaca1ec810306", diff --git a/modules/local/hiphase/main.nf b/modules/local/hiphase/main.nf index 44b5d4bf..3de4359a 100644 --- a/modules/local/hiphase/main.nf +++ b/modules/local/hiphase/main.nf @@ -42,7 +42,7 @@ process HIPHASE { vcfInputs.add('--vcf') vcfInputs.add(vcf) vcfOutputs.add('--output-vcf') - vcfOutputs.add("${prefix}.vcf.gz") + vcfOutputs.add("${prefix}_phased.vcf.gz") vcfNames.add(vcf.getName()) } @@ -58,7 +58,7 @@ process HIPHASE { if(output_bam) { bamOutputs.add('--output-bam') - bamOutputs.add("${prefix}.bam") + bamOutputs.add("${prefix}_haplotagged.bam") } } diff --git a/modules/nf-core/longphase/haplotag/environment.yml b/modules/nf-core/longphase/haplotag/environment.yml new file mode 100644 index 00000000..06445a93 --- /dev/null +++ b/modules/nf-core/longphase/haplotag/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::longphase=1.7.3" diff --git a/modules/nf-core/longphase/haplotag/main.nf b/modules/nf-core/longphase/haplotag/main.nf new file mode 100644 index 00000000..701af31f --- /dev/null +++ b/modules/nf-core/longphase/haplotag/main.nf @@ -0,0 +1,66 @@ +process LONGPHASE_HAPLOTAG { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/longphase:1.7.3--hf5e1c6e_0': + 'biocontainers/longphase:1.7.3--hf5e1c6e_0' }" + + input: + tuple val(meta), path(bam), path(bai), path(snps), path(svs), path(mods) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + + + output: + tuple val(meta), path("*.{bam,cram}"), emit: bam + tuple val(meta), path("*.log") , emit: log , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def sv_file = params.svs ? "--sv-file ${svs}" : "" + def mod_file = params.mods ? "--mod-file ${mods}" : "" + + """ + longphase \\ + haplotag \\ + $args \\ + --threads $task.cpus \\ + -o ${prefix} \\ + --reference ${fasta} \\ + --snp-file ${snps} \\ + --bam ${bam} \\ + ${sv_file} \\ + ${mod_file} + + if [ -f "${prefix}.out" ]; then + mv ${prefix}.out ${prefix}.log + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = args.contains('--cram') ? "cram" : "bam" + def log = args.contains('--log') ? "touch ${prefix}.log" : '' + """ + touch ${prefix}.${suffix} + ${log} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/longphase/haplotag/meta.yml b/modules/nf-core/longphase/haplotag/meta.yml new file mode 100644 index 00000000..2fe7c569 --- /dev/null +++ b/modules/nf-core/longphase/haplotag/meta.yml @@ -0,0 +1,89 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "longphase_haplotag" +description: LongPhase is an ultra-fast program for simultaneously co-phasing SNPs, small indels, large SVs, and (5mC) modifications for Nanopore and PacBio platforms. +keywords: + - haplotag + - long-read + - genomics +tools: + - "longphase": + description: "LongPhase is an ultra-fast program for simultaneously co-phasing SNPs, small indels, large SVs, and (5mC) modifications for Nanopore and PacBio platforms." + homepage: "https://github.com/twolinin/longphase" + documentation: "https://github.com/twolinin/longphase" + tool_dev_url: "https://github.com/twolinin/longphase" + doi: "10.1093/bioinformatics/btac058" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - bam: + type: file + description: Sorted BAM/CRAM file + pattern: "*.{bam,cram}" + - bai: + type: file + description: Index of sorted BAM/CRAM file + pattern: "*.{bai,crai,csi}" + - snps: + type: file + description: VCF file with SNPs (and INDELs) + pattern: "*.{vcf,vcf.gz}" + - svs: + type: file + description: VCF file with SVs + pattern: "*.{vcf,vcf.gz}" + - mods: + type: file + description: modcall-generated VCF with modifications + pattern: "*.{vcf,vcf.gz}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'hg38' ]` + - fasta: + type: file + description: Reference fasta file + pattern: "*.fasta" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'hg38' ]` + - fai: + type: file + description: Reference fai index + pattern: "*.fai" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bam: + type: file + description: BAM file with haplotagged reads + pattern: "*.bam" + - cram: + type: file + description: CRAM file with haplotagged reads + pattern: "*.cram" + - log: + type: file + description: Log file + pattern: "*.log" + +authors: + - "@fellen31" +maintainers: + - "@fellen31" diff --git a/modules/nf-core/longphase/haplotag/tests/main.nf.test b/modules/nf-core/longphase/haplotag/tests/main.nf.test new file mode 100644 index 00000000..c80133c6 --- /dev/null +++ b/modules/nf-core/longphase/haplotag/tests/main.nf.test @@ -0,0 +1,202 @@ +nextflow_process { + + name "Test Process LONGPHASE_HAPLOTAG" + script "../main.nf" + process "LONGPHASE_HAPLOTAG" + + tag "modules" + tag "modules_nfcore" + tag "longphase" + tag "longphase/haplotag" + + test("[ bam, bai, snps, [], [] ], fasta, fai") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true), + [], + [] + ] + input[1] = [ + [ id:'reference' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'reference' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + bam(process.out.bam.get(0).get(1), stringency: 'silent').getHeader(), + bam(process.out.bam.get(0).get(1), stringency: 'silent').getReadsMD5(), + ).match() } + ) + } + + } + + test("[ bam, bai, snps, [], [] ], fasta, fai - log & cram") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true), + [], + [] + ] + input[1] = [ + [ id:'reference' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'reference' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + process.out.log, + bam(process.out.bam.get(0).get(1), 'https://github.com/nf-core/test-datasets/raw/modules/data/genomics/homo_sapiens/genome/genome.fasta', stringency: 'silent').getHeader()[2..5], + bam(process.out.bam.get(0).get(1), 'https://github.com/nf-core/test-datasets/raw/modules/data/genomics/homo_sapiens/genome/genome.fasta', stringency: 'silent').getReadsMD5(), + ).match() } + ) + } + + } + + test("[ bam, bai, snps, svs, [] ], fasta, fai") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'reference' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'reference' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + bam(process.out.bam.get(0).get(1), stringency: 'silent').getHeader(), + bam(process.out.bam.get(0).get(1), stringency: 'silent').getReadsMD5(), + ).match() } + ) + } + + } + + test("[ bam, bai, snps, [], [] ], fasta, fai -stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true), + [], + [] + ] + input[1] = [ + [ id:'reference' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'reference' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("[ bam, bai, snps, [], [] ], fasta, fai - log & cram -stub") { + + options "-stub" + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true), + [], + [] + ] + input[1] = [ + [ id:'reference' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'reference' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } +} \ No newline at end of file diff --git a/modules/nf-core/longphase/haplotag/tests/main.nf.test.snap b/modules/nf-core/longphase/haplotag/tests/main.nf.test.snap new file mode 100644 index 00000000..225a4f7e --- /dev/null +++ b/modules/nf-core/longphase/haplotag/tests/main.nf.test.snap @@ -0,0 +1,159 @@ +{ + "[ bam, bai, snps, [], [] ], fasta, fai - log & cram -stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.cram:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,3c169b598f6e15332b6d7ec1d6d96810" + ], + "bam": [ + [ + { + "id": "test" + }, + "test.cram:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,3c169b598f6e15332b6d7ec1d6d96810" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-08-09T09:40:30.738831707" + }, + "[ bam, bai, snps, [], [] ], fasta, fai": { + "content": [ + [ + "versions.yml:md5,3c169b598f6e15332b6d7ec1d6d96810" + ], + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:chr22\tLN:40001", + "@RG\tID:test\tSM:test", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -y -x map-ont --secondary=no -R @RG\\tID:test\\tSM:test -t 30 -a genome.mmi test.bam_other.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.19.2\tCL:samtools sort -@ 29 -o test.bam_other.fastq.gz.bam --write-index", + "@PG\tID:longphase\tPN:longphase\tPP:samtools\tVN:1.7.3\tCL:longphase haplotag --threads 2 -o test --reference genome.fasta --snp-file test.genome.vcf.gz --bam test.sorted.bam " + ], + "721264eb2824a3146b331f2532d10180" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-25T09:12:34.848038423" + }, + "[ bam, bai, snps, [], [] ], fasta, fai -stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,3c169b598f6e15332b6d7ec1d6d96810" + ], + "bam": [ + [ + { + "id": "test" + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + + ], + "versions": [ + "versions.yml:md5,3c169b598f6e15332b6d7ec1d6d96810" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-08-09T09:40:20.836809553" + }, + "[ bam, bai, snps, svs, [] ], fasta, fai": { + "content": [ + [ + "versions.yml:md5,3c169b598f6e15332b6d7ec1d6d96810" + ], + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:chr22\tLN:40001", + "@RG\tID:test\tSM:test", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -y -x map-ont --secondary=no -R @RG\\tID:test\\tSM:test -t 30 -a genome.mmi test.bam_other.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.19.2\tCL:samtools sort -@ 29 -o test.bam_other.fastq.gz.bam --write-index", + "@PG\tID:longphase\tPN:longphase\tPP:samtools\tVN:1.7.3\tCL:longphase haplotag --threads 2 -o test --reference genome.fasta --snp-file test.genome.vcf.gz --bam test.sorted.bam " + ], + "721264eb2824a3146b331f2532d10180" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-25T09:13:39.901419316" + }, + "[ bam, bai, snps, [], [] ], fasta, fai - log & cram": { + "content": [ + [ + "versions.yml:md5,3c169b598f6e15332b6d7ec1d6d96810" + ], + [ + [ + { + "id": "test" + }, + "test.log:md5,6203f10696f4b0909f0d327c021df773" + ] + ], + [ + "@RG\tID:test\tSM:test", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -y -x map-ont --secondary=no -R @RG\\tID:test\\tSM:test -t 30 -a genome.mmi test.bam_other.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.19.2\tCL:samtools sort -@ 29 -o test.bam_other.fastq.gz.bam --write-index", + "@PG\tID:longphase\tPN:longphase\tPP:samtools\tVN:1.7.3\tCL:longphase haplotag --log --cram --threads 2 -o test --reference genome.fasta --snp-file test.genome.vcf.gz --bam test.sorted.bam " + ], + "721264eb2824a3146b331f2532d10180" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-25T09:45:54.254102844" + } +} \ No newline at end of file diff --git a/modules/nf-core/longphase/haplotag/tests/nextflow.config b/modules/nf-core/longphase/haplotag/tests/nextflow.config new file mode 100644 index 00000000..d50498b2 --- /dev/null +++ b/modules/nf-core/longphase/haplotag/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'LONGPHASE_HAPLOTAG' { + ext.args = '--log --cram' + } +} diff --git a/modules/nf-core/longphase/haplotag/tests/tags.yml b/modules/nf-core/longphase/haplotag/tests/tags.yml new file mode 100644 index 00000000..117e16a5 --- /dev/null +++ b/modules/nf-core/longphase/haplotag/tests/tags.yml @@ -0,0 +1,2 @@ +longphase/haplotag: + - "modules/nf-core/longphase/haplotag/**" diff --git a/modules/nf-core/longphase/phase/environment.yml b/modules/nf-core/longphase/phase/environment.yml new file mode 100644 index 00000000..96e04eb6 --- /dev/null +++ b/modules/nf-core/longphase/phase/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::longphase=1.7.3" + - "bioconda::htslib=1.20" diff --git a/modules/nf-core/longphase/phase/main.nf b/modules/nf-core/longphase/phase/main.nf new file mode 100644 index 00000000..ad29d997 --- /dev/null +++ b/modules/nf-core/longphase/phase/main.nf @@ -0,0 +1,69 @@ +process LONGPHASE_PHASE { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-d626bb8ec5a659accfbd8490bc1ac4a940722258:682e8c0cc0ceebf9bd38371a58249aabce93b1b3-0': + 'biocontainers/mulled-v2-d626bb8ec5a659accfbd8490bc1ac4a940722258:682e8c0cc0ceebf9bd38371a58249aabce93b1b3-0' }" + + input: + tuple val(meta), path(bam), path(bai), path(snps), path(svs), path(mods) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + + + output: + tuple val(meta), path("*.vcf.gz"), emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def sv_file = params.svs ? "--sv-file ${svs}" : "" + def mod_file = params.mods ? "--mod-file ${mods}" : "" + + def bamList = [] + for (file in bam) { + bamList.add("-b") + bamList.add(file) + } + """ + longphase \\ + phase \\ + $args \\ + --threads $task.cpus \\ + -o ${prefix} \\ + --reference ${fasta} \\ + --snp-file ${snps} \\ + ${bamList.join(" ")} \\ + ${sv_file} \\ + ${mod_file} \\ + + bgzip \\ + --threads $task.cpus \\ + $args2 \\ + ${prefix}.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | bgzip -c > ${prefix}.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/longphase/phase/meta.yml b/modules/nf-core/longphase/phase/meta.yml new file mode 100644 index 00000000..42899177 --- /dev/null +++ b/modules/nf-core/longphase/phase/meta.yml @@ -0,0 +1,81 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "longphase_phase" +description: LongPhase is an ultra-fast program for simultaneously co-phasing SNPs, small indels, large SVs, and (5mC) modifications for Nanopore and PacBio platforms. +keywords: + - phase + - long-read + - genomics +tools: + - "longphase": + description: "LongPhase is an ultra-fast program for simultaneously co-phasing SNPs, small indels, large SVs, and (5mC) modifications for Nanopore and PacBio platforms." + homepage: "https://github.com/twolinin/longphase" + documentation: "https://github.com/twolinin/longphase" + tool_dev_url: "https://github.com/twolinin/longphase" + doi: "10.1093/bioinformatics/btac058" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - bam: + type: file + description: Sorted BAM/CRAM file(s) + pattern: "*.{bam,cram}" + - bai: + type: file + description: Index of sorted BAM/CRAM file(s) + pattern: "*.{bai,crai,csi}" + - snps: + type: file + description: VCF file with SNPs (and INDELs) + pattern: "*.{vcf,vcf.gz}" + - svs: + type: file + description: VCF file with SVs + pattern: "*.{vcf,vcf.gz}" + - mods: + type: file + description: modcall-generated VCF with modifications + pattern: "*.{vcf,vcf.gz}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'hg38' ]` + - fasta: + type: file + description: Reference fasta file + pattern: "*.fasta" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'hg38' ]` + - fai: + type: file + description: Reference fai index + pattern: "*.fai" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf: + type: file + description: Compressed VCF file with phased variants + pattern: "*.vcf.gz" + +authors: + - "@fellen31" +maintainers: + - "@fellen31" diff --git a/modules/nf-core/longphase/phase/tests/main.nf.test b/modules/nf-core/longphase/phase/tests/main.nf.test new file mode 100644 index 00000000..3e303312 --- /dev/null +++ b/modules/nf-core/longphase/phase/tests/main.nf.test @@ -0,0 +1,157 @@ +nextflow_process { + + name "Test Process LONGPHASE_PHASE" + script "../main.nf" + process "LONGPHASE_PHASE" + + tag "modules" + tag "modules_nfcore" + tag "longphase" + tag "longphase/phase" + config "./nextflow.config" + + test("[ bam, bai, snps, [], [] ], fasta, fai") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true), + [], + [] + ] + input[1] = [ + [ id:'reference' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'reference' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +test("[ bam, bai, snps, svs, [] ], fasta, fai") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'reference' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'reference' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +test("[ bam x2, bai x2, snps, svs, [] ], fasta, fai") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test2.sorted.bam', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test2.sorted.bam.bai', checkIfExists: true) + ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'reference' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'reference' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("[ bam, bai, snps, [], [] ], fasta, fai - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/nanopore/bam/test.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true), + [], + [] + ] + input[1] = [ + [ id:'reference' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'reference' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/longphase/phase/tests/main.nf.test.snap b/modules/nf-core/longphase/phase/tests/main.nf.test.snap new file mode 100644 index 00000000..02e2bb17 --- /dev/null +++ b/modules/nf-core/longphase/phase/tests/main.nf.test.snap @@ -0,0 +1,134 @@ +{ + "[ bam, bai, snps, [], [] ], fasta, fai": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,fd2d21056b2de4722f12d5e883d9cb0a" + ] + ], + "1": [ + "versions.yml:md5,8d8d82510dd1fbe01a91c575c472897f" + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,fd2d21056b2de4722f12d5e883d9cb0a" + ] + ], + "versions": [ + "versions.yml:md5,8d8d82510dd1fbe01a91c575c472897f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-22T12:14:04.269956432" + }, + "[ bam, bai, snps, svs, [] ], fasta, fai": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,fd2d21056b2de4722f12d5e883d9cb0a" + ] + ], + "1": [ + "versions.yml:md5,8d8d82510dd1fbe01a91c575c472897f" + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,fd2d21056b2de4722f12d5e883d9cb0a" + ] + ], + "versions": [ + "versions.yml:md5,8d8d82510dd1fbe01a91c575c472897f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-22T12:14:39.961315592" + }, + "[ bam x2, bai x2, snps, svs, [] ], fasta, fai": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,3ced25dc8c1ec0a7c64481c8a163d687" + ] + ], + "1": [ + "versions.yml:md5,8d8d82510dd1fbe01a91c575c472897f" + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,3ced25dc8c1ec0a7c64481c8a163d687" + ] + ], + "versions": [ + "versions.yml:md5,8d8d82510dd1fbe01a91c575c472897f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-22T12:15:07.970597495" + }, + "[ bam, bai, snps, [], [] ], fasta, fai - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,8d8d82510dd1fbe01a91c575c472897f" + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,8d8d82510dd1fbe01a91c575c472897f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-22T12:15:40.296227382" + } +} \ No newline at end of file diff --git a/modules/nf-core/longphase/phase/tests/nextflow.config b/modules/nf-core/longphase/phase/tests/nextflow.config new file mode 100644 index 00000000..317f040a --- /dev/null +++ b/modules/nf-core/longphase/phase/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'LONGPHASE_PHASE' { + ext.args = '--ont' + } +} diff --git a/modules/nf-core/longphase/phase/tests/tags.yml b/modules/nf-core/longphase/phase/tests/tags.yml new file mode 100644 index 00000000..260a0d08 --- /dev/null +++ b/modules/nf-core/longphase/phase/tests/tags.yml @@ -0,0 +1,2 @@ +longphase/phase: + - "modules/nf-core/longphase/phase/**" diff --git a/nextflow.config b/nextflow.config index 031db0ef..407ccc40 100644 --- a/nextflow.config +++ b/nextflow.config @@ -44,7 +44,7 @@ params { skip_snv_annotation = false deepvariant_model_type = params.preset == 'ONT_R10' ? 'ONT_R104' : 'PACBIO' - phaser = 'whatshap' + phaser = 'longphase' sv_caller = 'severus' preset = 'revio' hifiasm_mode = 'hifi-only' diff --git a/nextflow_schema.json b/nextflow_schema.json index 77448f74..f76d7872 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -369,9 +369,9 @@ }, "phaser": { "type": "string", - "default": "whatshap", - "description": "Which phasing software to use (`whatshap`, `hiphase_snv`, `hiphase_sv`)", - "enum": ["whatshap", "hiphase_snv", "hiphase_sv"] + "default": "longphase", + "description": "Which phasing software to use (`longphase`, `whatshap`, `hiphase`)", + "enum": ["longphase", "whatshap", "hiphase"] }, "hifiasm_mode": { "type": "string", diff --git a/subworkflows/local/phasing.nf b/subworkflows/local/phasing.nf index fb87a27b..4d3757ff 100644 --- a/subworkflows/local/phasing.nf +++ b/subworkflows/local/phasing.nf @@ -1,126 +1,138 @@ -include { BCFTOOLS_FILLFROMFASTA } from '../../modules/local/bcftools/fillfromfasta/main' -include { BCFTOOLS_REHEADER } from '../../modules/nf-core/bcftools/reheader/main' -include { CRAMINO as CRAMINO_PHASED } from '../../modules/local/cramino' -include { HIPHASE as HIPHASE_SNV } from '../../modules/local/hiphase/main' -include { HIPHASE as HIPHASE_SV } from '../../modules/local/hiphase/main' -include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_WHATSHAP } from '../../modules/nf-core/samtools/index/main' -include { TABIX_BGZIPTABIX } from '../../modules/nf-core/tabix/bgziptabix/main' -include { TABIX_TABIX } from '../../modules/nf-core/tabix/tabix/main' -include { WHATSHAP_HAPLOTAG } from '../../modules/local/whatshap/haplotag/main' -include { WHATSHAP_PHASE } from '../../modules/local/whatshap/phase/main' -include { WHATSHAP_STATS } from '../../modules/local/whatshap/stats/main' +include { CRAMINO as CRAMINO_PHASED } from '../../modules/local/cramino' +include { HIPHASE } from '../../modules/local/hiphase/main' +include { LONGPHASE_HAPLOTAG } from '../../modules/nf-core/longphase/haplotag/main' +include { LONGPHASE_PHASE } from '../../modules/nf-core/longphase/phase/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_LONGPHASE } from '../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_WHATSHAP } from '../../modules/nf-core/samtools/index/main' +include { TABIX_TABIX as TABIX_LONGPHASE_PHASE } from '../../modules/nf-core/tabix/tabix/main' +include { WHATSHAP_HAPLOTAG } from '../../modules/local/whatshap/haplotag/main' +include { WHATSHAP_PHASE } from '../../modules/local/whatshap/phase/main' +include { WHATSHAP_STATS } from '../../modules/local/whatshap/stats/main' workflow PHASING { take: - ch_vcf // channel: [ val(meta), vcf ] - ch_sv_vcf // channel: [ val(meta), vcf ] - ch_bam_bai // channel: [ val(meta), bam, bai ] - fasta // channel: [ val(meta), fasta ] - fai // channel: [ val(meta), fai ] + ch_vcf // channel: [ val(meta), path(vcf) ] + ch_vcf_index // channel: [ val(meta), path(tbi) ] + ch_bam_bai // channel: [ val(meta), path(bam), path(bai) ] + fasta // channel: [ val(meta), path(fasta) ] + fai // channel: [ val(meta), path(fai) ] main: - ch_versions = Channel.empty() - ch_bam_bai_haplotagged = Channel.empty() - ch_vcf_index = Channel.empty() - - TABIX_TABIX(ch_vcf) - ch_versions = ch_versions.mix(TABIX_TABIX.out.versions) - - if (params.phaser.equals("whatshap")) { - - WHATSHAP_PHASE( ch_vcf.join(ch_bam_bai), fasta, fai ) - ch_versions = ch_versions.mix(WHATSHAP_PHASE.out.versions) - - WHATSHAP_PHASE.out.vcf_tbi - .join(ch_bam_bai) - .set { ch_whatshap_haplotag_in } - - WHATSHAP_HAPLOTAG(ch_whatshap_haplotag_in, fasta, fai) - ch_versions = ch_versions.mix(WHATSHAP_HAPLOTAG.out.versions) - - SAMTOOLS_INDEX_WHATSHAP( WHATSHAP_HAPLOTAG.out.bam ) - ch_versions = ch_versions.mix(SAMTOOLS_INDEX_WHATSHAP.out.versions) - - WHATSHAP_HAPLOTAG - .out.bam - .join(SAMTOOLS_INDEX_WHATSHAP.out.bai) - .set { ch_bam_bai_haplotagged } - - ch_vcf_index = ch_vcf_index.mix( WHATSHAP_PHASE.out.vcf_tbi ) - - } else if (params.phaser.equals("hiphase_snv")) { - ch_vcf - .join(TABIX_TABIX.out.csi) - .join(ch_bam_bai) - .set { ch_hiphase_snv_in } - - HIPHASE_SNV( ch_hiphase_snv_in, fasta, fai, true ) - ch_versions = ch_versions.mix(HIPHASE_SNV.out.versions) - - HIPHASE_SNV.out.bams - .join(HIPHASE_SNV.out.bais) - .set { ch_bam_bai_haplotagged } - - ch_vcf_index = ch_vcf_index.mix( HIPHASE_SNV.out.vcfs.join(HIPHASE_SNV.out.vcfs_tbi) ) - - } else if (params.phaser.equals("hiphase_sv")) { - // Sniffles specific... - BCFTOOLS_REHEADER( - ch_sv_vcf - .map { meta, vcf -> [meta, vcf, [], []] }, - [[],[]] - ) - ch_versions = ch_versions.mix(BCFTOOLS_REHEADER.out.versions) - - // Might be that newer versions of HiPhase ignores certain SVs - // if BCFTOOLS_FILLFROMFASTA is not run, instead of craching - BCFTOOLS_FILLFROMFASTA(BCFTOOLS_REHEADER.out.vcf, fasta) - ch_versions = ch_versions.mix(BCFTOOLS_FILLFROMFASTA.out.versions) - - TABIX_BGZIPTABIX(BCFTOOLS_FILLFROMFASTA.out.vcf) - ch_versions = ch_versions.mix(TABIX_BGZIPTABIX.out.versions) - - TABIX_BGZIPTABIX.out.gz_tbi - .map { meta, gz, tbi -> [ meta, gz ] } - .set { ch_sv_vcf } - - TABIX_BGZIPTABIX.out.gz_tbi - .map { meta, gz, tbi -> [ meta, tbi ] } - .set { ch_sv_tbi } - - ch_vcf - .concat(ch_sv_vcf) - .groupTuple() - .set { ch_hiphase_vcf } - - TABIX_TABIX.out.csi - .concat(ch_sv_tbi) - .groupTuple() - .set { ch_hiphase_tbi } - - ch_hiphase_vcf - .join(ch_hiphase_tbi) - .join(ch_bam_bai) - .set { ch_hiphase_in } - - HIPHASE_SV( ch_hiphase_in, fasta, fai, true ) - ch_versions = ch_versions.mix(HIPHASE_SV.out.versions) - - HIPHASE_SV.out.bams - .join(HIPHASE_SV.out.bais) - .set { ch_bam_bai_haplotagged } - - ch_vcf_index = ch_vcf_index.mix( HIPHASE_SV.out.vcfs.join(HIPHASE_SV.out.vcfs_tbi) ) - } - - WHATSHAP_STATS( ch_vcf_index ) - ch_versions = ch_versions.mix(WHATSHAP_STATS.out.versions) - - CRAMINO_PHASED( ch_bam_bai_haplotagged ) - ch_versions = ch_versions.mix(CRAMINO_PHASED.out.versions) + ch_versions = Channel.empty() + + // Phase variants and haplotag reads with Longphase + if (params.phaser.equals("longphase")) { + + ch_bam_bai + .join( ch_vcf ) + .map { meta, bam, bai, snvs -> [ meta, bam, bai, snvs, [], [] ] } + .set { ch_longphase_phase_in } + + LONGPHASE_PHASE ( + ch_longphase_phase_in, + fasta, + fai + ) + ch_versions = ch_versions.mix(LONGPHASE_PHASE.out.versions) + + TABIX_LONGPHASE_PHASE ( + LONGPHASE_PHASE.out.vcf + ) + ch_versions = ch_versions.mix(TABIX_LONGPHASE_PHASE.out.versions) + + LONGPHASE_PHASE.out.vcf + .join( TABIX_LONGPHASE_PHASE.out.tbi ) + .set { ch_phased_vcf_index } + + ch_bam_bai + .join( LONGPHASE_PHASE.out.vcf ) + .map { meta, bam, bai, vcf -> [ meta, bam, bai, vcf, [], [] ] } + .set { ch_longphase_haplotag_in } + + LONGPHASE_HAPLOTAG ( + ch_longphase_haplotag_in, + fasta, + fai + ) + ch_versions = ch_versions.mix(LONGPHASE_HAPLOTAG.out.versions) + + SAMTOOLS_INDEX_LONGPHASE ( + LONGPHASE_HAPLOTAG.out.bam + ) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_LONGPHASE.out.versions) + + LONGPHASE_HAPLOTAG.out.bam + .join( SAMTOOLS_INDEX_LONGPHASE.out.bai ) + .set { ch_bam_bai_haplotagged } + + // Phase variants and haplotag reads with whatshap + } else if (params.phaser.equals("whatshap")) { + + WHATSHAP_PHASE( + ch_vcf.join( ch_bam_bai ), + fasta, + fai + ) + ch_versions = ch_versions.mix(WHATSHAP_PHASE.out.versions) + + WHATSHAP_PHASE.out.vcf_tbi + .join( ch_bam_bai ) + .set { ch_whatshap_haplotag_in } + + WHATSHAP_HAPLOTAG ( + ch_whatshap_haplotag_in, + fasta, + fai + ) + ch_versions = ch_versions.mix(WHATSHAP_HAPLOTAG.out.versions) + + SAMTOOLS_INDEX_WHATSHAP ( + WHATSHAP_HAPLOTAG.out.bam + ) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX_WHATSHAP.out.versions) + + WHATSHAP_HAPLOTAG.out.bam + .join( SAMTOOLS_INDEX_WHATSHAP.out.bai ) + .set { ch_bam_bai_haplotagged } + + WHATSHAP_PHASE.out.vcf_tbi + .set { ch_phased_vcf_index } + + // Phase variants and haplotag reads with HiPhase + } else if (params.phaser.equals("hiphase")) { + ch_vcf + .join( ch_vcf_index ) + .join( ch_bam_bai ) + .set { ch_hiphase_snv_in } + + HIPHASE ( + ch_hiphase_snv_in, + fasta, + fai, + true + ) + ch_versions = ch_versions.mix(HIPHASE.out.versions) + + HIPHASE.out.bams + .join( HIPHASE.out.bais ) + .set { ch_bam_bai_haplotagged } + + HIPHASE.out.vcfs + .join( HIPHASE.out.vcfs_tbi ) + .set { ch_phased_vcf_index } + + } + + // Phasing stats + WHATSHAP_STATS ( ch_phased_vcf_index ) + ch_versions = ch_versions.mix(WHATSHAP_STATS.out.versions) + + // Phasing QC + CRAMINO_PHASED ( ch_bam_bai_haplotagged ) + ch_versions = ch_versions.mix(CRAMINO_PHASED.out.versions) emit: - haplotagged_bam_bai = ch_bam_bai_haplotagged // channel: [ val(meta), bam, bai ] - stats = WHATSHAP_STATS.out.stats // channel: [ val(meta), txt ] - versions = ch_versions // channel: [ versions.yml ] + haplotagged_bam_bai = ch_bam_bai_haplotagged // channel: [ val(meta), path(bam), path(bai) ] + stats = WHATSHAP_STATS.out.stats // channel: [ val(meta), path(txt) ] + versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/subworkflows/local/short_variant_calling/main.nf b/subworkflows/local/short_variant_calling/main.nf index 5395cd2e..18be861b 100644 --- a/subworkflows/local/short_variant_calling/main.nf +++ b/subworkflows/local/short_variant_calling/main.nf @@ -93,7 +93,8 @@ workflow SHORT_VARIANT_CALLING { ch_versions = ch_versions.mix(BCFTOOLS_NORM_MULTISAMPLE.out.versions) emit: - snp_calls_vcf = BCFTOOLS_NORM_SINGLESAMPLE.out.vcf // channel: [ val(meta), path(bcf) ] + snp_calls_vcf = BCFTOOLS_NORM_SINGLESAMPLE.out.vcf // channel: [ val(meta), path(vcf) ] + snp_calls_tbi = BCFTOOLS_NORM_SINGLESAMPLE.out.tbi // channel: [ val(meta), path(tbi) ] combined_bcf = BCFTOOLS_NORM_MULTISAMPLE.out.vcf // channel: [ val(meta), path(bcf) ] combined_csi = BCFTOOLS_NORM_MULTISAMPLE.out.csi // channel: [ val(meta), path(csi) ] versions = ch_versions // channel: [ path(versions.yml) ] diff --git a/subworkflows/local/short_variant_calling/tests/main.nf.test.snap b/subworkflows/local/short_variant_calling/tests/main.nf.test.snap index 5aa2cc19..ef9754dc 100644 --- a/subworkflows/local/short_variant_calling/tests/main.nf.test.snap +++ b/subworkflows/local/short_variant_calling/tests/main.nf.test.snap @@ -5,13 +5,22 @@ "0": [ [ { - "id": "test", + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", "single_end": false }, - "test_norm_singlesample.bcf:md5,94d38d8fd3c50a7ede72a080fbd4992c" + "test_norm_singlesample.vcf.gz:md5,8d1b64c0eed7eb66dd3294fcbc15b4a1" ] ], "1": [ + [ + { + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,41ab8f51ac55e08b2339feb5a6b40764" + ] + ], + "2": [ [ { "id": "genome.bed", @@ -21,7 +30,7 @@ "genome.bed_norm_multisample.vcf.gz:md5,56d662f0a527803ae72e40625c01014b" ] ], - "2": [ + "3": [ [ { "id": "genome.bed", @@ -31,7 +40,7 @@ "genome.bed_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9" ] ], - "3": [ + "4": [ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", "versions.yml:md5,13101c9283d4a82e859574b0a981311c", "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", @@ -59,13 +68,22 @@ "genome.bed_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9" ] ], + "snp_calls_tbi": [ + [ + { + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,41ab8f51ac55e08b2339feb5a6b40764" + ] + ], "snp_calls_vcf": [ [ { - "id": "test", + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", "single_end": false }, - "test_norm_singlesample.bcf:md5,94d38d8fd3c50a7ede72a080fbd4992c" + "test_norm_singlesample.vcf.gz:md5,8d1b64c0eed7eb66dd3294fcbc15b4a1" ] ], "versions": [ @@ -82,7 +100,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-19T14:06:15.287122968" + "timestamp": "2024-09-20T09:13:58.152956123" }, "1 sample - 2 bed, fasta, fai, bed, [] - stub": { "content": [ @@ -90,13 +108,22 @@ "0": [ [ { - "id": "test", + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", "single_end": false }, - "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + "test_norm_singlesample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "1": [ + [ + { + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ [ { "id": "genome.bed", @@ -114,7 +141,7 @@ "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], - "2": [ + "3": [ [ { "id": "genome.bed", @@ -132,7 +159,7 @@ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "3": [ + "4": [ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", "versions.yml:md5,13101c9283d4a82e859574b0a981311c", @@ -180,13 +207,22 @@ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "snp_calls_tbi": [ + [ + { + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], "snp_calls_vcf": [ [ { - "id": "test", + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", "single_end": false }, - "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + "test_norm_singlesample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "versions": [ @@ -207,7 +243,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-19T14:09:04.780658252" + "timestamp": "2024-09-20T09:16:49.165250859" }, "1 sample - 1 bed, fasta, fai, bed, []": { "content": [ @@ -215,13 +251,22 @@ "0": [ [ { - "id": "test", + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", "single_end": false }, - "test_norm_singlesample.bcf:md5,94d38d8fd3c50a7ede72a080fbd4992c" + "test_norm_singlesample.vcf.gz:md5,8d1b64c0eed7eb66dd3294fcbc15b4a1" ] ], "1": [ + [ + { + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,41ab8f51ac55e08b2339feb5a6b40764" + ] + ], + "2": [ [ { "id": "genome.bed", @@ -231,7 +276,7 @@ "genome.bed_norm_multisample.vcf.gz:md5,56d662f0a527803ae72e40625c01014b" ] ], - "2": [ + "3": [ [ { "id": "genome.bed", @@ -241,7 +286,7 @@ "genome.bed_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9" ] ], - "3": [ + "4": [ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", "versions.yml:md5,13101c9283d4a82e859574b0a981311c", "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", @@ -269,13 +314,22 @@ "genome.bed_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9" ] ], + "snp_calls_tbi": [ + [ + { + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,41ab8f51ac55e08b2339feb5a6b40764" + ] + ], "snp_calls_vcf": [ [ { - "id": "test", + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", "single_end": false }, - "test_norm_singlesample.bcf:md5,94d38d8fd3c50a7ede72a080fbd4992c" + "test_norm_singlesample.vcf.gz:md5,8d1b64c0eed7eb66dd3294fcbc15b4a1" ] ], "versions": [ @@ -292,7 +346,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-19T14:06:44.174749859" + "timestamp": "2024-09-20T09:14:27.319216407" }, "2 samples - 2 bed, fasta, fai, bed, par_bed": { "content": [ @@ -300,20 +354,36 @@ "0": [ [ { - "id": "test", + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", "single_end": false }, - "test_norm_singlesample.bcf:md5,f978f9f75d4892499156218a300fd246" + "test_norm_singlesample.vcf.gz:md5,f8e492c9c4d6e0cb027b300bb3f7155a" ], [ { "id": "test2", "single_end": false }, - "test2_norm_singlesample.bcf:md5,562e4ab7cea09c4f296550784e02ca84" + "test2_norm_singlesample.vcf.gz:md5,0e42833e64e0f427ca09259e17e2765b" ] ], "1": [ + [ + { + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,f1d5f7e2321e7f0534d251c12b373221" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2_norm_singlesample.vcf.gz.tbi:md5,2c4354a182ae1f4ad94f056b2fbf62a8" + ] + ], + "2": [ [ { "id": "genome.bed", @@ -331,7 +401,7 @@ "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,ec12cca4d6849a5f47823331c4b9a8f3" ] ], - "2": [ + "3": [ [ { "id": "genome.bed", @@ -349,7 +419,7 @@ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,12d9802de9b9e071408e526930050626" ] ], - "3": [ + "4": [ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", "versions.yml:md5,13101c9283d4a82e859574b0a981311c", @@ -401,20 +471,36 @@ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,12d9802de9b9e071408e526930050626" ] ], + "snp_calls_tbi": [ + [ + { + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,f1d5f7e2321e7f0534d251c12b373221" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2_norm_singlesample.vcf.gz.tbi:md5,2c4354a182ae1f4ad94f056b2fbf62a8" + ] + ], "snp_calls_vcf": [ [ { - "id": "test", + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", "single_end": false }, - "test_norm_singlesample.bcf:md5,f978f9f75d4892499156218a300fd246" + "test_norm_singlesample.vcf.gz:md5,f8e492c9c4d6e0cb027b300bb3f7155a" ], [ { "id": "test2", "single_end": false }, - "test2_norm_singlesample.bcf:md5,562e4ab7cea09c4f296550784e02ca84" + "test2_norm_singlesample.vcf.gz:md5,0e42833e64e0f427ca09259e17e2765b" ] ], "versions": [ @@ -439,7 +525,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-19T14:08:22.570429008" + "timestamp": "2024-09-20T09:16:06.366289968" }, "2 samples - 2 bed, fasta, fai, bed, [] - stub": { "content": [ @@ -447,20 +533,36 @@ "0": [ [ { - "id": "test", + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", "single_end": false }, - "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + "test_norm_singlesample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ], [ { "id": "test2", "single_end": false }, - "test2_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + "test2_norm_singlesample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "1": [ + [ + { + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2_norm_singlesample.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ [ { "id": "genome.bed", @@ -478,7 +580,7 @@ "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], - "2": [ + "3": [ [ { "id": "genome.bed", @@ -496,7 +598,7 @@ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "3": [ + "4": [ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", "versions.yml:md5,13101c9283d4a82e859574b0a981311c", @@ -548,20 +650,36 @@ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "snp_calls_tbi": [ + [ + { + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2_norm_singlesample.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], "snp_calls_vcf": [ [ { - "id": "test", + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", "single_end": false }, - "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + "test_norm_singlesample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ], [ { "id": "test2", "single_end": false }, - "test2_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + "test2_norm_singlesample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "versions": [ @@ -586,7 +704,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-19T14:09:16.982831625" + "timestamp": "2024-09-20T09:17:01.14095623" }, "2 samples - 2 bed, fasta, fai, bed, par_bed - stub": { "content": [ @@ -594,20 +712,36 @@ "0": [ [ { - "id": "test", + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", "single_end": false }, - "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + "test_norm_singlesample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ], [ { "id": "test2", "single_end": false }, - "test2_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + "test2_norm_singlesample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "1": [ + [ + { + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2_norm_singlesample.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ [ { "id": "genome.bed", @@ -625,7 +759,7 @@ "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], - "2": [ + "3": [ [ { "id": "genome.bed", @@ -643,7 +777,7 @@ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "3": [ + "4": [ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", "versions.yml:md5,13101c9283d4a82e859574b0a981311c", @@ -695,20 +829,36 @@ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "snp_calls_tbi": [ + [ + { + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2_norm_singlesample.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], "snp_calls_vcf": [ [ { - "id": "test", + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", "single_end": false }, - "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + "test_norm_singlesample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ], [ { "id": "test2", "single_end": false }, - "test2_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + "test2_norm_singlesample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "versions": [ @@ -733,7 +883,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-19T14:09:29.288580006" + "timestamp": "2024-09-20T09:17:13.216410221" }, "1 sample - 1 bed, fasta, fai, bed, [] - stub": { "content": [ @@ -741,13 +891,22 @@ "0": [ [ { - "id": "test", + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", "single_end": false }, - "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + "test_norm_singlesample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "1": [ + [ + { + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ [ { "id": "genome.bed", @@ -757,7 +916,7 @@ "genome.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], - "2": [ + "3": [ [ { "id": "genome.bed", @@ -767,7 +926,7 @@ "genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "3": [ + "4": [ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", "versions.yml:md5,13101c9283d4a82e859574b0a981311c", "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", @@ -795,13 +954,22 @@ "genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "snp_calls_tbi": [ + [ + { + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], "snp_calls_vcf": [ [ { - "id": "test", + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", "single_end": false }, - "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + "test_norm_singlesample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "versions": [ @@ -818,7 +986,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-19T14:08:53.503322275" + "timestamp": "2024-09-20T09:16:38.070360001" }, "1 sample - no bed, fasta, fai, [], []": { "content": [ @@ -826,13 +994,22 @@ "0": [ [ { - "id": "test", + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", "single_end": false }, - "test_norm_singlesample.bcf:md5,94d38d8fd3c50a7ede72a080fbd4992c" + "test_norm_singlesample.vcf.gz:md5,8d1b64c0eed7eb66dd3294fcbc15b4a1" ] ], "1": [ + [ + { + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,41ab8f51ac55e08b2339feb5a6b40764" + ] + ], + "2": [ [ { "id": [ @@ -844,7 +1021,7 @@ "[]_norm_multisample.vcf.gz:md5,56d662f0a527803ae72e40625c01014b" ] ], - "2": [ + "3": [ [ { "id": [ @@ -856,7 +1033,7 @@ "[]_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9" ] ], - "3": [ + "4": [ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", "versions.yml:md5,13101c9283d4a82e859574b0a981311c", "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", @@ -888,13 +1065,22 @@ "[]_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9" ] ], + "snp_calls_tbi": [ + [ + { + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,41ab8f51ac55e08b2339feb5a6b40764" + ] + ], "snp_calls_vcf": [ [ { - "id": "test", + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", "single_end": false }, - "test_norm_singlesample.bcf:md5,94d38d8fd3c50a7ede72a080fbd4992c" + "test_norm_singlesample.vcf.gz:md5,8d1b64c0eed7eb66dd3294fcbc15b4a1" ] ], "versions": [ @@ -911,7 +1097,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-19T14:05:46.058241488" + "timestamp": "2024-09-20T09:13:29.038685117" }, "1 sample - 2 bed, fasta, fai, bed, []": { "content": [ @@ -919,13 +1105,22 @@ "0": [ [ { - "id": "test", + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", "single_end": false }, - "test_norm_singlesample.bcf:md5,f978f9f75d4892499156218a300fd246" + "test_norm_singlesample.vcf.gz:md5,f8e492c9c4d6e0cb027b300bb3f7155a" ] ], "1": [ + [ + { + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,f1d5f7e2321e7f0534d251c12b373221" + ] + ], + "2": [ [ { "id": "genome.bed", @@ -943,7 +1138,7 @@ "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,56d662f0a527803ae72e40625c01014b" ] ], - "2": [ + "3": [ [ { "id": "genome.bed", @@ -961,7 +1156,7 @@ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9" ] ], - "3": [ + "4": [ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", "versions.yml:md5,13101c9283d4a82e859574b0a981311c", @@ -1009,13 +1204,22 @@ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,53e24fb59fef3bee6291c474e20200b9" ] ], + "snp_calls_tbi": [ + [ + { + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,f1d5f7e2321e7f0534d251c12b373221" + ] + ], "snp_calls_vcf": [ [ { - "id": "test", + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", "single_end": false }, - "test_norm_singlesample.bcf:md5,f978f9f75d4892499156218a300fd246" + "test_norm_singlesample.vcf.gz:md5,f8e492c9c4d6e0cb027b300bb3f7155a" ] ], "versions": [ @@ -1036,7 +1240,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-19T14:07:15.005268156" + "timestamp": "2024-09-20T09:14:58.277461525" }, "2 samples - 2 bed, fasta, fai, bed, []": { "content": [ @@ -1044,20 +1248,36 @@ "0": [ [ { - "id": "test", + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", "single_end": false }, - "test_norm_singlesample.bcf:md5,f978f9f75d4892499156218a300fd246" + "test_norm_singlesample.vcf.gz:md5,f8e492c9c4d6e0cb027b300bb3f7155a" ], [ { "id": "test2", "single_end": false }, - "test2_norm_singlesample.bcf:md5,562e4ab7cea09c4f296550784e02ca84" + "test2_norm_singlesample.vcf.gz:md5,0e42833e64e0f427ca09259e17e2765b" ] ], "1": [ + [ + { + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,f1d5f7e2321e7f0534d251c12b373221" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2_norm_singlesample.vcf.gz.tbi:md5,2c4354a182ae1f4ad94f056b2fbf62a8" + ] + ], + "2": [ [ { "id": "genome.bed", @@ -1075,7 +1295,7 @@ "genome.multi_intervals.bed_norm_multisample.vcf.gz:md5,ec12cca4d6849a5f47823331c4b9a8f3" ] ], - "2": [ + "3": [ [ { "id": "genome.bed", @@ -1093,7 +1313,7 @@ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,12d9802de9b9e071408e526930050626" ] ], - "3": [ + "4": [ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", "versions.yml:md5,13101c9283d4a82e859574b0a981311c", @@ -1145,20 +1365,36 @@ "genome.multi_intervals.bed_norm_multisample.vcf.gz.csi:md5,12d9802de9b9e071408e526930050626" ] ], + "snp_calls_tbi": [ + [ + { + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,f1d5f7e2321e7f0534d251c12b373221" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2_norm_singlesample.vcf.gz.tbi:md5,2c4354a182ae1f4ad94f056b2fbf62a8" + ] + ], "snp_calls_vcf": [ [ { - "id": "test", + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", "single_end": false }, - "test_norm_singlesample.bcf:md5,f978f9f75d4892499156218a300fd246" + "test_norm_singlesample.vcf.gz:md5,f8e492c9c4d6e0cb027b300bb3f7155a" ], [ { "id": "test2", "single_end": false }, - "test2_norm_singlesample.bcf:md5,562e4ab7cea09c4f296550784e02ca84" + "test2_norm_singlesample.vcf.gz:md5,0e42833e64e0f427ca09259e17e2765b" ] ], "versions": [ @@ -1183,7 +1419,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-19T14:07:48.515375217" + "timestamp": "2024-09-20T09:15:32.21563308" }, "1 sample - no bed, fasta, fai, [], [] - stub": { "content": [ @@ -1191,13 +1427,22 @@ "0": [ [ { - "id": "test", + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", "single_end": false }, - "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + "test_norm_singlesample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "1": [ + [ + { + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ [ { "id": [ @@ -1209,7 +1454,7 @@ "[]_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], - "2": [ + "3": [ [ { "id": [ @@ -1221,7 +1466,7 @@ "[]_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "3": [ + "4": [ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", "versions.yml:md5,13101c9283d4a82e859574b0a981311c", "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", @@ -1253,13 +1498,22 @@ "[]_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "snp_calls_tbi": [ + [ + { + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], "snp_calls_vcf": [ [ { - "id": "test", + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", "single_end": false }, - "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + "test_norm_singlesample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "versions": [ @@ -1276,7 +1530,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-19T14:08:32.905425335" + "timestamp": "2024-09-20T09:16:16.776853798" }, "1 sample - 1 bed, fasta, fai, [], [] - stub": { "content": [ @@ -1284,13 +1538,22 @@ "0": [ [ { - "id": "test", + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", "single_end": false }, - "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + "test_norm_singlesample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "1": [ + [ + { + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ [ { "id": "genome.bed", @@ -1300,7 +1563,7 @@ "genome.bed_norm_multisample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], - "2": [ + "3": [ [ { "id": "genome.bed", @@ -1310,7 +1573,7 @@ "genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "3": [ + "4": [ "versions.yml:md5,0ca81f5e441a48c6c650a07c7045ed34", "versions.yml:md5,13101c9283d4a82e859574b0a981311c", "versions.yml:md5,77dbd5f16ae8b59d09563a07be6faa44", @@ -1338,13 +1601,22 @@ "genome.bed_norm_multisample.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "snp_calls_tbi": [ + [ + { + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", + "single_end": false + }, + "test_norm_singlesample.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], "snp_calls_vcf": [ [ { - "id": "test", + "id": "test:md5,d41d8cd98f00b204e9800998ecf8427e", "single_end": false }, - "test_norm_singlesample.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + "test_norm_singlesample.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "versions": [ @@ -1361,6 +1633,6 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-09-19T14:08:43.136478303" + "timestamp": "2024-09-20T09:16:27.732828098" } } \ No newline at end of file diff --git a/subworkflows/local/short_variant_calling/tests/nextflow.config b/subworkflows/local/short_variant_calling/tests/nextflow.config index f9b1d1a5..e0f992af 100644 --- a/subworkflows/local/short_variant_calling/tests/nextflow.config +++ b/subworkflows/local/short_variant_calling/tests/nextflow.config @@ -37,7 +37,8 @@ process { '--no-version', '-m -', '-w 10000', - '--output-type u', + '--output-type z', + '--write-index=tbi' ].join(' ') } diff --git a/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf b/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf index 7711da7f..c2973a9a 100644 --- a/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf @@ -390,14 +390,14 @@ def toolCitationText() { "WhatsHap (Martin et al. 2016)", ] } - if(params.phaser == 'hiphase_sv') { + if(params.phaser == 'hiphase') { citation_text = citation_text + [ "HiPhase (Holt et al. 2024)", ] } - if(params.phaser == 'hiphase_snv') { + if(params.phaser == 'longphase') { citation_text = citation_text + [ - "HiPhase (Holt et al. 2024)", + "LongPhase (Lin et al. 2024)", ] } if (!params.skip_methylation_wf) { @@ -445,6 +445,7 @@ def toolBibliographyText() { "
  • Yun T, Li H, Chang PC, Lin MF, Carroll A, McLean CY. Accurate, scalable cohort variant calls using DeepVariant and GLnexus. Robinson P, ed. Bioinformatics. 2021;36(24):5582-5589. doi:10.1093/bioinformatics/btaa1081
  • ", "
  • Cheng, H., Concepcion, G.T., Feng, X. et al. Haplotype-resolved de novo assembly using phased assembly graphs with hifiasm. Nat Methods 18, 170–175 (2021). https://doi.org/10.1038/s41592-020-01056-5
  • ", "
  • James M Holt, Christopher T Saunders, William J Rowell, Zev Kronenberg, Aaron M Wenger, Michael Eberle, HiPhase: jointly phasing small, structural, and tandem repeat variants from HiFi sequencing, Bioinformatics, Volume 40, Issue 2, February 2024, btae042, https://doi.org/10.1093/bioinformatics/btae042
  • ", + "
  • Jyun-Hong Lin, Liang-Chi Chen, Shu-Chi Yu, Yao-Ting Huang, LongPhase: an ultra-fast chromosome-scale phasing algorithm for small and large variants, Bioinformatics, Volume 38, Issue 7, March 2022, Pages 1816–1822, https://doi.org/10.1093/bioinformatics/btac058
  • ", "
  • Heng Li, Minimap2: pairwise alignment for nucleotide sequences, Bioinformatics, Volume 34, Issue 18, September 2018, Pages 3094–3100, https://doi.org/10.1093/bioinformatics/bty191
  • ", "
  • Pedersen BS, Quinlan AR. Mosdepth: quick coverage calculation for genomes and exomes. Hancock J, ed. Bioinformatics. 2018;34(5):867-868. doi:10.1093/bioinformatics/btx699
  • ", "
  • Genome-wide profiling of highly similar paralogous genes using HiFi sequencing. Xiao Chen, Daniel Baker, Egor Dolzhenko, Joseph M Devaney, Jessica Noya, April S Berlyoung, Rhonda Brandon, Kathleen S Hruska, Lucas Lochovsky, Paul Kruszka, Scott Newman, Emily Farrow, Isabelle Thiffault, Tomi Pastinen, Dalia Kasperaviciute, Christian Gilissen, Lisenka Vissers, Alexander Hoischen, Seth Berger, Eric Vilain, Emmanuèle Délot, UCI Genomics Research to Elucidate the Genetics of Rare Diseases (UCI GREGoR) Consortium, Michael A Eberle. bioRxiv 2024.04.19.590294; doi: https://doi.org/10.1101/2024.04.19.590294
  • ", diff --git a/tests/main.nf.test b/tests/main.nf.test index 975ebbbf..ec5ade6f 100644 --- a/tests/main.nf.test +++ b/tests/main.nf.test @@ -29,7 +29,7 @@ nextflow_pipeline { file("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), file("$outputDir/multiqc/multiqc_data/multiqc_somalier.txt"), // Assert with snapshot HG002_Revio - bam("$outputDir/aligned_reads/HG002_Revio/HG002_Revio_phased.bam", stringency: 'silent').getReadsMD5(), + bam("$outputDir/aligned_reads/HG002_Revio/HG002_Revio_haplotagged.bam", stringency: 'silent').getReadsMD5(), file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio/HG002_Revio.asm.bp.hap1.p_ctg.assembly_summary"), file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio/HG002_Revio.asm.bp.hap2.p_ctg.assembly_summary"), bam("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap1.bam", stringency: 'silent').getReadsMD5(), @@ -80,7 +80,7 @@ nextflow_pipeline { { assert new File("$outputDir/svs/multi_sample/test/test_svs.vcf.gz").exists() }, { assert new File("$outputDir/svs/multi_sample/test/test_svs.vcf.gz.tbi").exists() }, // Assert exists HG002_Revio - { assert new File("$outputDir/aligned_reads/HG002_Revio/HG002_Revio_phased.bam.bai").exists() }, + { assert new File("$outputDir/aligned_reads/HG002_Revio/HG002_Revio_haplotagged.bam.bai").exists() }, { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap1.bam.bai").exists() }, { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap1.sam.gz").exists() }, { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio/HG002_Revio.hap2.bam.bai").exists() }, @@ -98,10 +98,10 @@ nextflow_pipeline { { assert new File("$outputDir/paraphase/HG002_Revio/HG002_Revio.paraphase.json").exists() }, { assert new File("$outputDir/qc/cramino/phased/HG002_Revio/HG002_Revio_cramino_aligned_phased.txt").exists() }, { assert new File("$outputDir/qc/cramino/unphased/HG002_Revio/HG002_Revio_cramino_aligned.txt").exists() }, - { assert new File("$outputDir/phasing/whatshap/phase/HG002_Revio/HG002_Revio_phased.vcf.gz").exists() }, - { assert new File("$outputDir/phasing/whatshap/phase/HG002_Revio/HG002_Revio_phased.vcf.gz.tbi").exists() }, - { assert new File("$outputDir/phasing/whatshap/stats/HG002_Revio/HG002_Revio_stats.stats.tsv").exists() }, - { assert new File("$outputDir/phasing/whatshap/stats/HG002_Revio/HG002_Revio_stats.blocks.tsv").exists() }, + { assert new File("$outputDir/phased_variants/HG002_Revio/HG002_Revio_phased.vcf.gz").exists() }, + { assert new File("$outputDir/phased_variants/HG002_Revio/HG002_Revio_phased.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/qc/phasing_stats/HG002_Revio/HG002_Revio_stats.stats.tsv").exists() }, + { assert new File("$outputDir/qc/phasing_stats/HG002_Revio/HG002_Revio_stats.blocks.tsv").exists() }, { assert new File("$outputDir/qc/fastqc/HG002_Revio/HG002_Revio_fastqc.html").exists() }, { assert new File("$outputDir/qc/fastqc/HG002_Revio/HG002_Revio_fastqc.zip").exists() }, { assert new File("$outputDir/repeat_annotation/stranger/HG002_Revio/HG002_Revio_repeat_expansion_stranger.vcf.gz").exists() }, @@ -122,6 +122,7 @@ nextflow_pipeline { params { input = 'https://github.com/genomic-medicine-sweden/test-datasets/raw/e2266a34c14d1e0a9ef798de3cd81a76c9216fc1/testdata/samplesheet_multisample_bam.csv' outdir = "$outputDir" + phaser = 'hiphase' } } @@ -134,7 +135,7 @@ nextflow_pipeline { file("$outputDir/multiqc/multiqc_data/multiqc_citations.txt"), file("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), // Assert with snapshot HG002_Revio_A - bam("$outputDir/aligned_reads/HG002_Revio_A/HG002_Revio_A_phased.bam", stringency: 'silent').getReadsMD5(), + bam("$outputDir/aligned_reads/HG002_Revio_A/HG002_Revio_A_haplotagged.bam", stringency: 'silent').getReadsMD5(), file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio_A/HG002_Revio_A.asm.bp.hap1.p_ctg.assembly_summary"), file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio_A/HG002_Revio_A.asm.bp.hap2.p_ctg.assembly_summary"), bam("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap1.bam", stringency: 'silent').getReadsMD5(), @@ -170,7 +171,7 @@ nextflow_pipeline { bam("$outputDir/repeat_calling/trgt/single_sample/HG002_Revio_A/HG002_Revio_A_spanning_sorted.bam", stringency: 'silent').getReadsMD5(), file("$outputDir/snvs/stats/single_sample/HG002_Revio_A.vcf.gz.bcftools_stats.txt").readLines()[0..2], // Assert with snapshot HG002_Revio_B - bam("$outputDir/aligned_reads/HG002_Revio_B/HG002_Revio_B_phased.bam", stringency: 'silent').getReadsMD5(), + bam("$outputDir/aligned_reads/HG002_Revio_B/HG002_Revio_B_haplotagged.bam", stringency: 'silent').getReadsMD5(), file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio_B/HG002_Revio_B.asm.bp.hap1.p_ctg.assembly_summary"), file("$outputDir/assembly_haplotypes/gfastats/HG002_Revio_B/HG002_Revio_B.asm.bp.hap2.p_ctg.assembly_summary"), bam("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap1.bam", stringency: 'silent').getReadsMD5(), @@ -225,7 +226,7 @@ nextflow_pipeline { { assert new File("$outputDir/repeat_calling/trgt/multi_sample/test/test.vcf.gz").exists() }, { assert new File("$outputDir/repeat_calling/trgt/multi_sample/test/test.vcf.gz.tbi").exists() }, // Assert exists HG002_Revio_A - { assert new File("$outputDir/aligned_reads/HG002_Revio_A/HG002_Revio_A_phased.bam.bai").exists() }, + { assert new File("$outputDir/aligned_reads/HG002_Revio_A/HG002_Revio_A_haplotagged.bam.bai").exists() }, { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap1.bam.bai").exists() }, { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap1.sam.gz").exists() }, { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_A/HG002_Revio_A.hap2.bam.bai").exists() }, @@ -243,11 +244,11 @@ nextflow_pipeline { { assert new File("$outputDir/paraphase/HG002_Revio_A/HG002_Revio_A.paraphase.json").exists() }, { assert new File("$outputDir/qc/cramino/phased/HG002_Revio_A/HG002_Revio_A_cramino_aligned_phased.txt").exists() }, { assert new File("$outputDir/qc/cramino/unphased/HG002_Revio_A/HG002_Revio_A_cramino_aligned.txt").exists() }, - { assert new File("$outputDir/aligned_reads/HG002_Revio_A/HG002_Revio_A_phased.bam.bai").exists() }, - { assert new File("$outputDir/phasing/whatshap/phase/HG002_Revio_A/HG002_Revio_A_phased.vcf.gz").exists() }, - { assert new File("$outputDir/phasing/whatshap/phase/HG002_Revio_A/HG002_Revio_A_phased.vcf.gz.tbi").exists() }, - { assert new File("$outputDir/phasing/whatshap/stats/HG002_Revio_A/HG002_Revio_A_stats.stats.tsv").exists() }, - { assert new File("$outputDir/phasing/whatshap/stats/HG002_Revio_A/HG002_Revio_A_stats.blocks.tsv").exists() }, + { assert new File("$outputDir/aligned_reads/HG002_Revio_A/HG002_Revio_A_haplotagged.bam.bai").exists() }, + { assert new File("$outputDir/phased_variants/HG002_Revio_A/HG002_Revio_A_phased.vcf.gz").exists() }, + { assert new File("$outputDir/phased_variants/HG002_Revio_A/HG002_Revio_A_phased.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/qc/phasing_stats/HG002_Revio_A/HG002_Revio_A_stats.stats.tsv").exists() }, + { assert new File("$outputDir/qc/phasing_stats/HG002_Revio_A/HG002_Revio_A_stats.blocks.tsv").exists() }, { assert new File("$outputDir/qc/fastqc/HG002_Revio_A/HG002_Revio_A_fastqc.html").exists() }, { assert new File("$outputDir/qc/fastqc/HG002_Revio_A/HG002_Revio_A_fastqc.zip").exists() }, { assert new File("$outputDir/repeat_annotation/stranger/HG002_Revio_A/HG002_Revio_A_repeat_expansion_stranger.vcf.gz").exists() }, @@ -258,7 +259,7 @@ nextflow_pipeline { { assert new File("$outputDir/svs/single_sample/HG002_Revio_A/HG002_Revio_A_severus.vcf.gz").exists() }, { assert new File("$outputDir/svs/single_sample/HG002_Revio_A/HG002_Revio_A_severus.vcf.gz.tbi").exists() }, // Assert exists HG002_Revio_B - { assert new File("$outputDir/aligned_reads/HG002_Revio_B/HG002_Revio_B_phased.bam.bai").exists() }, + { assert new File("$outputDir/aligned_reads/HG002_Revio_B/HG002_Revio_B_haplotagged.bam.bai").exists() }, { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap1.bam.bai").exists() }, { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap1.sam.gz").exists() }, { assert new File("$outputDir/assembly_variant_calling/dipcall/HG002_Revio_B/HG002_Revio_B.hap2.bam.bai").exists() }, @@ -276,11 +277,11 @@ nextflow_pipeline { { assert new File("$outputDir/paraphase/HG002_Revio_B/HG002_Revio_B.paraphase.json").exists() }, { assert new File("$outputDir/qc/cramino/phased/HG002_Revio_B/HG002_Revio_B_cramino_aligned_phased.txt").exists() }, { assert new File("$outputDir/qc/cramino/unphased/HG002_Revio_B/HG002_Revio_B_cramino_aligned.txt").exists() }, - { assert new File("$outputDir/aligned_reads/HG002_Revio_B/HG002_Revio_B_phased.bam.bai").exists() }, - { assert new File("$outputDir/phasing/whatshap/phase/HG002_Revio_B/HG002_Revio_B_phased.vcf.gz").exists() }, - { assert new File("$outputDir/phasing/whatshap/phase/HG002_Revio_B/HG002_Revio_B_phased.vcf.gz.tbi").exists() }, - { assert new File("$outputDir/phasing/whatshap/stats/HG002_Revio_B/HG002_Revio_B_stats.stats.tsv").exists() }, - { assert new File("$outputDir/phasing/whatshap/stats/HG002_Revio_B/HG002_Revio_B_stats.blocks.tsv").exists() }, + { assert new File("$outputDir/aligned_reads/HG002_Revio_B/HG002_Revio_B_haplotagged.bam.bai").exists() }, + { assert new File("$outputDir/phased_variants/HG002_Revio_B/HG002_Revio_B_phased.vcf.gz").exists() }, + { assert new File("$outputDir/phased_variants/HG002_Revio_B/HG002_Revio_B_phased.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/qc/phasing_stats/HG002_Revio_B/HG002_Revio_B_stats.stats.tsv").exists() }, + { assert new File("$outputDir/qc/phasing_stats/HG002_Revio_B/HG002_Revio_B_stats.blocks.tsv").exists() }, { assert new File("$outputDir/qc/fastqc/HG002_Revio_B/HG002_Revio_B_fastqc.html").exists() }, { assert new File("$outputDir/qc/fastqc/HG002_Revio_B/HG002_Revio_B_fastqc.zip").exists() }, { assert new File("$outputDir/repeat_annotation/stranger/HG002_Revio_B/HG002_Revio_B_repeat_expansion_stranger.vcf.gz").exists() }, @@ -303,6 +304,7 @@ nextflow_pipeline { preset = 'ONT_R10' parallel_alignments = 2 parallel_snv = 1 + phaser = 'whatshap' outdir = "$outputDir" } } @@ -316,7 +318,7 @@ nextflow_pipeline { file("$outputDir/multiqc/multiqc_data/multiqc_citations.txt"), file("$outputDir/multiqc/multiqc_data/multiqc_fastqc.txt"), // Assert with snapshot HG002_ONT_A - bam("$outputDir/aligned_reads/HG002_ONT_A/HG002_ONT_A_phased.bam", stringency: 'silent').getReadsMD5(), + bam("$outputDir/aligned_reads/HG002_ONT_A/HG002_ONT_A_haplotagged.bam", stringency: 'silent').getReadsMD5(), file("$outputDir/cnv_calling/hificnv/HG002_ONT_A/HG002_ONT_A.HG002_ONT_A.copynum.bedgraph"), file("$outputDir/cnv_calling/hificnv/HG002_ONT_A/HG002_ONT_A.HG002_ONT_A.depth.bw"), file("$outputDir/cnv_calling/hificnv/HG002_ONT_A/HG002_ONT_A.HG002_ONT_A.maf.bw"), @@ -334,7 +336,7 @@ nextflow_pipeline { file("$outputDir/qc/mosdepth/HG002_ONT_A/HG002_ONT_A.regions.bed.gz.csi"), file("$outputDir/snvs/stats/single_sample/HG002_ONT_A.vcf.gz.bcftools_stats.txt").readLines()[0..2], // Assert with snapshot HG002_ONT_B - bam("$outputDir/aligned_reads/HG002_ONT_B/HG002_ONT_B_phased.bam", stringency: 'silent').getReadsMD5(), + bam("$outputDir/aligned_reads/HG002_ONT_B/HG002_ONT_B_haplotagged.bam", stringency: 'silent').getReadsMD5(), file("$outputDir/cnv_calling/hificnv/HG002_ONT_B/HG002_ONT_B.HG002_ONT_B.copynum.bedgraph"), file("$outputDir/cnv_calling/hificnv/HG002_ONT_B/HG002_ONT_B.HG002_ONT_B.depth.bw"), file("$outputDir/cnv_calling/hificnv/HG002_ONT_B/HG002_ONT_B.HG002_ONT_B.maf.bw"), @@ -369,7 +371,7 @@ nextflow_pipeline { { assert new File("$outputDir/svs/multi_sample/test/test_svs.vcf.gz").exists() }, { assert new File("$outputDir/svs/multi_sample/test/test_svs.vcf.gz.tbi").exists() }, // Assert exists HG002_ONT_A - { assert new File("$outputDir/aligned_reads/HG002_ONT_A/HG002_ONT_A_phased.bam.bai").exists() }, + { assert new File("$outputDir/aligned_reads/HG002_ONT_A/HG002_ONT_A_haplotagged.bam.bai").exists() }, { assert new File("$outputDir/cnv_calling/hificnv/HG002_ONT_A/HG002_ONT_A.log").exists() }, { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_ONT_A/HG002_ONT_A_modkit_pileup_phased_1.bed.gz").exists() }, { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_ONT_A/HG002_ONT_A_modkit_pileup_phased_1.bed.gz.tbi").exists() }, @@ -383,11 +385,11 @@ nextflow_pipeline { { assert new File("$outputDir/paraphase/HG002_ONT_A/HG002_ONT_A.paraphase.json").exists() }, { assert new File("$outputDir/qc/cramino/phased/HG002_ONT_A/HG002_ONT_A_cramino_aligned_phased.txt").exists() }, { assert new File("$outputDir/qc/cramino/unphased/HG002_ONT_A/HG002_ONT_A_cramino_aligned.txt").exists() }, - { assert new File("$outputDir/aligned_reads/HG002_ONT_A/HG002_ONT_A_phased.bam.bai").exists() }, - { assert new File("$outputDir/phasing/whatshap/phase/HG002_ONT_A/HG002_ONT_A_phased.vcf.gz").exists() }, - { assert new File("$outputDir/phasing/whatshap/phase/HG002_ONT_A/HG002_ONT_A_phased.vcf.gz.tbi").exists() }, - { assert new File("$outputDir/phasing/whatshap/stats/HG002_ONT_A/HG002_ONT_A_stats.stats.tsv").exists() }, - { assert new File("$outputDir/phasing/whatshap/stats/HG002_ONT_A/HG002_ONT_A_stats.blocks.tsv").exists() }, + { assert new File("$outputDir/aligned_reads/HG002_ONT_A/HG002_ONT_A_haplotagged.bam.bai").exists() }, + { assert new File("$outputDir/phased_variants/HG002_ONT_A/HG002_ONT_A_phased.vcf.gz").exists() }, + { assert new File("$outputDir/phased_variants/HG002_ONT_A/HG002_ONT_A_phased.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/qc/phasing_stats/HG002_ONT_A/HG002_ONT_A_stats.stats.tsv").exists() }, + { assert new File("$outputDir/qc/phasing_stats/HG002_ONT_A/HG002_ONT_A_stats.blocks.tsv").exists() }, { assert new File("$outputDir/qc/fastqc/HG002_ONT_A/HG002_ONT_A_fastqc.html").exists() }, { assert new File("$outputDir/qc/fastqc/HG002_ONT_A/HG002_ONT_A_fastqc.zip").exists() }, { assert new File("$outputDir/snvs/single_sample/HG002_ONT_A/HG002_ONT_A_snv_annotated_ranked.vcf.gz").exists() }, @@ -395,7 +397,7 @@ nextflow_pipeline { { assert new File("$outputDir/svs/single_sample/HG002_ONT_A/HG002_ONT_A_severus.vcf.gz").exists() }, { assert new File("$outputDir/svs/single_sample/HG002_ONT_A/HG002_ONT_A_severus.vcf.gz.tbi").exists() }, // Assert exists HG002_ONT_B - { assert new File("$outputDir/aligned_reads/HG002_ONT_B/HG002_ONT_B_phased.bam.bai").exists() }, + { assert new File("$outputDir/aligned_reads/HG002_ONT_B/HG002_ONT_B_haplotagged.bam.bai").exists() }, { assert new File("$outputDir/cnv_calling/hificnv/HG002_ONT_B/HG002_ONT_B.log").exists() }, { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_ONT_B/HG002_ONT_B_modkit_pileup_phased_1.bed.gz").exists() }, { assert new File("$outputDir/methylation/modkit/pileup/phased/HG002_ONT_B/HG002_ONT_B_modkit_pileup_phased_1.bed.gz.tbi").exists() }, @@ -409,11 +411,11 @@ nextflow_pipeline { { assert new File("$outputDir/paraphase/HG002_ONT_B/HG002_ONT_B.paraphase.json").exists() }, { assert new File("$outputDir/qc/cramino/phased/HG002_ONT_B/HG002_ONT_B_cramino_aligned_phased.txt").exists() }, { assert new File("$outputDir/qc/cramino/unphased/HG002_ONT_B/HG002_ONT_B_cramino_aligned.txt").exists() }, - { assert new File("$outputDir/aligned_reads/HG002_ONT_B/HG002_ONT_B_phased.bam.bai").exists() }, - { assert new File("$outputDir/phasing/whatshap/phase/HG002_ONT_B/HG002_ONT_B_phased.vcf.gz").exists() }, - { assert new File("$outputDir/phasing/whatshap/phase/HG002_ONT_B/HG002_ONT_B_phased.vcf.gz.tbi").exists() }, - { assert new File("$outputDir/phasing/whatshap/stats/HG002_ONT_B/HG002_ONT_B_stats.stats.tsv").exists() }, - { assert new File("$outputDir/phasing/whatshap/stats/HG002_ONT_B/HG002_ONT_B_stats.blocks.tsv").exists() }, + { assert new File("$outputDir/aligned_reads/HG002_ONT_B/HG002_ONT_B_haplotagged.bam.bai").exists() }, + { assert new File("$outputDir/phased_variants/HG002_ONT_B/HG002_ONT_B_phased.vcf.gz").exists() }, + { assert new File("$outputDir/phased_variants/HG002_ONT_B/HG002_ONT_B_phased.vcf.gz.tbi").exists() }, + { assert new File("$outputDir/qc/phasing_stats/HG002_ONT_B/HG002_ONT_B_stats.stats.tsv").exists() }, + { assert new File("$outputDir/qc/phasing_stats/HG002_ONT_B/HG002_ONT_B_stats.blocks.tsv").exists() }, { assert new File("$outputDir/qc/fastqc/HG002_ONT_B/HG002_ONT_B_fastqc.html").exists() }, { assert new File("$outputDir/qc/fastqc/HG002_ONT_B/HG002_ONT_B_fastqc.zip").exists() }, { assert new File("$outputDir/snvs/single_sample/HG002_ONT_B/HG002_ONT_B_snv_annotated_ranked.vcf.gz").exists() }, diff --git a/tests/main.nf.test.snap b/tests/main.nf.test.snap index ec5566aa..a944d17d 100644 --- a/tests/main.nf.test.snap +++ b/tests/main.nf.test.snap @@ -39,8 +39,8 @@ "HG002_Revio_cramino_aligned.arrow:md5,72df2934ff8aa7e1bf8cf8a4881a0d2a", "HG002_Revio.regions.bed.gz:md5,e051f64c7a780d67ea6727a327dd4281", "HG002_Revio.regions.bed.gz.csi:md5,026eef1c69fb4aa3a1687463fe2088ab", - "HG002_Revio_sorted.vcf.gz:md5,fbb5699b8f74fc105fb154e8fac7bfea", - "HG002_Revio_sorted.vcf.gz.tbi:md5,0466518ee265ba63160ed27cee0dec88", + "HG002_Revio_sorted.vcf.gz:md5,735d8ba586b01fb27e009e3964112ce9", + "HG002_Revio_sorted.vcf.gz.tbi:md5,d4c7838d72a4200e790227ba4d7b25d5", "65999ab8f2bc7841de8172468bf23ab6", [ "# This file was produced by bcftools stats (1.20+htslib-1.20) and can be plotted using plot-vcfstats.", @@ -49,10 +49,10 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.3" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-09-12T12:19:02.681485167" + "timestamp": "2024-09-20T09:18:35.001197778" }, "test profile - multisample": { "content": [ @@ -90,8 +90,8 @@ "HG002_Revio_A_cramino_aligned.arrow:md5,72df2934ff8aa7e1bf8cf8a4881a0d2a", "HG002_Revio_A.regions.bed.gz:md5,e051f64c7a780d67ea6727a327dd4281", "HG002_Revio_A.regions.bed.gz.csi:md5,026eef1c69fb4aa3a1687463fe2088ab", - "HG002_Revio_A_sorted.vcf.gz:md5,680938d6ebeafe73d8df0b21c0310276", - "HG002_Revio_A_sorted.vcf.gz.tbi:md5,a6554ab817e7c232a1554ea85fa00151", + "HG002_Revio_A_sorted.vcf.gz:md5,e7f53d86c027a7778e0e642cb2da8884", + "HG002_Revio_A_sorted.vcf.gz.tbi:md5,d4c7838d72a4200e790227ba4d7b25d5", "65999ab8f2bc7841de8172468bf23ab6", [ "# This file was produced by bcftools stats (1.20+htslib-1.20) and can be plotted using plot-vcfstats.", @@ -129,8 +129,8 @@ "HG002_Revio_B_cramino_aligned.arrow:md5,544a6513e7604d6425a664855212eced", "HG002_Revio_B.regions.bed.gz:md5,9c4c037139c752c60385d3c0b905af7f", "HG002_Revio_B.regions.bed.gz.csi:md5,d0640255d527e36655281f64e184b02c", - "HG002_Revio_B_sorted.vcf.gz:md5,a8aaec5870a60a4fabf6aff849ba1e61", - "HG002_Revio_B_sorted.vcf.gz.tbi:md5,e255a5ea92885967f0c126bddc8ea3b2", + "HG002_Revio_B_sorted.vcf.gz:md5,8703d5602d79e9fe4d70aced838f518f", + "HG002_Revio_B_sorted.vcf.gz.tbi:md5,01f65612542a83475217568bd50d4efd", "65999ab8f2bc7841de8172468bf23ab6", [ "# This file was produced by bcftools stats (1.20+htslib-1.20) and can be plotted using plot-vcfstats.", @@ -139,10 +139,10 @@ ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.3" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-09-12T12:22:11.460202495" + "timestamp": "2024-09-20T09:48:22.654535823" }, "test profile - multisample - ont - parallel_alignments 2 - parallel_snv 1": { "content": [ diff --git a/workflows/nallo.nf b/workflows/nallo.nf index 3eecb23d..0bb337e5 100644 --- a/workflows/nallo.nf +++ b/workflows/nallo.nf @@ -106,7 +106,7 @@ workflow NALLO { : '' // Check parameter that doesn't conform to schema validation here - if (params.phaser.matches('hiphase_sv|hiphase_snv') && params.preset == 'ONT_R10') { error "The HiPhase license only permits analysis of data from PacBio. For details see: https://github.com/PacificBiosciences/HiPhase/blob/main/LICENSE.md" } + if (params.phaser.matches('hiphase') && params.preset == 'ONT_R10') { error "The HiPhase license only permits analysis of data from PacBio. For details see: https://github.com/PacificBiosciences/HiPhase/blob/main/LICENSE.md" } // Read and store paths in the vep_plugin_files file if (params.vep_plugin_files) { @@ -451,7 +451,13 @@ workflow NALLO { // if(!params.skip_phasing_wf) { - PHASING( SHORT_VARIANT_CALLING.out.snp_calls_vcf, CALL_SVS.out.ch_sv_calls_vcf, bam_bai, fasta, fai) + PHASING ( + SHORT_VARIANT_CALLING.out.snp_calls_vcf, + SHORT_VARIANT_CALLING.out.snp_calls_tbi, + bam_bai, + fasta, + fai + ) ch_versions = ch_versions.mix(PHASING.out.versions) ch_multiqc_files = ch_multiqc_files.mix(PHASING.out.stats.collect{it[1]}.ifEmpty([]))