diff --git a/README.md b/README.md index 411674c6..a152939f 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool - SV calling and joint genotyping ([`sniffles2`](https://github.com/fritzsedlazeck/Sniffles)) - Tandem repeats ([`TRGT`](https://github.com/PacificBiosciences/trgt/tree/main)) - Assembly based variant calls (HiFi only) ([`dipcall`](https://github.com/lh3/dipcall)) +- CNV-calling (HiFi only) ([`HiFiCNV`](https://github.com/PacificBiosciences/HiFiCNV)) ##### Phasing and methylation - Phase and haplotag reads ([`whatshap`](https://github.com/whatshap/whatshap) + [`hiphase`](https://github.com/PacificBiosciences/HiPhase)) @@ -66,6 +67,7 @@ HG005,/path/to/HG005.fastq.gz,FAM1,HG003,HG004,2,1 - If running dipcall, download a BED file with PAR regions ([hg38](https://raw.githubusercontent.com/lh3/dipcall/master/data/hs38.PAR.bed)) - If running TRGT, download a BED file with tandem repeats ([TRGT](https://github.com/PacificBiosciences/trgt/tree/main/repeats)) matching your reference genome. - If running SNV annotation, download [VEP cache](https://ftp.ensembl.org/pub/release-110/variation/vep/homo_sapiens_vep_110_GRCh38.tar.gz) and prepare a samplesheet with annotation databases ([`echtvar encode`](https://github.com/brentp/echtvar)): +- If running CNV-calling, expected CN regions for your reference genome can be downloaded from [HiFiCNV GitHub](https://github.com/PacificBiosciences/HiFiCNV/tree/main/data/excluded_regions) `snp_dbs.csv` ``` diff --git a/modules/local/hificnv.nf b/modules/local/pacbio/hificnv/main.nf similarity index 64% rename from modules/local/hificnv.nf rename to modules/local/pacbio/hificnv/main.nf index 12ef146a..3c25fe60 100644 --- a/modules/local/hificnv.nf +++ b/modules/local/pacbio/hificnv/main.nf @@ -1,17 +1,15 @@ process HIFICNV { tag "$meta.id" - label 'process_low' + label 'process_medium' + + conda "bioconda::hificnv=0.1.7" + container "quay.io/biocontainers/hificnv:0.1.7--h9ee0642_0" - conda "bioconda::hificnv=0.1.6b" - container "quay.io/biocontainers/hificnv:0.1.6b--h9ee0642_0" input: - tuple val(meta), path(bam), path(bai) + tuple val(meta), path(bam), path(bai), path(maf_vcf), path(expected_cn_bed) tuple val(meta2), path(fasta) - // Take care of this later - tuple val(meta3), path(maf_vcf_gz) path(exclude_bed) - path(expected_cn) output: tuple val(meta), path("*.vcf.gz") , emit: vcf @@ -28,13 +26,19 @@ process HIFICNV { def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" - // TODO: add exclude, expected + def expected_cn = expected_cn_bed ? "--expected-cn ${expected_cn_bed}" : "" + def exclude = exclude_bed ? "--exclude ${exclude_bed}" : "" + def maf = maf_vcf ? "--maf ${maf_vcf}" : "" + """ hificnv \\ $args \\ --bam ${bam} \\ + $expected_cn \\ + $exclude \\ + $maf \\ --ref ${fasta} \\ - --threads $task.cpus \\ + --threads ${task.cpus} \\ --output-prefix ${prefix} cat <<-END_VERSIONS > versions.yml diff --git a/nextflow.config b/nextflow.config index 94a6bc47..2b6cc24c 100644 --- a/nextflow.config +++ b/nextflow.config @@ -27,6 +27,7 @@ params { skip_phasing_wf = false skip_short_variant_calling = false skip_snv_annotation = false + skip_cnv_calling = false tandem_repeats = null preset = null @@ -36,10 +37,16 @@ params { snp_db = null vep_cache = null + // HiFiCNV + hificnv_xy = null + hificnv_xx = null + hificnv_exclude = null + bed = null - parallel_snv = 13 - //Preprocessing + // Preprocessing/parallelisation + + parallel_snv = 13 split_fastq = 0 // References diff --git a/nextflow_schema.json b/nextflow_schema.json index f2542182..c38726fb 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -49,6 +49,10 @@ "skip_snv_annotation": { "type": "boolean", "description": "Skip SNV annotation" + }, + "skip_cnv_calling": { + "type": "boolean", + "description": "Skip CNV workflow" } }, "fa_icon": "fas fa-american-sign-language-interpreting" @@ -112,6 +116,12 @@ "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", "fa_icon": "far fa-file-code" }, + "config_profile_contact": { + "type": "string", + "description": "Institutional config contact information.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, "igenomes_base": { "type": "string", "format": "directory-path", @@ -165,12 +175,6 @@ "hidden": true, "fa_icon": "fas fa-users-cog" }, - "config_profile_contact": { - "type": "string", - "description": "Institutional config contact information.", - "hidden": true, - "fa_icon": "fas fa-users-cog" - }, "config_profile_url": { "type": "string", "description": "Institutional config URL link.", @@ -403,7 +407,6 @@ }, "vep_cache": { "type": "string", - "default": "None", "description": "Path to directory of vep_cache", "format": "directory-path" }, @@ -412,6 +415,18 @@ "pattern": "^\\S+\\.bed$", "format": "file-path", "description": "BED file with regions of interest" + }, + "hificnv_xy": { + "type": "string", + "format": "file-path" + }, + "hificnv_xx": { + "type": "string", + "format": "file-path" + }, + "hificnv_exclude": { + "type": "string", + "description": "HiFiCNV BED file specifying regions to exclude" } } } diff --git a/subworkflows/local/cnv.nf b/subworkflows/local/cnv.nf index 250ea5e7..63219f8d 100644 --- a/subworkflows/local/cnv.nf +++ b/subworkflows/local/cnv.nf @@ -1,19 +1,48 @@ -include { HIFICNV } from '../../modules/local/hificnv' +include { HIFICNV } from '../../modules/local/pacbio/hificnv' workflow CNV { take: - ch_bam_bai // channel: [ val(meta), [[ bam ], [bai]] ] + ch_bam_bai_vcf // channel: [ val(meta), [[ bam ], [bai], [vcf] ] ch_fasta + ch_expected_xy_bed + ch_expected_xx_bed + ch_exclude_bed main: ch_versions = Channel.empty() - HIFICNV(ch_bam_bai, ch_fasta, [[],[]], [], []) + // Split samples if male or female + ch_bam_bai_vcf + .branch{ meta, bam, bai, vcf -> + male: meta.sex == '1' + female: meta.sex == '2' + } + .set{branched_bam} + + // Then merge male with XY-bed + branched_bam + .male + .combine(ch_expected_xy_bed) + .set {branched_bam_male} + + // And female with XX-bed + branched_bam + .female + .combine(ch_expected_xx_bed) + .set {branched_bam_female} + + // Concatenate the two channels + branched_bam_male + .concat(branched_bam_female) + .set{ ch_hificnv_in } + + // Run HiFiCNV + HIFICNV(ch_hificnv_in, ch_fasta, ch_exclude_bed) ch_versions = ch_versions.mix(HIFICNV.out.versions) emit: - versions = ch_versions // channel: [ versions.yml ] + versions = ch_versions // channel: [ versions.yml ] } diff --git a/workflows/skierfe.nf b/workflows/skierfe.nf index 81fc483c..48568c17 100644 --- a/workflows/skierfe.nf +++ b/workflows/skierfe.nf @@ -23,7 +23,10 @@ def checkPathParamList = [ params.input, params.trgt_repeats, params.snp_db, params.vep_cache, - params.bed + params.bed, + params.hificnv_exclude, + params.hificnv_xx, + params.hificnv_xy, ] for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } @@ -54,6 +57,8 @@ def checkUnsupportedCombinations() { exit 1, 'Cannot run repeat analysis without short variant calling' } else if (!params.skip_snv_annotation ) { exit 1, 'Cannot run snv annotation without short variant calling' + } else if (!params.skip_cnv_calling) { + exit 1, 'Cannot run CNV-calling without short variant calling' } } if (!params.skip_assembly_wf) { @@ -68,6 +73,11 @@ def checkUnsupportedCombinations() { if(params.snp_db) { ch_databases = Channel.fromSamplesheet('snp_db', immutable_meta: false).map{it[1]}.collect() } else { exit 1, 'Not skipping SNV Annotation: Missing Echtvar-DB samplesheet (--snp_db)'} if(params.vep_cache) { ch_vep_cache = Channel.fromPath(params.vep_cache).collect() } else { exit 1, 'Not skipping SNV Annotation: missing path to VEP cache-dir (--vep_cache)'} } + if (!params.skip_short_variant_calling & !params.skip_cnv_calling) { + if(params.hificnv_xy) { ch_expected_xy_bed = Channel.fromPath(params.hificnv_xy).collect() } else { exit 1, 'Not skipping CNV-calling: Missing --hificnv_xy'} + if(params.hificnv_xx) { ch_expected_xx_bed = Channel.fromPath(params.hificnv_xx).collect() } else { exit 1, 'Not skipping CNV-calling: Missing --hificnv_xx'} + if(params.hificnv_exclude) { ch_exclude_bed = Channel.fromPath(params.hificnv_exclude).collect() } else { ch_exclude_bed = Channel.value([]) } + } } // Check and set input files that are mandatory for some analyses @@ -89,11 +99,14 @@ def getValidWorkflows(preset) { switch(preset) { case "pacbio": return ["skip_methylation_wf"] + case "ONT_R10": + return ["skip_cnv_calling", "skip_assembly_wf"] } } -if(params.preset == "pacbio" & !params.skip_methylation_wf) { - exit 1, "Preset \'$params.preset\' cannot be run without: " + getValidWorkflows(params.preset) +if( (params.preset == "pacbio" & !params.skip_methylation_wf) | + (params.preset == "ONT_R10" & (!params.skip_cnv_calling | !params.skip_assembly_wf))) { + exit 1, "Preset \'$params.preset\' cannot be run wih: " + getValidWorkflows(params.preset) } /* @@ -232,10 +245,6 @@ workflow SKIERFE { // Call SVs with Sniffles2 STRUCTURAL_VARIANT_CALLING( bam_bai , ch_extra_snfs, fasta, fai, ch_tandem_repeats ) -<<<<<<< HEAD - -======= ->>>>>>> dev // Gather versions ch_versions = ch_versions.mix(ALIGN_READS.out.versions) @@ -244,7 +253,6 @@ workflow SKIERFE { if(!params.skip_short_variant_calling) { // Call SNVs with DeepVariant/DeepTrio - //SHORT_VARIANT_CALLING( bam_bai.map{ meta, bam, bai -> [meta, bam, bai, ''] }, ch_extra_gvcfs, fasta, fai ) SHORT_VARIANT_CALLING( ch_snv_calling_in , ch_extra_gvcfs, fasta, fai, ch_bed ) ch_versions = ch_versions.mix(SHORT_VARIANT_CALLING.out.versions) @@ -252,6 +260,19 @@ workflow SKIERFE { SNV_ANNOTATION(SHORT_VARIANT_CALLING.out.combined_bcf, SHORT_VARIANT_CALLING.out.snp_calls_vcf, ch_databases, fasta, ch_vep_cache) } + if(params.preset != 'ONT_R10') { + + bam_bai + .join(SHORT_VARIANT_CALLING.out.snp_calls_vcf) + .groupTuple() + .set { cnv_workflow_in } + + CNV(cnv_workflow_in, fasta, ch_expected_xy_bed, ch_expected_xx_bed, ch_exclude_bed) + ch_versions = ch_versions.mix(CNV.out.versions) + } + + + if(!params.skip_phasing_wf) { // Phase variants with WhatsHap PHASING( SHORT_VARIANT_CALLING.out.snp_calls_vcf, STRUCTURAL_VARIANT_CALLING.out.ch_sv_calls_vcf, bam_bai, fasta, fai) @@ -260,9 +281,6 @@ workflow SKIERFE { // Gather versions ch_versions = ch_versions.mix(PHASING.out.versions) - // Call CNVs - CNV( hap_bam_bai, fasta) - ch_versions = ch_versions.mix(CNV.out.versions) if(!params.skip_methylation_wf) { // Pileup methylation with modkit