Skip to content

Commit

Permalink
Add expected CN-files (for XX/XY), MAF-vcf and option to use an exclu…
Browse files Browse the repository at this point in the history
…de regions file. Also update HiFiCNV version to v.0.1.7
  • Loading branch information
fellen31 committed Dec 5, 2023
1 parent 5d58573 commit 90fa4cd
Show file tree
Hide file tree
Showing 6 changed files with 108 additions and 33 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool
- SV calling and joint genotyping ([`sniffles2`](https://github.com/fritzsedlazeck/Sniffles))
- Tandem repeats ([`TRGT`](https://github.com/PacificBiosciences/trgt/tree/main))
- Assembly based variant calls (HiFi only) ([`dipcall`](https://github.com/lh3/dipcall))
- CNV-calling (HiFi only) ([`HiFiCNV`](https://github.com/PacificBiosciences/HiFiCNV))

##### Phasing and methylation
- Phase and haplotag reads ([`whatshap`](https://github.com/whatshap/whatshap) + [`hiphase`](https://github.com/PacificBiosciences/HiPhase))
Expand Down Expand Up @@ -66,6 +67,7 @@ HG005,/path/to/HG005.fastq.gz,FAM1,HG003,HG004,2,1
- If running dipcall, download a BED file with PAR regions ([hg38](https://raw.githubusercontent.com/lh3/dipcall/master/data/hs38.PAR.bed))
- If running TRGT, download a BED file with tandem repeats ([TRGT](https://github.com/PacificBiosciences/trgt/tree/main/repeats)) matching your reference genome.
- If running SNV annotation, download [VEP cache](https://ftp.ensembl.org/pub/release-110/variation/vep/homo_sapiens_vep_110_GRCh38.tar.gz) and prepare a samplesheet with annotation databases ([`echtvar encode`](https://github.com/brentp/echtvar)):
- If running CNV-calling, expected CN regions for your reference genome can be downloaded from [HiFiCNV GitHub](https://github.com/PacificBiosciences/HiFiCNV/tree/main/data/excluded_regions)

`snp_dbs.csv`
```
Expand Down
22 changes: 13 additions & 9 deletions modules/local/hificnv.nf → modules/local/pacbio/hificnv/main.nf
Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
process HIFICNV {
tag "$meta.id"
label 'process_low'
label 'process_medium'

conda "bioconda::hificnv=0.1.7"
container "quay.io/biocontainers/hificnv:0.1.7--h9ee0642_0"

conda "bioconda::hificnv=0.1.6b"
container "quay.io/biocontainers/hificnv:0.1.6b--h9ee0642_0"

input:
tuple val(meta), path(bam), path(bai)
tuple val(meta), path(bam), path(bai), path(maf_vcf), path(expected_cn_bed)
tuple val(meta2), path(fasta)
// Take care of this later
tuple val(meta3), path(maf_vcf_gz)
path(exclude_bed)
path(expected_cn)

output:
tuple val(meta), path("*.vcf.gz") , emit: vcf
Expand All @@ -28,13 +26,19 @@ process HIFICNV {
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"

// TODO: add exclude, expected
def expected_cn = expected_cn_bed ? "--expected-cn ${expected_cn_bed}" : ""
def exclude = exclude_bed ? "--exclude ${exclude_bed}" : ""
def maf = maf_vcf ? "--maf ${maf_vcf}" : ""

"""
hificnv \\
$args \\
--bam ${bam} \\
$expected_cn \\
$exclude \\
$maf \\
--ref ${fasta} \\
--threads $task.cpus \\
--threads ${task.cpus} \\
--output-prefix ${prefix}
cat <<-END_VERSIONS > versions.yml
Expand Down
11 changes: 9 additions & 2 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ params {
skip_phasing_wf = false
skip_short_variant_calling = false
skip_snv_annotation = false
skip_cnv_calling = false

tandem_repeats = null
preset = null
Expand All @@ -36,10 +37,16 @@ params {
snp_db = null
vep_cache = null

// HiFiCNV
hificnv_xy = null
hificnv_xx = null
hificnv_exclude = null

bed = null
parallel_snv = 13

//Preprocessing
// Preprocessing/parallelisation

parallel_snv = 13
split_fastq = 0

// References
Expand Down
29 changes: 22 additions & 7 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@
"skip_snv_annotation": {
"type": "boolean",
"description": "Skip SNV annotation"
},
"skip_cnv_calling": {
"type": "boolean",
"description": "Skip CNV workflow"
}
},
"fa_icon": "fas fa-american-sign-language-interpreting"
Expand Down Expand Up @@ -112,6 +116,12 @@
"help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.",
"fa_icon": "far fa-file-code"
},
"config_profile_contact": {
"type": "string",
"description": "Institutional config contact information.",
"hidden": true,
"fa_icon": "fas fa-users-cog"
},
"igenomes_base": {
"type": "string",
"format": "directory-path",
Expand Down Expand Up @@ -165,12 +175,6 @@
"hidden": true,
"fa_icon": "fas fa-users-cog"
},
"config_profile_contact": {
"type": "string",
"description": "Institutional config contact information.",
"hidden": true,
"fa_icon": "fas fa-users-cog"
},
"config_profile_url": {
"type": "string",
"description": "Institutional config URL link.",
Expand Down Expand Up @@ -403,7 +407,6 @@
},
"vep_cache": {
"type": "string",
"default": "None",
"description": "Path to directory of vep_cache",
"format": "directory-path"
},
Expand All @@ -412,6 +415,18 @@
"pattern": "^\\S+\\.bed$",
"format": "file-path",
"description": "BED file with regions of interest"
},
"hificnv_xy": {
"type": "string",
"format": "file-path"
},
"hificnv_xx": {
"type": "string",
"format": "file-path"
},
"hificnv_exclude": {
"type": "string",
"description": "HiFiCNV BED file specifying regions to exclude"
}
}
}
Expand Down
37 changes: 33 additions & 4 deletions subworkflows/local/cnv.nf
Original file line number Diff line number Diff line change
@@ -1,19 +1,48 @@
include { HIFICNV } from '../../modules/local/hificnv'
include { HIFICNV } from '../../modules/local/pacbio/hificnv'

workflow CNV {

take:
ch_bam_bai // channel: [ val(meta), [[ bam ], [bai]] ]
ch_bam_bai_vcf // channel: [ val(meta), [[ bam ], [bai], [vcf] ]
ch_fasta
ch_expected_xy_bed
ch_expected_xx_bed
ch_exclude_bed

main:
ch_versions = Channel.empty()

HIFICNV(ch_bam_bai, ch_fasta, [[],[]], [], [])
// Split samples if male or female
ch_bam_bai_vcf
.branch{ meta, bam, bai, vcf ->
male: meta.sex == '1'
female: meta.sex == '2'
}
.set{branched_bam}

// Then merge male with XY-bed
branched_bam
.male
.combine(ch_expected_xy_bed)
.set {branched_bam_male}

// And female with XX-bed
branched_bam
.female
.combine(ch_expected_xx_bed)
.set {branched_bam_female}

// Concatenate the two channels
branched_bam_male
.concat(branched_bam_female)
.set{ ch_hificnv_in }

// Run HiFiCNV
HIFICNV(ch_hificnv_in, ch_fasta, ch_exclude_bed)

ch_versions = ch_versions.mix(HIFICNV.out.versions)

emit:
versions = ch_versions // channel: [ versions.yml ]
versions = ch_versions // channel: [ versions.yml ]
}

40 changes: 29 additions & 11 deletions workflows/skierfe.nf
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,10 @@ def checkPathParamList = [ params.input,
params.trgt_repeats,
params.snp_db,
params.vep_cache,
params.bed
params.bed,
params.hificnv_exclude,
params.hificnv_xx,
params.hificnv_xy,
]

for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } }
Expand Down Expand Up @@ -54,6 +57,8 @@ def checkUnsupportedCombinations() {
exit 1, 'Cannot run repeat analysis without short variant calling'
} else if (!params.skip_snv_annotation ) {
exit 1, 'Cannot run snv annotation without short variant calling'
} else if (!params.skip_cnv_calling) {
exit 1, 'Cannot run CNV-calling without short variant calling'
}
}
if (!params.skip_assembly_wf) {
Expand All @@ -68,6 +73,11 @@ def checkUnsupportedCombinations() {
if(params.snp_db) { ch_databases = Channel.fromSamplesheet('snp_db', immutable_meta: false).map{it[1]}.collect() } else { exit 1, 'Not skipping SNV Annotation: Missing Echtvar-DB samplesheet (--snp_db)'}
if(params.vep_cache) { ch_vep_cache = Channel.fromPath(params.vep_cache).collect() } else { exit 1, 'Not skipping SNV Annotation: missing path to VEP cache-dir (--vep_cache)'}
}
if (!params.skip_short_variant_calling & !params.skip_cnv_calling) {
if(params.hificnv_xy) { ch_expected_xy_bed = Channel.fromPath(params.hificnv_xy).collect() } else { exit 1, 'Not skipping CNV-calling: Missing --hificnv_xy'}
if(params.hificnv_xx) { ch_expected_xx_bed = Channel.fromPath(params.hificnv_xx).collect() } else { exit 1, 'Not skipping CNV-calling: Missing --hificnv_xx'}
if(params.hificnv_exclude) { ch_exclude_bed = Channel.fromPath(params.hificnv_exclude).collect() } else { ch_exclude_bed = Channel.value([]) }
}
}

// Check and set input files that are mandatory for some analyses
Expand All @@ -89,11 +99,14 @@ def getValidWorkflows(preset) {
switch(preset) {
case "pacbio":
return ["skip_methylation_wf"]
case "ONT_R10":
return ["skip_cnv_calling", "skip_assembly_wf"]
}
}

if(params.preset == "pacbio" & !params.skip_methylation_wf) {
exit 1, "Preset \'$params.preset\' cannot be run without: " + getValidWorkflows(params.preset)
if( (params.preset == "pacbio" & !params.skip_methylation_wf) |
(params.preset == "ONT_R10" & (!params.skip_cnv_calling | !params.skip_assembly_wf))) {
exit 1, "Preset \'$params.preset\' cannot be run wih: " + getValidWorkflows(params.preset)
}

/*
Expand Down Expand Up @@ -232,10 +245,6 @@ workflow SKIERFE {

// Call SVs with Sniffles2
STRUCTURAL_VARIANT_CALLING( bam_bai , ch_extra_snfs, fasta, fai, ch_tandem_repeats )
<<<<<<< HEAD

=======
>>>>>>> dev

// Gather versions
ch_versions = ch_versions.mix(ALIGN_READS.out.versions)
Expand All @@ -244,14 +253,26 @@ workflow SKIERFE {

if(!params.skip_short_variant_calling) {
// Call SNVs with DeepVariant/DeepTrio
//SHORT_VARIANT_CALLING( bam_bai.map{ meta, bam, bai -> [meta, bam, bai, ''] }, ch_extra_gvcfs, fasta, fai )
SHORT_VARIANT_CALLING( ch_snv_calling_in , ch_extra_gvcfs, fasta, fai, ch_bed )
ch_versions = ch_versions.mix(SHORT_VARIANT_CALLING.out.versions)

if(!params.skip_snv_annotation) {
SNV_ANNOTATION(SHORT_VARIANT_CALLING.out.combined_bcf, SHORT_VARIANT_CALLING.out.snp_calls_vcf, ch_databases, fasta, ch_vep_cache)
}

if(params.preset != 'ONT_R10') {

bam_bai
.join(SHORT_VARIANT_CALLING.out.snp_calls_vcf)
.groupTuple()
.set { cnv_workflow_in }

CNV(cnv_workflow_in, fasta, ch_expected_xy_bed, ch_expected_xx_bed, ch_exclude_bed)
ch_versions = ch_versions.mix(CNV.out.versions)
}



if(!params.skip_phasing_wf) {
// Phase variants with WhatsHap
PHASING( SHORT_VARIANT_CALLING.out.snp_calls_vcf, STRUCTURAL_VARIANT_CALLING.out.ch_sv_calls_vcf, bam_bai, fasta, fai)
Expand All @@ -260,9 +281,6 @@ workflow SKIERFE {
// Gather versions
ch_versions = ch_versions.mix(PHASING.out.versions)

// Call CNVs
CNV( hap_bam_bai, fasta)
ch_versions = ch_versions.mix(CNV.out.versions)

if(!params.skip_methylation_wf) {
// Pileup methylation with modkit
Expand Down

0 comments on commit 90fa4cd

Please sign in to comment.