Skip to content

Commit

Permalink
Rank variants in parallel (#278)
Browse files Browse the repository at this point in the history
  • Loading branch information
fellen31 authored Jul 26, 2024
1 parent 4ee898e commit ef6a9a2
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 68 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#268](https://github.com/genomic-medicine-sweden/nallo/pull/268) - Changed alignment output file names and directory structure
- [#270](https://github.com/genomic-medicine-sweden/nallo/pull/270) - Changed whatshap stats to always run, regardless of phasing software, and changed the output from `*.stats.tsv.gz` to `*.stats.tsv` to allow being picked up by MultiQC
- [#277](https://github.com/genomic-medicine-sweden/nallo/pull/277) - Allowed CNV calling as soon as SNV calling for a sample is finished
- [#278](https://github.com/genomic-medicine-sweden/nallo/pull/278) - Changed the SNV ranking to run in parallel per region

### `Removed`

Expand Down
4 changes: 2 additions & 2 deletions conf/modules/general.config
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ process {
}

withName: '.*:NALLO:BCFTOOLS_CONCAT' {
ext.prefix = { params.skip_snv_annotation ? "${meta.id}_snv" : "${meta.id}_snv_annotated" }
ext.prefix = { params.skip_snv_annotation ? "${meta.id}_snv" : (params.skip_rank_variants ? "${meta.id}_snv_annotated" : "${meta.id}_snv_annotated_ranked") }
ext.args = { [
'--allow-overlaps',
'--output-type z',
Expand All @@ -43,7 +43,7 @@ process {
publishDir = [
path: { "${params.outdir}/snvs/multi_sample/${meta.id}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : (!params.skip_rank_variants && !params.skip_snv_annotation ? null : filename) }
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

Expand Down
22 changes: 6 additions & 16 deletions conf/modules/rank_variants.config
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@

process {

withName: '.*:RANK_VARIANTS_SNV:.*' {
publishDir = [
enabled: false,
]
}

withName: '.*:RANK_VARIANTS_SNV:GENMOD_ANNOTATE' {
ext.prefix = { "${meta.id}_snv_genmod_annotate" }
ext.args = { [
Expand Down Expand Up @@ -45,20 +51,4 @@ process {
ext.when = false
}

withName: '.*:RANK_VARIANTS_SNV:TABIX_BGZIP' {
ext.prefix = { "${meta.id}_snv_annotated_ranked" }
publishDir = [
path: { "${params.outdir}/snvs/multi_sample/${meta.id}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: '.*:RANK_VARIANTS_SNV:TABIX_TABIX' {
publishDir = [
path: { "${params.outdir}/snvs/multi_sample/${meta.id}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
}
2 changes: 1 addition & 1 deletion conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ params {
// Somalier
somalier_sites = params.pipelines_testdata_base_path + 'nallo/reference/somalier_sites.vcf.gz'

parallel_snv = 3 // Create 3 parallel DeepVariant processes
parallel_snv = 2 // Create 2 parallel DeepVariant processes
preset = "revio"

}
Expand Down
4 changes: 2 additions & 2 deletions tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ nextflow_pipeline {
variant_consequences_snv = params.pipelines_testdata_base_path + 'nallo/reference/variant_consequences_v2.txt'

// Parameters
parallel_snv = 3
parallel_snv = 2
preset = "revio"
outdir = "$outputDir"
}
Expand Down Expand Up @@ -162,7 +162,7 @@ nextflow_pipeline {
variant_consequences_snv = params.pipelines_testdata_base_path + 'nallo/reference/variant_consequences_v2.txt'

// Parameters
parallel_snv = 3
parallel_snv = 2
preset = "revio"
outdir = "$outputDir"
}
Expand Down
79 changes: 32 additions & 47 deletions workflows/nallo.nf
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ workflow NALLO {
if(!params.skip_snv_annotation) {

//
// Annotate one multisample VCF per variant call region
// Annotates one multisample VCF per variant call region
//
SNV_ANNOTATION(
SHORT_VARIANT_CALLING.out.combined_bcf,
Expand All @@ -310,68 +310,53 @@ workflow NALLO {
)
ch_versions = ch_versions.mix(ANN_CSQ_PLI_SNV.out.versions)

ANN_CSQ_PLI_SNV.out.vcf_ann
.join( ANN_CSQ_PLI_SNV.out.tbi_ann )
.set { ch_vcf_tbi }
//
// Ranks one multisample VCF per variant call region
//
if(!params.skip_rank_variants) {
// Only run if we have affected individuals
RANK_VARIANTS_SNV (
ANN_CSQ_PLI_SNV.out.vcf_ann.filter { meta, vcf -> meta.contains_affected },
ch_pedfile,
ch_reduced_penetrance,
ch_score_config_snv
)
ch_versions = ch_versions.mix(RANK_VARIANTS_SNV.out.versions)

// If there are affected individuals and RANK_VARIANTS has been run,
// input that to VCF concatenation
RANK_VARIANTS_SNV.out.vcf
.join( RANK_VARIANTS_SNV.out.tbi )
.set { ch_vcf_tbi_per_region }
} else {
// otherwise grab the VCF that should have gone into RANK_VARIANTS
ANN_CSQ_PLI_SNV.out.vcf_ann
.join( ANN_CSQ_PLI_SNV.out.tbi_ann )
.set { ch_vcf_tbi_per_region }
}
} else {
// If neither snv_annotation nor rank_variants was run, take the output from
// SHORT_VARIANT_CALLING
SHORT_VARIANT_CALLING.out.combined_bcf
.join( SHORT_VARIANT_CALLING.out.combined_csi )
.set { ch_vcf_tbi }
.set { ch_vcf_tbi_per_region }
}

ch_vcf_tbi
.map { meta, vcf, tbi ->
new_meta = [
id:'multisample',
contains_affected: meta.contains_affected.any()
]
[ new_meta, vcf, tbi ]
}
ch_vcf_tbi_per_region
.map { meta, vcf, tbi -> [ [ id: 'multisample' ], vcf, tbi ] }
.groupTuple()
.set { ch_bcftools_concat_in }

// Concat into a mutlisample VCF with all regions
// Pubish from here if we don't run rank variants
// Concat into a multisample VCF with all regions and publish
BCFTOOLS_CONCAT ( ch_bcftools_concat_in )
ch_versions = ch_versions.mix(BCFTOOLS_CONCAT.out.versions)

// Make an echtvar database of all samples
ECHTVAR_ENCODE ( BCFTOOLS_CONCAT.out.vcf )
ch_versions = ch_versions.mix(ECHTVAR_ENCODE.out.versions)

if(!params.skip_snv_annotation && !params.skip_rank_variants) {
// Only run if we have affected individuals
// Publish from here if we have affected individuals
// Not sure if this could be run in parallel or not
RANK_VARIANTS_SNV (
BCFTOOLS_CONCAT.out.vcf.filter { meta, vcf -> meta.contains_affected },
ch_pedfile,
ch_reduced_penetrance,
ch_score_config_snv
)
ch_versions = ch_versions.mix(RANK_VARIANTS_SNV.out.versions)

split_multisample_in = Channel.empty()

// If there are affected individuals and RANK_VARIANTS has been run,
// split that, otherwise grab the VCF that should have gone into RANK_VARIANTS
split_multisample_in = split_multisample_in
.mix(
RANK_VARIANTS_SNV.out.vcf
.join( RANK_VARIANTS_SNV.out.tbi )
.filter { meta, vcf, tbi -> meta.contains_affected }
)
.mix( BCFTOOLS_CONCAT.out.vcf
.join( BCFTOOLS_CONCAT.out.tbi )
.filter { meta, vcf, tbi -> !meta.contains_affected }
)
} else {
BCFTOOLS_CONCAT.out.vcf
.join( BCFTOOLS_CONCAT.out.tbi )
.set { split_multisample_in }
}
// Split multisample VCF to also publish a VCF per sample
BCFTOOLS_PLUGINSPLIT ( split_multisample_in, [], [], [], [] )
BCFTOOLS_PLUGINSPLIT ( BCFTOOLS_CONCAT.out.vcf.join(BCFTOOLS_CONCAT.out.tbi ), [], [], [], [] )
ch_versions = ch_versions.mix(BCFTOOLS_PLUGINSPLIT.out.versions)

BCFTOOLS_PLUGINSPLIT.out.vcf
Expand Down

0 comments on commit ef6a9a2

Please sign in to comment.