From cc6d26a4859f2a4e2996caea6bcd0b3566434e8f Mon Sep 17 00:00:00 2001 From: Felix Lenner <52530259+fellen31@users.noreply.github.com> Date: Thu, 15 Aug 2024 16:19:24 +0200 Subject: [PATCH] Fix file requirements (#317) * Split vep plugins into vep cache and vep plugins * Fix file requirements --- CHANGELOG.md | 1 + docs/usage.md | 5 ++++- subworkflows/local/utils_nfcore_nallo_pipeline/main.nf | 3 ++- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f7cf2390..6039f0b7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -62,6 +62,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#312](https://github.com/genomic-medicine-sweden/nallo/pull/312) - Changed echtvar encode database creation to use dynamic `${project}` from samplesheet - [#313](https://github.com/genomic-medicine-sweden/nallo/pull/313) - Updated calling of variants in non-autosomal contigs for DeepVariant - [#314](https://github.com/genomic-medicine-sweden/nallo/pull/314) - Changed VEP annotation added in #244 to not include SpliceAI +- [#317](https://github.com/genomic-medicine-sweden/nallo/pull/317) - Changed so that `--reduced_penetrance` and `--score_config_snv` is required by rank variants and not SNV annotation - [#321](https://github.com/genomic-medicine-sweden/nallo/pull/321) - Changed the input to BUILD_INTERVALS to have `meta.id` when building intervals from reference ### `Removed` diff --git a/docs/usage.md b/docs/usage.md index 9aeec0df..1e443525 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -112,8 +112,11 @@ Some workflows require additional files: - If running without `--skip_repeat_annotation`, download a json variant catalog, (e.g. [variant_catalog_grch38.json](https://github.com/Clinical-Genomics/stranger/raw/main/stranger/resources/variant_catalog_grch38.json)) matching your reference genome to supply with `--variant_catalog`. - If running without `--skip_snv_annotation`, download [VEP cache](https://ftp.ensembl.org/pub/release-110/variation/vep/homo_sapiens_vep_110_GRCh38.tar.gz) to supply with `--vep_cache` and prepare a samplesheet with annotation databases ([`echtvar encode`](https://github.com/brentp/echtvar)) to supply with `--snp_db`: + - If running without `--skip_snv_annotation`, you will also need to download VEP plugin files to supply with `--vep_plugin_files` see [example](https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugin_files.csv). PLI and LoFtool. +- If running without `--skip_snv_annotation`, `--variant_consequences_snv` is also required (File containing list of SO terms listed in the order of severity from most severe to lease severe for annotating genomic and mitochondrial SNVs. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/variant_consequences_v2.txt). You can learn more about these terms [here](https://grch37.ensembl.org/info/genome/variation/prediction/predicted_data.html)). + ``` vep_files https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/SpliceAI.pm @@ -136,7 +139,7 @@ gnomad,/path/to/gnomad.v3.1.2.echtvar.popmax.v2.zip cadd,/path/to/cadd.v1.6.hg38.zip ``` -- If your samplesheet contains at least one affected sample (phenotype = 2), `--reduced_penetrance` (Used by GENMOD while modeling the variants. Contains a list of loci that show [reduced penetrance](https://medlineplus.gov/genetics/understanding/inheritance/penetranceexpressivity/) in people. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/reduced_penetrance.tsv)), `--score_config_snv` (Used by GENMOD for ranking the variants. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/rank_model_snv.ini)) and `--variant_consequences_snv` (File containing list of SO terms listed in the order of severity from most severe to lease severe for annotating genomic and mitochondrial SNVs. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/variant_consequences_v2.txt). You can learn more about these terms [here](https://grch37.ensembl.org/info/genome/variation/prediction/predicted_data.html)) is also required. +- If running wihtout `--skip_rank_variants` and your samplesheet contains at least one affected sample (phenotype = 2), `--reduced_penetrance` (Used by GENMOD while modeling the variants. Contains a list of loci that show [reduced penetrance](https://medlineplus.gov/genetics/understanding/inheritance/penetranceexpressivity/) in people. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/reduced_penetrance.tsv)), `--score_config_snv` (Used by GENMOD for ranking the variants. Sample file [here](https://github.com/nf-core/test-datasets/blob/raredisease/reference/rank_model_snv.ini)). - Optionally, if running without `--skip_snv_annotation`, supply a path to a folder containing cadd annotations with `--cadd_resources` and prescored indels with `--cadd_prescored`. Equivalent of the data/annotations/ and data/prescored/ folders described [here](https://github.com/kircherlab/CADD-scripts/#manual-installation), and it is used to calculate CADD scores for small indels. diff --git a/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf b/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf index c315bf96..91cb7ac5 100644 --- a/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_nallo_pipeline/main.nf @@ -74,8 +74,9 @@ def fileDependencies = [ mapping : ["fasta", "somalier_sites"], assembly : ["fasta", "par_regions"], // The assembly workflow should be split into two - assembly and variant calling (requires ref) snv_calling : ["fasta", "par_regions"], - snv_annotation : ["snp_db", "vep_cache", "vep_plugin_files", "reduced_penetrance", "score_config_snv", "variant_consequences_snv"], + snv_annotation : ["snp_db", "vep_cache", "vep_plugin_files", "variant_consequences_snv"], cnv_calling : ["hificnv_xy", "hificnv_xx", "hificnv_exclude"], + rank_variants : ["reduced_penetrance", "score_config_snv"], repeat_calling : ["trgt_repeats"], repeat_annotation: ["variant_catalog"], ]