diff --git a/.tests/integration/config/config.yaml b/.tests/integration/config/config.yaml index e52a5d7..05d6797 100644 --- a/.tests/integration/config/config.yaml +++ b/.tests/integration/config/config.yaml @@ -11,7 +11,7 @@ default_container: "docker://hydragenetics/common:1.11.1" modules: alignment: "v0.6.0" - annotation: "v0.3.0" + annotation: "v1.0.1" compression: "v2.0.0" cnv_sv: "v0.5.0" filtering: "v0.3.0" @@ -57,16 +57,19 @@ bcftools_split_vep: bcftools_view_biallelic: extra: "-m2 -M2 -v snps" -bcftools_filter_cnvpytor: - exclude: "'pytorp1>0.0001 | pytorDG<100000 | pytorpN>0.5 | pytorQ0>0.5'" +bcftools_softfilter_cnvpytor: + exclude: "'pytorP1>0.0001 | pytorDG<100000 | pytorPN>0.5 | ( pytorQ0>0.5 && FORMAT/CN != 0)'" extra: "-s LowQual " +bcftools_hardfilter_cnvpytor: + exclude: "'pytorP1>0.0001 | pytorDG<100000 | pytorPN>0.5 | ( pytorQ0>0.5 && FORMAT/CN != 0)'" + bcftools_view_pass: container: "docker://hydragenetics/common:1.11.1" extra: "--apply-filter PASS " bcftools_view_svdb: - extra: "-f 'PASS' -e 'tiddit_AF>0.05 | manta_AF>0.05 | cnvpytor_AF>0.05 | gnomad_AF>0.1'" + extra: "-f 'PASS' -e 'manta_AF>0.05 | cnvpytor_AF>0.05 | gnomad_AF>0.1'" bwa_mem: container: "docker://hydragenetics/bwa:0.7.15" @@ -319,8 +322,8 @@ stranger: svdb_merge: container: "docker://hydragenetics/svdb:2.8.2" overlap: 0.6 - priority: "tiddit,manta,cnvpytor" - extra: "--pass_only " + priority: "manta,cnvpytor" + extra: "--pass_only --no_intra " svdb_query: container: "docker://hydragenetics/svdb:2.6.0" @@ -332,7 +335,7 @@ tabix: container: "docker://hydragenetics/common:0.1.9" tiddit: - container: "docker://hydragenetics/tiddit:3.3.2" + container: "docker://hydragenetics/tiddit:3.7.0" upd: container: "docker://hydragenetics/upd:0.1.1" @@ -341,16 +344,19 @@ upd: vep: container: "docker://ensemblorg/ensembl-vep:release_109.3" vep_cache: "reference/VEP/" + mode: "--offline --cache --refseq " extra: "--assembly GRCh38 --check_existing --pick --sift b --polyphen b --ccds --symbol --af --af_1kg --af_gnomad --max_af" vep_trio: container: "docker://ensemblorg/ensembl-vep:release_109.3" vep_cache: "reference/VEP/" + mode: "--offline --cache --refseq " extra: "--assembly GRCh38 --check_existing --pick --max_af --buffer_size 50000" vep_svdb: container: "docker://ensemblorg/ensembl-vep:release_110.1" vep_cache: "reference/VEP/" + mode: "--offline --cache --refseq " extra: "--assembly GRCh38 --check_existing --pick --custom file=/data/ref_data/gnomad/gnomad_sv/gnomad.v4.0.sv.vcf.gz,short_name=gnomad,fields=AF%CN_NONREF_FREQ%FILTER%ALGORITHMS%EVIDENCE%SVTYPE,format=vcf,reciprocal=1,overlap_cutoff=60,distance=1000,same_type=1" verifybamid2: diff --git a/.tests/integration/config/config_hard_filter_germline.yaml b/.tests/integration/config/config_hard_filter_germline.yaml index 09c8d9d..288283f 100644 --- a/.tests/integration/config/config_hard_filter_germline.yaml +++ b/.tests/integration/config/config_hard_filter_germline.yaml @@ -1,5 +1,5 @@ filters: germline: description: "Hard filter germline" - expression: "(VEP:MAX_AF > 0.10)" + expression: "(VEP:MAX_AF > 0.10 and VEP:SYMBOL != POLG)" soft_filter: "False" diff --git a/.tests/integration/config/output_list.json b/.tests/integration/config/output_list.json index f0b864a..b8a8556 100644 --- a/.tests/integration/config/output_list.json +++ b/.tests/integration/config/output_list.json @@ -3,7 +3,7 @@ "results/{sample}/{sample}_snv_indels.vcf.gz.tbi": {"name": "_copy_snv_indels_tbi", "file": "vcf_final/{sample}_N.vcf.gz.tbi", "types": ["N"]}, "results/{sample}/{sample}_snv_indels.filtered.vcf.gz": {"name": "_copy_snv_indels_filtered_vcf", "file": "vcf_final/{sample}_N.vep_annotated.filter.germline.vcf.gz", "types": ["N"]}, "results/{sample}/{sample}_snv_indels.filtered.vcf.gz.tbi": {"name": "_copy_snv_indels_filtered_tbi", "file": "vcf_final/{sample}_N.vep_annotated.filter.germline.vcf.gz.tbi", "types": ["N"]}, - "results/{sample}/cnv_sv/{sample}.cnvpytor_filtered.vcf.gz": {"name": "_copy_cnvpytor_filtered_vcf", "file": "cnv_sv/cnvpytor/{sample}_N.filtered.vcf.gz", "types": ["N"]}, + "results/{sample}/cnv_sv/{sample}.cnvpytor_filtered.vcf.gz": {"name": "_copy_cnvpytor_filtered_vcf", "file": "cnv_sv/cnvpytor/{sample}_N.hardfiltered.vcf.gz", "types": ["N"]}, "results/{sample}/cnv_sv/{sample}.cnvpytor.vcf.gz": {"name": "_copy_cnvpytor_vcf", "file": "cnv_sv/cnvpytor/{sample}_N.softfiltered.vcf.gz", "types": ["N"]}, "results/{sample}/cnv_sv/{sample}.cnvpytor_filtered.aed": {"name": "_copy_cnvpytor_filtered_aed", "file": "cnv_sv/cnvpytor/{sample}_N_filtered.aed", "types": ["N"]}, "results/{sample}/cnv_sv/{sample}.cnvpytor.aed": {"name": "_copy_cnvpytor_aed", "file": "cnv_sv/cnvpytor/{sample}_N.aed", "types": ["N"]}, diff --git a/.tests/integration/results/versions/software_Poirot/Poirot__update_svdb_merge_mqc_versions.yaml b/.tests/integration/results/versions/software_Poirot/Poirot__update_svdb_merge_mqc_versions.yaml new file mode 100644 index 0000000..e69de29 diff --git a/.tests/integration/results/versions/software_Poirot/softwares_mqc_versions.yaml b/.tests/integration/results/versions/software_Poirot/softwares_mqc_versions.yaml new file mode 100644 index 0000000..e69de29 diff --git a/README.md b/README.md index 9e67638..3eb9938 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ The workflow repository contains a dry run test of the pipeline in `.tests/inte ```bash $ cd .tests/integration -$ snakemake -n -s ../../workflow/Snakefile --configfiles ../../config/config.yaml config.yaml +$ snakemake -n -s ../../workflow/Snakefile --configfile config/config.yaml ``` ## :rocket: [Usage](https://poirot-rd-wgs.readthedocs.io/en/latest/running/) diff --git a/config/config.yaml b/config/config.yaml index 7052665..95d8000 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -1,5 +1,7 @@ --- +aligner: "bwa_gpu" +snp_caller: "deepvariant_gpu" resources: "config/resources.yaml" samples: "samples_with_info.tsv" @@ -11,7 +13,7 @@ default_container: "docker://hydragenetics/common:1.11.1" modules: alignment: "v0.6.0" - annotation: "v0.3.0" + annotation: "v1.0.1" compression: "v2.0.0" cnv_sv: "v0.5.0" filtering: "v0.3.0" @@ -23,18 +25,18 @@ modules: snv_indels: "v1.0.0" reference: - coverage_bed: "/beegfs-storage/data/ref_data/refseq/refseq_select_mane_with_MT_20240102.bed" #Created bed with https://github.com/zezzipa/UCSCtable2bed + coverage_bed: "/beegfs-storage/data/ref_data/refseq/refseq_select_mane_with_MT_with_non_coding_20240613.bed" # see /beegfs-storage/data/ref_data/refseq/README design_bed: "/beegfs-storage/data/ref_genomes/GRCh38/GRCmasked/homo_sapiens.hg38.wgs.bed" exome_bed: "/beegfs-storage/data/ref_genomes/GRCh38/GRCmasked/Twist_Comprehensive_Exome_Covered_Targets_hg38.bed" exome_intervals: "/beegfs-storage/data/ref_genomes/GRCh38/GRCmasked/Twist_Comprehensive_Exome_Covered_Targets_hg38.interval_list" fasta: "/beegfs-storage/data/ref_genomes/GRCh38/GRCmasked/GCA_000001405.15_GRCh38_no_alt_analysis_set_GRCmasked.fasta" fai: "/beegfs-storage/data/ref_genomes/GRCh38/GRCmasked/GCA_000001405.15_GRCh38_no_alt_analysis_set_GRCmasked.fasta.fai" - genepanels: "/beegfs-storage/projects/wp3/Reference_files/Manifest/Clinical_research_exome/Gene_panels/genepanels_WGS.list" + genepanels: "config/gene_panels/genepanels_WGS.list" sites: "/beegfs-storage/data/ref_genomes/GRCh38/GRCmasked/Homo_sapiens_assembly38.known_indels.vcf.gz" wgs_intervals: "/beegfs-storage/data/ref_genomes/GRCh38/GRCmasked/homo_sapiens.hg38.wgs.interval_list" str_panels_dir: "config/str_panels" str_panels: - - "ataxia.list" + - "ataxi.list" merge_contigs: # contigs to be merged to a single BAM for mark duplicates - ".*_random" - "chrUn_.*" @@ -42,9 +44,6 @@ reference: trimmer_software: "fastp_pe" -aligner: "bwa_gpu" # "bwa_cpu" or "bwa_gpu" -snp_caller: "deepvariant_gpu" # "deepvariant_cpu" or "deepvariant_gpu" - automap: container: "docker://hydragenetics/automap:1.2" build: "hg38" @@ -57,15 +56,18 @@ bcftools_split_vep: bcftools_view_biallelic: extra: "-m2 -M2 -v snps" -bcftools_filter_cnvpytor: - exclude: "'pytorP1>0.0001 | pytorDG<100000 | pytorPN>0.5 | pytorQ0>0.5'" +bcftools_softfilter_cnvpytor: + exclude: "'pytorP1>0.0001 | pytorDG<100000 | pytorPN>0.5 | ( pytorQ0>0.5 && FORMAT/CN != 0)'" extra: "-s LowQual " +bcftools_hardfilter_cnvpytor: + exclude: "'pytorP1>0.0001 | pytorDG<100000 | pytorPN>0.5 | ( pytorQ0>0.5 && FORMAT/CN != 0)'" + bcftools_view_pass: extra: "--apply-filter PASS " bcftools_view_svdb: - extra: "-f 'PASS' -e 'tiddit_AF>0.05 | manta_AF>0.05 | cnvpytor_AF>0.05 | gnomad_AF>0.1'" + extra: "-f 'PASS' -e 'manta_AF>0.05 | cnvpytor_AF>0.05 | gnomad_AF>0.1'" bwa_mem: container: "docker://hydragenetics/bwa:0.7.15" @@ -109,7 +111,6 @@ cnvpytor_filter: view: "500" create_cov_excel: - container: "docker://hydragenetics/common:1.11.1" covLimits: "10 20 30" deepvariant: @@ -119,7 +120,7 @@ deepvariant: expansionhunter: container: "docker://hydragenetics/expansionhunter:5.0.0" - variant_catalog: "/beegfs-storage/data/ref_data/stranger/v0.8.1/variant_catalog_hg38.json" + variant_catalog: "config/str_catalog/variant_catalog_hg38.json" fastp_pe: container: "docker://hydragenetics/fastp:0.20.1" @@ -313,25 +314,25 @@ spring: stranger: container: "docker://hydragenetics/stranger:0.8.1" - catalog: "/beegfs-storage/data/ref_data/stranger/v0.8.1/variant_catalog_hg38.json" + catalog: "config/str_catalog/variant_catalog_hg38.json" svdb_merge: container: "docker://hydragenetics/svdb:2.8.2" overlap: 0.6 - priority: "tiddit,manta,cnvpytor" - extra: "--pass_only " + priority: "manta,cnvpytor" + extra: "--pass_only --no_intra " svdb_query: container: "docker://hydragenetics/svdb:2.8.2" db_string: - "--db /beegfs-storage/data/ref_data/wp3/svdb/svdb_create/tiddit_wgs_hg38_1.0.vcf.gz,/beegfs-storage/data/ref_data/wp3/svdb/svdb_create/manta_wgs_hg38_1.0.vcf.gz,/beegfs-storage/data/ref_data/wp3/svdb/svdb_create/cnvpytor_wgs_hg38_1.0.vcf.gz" - extra: "--in_frq FRQ,FRQ,FRQ --in_occ OCC,OCC,OCC --out_frq tiddit_AF,manta_AF,cnvpytor_AF --out_occ tiddit_OCC,manta_OCC,cnpytor_OCC " + "--db /beegfs-storage/data/ref_data/wp3/svdb/svdb_create/manta_wgs_hg38_1.0.vcf.gz,/beegfs-storage/data/ref_data/wp3/svdb/svdb_create/cnvpytor_wgs_hg38_1.0.vcf.gz" + extra: "--in_frq FRQ,FRQ --in_occ OCC,OCC --out_frq manta_AF,cnvpytor_AF --out_occ manta_OCC,cnvpytor_OCC " tabix: container: "docker://hydragenetics/common:1.11.1" tiddit: - container: "docker://hydragenetics/tiddit:3.3.2" + container: "docker://hydragenetics/tiddit:3.7.0" upd: container: "docker://hydragenetics/upd:0.1.1" @@ -340,16 +341,19 @@ upd: vep: container: "docker://ensemblorg/ensembl-vep:release_110.1" vep_cache: "/beegfs-storage/data/ref_genomes/VEP/" + mode: "--offline --cache --refseq " extra: "--assembly GRCh38 --check_existing --pick --sift b --polyphen b --ccds --symbol --af --af_1kg --af_gnomad --max_af" vep_trio: container: "docker://ensemblorg/ensembl-vep:release_110.1" vep_cache: "/beegfs-storage/data/ref_genomes/VEP/" + mode: "--offline --cache --refseq " extra: "--assembly GRCh38 --check_existing --pick --max_af --buffer_size 50000" vep_svdb: container: "docker://ensemblorg/ensembl-vep:release_110.1" vep_cache: "/beegfs-storage/data/ref_genomes/VEP/" + mode: "--offline --cache --refseq " extra: "--assembly GRCh38 --check_existing --pick --custom file=/data/ref_data/gnomad/gnomad_sv/gnomad.v4.0.sv.vcf.gz,short_name=gnomad,fields=AF%CN_NONREF_FREQ%FILTER%ALGORITHMS%EVIDENCE%SVTYPE,format=vcf,reciprocal=1,overlap_cutoff=60,distance=1000,same_type=1" verifybamid2: diff --git a/config/config_bianca.yaml b/config/config_bianca.yaml index 49b6f8d..817907d 100644 --- a/config/config_bianca.yaml +++ b/config/config_bianca.yaml @@ -1,26 +1,28 @@ -resources: config/resources_bianca.yaml -samples: config/samples.tsv -units: config/units.tsv +aligner: bwa_gpu +snp_caller: deepvariant_gpu +resources: config/resources_bianca.yaml +samples: config/samples.tsv +units: config/units.tsv hydra_local_path: "/castor/project/proj/poirot_pipeline/" - output: config/output_list.json -default_container: /castor/project/proj/poirot_pipeline/singularity_files/hydragenetics_common_1.10.2.sif +default_container: /castor/project/proj/poirot_pipeline/singularity_files/hydragenetics_common_1.11.1.sif modules: - alignment: "v0.4.0" - annotation: "v0.3.0" + alignment: "v0.6.0" + annotation: "v1.0.1" compression: "v2.0.0" - cnv_sv: "b549266" + cnv_sv: "v0.5.0" filtering: "v0.3.0" - parabricks: "074a4dc" + parabricks: "v1.2.0" prealignment: "v1.2.0" - qc: "da66130" + qc: "v0.5.0" + misc: "v0.2.0" mitochondrial: "v0.1.0" - snv_indels: "v0.5.0" + snv_indels: "v1.0.0" reference: coverage_bed: /castor/project/proj/poirot_pipeline/reference_files/refseq_select_mane_with_MT_20240102.bed @@ -32,25 +34,41 @@ reference: genepanels: /castor/project/proj/poirot_pipeline/reference_files/gene_panels/genepanels_WGS.list sites: /castor/project/proj/poirot_pipeline/reference_files/homo_sapiens.known_indels.vcf.gz wgs_intervals: /castor/project/proj/poirot_pipeline/reference_files/homo_sapiens.wgs.interval_list + str_panels_dir: "config/str_panels" + str_panels: + - "ataxi.list" + merge_contigs: # contigs to be merged to a single BAM for mark duplicates + - ".*_random" + - "chrUn_.*" + - "chrEBV" trimmer_software: fastp_pe -aligner: bwa_gpu - -snp_caller: deepvariant_gpu - automap: build: hg38 container: /castor/project/proj/poirot_pipeline/singularity_files/hydragenetics_automap_1.2.sif extra: --DP 10 --minsize 3 --chrX outdir: cnv_sv/automap +bcftools_split_vep: + columns: "gnomad_AF:Float" + bcftools_view_biallelic: extra: -m2 -M2 -v snps +bcftools_softfilter_cnvpytor: + exclude: "'pytorP1>0.0001 | pytorDG<100000 | pytorPN>0.5 | ( pytorQ0>0.5 && FORMAT/CN != 0)'" + extra: "-s LowQual " + +bcftools_hardfilter_cnvpytor: + exclude: "'pytorP1>0.0001 | pytorDG<100000 | pytorPN>0.5 | ( pytorQ0>0.5 && FORMAT/CN != 0)'" + bcftools_view_pass: extra: '--apply-filter PASS ' +bcftools_view_svdb: + extra: "-f 'PASS' -e 'manta_AF>0.05 | cnvpytor_AF>0.05 | gnomad_AF>0.1'" + bwa_mem: container: /castor/project/proj/poirot_pipeline/singularity_files/hydragenetics_bwa_0.7.15.sif amb: /castor/project/proj/poirot_pipeline/reference_files/GCA_000001405.15_GRCh38_no_alt_analysis_set_GRCmasked.fasta.amb @@ -80,7 +98,8 @@ bwa_mem_mito: cnvpytor_readdepth: container: /castor/project/proj/poirot_pipeline/singularity_files/hydragenetics_cnvpytor_1.3.1.sif - length_list: 500 10000 100000 + length_list: "500" + extra: "-chrom chr1 chr2 chr3 chr4 chr5 chr6 chr7 chr8 chr9 chr10 chr11 chr12 chr13 chr14 chr15 chr16 chr17 chr18 chr19 chr20 chr21 chr22 chrX chrY chrM " cnvpytor_filter: container: /castor/project/proj/poirot_pipeline/singularity_files/hydragenetics_cnvpytor_1.3.1.sif @@ -94,18 +113,10 @@ cnvpytor_filter: create_cov_excel: covLimits: 10 20 30 -deepvariant_call_variants: - container: /castor/project/proj/poirot_pipeline/singularity_files/google_deepvariant_1.5.0.sif - model: /opt/models/wgs/model.ckpt - -deepvariant_make_examples: - container: /castor/project/proj/poirot_pipeline/singularity_files/google_deepvariant_1.5.0.sif - extra: --channels insert_size - n_shards: 80 - -deepvariant_postprocess_variants: - container: /castor/project/proj/poirot_pipeline/singularity_files/google_deepvariant_1.5.0.sif - vcf_type: "gvcf" +deepvariant: + container: "docker://google/deepvariant:1.6.1" + model_type: "WGS" + output_gvcf: true expansionhunter: container: /castor/project/proj/poirot_pipeline/singularity_files/hydragenetics_expansionhunter_5.0.0.sif @@ -113,6 +124,7 @@ expansionhunter: fastp_pe: container: /castor/project/proj/poirot_pipeline/singularity_files/hydragenetics_fastp_0.20.1.sif + # Default enabled trimming parameters for fastp. Specified for clarity. extra: --trim_poly_g --qualified_quality_phred 15 --unqualified_percent_limit 40 --n_base_limit 5 --length_required 15 fastqc: @@ -180,9 +192,9 @@ gatk_split_multi_allelic_sites: gatk_variant_filtration: container: /castor/project/proj/poirot_pipeline/singularity_files/hydragenetics_gatk4_4.2.2.0.sif -glnexus: - configfile: DeepVariantWGS - container: /castor/project/proj/poirot_pipeline/singularity_files/ghcr.io_dnanexus-rnd_glnexus_v1.4.1.sif +glnexus_trio: + container: "docker://ghcr.io/dnanexus-rnd/glnexus:v1.4.1" + configfile: "DeepVariantWGS" glnexus_peddy: configfile: DeepVariantWGS @@ -202,7 +214,7 @@ manta_run_workflow_n: mosdepth: by: '500' container: /castor/project/proj/poirot_pipeline/singularity_files/hydragenetics_mosdepth_0.3.2.sif - extra: '--fast-mode --no-per-base ' + extra: "--fast-mode --use-median --no-per-base " mosdepth_bed: container: /castor/project/proj/poirot_pipeline/singularity_files/hydragenetics_mosdepth_0.3.2.sif @@ -243,13 +255,15 @@ mt_reference: mt_shifted: /castor/project/proj/poirot_pipeline/reference_files/gatk_mito/Homo_sapiens_assembly38.chrM.shifted_by_8000_bases.fasta pbrun_deepvariant: - container: /castor/project/proj/poirot_pipeline/singularity_files/nvcr.io_nvidia_clara_clara-parabricks_4.2.1-1.sif + container: /castor/project/proj/poirot_pipeline/singularity_files/nvcr.io_nvidia_clara_clara-parabricks_4.3.0-1.sif extra: "--num-cpu-threads-per-stream 8 --disable-use-window-selector-model --gvcf " pbrun_fq2bam: - container: /castor/project/proj/poirot_pipeline/singularity_files/nvcr.io_nvidia_clara_clara-parabricks_4.2.1-1.sif + container: /castor/project/proj/poirot_pipeline/singularity_files/nvcr.io_nvidia_clara_clara-parabricks_4.3.0-1.sif extra: "--gpusort --gpuwrite " + + peddy: config: config/peddy_mqc.yaml container: /castor/project/proj/poirot_pipeline/singularity_files/hydragenetics_peddy_0.4.8.sif @@ -270,9 +284,6 @@ picard_collect_hs_metrics: picard_collect_insert_size_metrics: container: /castor/project/proj/poirot_pipeline/singularity_files/hydragenetics_picard_2.25.0.sif -picard_collect_multiple_metrics: - container: /castor/project/proj/poirot_pipeline/singularity_files/hydragenetics_picard_2.25.0.sif - picard_collect_wgs_metrics: container: /castor/project/proj/poirot_pipeline/singularity_files/hydragenetics_picard_2.25.0.sif @@ -283,13 +294,13 @@ reviewer: container: /castor/project/proj/poirot_pipeline/singularity_files/hydragenetics_reviewer_0.2.7.sif samtools_stats: - container: /castor/project/proj/poirot_pipeline/singularity_files/hydragenetics_common_1.10.2.sif + container: /castor/project/proj/poirot_pipeline/singularity_files/hydragenetics_common_1.11.1.sif samtools_idxstats: - container: /castor/project/proj/poirot_pipeline/singularity_files/hydragenetics_common_1.10.2.sif + container: /castor/project/proj/poirot_pipeline/singularity_files/hydragenetics_common_1.11.1.sif samtools: - container: /castor/project/proj/poirot_pipeline/singularity_files/hydragenetics_common_1.10.2.sif + container: /castor/project/proj/poirot_pipeline/singularity_files/hydragenetics_common_1.11.1.sif smn_caller: container: /castor/project/proj/poirot_pipeline/singularity_files/hydragenetics_smncopynumbercaller_1.1.2.sif @@ -302,42 +313,53 @@ spring: container: /castor/project/proj/poirot_pipeline/singularity_files/hydragenetics_spring_1.0.1.sif stranger: - catalog: /castor/project/proj/poirot_pipeline/reference_files/stranger/v0.8.1/variant_catalog_grch38.json container: /castor/project/proj/poirot_pipeline/singularity_files/hydragenetics_stranger_0.8.1.sif + catalog: "config/str_catalog/variant_catalog_hg38.json" svdb_merge: container: /castor/project/proj/poirot_pipeline/singularity_files/hydragenetics_svdb_2.6.0.sif overlap: 0.6 - extra: "--pass-only " + priority: "manta,cnvpytor" + extra: "--pass_only --no_intra " svdb_query: container: "docker://hydragenetics/svdb:2.8.2" db_string: - "--db /castor/project/proj/poirot_pipeline/reference_files/tiddit_wgs_hg38_1.0.vcf.gz,/castor/project/proj/poirot_pipeline/reference_files/manta_wgs_hg38_1.0.vcf.gz,/castor/project/proj/poirot_pipeline/reference_files/cnvpytor_wgs_hg38_1.0.vcf.gz" - extra: "--in_frq FRQ,FRQ,FRQ --in_occ OCC,OCC,OCC --out_frq tiddit_AF,manta_AF,cnvpytor_AF --out_occ tiddit_OCC,manta_OCC,cnpytor_OCC " + "--db /castor/project/proj/poirot_pipeline/reference_files/manta_wgs_hg38_1.0.vcf.gz,/castor/project/proj/poirot_pipeline/reference_files/cnvpytor_wgs_hg38_1.0.vcf.gz" + extra: "--in_frq FRQ,FRQ --in_occ OCC,OCC --out_frq manta_AF,cnvpytor_AF --out_occ manta_OCC,cnvpytor_OCC " tabix: - container: /castor/project/proj/poirot_pipeline/singularity_files/hydragenetics_common_1.10.2.sif + container: /castor/project/proj/poirot_pipeline/singularity_files/hydragenetics_common_1.11.1.sif tiddit: - container: /castor/project/proj/poirot_pipeline/singularity_files/hydragenetics_tiddit_3.3.2.sif + container: /castor/project/proj/poirot_pipeline/singularity_files/hydragenetics_tiddit_3.7.0.sif upd: container: /castor/project/proj/poirot_pipeline/singularity_files/hydragenetics_upd_0.1.1.sif extra: '--vep ' vep: - container: /castor/project/proj/poirot_pipeline/singularity_files/ensemblorg_ensembl-vep_release_109.3.sif + container: /castor/project/proj/poirot_pipeline/singularity_files/ensemblorg_ensembl-vep_release_110.1.sif + mode: "--offline --cache --refseq " extra: --assembly GRCh38 --check_existing --pick --sift b --polyphen b --ccds --symbol --af --af_1kg --af_gnomad --max_af vep_cache: /castor/project/proj/poirot_pipeline/reference_files/VEP/ vep_trio: - container: /castor/project/proj/poirot_pipeline/singularity_files/ensemblorg_ensembl-vep_release_109.3.sif + container: /castor/project/proj/poirot_pipeline/singularity_files/ensemblorg_ensembl-vep_release_110.1.sif + mode: "--offline --cache --refseq " extra: --assembly GRCh38 --check_existing --pick --max_af --buffer_size 50000 vep_cache: /castor/project/proj/poirot_pipeline/reference_files/VEP/ +vep_svdb: + container: "docker://ensemblorg/ensembl-vep:release_110.1" + vep_cache: "/beegfs-storage/data/ref_genomes/VEP/" + mode: "--offline --cache --refseq " + extra: "--assembly GRCh38 --check_existing --pick --custom file=/castor/project/proj/poirot_pipeline/reference_files/gnomad/gnomad_sv/gnomad.v4.0.sv.vcf.gz,short_name=gnomad,fields=AF%CN_NONREF_FREQ%FILTER%ALGORITHMS%EVIDENCE%SVTYPE,format=vcf,reciprocal=1,overlap_cutoff=60,distance=1000,same_type=1" + + verifybamid2: container: /castor/project/proj/poirot_pipeline/singularity_files/hydragenetics_verifybamid2_2.0.1.sif" + mode: "--offline --cache --refseq " svd_mu: "/castor/project/proj/poirot_pipeline/reference_files/1000g.phase3.100k.b38.vcf.gz.dat.mu" vt_decompose: diff --git a/config/config_hard_filter_germline.yaml b/config/config_hard_filter_germline.yaml index 09c8d9d..e5f4c5b 100644 --- a/config/config_hard_filter_germline.yaml +++ b/config/config_hard_filter_germline.yaml @@ -1,5 +1,5 @@ filters: germline: description: "Hard filter germline" - expression: "(VEP:MAX_AF > 0.10)" + expression: "(VEP:MAX_AF > 0.10 and VEP:NA_TRUE:SYMBOL != POLG)" soft_filter: "False" diff --git a/config/output_list.json b/config/output_list.json index f0b864a..b8a8556 100644 --- a/config/output_list.json +++ b/config/output_list.json @@ -3,7 +3,7 @@ "results/{sample}/{sample}_snv_indels.vcf.gz.tbi": {"name": "_copy_snv_indels_tbi", "file": "vcf_final/{sample}_N.vcf.gz.tbi", "types": ["N"]}, "results/{sample}/{sample}_snv_indels.filtered.vcf.gz": {"name": "_copy_snv_indels_filtered_vcf", "file": "vcf_final/{sample}_N.vep_annotated.filter.germline.vcf.gz", "types": ["N"]}, "results/{sample}/{sample}_snv_indels.filtered.vcf.gz.tbi": {"name": "_copy_snv_indels_filtered_tbi", "file": "vcf_final/{sample}_N.vep_annotated.filter.germline.vcf.gz.tbi", "types": ["N"]}, - "results/{sample}/cnv_sv/{sample}.cnvpytor_filtered.vcf.gz": {"name": "_copy_cnvpytor_filtered_vcf", "file": "cnv_sv/cnvpytor/{sample}_N.filtered.vcf.gz", "types": ["N"]}, + "results/{sample}/cnv_sv/{sample}.cnvpytor_filtered.vcf.gz": {"name": "_copy_cnvpytor_filtered_vcf", "file": "cnv_sv/cnvpytor/{sample}_N.hardfiltered.vcf.gz", "types": ["N"]}, "results/{sample}/cnv_sv/{sample}.cnvpytor.vcf.gz": {"name": "_copy_cnvpytor_vcf", "file": "cnv_sv/cnvpytor/{sample}_N.softfiltered.vcf.gz", "types": ["N"]}, "results/{sample}/cnv_sv/{sample}.cnvpytor_filtered.aed": {"name": "_copy_cnvpytor_filtered_aed", "file": "cnv_sv/cnvpytor/{sample}_N_filtered.aed", "types": ["N"]}, "results/{sample}/cnv_sv/{sample}.cnvpytor.aed": {"name": "_copy_cnvpytor_aed", "file": "cnv_sv/cnvpytor/{sample}_N.aed", "types": ["N"]}, diff --git a/config/resources.yaml b/config/resources.yaml index 034972f..6301ce0 100644 --- a/config/resources.yaml +++ b/config/resources.yaml @@ -149,6 +149,7 @@ spring: tiddit: threads: 8 + time: "20:00:00" vep: threads: 4 diff --git a/images/dag.svg b/images/dag.svg index cc652cd..db65df0 100644 --- a/images/dag.svg +++ b/images/dag.svg @@ -1,1962 +1,2017 @@ - - + snakemake_dag - + 0 - -all + +all 1 - -_copy_spring + +_copy_cnvpytor_aed 1->0 - - + + 2 - -compression_spring -barcode: CAAGCTAG+ACATAGCG -flowcell: HVWHGDSXX -lane: L001 -sample: NA12878 -type: N + +vcf_to_aed - + 2->1 - - + + 3 - -_copy_reviewer + +cnv_sv_cnvpytor_filter - - -3->0 - - + + +3->2 + + + + + +67 + +filtering_bcftools_softfilter_cnvpytor + + + +3->67 + + + + + +96 + +filtering_bcftools_hardfilter_cnvpytor + + + +3->96 + + 4 - -cnv_sv_reviewer + +cnv_sv_cnvpytor_readdepth - + 4->3 - - + + 5 - -alignment_samtools_sort_eh_bam + +parabricks_fq2bam - + 5->4 - - - - - -15 - -misc_samtools_index + + - - -5->15 - - + + +7 + +parabricks_deepvariant - - -6 - -cnv_sv_expansionhunter + + +5->7 + + - - -6->4 - - + + +11 + +cnv_sv_smn_manifest - - -6->5 - - + + +5->11 + + 16 - -cnv_sv_reviewer_generate_locus_list - - - -6->16 - - + +compression_samtools_view +file: NA12878_N - - -62 - -annotation_stranger - - - -6->62 - - - - - -7 - -parabricks_fq2bam + + +5->16 + + - - -7->6 - - + + +32 + +mitochondrial_gatk_print_reads - - -12 - -parabricks_deepvariant + + +5->32 + + - - -7->12 - - + + +41 + +qc_samtools_idxstats - - -40 - -mitochondrial_gatk_print_reads + + +5->41 + + - - -7->40 - - + + +42 + +qc_mosdepth - - -59 - -cnv_sv_cnvpytor_readdepth + + +5->42 + + - - -7->59 - - + + +43 + +qc_samtools_stats - - -69 - -cnv_sv_smn_manifest + + +5->43 + + - - -7->69 - - + + +50 + +qc_pic_wgs_met - - -80 - -cnv_sv_tiddit + + +5->50 + + - - -7->80 - - + + +51 + +qc_pic_hs_met - - -81 - -cnv_sv_manta_run_workflow_n + + +5->51 + + - - -7->81 - - + + +52 + +qc_pic_ins_size - - -82 - -cnv_sv_manta_config_n + + +5->52 + + - - -7->82 - - + + +53 + +qc_pic_align_sum_met - - -89 - -qc_mosdepth_bed + + +5->53 + + - - -7->89 - - + + +54 + +qc_pic_gc_met - - -90 - -qc_pic_dup_met + + +5->54 + + - - -7->90 - - + + +55 + +qc_verifybamid2 - - -106 - -compression_samtools_view -file: NA12878_N + + +5->55 + + - - -7->106 - - + + +57 + +qc_pic_dup_met - - -109 - -qc_pic_ins_size + + +5->57 + + - - -7->109 - - + + +62 + +cnv_sv_expansionhunter - - -110 - -qc_samtools_idxstats + + +5->62 + + - - -7->110 - - + + +78 + +cnv_sv_manta_run_workflow_n - - -111 - -qc_mosdepth + + +5->78 + + - - -7->111 - - + + +79 + +cnv_sv_manta_config_n - - -112 - -qc_pic_gc_met + + +5->79 + + - - -7->112 - - + + +117 + +qc_mosdepth_bed - - -114 - -qc_verifybamid2 + + +5->117 + + - - -7->114 - - + + +125 + +cnv_sv_tiddit - - -115 - -qc_pic_hs_met + + +5->125 + + - - -7->115 - - + + +6 + +prealignment_fastp_pe +barcode: CAAGCTAG+ACATAGCG +flowcell: HVWHGDSXX +lane: L001 +sample: NA12878 +type: N - - -117 - -qc_pic_align_sum_met + + +6->5 + + - - -7->117 - - + + +7->4 + + - - -118 - -qc_samtools_stats + + +46 + +snv_indels_glnexus_peddy - - -7->118 - - + + +7->46 + + - - -119 - -qc_pic_wgs_met + + +89 + +snv_indels_fix_af - - -7->119 - - + + +7->89 + + 8 - -prealignment_fastp_pe -barcode: CAAGCTAG+ACATAGCG -flowcell: HVWHGDSXX -lane: L001 -sample: NA12878 -type: N - - - -8->7 - - + +_copy_smn_pdf + + + +8->0 + + 9 - -qc_peddy - - - -9->6 - - - - - -108 - -qc_multiqc -report: DNA - - - -9->108 - - - - - -120 - -create_peddy_mqc_tsv + +cnv_sv_smn_charts - - -9->120 - - + + +9->8 + + 10 - -snv_indels_bcftools_view + +cnv_sv_smn_caller 10->9 - - + + - - -13 - -misc_tabix -file: qc/peddy/all.vcf + + +82 + +_copy_smn_json - - -10->13 - - + + +10->82 + + - - -11 - -snv_indels_glnexus_peddy + + +100 + +_copy_smn_tsv + + + +10->100 + + - + 11->10 - - + + - - -12->11 - - - - - -23 - -snv_indels_fix_af - - - -12->23 - - + + +12 + +_copy_spring - - -12->59 - - + + +12->0 + + - - -13->9 - - + + +13 + +compression_spring +barcode: CAAGCTAG+ACATAGCG +flowcell: HVWHGDSXX +lane: L001 +sample: NA12878 +type: N + + + +13->12 + + 14 - -create_ped + +_copy_samtools_crai +sample: NA12878 - - -14->9 - - + + +14->0 + + - - -14->120 - - + + +15 + +compression_samtools_index +file: compression/samtools_view/NA12878_N - - -15->4 - - + + +15->14 + + - - -16->4 - - + + +16->15 + + + + + +93 + +_copy_samtools_cram +sample: NA12878 + + + +16->93 + + 17 - -_copy_snv_indels_vcf -sample: NA12878 + +_copy_haplochack_contamination_report - + 17->0 - - + + 18 - -misc_bgzip -file: vcf_final/NA12878_N.vcf + +mitochondrial_haplocheck - + 18->17 - - - - - -54 - -filtering_bcftools_view_pass -file: vcf_final/NA12878_N + + - - -18->54 - - - - - -71 - -misc_tabix + + +92 + +mitochondrial_gatk_filter_contamination - - -18->71 - - + + +18->92 + + 19 - -bcftools_concat + +mitochondrial_gatk_select_variants - + 19->18 - - + + - - -65 - -cnv_sv_automap - - - -19->65 - - + + +19->92 + + 20 - -exclude_chrM + +mitochondrial_gatk_left_align_and_trim_variants - + 20->19 - - + + 21 - -deepvariant_add_ref -sample: NA12878 -type: N + +mitochondrial_gatk_variant_filtration - + 21->20 - - + + 22 - -misc_bgzip -file: vcf_final/NA12878_N.fix_af.vcf + +mitochondrial_gatk_filter_mutect_calls_mt - + 22->21 - - + + + + + +23 + +mitochondrial_gatk_merge_vcfs - + 23->22 - - + + 24 - -snv_indels_chrM_fix_af + +mitochondrial_gatk_lift_over_vcf - - -24->19 - - + + +24->23 + + 25 - -mitochondrial_gatk_select_variants_final + +mitochondrial_gatk_mutect2 - + 25->24 - - + + + + + +38 + +mitochondrial_gatk_merge_stats + + + +25->38 + + 26 - -mitochondrial_gatk_filter_contamination + +mitochondrial_gatk_sort_sam - + 26->25 - - + + 27 - -mitochondrial_gatk_select_variants + +mitochondrial_gatk_mark_duplicates - + 27->26 - - - - - -47 - -mitochondrial_haplocheck - - - -27->47 - - + + 28 - -mitochondrial_gatk_left_align_and_trim_variants + +mitochondrial_gatk_merge_bam_alignment - + 28->27 - - + + 29 - -mitochondrial_gatk_variant_filtration + +mitochondrial_bwa_mem_mito +mt_ref: mt_shifted - + 29->28 - - + + 30 - -mitochondrial_gatk_filter_mutect_calls_mt + +mitochondrial_gatk_sam_to_fastq - + 30->29 - - + + + + + +37 + +mitochondrial_bwa_mem_mito +mt_ref: mt + + + +30->37 + + 31 - -mitochondrial_gatk_merge_vcfs + +mitochondrial_gatk_revert_sam + + + +31->28 + + - + 31->30 - - + + - - -32 - -mitochondrial_gatk_lift_over_vcf + + +36 + +mitochondrial_gatk_merge_bam_alignment + + + +31->36 + + - + 32->31 - - + + 33 - -mitochondrial_gatk_mutect2 + +mitochondrial_gatk_mutect2 - - -33->32 - - - - - -46 - -mitochondrial_gatk_merge_stats + + +33->23 + + - - -33->46 - - + + +33->38 + + 34 - -mitochondrial_gatk_sort_sam + +mitochondrial_gatk_sort_sam - + 34->33 - - + + 35 - -mitochondrial_gatk_mark_duplicates + +mitochondrial_gatk_mark_duplicates - + 35->34 - - - - - -36 - -mitochondrial_gatk_merge_bam_alignment + + - + 36->35 - - - - - -37 - -mitochondrial_bwa_mem_mito -mt_ref: mt_shifted + + - + 37->36 - - - - - -38 - -mitochondrial_gatk_sam_to_fastq - - - -38->37 - - + + - - -45 - -mitochondrial_bwa_mem_mito -mt_ref: mt + + +38->22 + + - - -38->45 - - + + +38->92 + + - -39 - -mitochondrial_gatk_revert_sam - - - -39->36 - - - - - -39->38 - - - - - -44 - -mitochondrial_gatk_merge_bam_alignment - - - -39->44 - - - - - -40->39 - - - - - -41 - -mitochondrial_gatk_mutect2 - - - -41->31 - - - - - -41->46 - - - - - -42 - -mitochondrial_gatk_sort_sam - - - -42->41 - - - - - -43 - -mitochondrial_gatk_mark_duplicates - - - -43->42 - - - - - -44->43 - - + +39 + +_copy_multiqc_html - - -45->44 - - + + +39->0 + + - - -46->26 - - + + +40 + +qc_multiqc +report: DNA - - -46->30 - - + + +40->39 + + - - -47->26 - - + + +41->40 + + - - -63 - -_copy_haplochack_contamination_report + + +42->40 + + - - -47->63 - - + + +43->40 + + - - -48 - -_copy_snv_indels_filtered_vcf -sample: NA12878 + + +44 + +qc_peddy - - -48->0 - - + + +44->40 + + 49 - -misc_bgzip -file: vcf_final/NA12878_N.vep_annotated.filter.germline.vcf - - - -49->48 - - + +create_peddy_mqc_tsv - - -96 - -misc_tabix + + +44->49 + + - - -49->96 - - + + +44->62 + + - - -50 - -filtering_filter_vcf -file: vcf_final/NA12878_N.vep_annotated -tag: germline + + +113 + +_copy_peddy_html - - -50->49 - - + + +44->113 + + - - -51 - -misc_bgzip -file: vcf_final/NA12878_N.vep_annotated.vcf + + +45 + +snv_indels_bcftools_view_peddy - - -51->50 - - + + +45->44 + + - - -55 - -misc_tabix + + +47 + +misc_tabix +file: qc/peddy/all.vcf - + -51->55 - - - - - -52 - -annotation_vep -sample: NA12878 -type: N - - - -52->51 - - +45->47 + + - - -53 - -misc_tabix -file: vcf_final/NA12878_N.bcftools_view_pass.vcf + + +46->45 + + - + -53->52 - - +47->44 + + - + + +48 + +create_ped + + -54->52 - - +48->44 + + - - -54->53 - - + + +48->49 + + - - -55->50 - - + + +49->40 + + - - -56 - -_copy_cnvpytor_aed + + +50->40 + + - - -56->0 - - + + +51->40 + + - - -57 - -vcf_to_aed + + +52->40 + + - - -57->56 - - + + +53->40 + + - - -58 - -cnv_sv_cnvpytor_filter + + +54->40 + + - - -58->57 - - + + +55->40 + + - - -83 - -filtering_bcftools_filter_cnvpytor + + +56 + +qc_fastqc +barcode: CAAGCTAG+ACATAGCG +flowcell: HVWHGDSXX +lane: L001 +read: fastq2 +sample: NA12878 +type: N + + + +56->40 + + - - -58->83 - - + + +57->40 + + - - -85 - -misc_bgzip -file: cnv_sv/cnvpytor/NA12878_N.filtered.vcf + + +116 + +create_cov_excel - - -58->85 - - + + +57->116 + + - - -93 - -vcf_to_aed_filtered + + +58 + +qc_fastqc +barcode: CAAGCTAG+ACATAGCG +flowcell: HVWHGDSXX +lane: L001 +read: fastq1 +sample: NA12878 +type: N + + + +58->40 + + - - -58->93 - - + + +59 + +_copy_reviewer - - -59->58 - - + + +59->0 + + 60 - -_copy_stranger_vcf -sample: NA12878 + +cnv_sv_reviewer - - -60->0 - - + + +60->59 + + 61 - -misc_bgzip -file: cnv_sv/stranger/NA12878_N.stranger.vcf + +alignment_samtools_sort_eh_bam - + 61->60 - - + + - - -62->61 - - + + +63 + +misc_samtools_index - - -104 - -extract_str_bed + + +61->63 + + - - -62->104 - - + + +62->60 + + - - -63->0 - - + + +62->61 + + 64 - -_copy_automap_tsv + +cnv_sv_reviewer_generate_locus_list - - -64->0 - - + + +62->64 + + - - -65->64 - - + + +112 + +annotation_stranger - - -124 - -_copy_automap_pdf + + +62->112 + + - - -65->124 - - + + +63->60 + + + + + +64->60 + + + + + +65 + +_copy_cnvpytor_vcf +sample: NA12878 + + + +65->0 + + 66 - -_copy_smn_pdf + +misc_bgzip +file: cnv_sv/cnvpytor/NA12878_N.softfiltered.vcf - - -66->0 - - - - - -67 - -cnv_sv_smn_charts + + +66->65 + + - + 67->66 - - + + + + + +77 + +cnv_sv_svdb_merge + + + +67->77 + + 68 - -cnv_sv_smn_caller - - - -68->67 - - + +_copy_svdb_merged_filtered_vcf - - -86 - -_copy_smn_tsv - - - -68->86 - - - - - -121 - -_copy_smn_json + + +68->0 + + - - -68->121 - - + + +69 + +filtering_bcftools_view_svdb - + 69->68 - - + + 70 - -_copy_snv_indels_tbi -sample: NA12878 + +bcftools_split_vep - - -70->0 - - + + +70->69 + + + + + +71 + +fix_sv_header - + 71->70 - - + + 72 - -_copy_svdb_merged_vcf -sample: NA12878 + +annotation_vep_svdb +sample: NA12878 +type: N - - -72->0 - - + + +72->71 + + + + + +81 + +misc_bgzip +file: annotate/vep_svdb/NA12878_N.merged.svdb_query.vep_annotated.vcf + + + +72->81 + + 73 - -misc_bgzip -file: annotate/vep_svdb/NA12878_N.merged.svdb_query.vep_annotated.vcf + +misc_bgzip +file: cnv_sv/svdb_query/NA12878_N.merged.svdb_query_ref.vcf - + 73->72 - - + + 74 - -annotation_vep_svdb -sample: NA12878 -type: N + +svdb_add_ref +sample: NA12878 +type: N - + 74->73 - - - - - -102 - -fix_sv_header - - - -74->102 - - + + 75 - -misc_bgzip -file: cnv_sv/svdb_query/NA12878_N.merged.svdb_query_ref.vcf + +misc_bgzip +file: cnv_sv/svdb_query/NA12878_N.merged.svdb_query.vcf - + 75->74 - - + + 76 - -svdb_add_ref -sample: NA12878 -type: N + +cnv_sv_svdb_query - + 76->75 - - - - - -77 - -misc_bgzip -file: cnv_sv/svdb_query/NA12878_N.merged.svdb_query.vcf + + - + 77->76 - - - - - -78 - -cnv_sv_svdb_query + + - + 78->77 - - - - - -79 - -cnv_sv_svdb_merge - - - -79->78 - - - - - -80->79 - - + + - - -128 - -misc_bgzip -file: cnv_sv/tiddit/NA12878_N.vcf + + +127 + +_copy_manta_vcf - + -80->128 - - +78->127 + + - - -81->79 - - + + +79->78 + + - - -94 - -_copy_manta_vcf + + +80 + +_copy_svdb_merged_vcf +sample: NA12878 - - -81->94 - - + + +80->0 + + - - -82->81 - - + + +81->80 + + - - -83->79 - - + + +82->0 + + - - -98 - -misc_bgzip -file: cnv_sv/cnvpytor/NA12878_N.softfiltered.vcf + + +83 + +_copy_automap_pdf - - -83->98 - - + + +83->0 + + 84 - -_copy_cnvpytor_filtered_vcf -sample: NA12878 + +cnv_sv_automap - - -84->0 - - + + +84->83 + + + + + +126 + +_copy_automap_tsv + + + +84->126 + + + + + +85 + +bcftools_concat - + 85->84 - - + + - - -86->0 - - + + +99 + +misc_bgzip +file: vcf_final/NA12878_N.vcf + + + +85->99 + + + + + +86 + +exclude_chrM + + + +86->85 + + 87 - -_copy_coverage_excel + +deepvariant_add_ref +sample: NA12878 +type: N - - -87->0 - - + + +87->86 + + 88 - -create_cov_excel + +misc_bgzip +file: vcf_final/NA12878_N.fix_af.vcf - + 88->87 - - + + - + 89->88 - - + + + + + +90 + +snv_indels_chrM_fix_af + + + +90->85 + + 91 - -mosdepth_bedtools + +mitochondrial_gatk_select_variants_final - - -89->91 - - - - - -90->88 - - - - - -90->108 - - - - - -91->88 - - + + +91->90 + + - - -92 - -_copy_cnvpytor_filtered_aed + + +92->91 + + - - -92->0 - - + + +93->0 + + - - -93->92 - - + + +94 + +_copy_cnvpytor_filtered_aed - + 94->0 - - + + 95 - -_copy_snv_indels_filtered_tbi -sample: NA12878 + +vcf_to_aed_filtered - - -95->0 - - + + +95->94 + + - + 96->95 - - + + + + + +120 + +misc_bgzip +file: cnv_sv/cnvpytor/NA12878_N.hardfiltered.vcf + + + +96->120 + + 97 - -_copy_cnvpytor_vcf -sample: NA12878 + +_copy_snv_indels_tbi +sample: NA12878 - + 97->0 - - + + + + + +98 + +misc_tabix - + 98->97 - - - - - -99 - -_copy_svdb_merged_filtered_vcf + + - - -99->0 - - + + +99->98 + + - - -100 - -filtering_bcftools_view_svdb + + +108 + +filtering_bcftools_view_pass +file: vcf_final/NA12878_N - - -100->99 - - + + +99->108 + + + + + +132 + +_copy_snv_indels_vcf +sample: NA12878 + + + +99->132 + + + + + +100->0 + + 101 - -bcftools_split_vep + +_copy_snv_indels_filtered_tbi +sample: NA12878 - - -101->100 - - + + +101->0 + + + + + +102 + +misc_tabix - + 102->101 - - + + 103 - -_copy_stranger_bed + +misc_bgzip +file: vcf_final/NA12878_N.vep_annotated.filter.germline.vcf - - -103->0 - - + + +103->102 + + + + + +114 + +_copy_snv_indels_filtered_vcf +sample: NA12878 + + + +103->114 + + + + + +104 + +filtering_filter_vcf +file: vcf_final/NA12878_N.vep_annotated +tag: germline - + 104->103 - - + + 105 - -_copy_samtools_cram -sample: NA12878 + +misc_bgzip +file: vcf_final/NA12878_N.vep_annotated.vcf - - -105->0 - - + + +105->104 + + - - -106->105 - - + + +109 + +misc_tabix - - -123 - -compression_samtools_index -file: compression/samtools_view/NA12878_N + + +105->109 + + - - -106->123 - - + + +106 + +annotation_vep +sample: NA12878 +type: N + + + +106->105 + + 107 - -_copy_multiqc_html + +misc_tabix +file: vcf_final/NA12878_N.bcftools_view_pass.vcf - - -107->0 - - + + +107->106 + + + + + +108->106 + + - + 108->107 - - + + - - -109->108 - - + + +109->104 + + - - -110->108 - - + + +110 + +_copy_stranger_bed - - -111->108 - - + + +110->0 + + - - -112->108 - - + + +111 + +extract_str_bed - - -113 - -qc_fastqc -barcode: CAAGCTAG+ACATAGCG -flowcell: HVWHGDSXX -lane: L001 -read: fastq2 -sample: NA12878 -type: N - - - -113->108 - - + + +111->110 + + - - -114->108 - - + + +112->111 + + + + + +129 + +misc_bgzip +file: cnv_sv/stranger/NA12878_N.stranger.vcf + + + +112->129 + + + + + +131 + +extract_str_bed_panel +panel: ataxia + + + +112->131 + + + + + +113->0 + + - - -115->108 - - + + +114->0 + + - - -116 - -qc_fastqc -barcode: CAAGCTAG+ACATAGCG -flowcell: HVWHGDSXX -lane: L001 -read: fastq1 -sample: NA12878 -type: N - - - -116->108 - - + + +115 + +_copy_coverage_excel - - -117->108 - - + + +115->0 + + - - -118->108 - - + + +116->115 + + - - -119->108 - - + + +117->116 + + - - -120->108 - - + + +118 + +mosdepth_bedtools + + + +117->118 + + + + + +118->116 + + + + + +119 + +_copy_cnvpytor_filtered_vcf +sample: NA12878 + + + +119->0 + + + + + +120->119 + + + + + +121 + +_copy_tiddit_vcf +sample: NA12878 121->0 - - + + 122 - -_copy_samtools_crai -sample: NA12878 + +misc_bgzip +file: cnv_sv/tiddit/NA12878_N_ref.vcf - - -122->0 - - + + +122->121 + + + + + +123 + +tiddit_add_ref +sample: NA12878 +type: N - + 123->122 - - + + - - -124->0 - - + + +124 + +misc_bgzip +file: cnv_sv/tiddit/NA12878_N.vcf - - -125 - -_copy_tiddit_vcf -sample: NA12878 + + +124->123 + + - - -125->0 - - + + +125->124 + + - - -126 - -misc_bgzip -file: cnv_sv/tiddit/NA12878_N_ref.vcf + + +126->0 + + - - -126->125 - - + + +127->0 + + - - -127 - -tiddit_add_ref -sample: NA12878 -type: N + + +128 + +_copy_stranger_vcf +sample: NA12878 - - -127->126 - - + + +128->0 + + + + + +129->128 + + + + + +130 + +_copy_stranger_bed_panel + + + +130->0 + + - - -128->127 - - + + +131->130 + + + + + +132->0 + + diff --git a/workflow/Snakefile b/workflow/Snakefile index 8a6cd9e..114804d 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -74,10 +74,15 @@ use rule vep from annotation as annotation_vep with: vcf="vcf_final/{sample}_{type}.bcftools_view_pass.vcf.gz", output: vcf=temp("vcf_final/{sample}_{type}.vep_annotated.vcf"), + params: + extra=config.get("vep", {}).get("extra", "--pick"), + mode=config.get("vep", {}).get("mode", "--offline --cache"), log: "vcf_final/{sample}_{type}.vep_annotated.vcf.log", benchmark: repeat("vcf_final/{sample}_{type}.vep_annotated.vcf.benchmark.tsv", config.get("vep", {}).get("benchmark_repeats", 1)) + container: + config.get("vep", {}).get("container", config["default_container"]) use rule vep from annotation as annotation_vep_trio with: @@ -97,6 +102,8 @@ use rule vep from annotation as annotation_vep_trio with: "snv_indels/glnexus/{sample}_{type}.vep_annotated.vcf.benchmark.tsv", config.get("vep", {}).get("benchmark_repeats", 1), ) + container: + config.get("vep_trio", {}).get("container", config["default_container"]) use rule vep from annotation as annotation_vep_svdb with: @@ -116,6 +123,8 @@ use rule vep from annotation as annotation_vep_svdb with: "annotate/vep_svdb/{sample}_{type}.merged.svdb_query.vep_annotated.vcf.benchmark.tsv", config.get("vep_svdb", {}).get("benchmark_repeats", 1), ) + container: + config.get("vep_svdb", {}).get("container", config["default_container"]) module cnv_sv: @@ -187,7 +196,6 @@ use rule smn_manifest from cnv_sv as cnv_sv_smn_manifest with: use rule svdb_merge from cnv_sv as cnv_sv_svdb_merge with: input: - tiddit="cnv_sv/tiddit/{sample}_{type}.vcf", manta="cnv_sv/manta_run_workflow_n/{sample}/results/variants/diploidSV.vcf.gz", cnvpytor="cnv_sv/cnvpytor/{sample}_{type}.softfiltered.vcf", output: @@ -263,20 +271,37 @@ use rule bcftools_view from filtering as filtering_bcftools_view_pass with: extra=config.get("bcftools_view_pass", {}).get("extra", ""), -use rule bcftools_filter_exclude_region from filtering as filtering_bcftools_filter_cnvpytor with: +use rule bcftools_filter_exclude_region from filtering as filtering_bcftools_softfilter_cnvpytor with: input: vcf="cnv_sv/cnvpytor/{sample}_{type}.vcf", output: vcf="cnv_sv/cnvpytor/{sample}_{type}.softfiltered.vcf", params: - filter=lambda wildcards: "-e {}".format(config["bcftools_filter_cnvpytor"]["exclude"]), - extra=config.get("bcftools_filter_cnvpytor", {}).get("extra", ""), + filter=lambda wildcards: "-e {}".format(config["bcftools_softfilter_cnvpytor"]["exclude"]), + extra=config.get("bcftools_softfilter_cnvpytor", {}).get("extra", ""), + log: + "cnv_sv/cnvpytor/{sample}_{type}.softfiltered.vcf.log", + benchmark: + repeat( + "cnv_sv/cnvpytor/{sample}_{type}.softfiltered.vcf.benchmark.tsv", + config.get("bcftools_softfilter_cnvpytor", {}).get("benchmark_repeats", 1), + ) + + +use rule bcftools_filter_exclude_region from filtering as filtering_bcftools_hardfilter_cnvpytor with: + input: + vcf="cnv_sv/cnvpytor/{sample}_{type}.vcf", + output: + vcf="cnv_sv/cnvpytor/{sample}_{type}.hardfiltered.vcf", + params: + filter=lambda wildcards: "-e {}".format(config["bcftools_hardfilter_cnvpytor"]["exclude"]), + extra=config.get("bcftools_hardfilter_cnvpytor", {}).get("extra", ""), log: - "cnv_sv/cnvpytor/{sample}_{type}.softfilered.vcf.log", + "cnv_sv/cnvpytor/{sample}_{type}.hardfiltered.vcf.log", benchmark: repeat( - "cnv_sv/cnvpytor/{sample}_{type}.softfilered.vcf.benchmark.tsv", - config.get("bcftools_filter_cnvpytor", {}).get("benchmark_repeats", 1), + "cnv_sv/cnvpytor/{sample}_{type}.hardfiltered.vcf.benchmark.tsv", + config.get("bcftools_hardfilter_cnvpytor", {}).get("benchmark_repeats", 1), ) diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 4670764..710b922 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -260,7 +260,6 @@ def get_glnexus_input(wildcards, input): def get_vcfs_for_svdb_merge(wildcards, input): vcfs_with_suffix = [] - vcfs_with_suffix.append(f"{input.tiddit}:tiddit") vcfs_with_suffix.append(f"{input.manta}:manta") vcfs_with_suffix.append(f"{input.cnvpytor}:cnvpytor") diff --git a/workflow/rules/coverage.smk b/workflow/rules/coverage.smk index 4cb40b3..9bc16a1 100644 --- a/workflow/rules/coverage.smk +++ b/workflow/rules/coverage.smk @@ -34,7 +34,6 @@ rule mosdepth_bedtools: rule create_cov_excel: input: - config="config/config.yaml", cov_regions="qc/mosdepth_bed/{sample}_{type}.regions.bed.gz", cov_thresh="qc/mosdepth_bed/{sample}_{type}.thresholds.bed.gz", duplication_file="qc/picard_collect_duplication_metrics/{sample}_{type}.duplication_metrics.txt", diff --git a/workflow/rules/vcf_to_aed.smk b/workflow/rules/vcf_to_aed.smk index c2ac003..7c0d3a6 100644 --- a/workflow/rules/vcf_to_aed.smk +++ b/workflow/rules/vcf_to_aed.smk @@ -32,7 +32,7 @@ rule vcf_to_aed: rule vcf_to_aed_filtered: input: - vcf="cnv_sv/cnvpytor/{sample}_{type}.filtered.vcf", + vcf="cnv_sv/cnvpytor/{sample}_{type}.hardfiltered.vcf", output: aed="cnv_sv/cnvpytor/{sample}_{type}_filtered.aed", params: diff --git a/workflow/scripts/create_excel.py b/workflow/scripts/create_excel.py index c3e547b..543d24f 100644 --- a/workflow/scripts/create_excel.py +++ b/workflow/scripts/create_excel.py @@ -17,7 +17,6 @@ log = logging.getLogger() # Specify input files -configfile = snakemake.input[0] duplicationFile = snakemake.input.duplication_file mosdepth = snakemake.input.summary covRegionsFile = snakemake.input.cov_regions @@ -40,15 +39,15 @@ italicFormat = workbook.add_format({'italic': True}) redFormat = workbook.add_format({'font_color': 'red'}) -with open(configfile, 'r') as file: - config_list = yaml.load(file, Loader=yaml.FullLoader) +# with open(configfile, 'r') as file: +# config_list = yaml.load(file, Loader=yaml.FullLoader) # runID = config_list['seqID']['sequencerun'] # sys.argv[5] -minCov = int(config_list['create_cov_excel']['covLimits'].split(' ')[0]) -medCov = int(config_list['create_cov_excel']['covLimits'].split(' ')[1]) -maxCov = int(config_list['create_cov_excel']['covLimits'].split(' ')[2]) -bedfile = config_list["reference"]["coverage_bed"] -genepanels = config_list["reference"]["genepanels"] +minCov = int(snakemake.config['create_cov_excel']['covLimits'].split(' ')[0]) +medCov = int(snakemake.config['create_cov_excel']['covLimits'].split(' ')[1]) +maxCov = int(snakemake.config['create_cov_excel']['covLimits'].split(' ')[2]) +bedfile = snakemake.config["reference"]["coverage_bed"] +genepanels = snakemake.config["reference"]["genepanels"] worksheetOver = workbook.add_worksheet('Overview') worksheetOver.write(0, 0, sample, headingFormat)