From 75d62969a51f29debf059933d7259f001d6bfa90 Mon Sep 17 00:00:00 2001 From: Padraic Corcoran Date: Tue, 2 Jan 2024 14:12:10 +0100 Subject: [PATCH 1/4] feat: handle when the coverage bed file that has mitochondria genes --- config/config.yaml | 2 +- config/config_bianca.yaml | 2 +- workflow/scripts/create_excel.py | 8 ++++++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index f5a3191..91e71a0 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -22,7 +22,7 @@ modules: snv_indels: "v0.5.0" reference: - coverage_bed: "/beegfs-storage/data/ref_data/refseq/refseq_select_mane_20230828.bed" #Created bed with https://github.com/zezzipa/UCSCtable2bed + coverage_bed: "/beegfs-storage/data/ref_data/refseq/refseq_select_mane_with_MT_20240102.bed" #Created bed with https://github.com/zezzipa/UCSCtable2bed design_bed: "/beegfs-storage/data/ref_genomes/GRCh38/reference_grasnatter/homo_sapiens.wgs.bed" exome_bed: "/beegfs-storage/projects/wp3/Reference_files/Manifest/Clinical_research_exome/TWIST/Twist_Comprehensive_Exome_Covered_Targets_hg38.bed" exome_intervals: "/beegfs-storage/projects/wp3/Reference_files/Manifest/Clinical_research_exome/TWIST/Twist_Comprehensive_Exome_Covered_Targets_hg38_interval_list.bed" diff --git a/config/config_bianca.yaml b/config/config_bianca.yaml index baa0959..06e0d84 100644 --- a/config/config_bianca.yaml +++ b/config/config_bianca.yaml @@ -23,7 +23,7 @@ modules: snv_indels: "v0.5.0" reference: - coverage_bed: /castor/project/proj/poirot_pipeline/reference_files/refseq_select_mane_20230828.bed + coverage_bed: /castor/project/proj/poirot_pipeline/reference_files/refseq_select_mane_with_MT_20240102.bed design_bed: /castor/project/proj/poirot_pipeline/reference_files/homo_sapiens.wgs.bed exome_bed: /castor/project/proj/poirot_pipeline/reference_files/Twist_Comprehensive_Exome_Covered_Targets_hg38.bed exome_intervals: /castor/project/proj/poirot_pipeline/reference_files/Twist_Comprehensive_Exome_Covered_Targets_hg38_interval_list.bed diff --git a/workflow/scripts/create_excel.py b/workflow/scripts/create_excel.py index 569ff18..c3e547b 100644 --- a/workflow/scripts/create_excel.py +++ b/workflow/scripts/create_excel.py @@ -76,8 +76,12 @@ line = lline.strip().split('\t') length = int(line[2])-int(line[1]) gene = line[3].split("_")[0] - exon = line[3].split("_")[3] - transcript = "NM_"+line[3].split("_")[2] + if line[3].split("_")[1] == "NM": + transcript = "NM_"+line[3].split("_")[2] + exon = line[3].split("_")[3] + else: + transcript = line[3].split("_")[1] + exon = line[3].split("_")[2] covRow = [gene, transcript, exon, line[4], length] tableLinesCov_unsorted.append(covRow) bedfile.append(line[0:4]) From f4e384e38b5919a905c7bbeccb899b4216205504 Mon Sep 17 00:00:00 2001 From: Padraic Corcoran Date: Tue, 2 Jan 2024 14:13:21 +0100 Subject: [PATCH 2/4] ci: run manta with multiple threads --- config/resources.yaml | 6 ++++++ config/resources_bianca.yaml | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/config/resources.yaml b/config/resources.yaml index fecde4f..ea9a7d4 100644 --- a/config/resources.yaml +++ b/config/resources.yaml @@ -92,6 +92,12 @@ gatk_sort_sam: glnexus: threads: 20 +manta_run_workflow_n: + threads: 20 + mem_mb: 122880 + mem_per_cpu: 6144 + time: "20:00:00" + mosdepth: mem_mb: 36864 threads: 4 diff --git a/config/resources_bianca.yaml b/config/resources_bianca.yaml index f4dfbd8..66db7bc 100644 --- a/config/resources_bianca.yaml +++ b/config/resources_bianca.yaml @@ -86,6 +86,11 @@ glnexus: mem_per_cpu: 7168 threads: 16 +manta_run_workflow_n: + mem_mb: 57344 + mem_per_cpu: 7168 + threads: 16 + mosdepth: mem_mb: 57344 mem_per_cpu: 7168 From 540b6d66d996f0cd6bd3947bfc6c9335e19d09ba Mon Sep 17 00:00:00 2001 From: Padraic Corcoran Date: Tue, 2 Jan 2024 17:00:35 +0100 Subject: [PATCH 3/4] feat: add verifybamid2 to the QC --- .tests/integration/config/config.yaml | 4 ++++ .../reference/1000g.phase3.100k.b38.vcf.gz.dat.mu | 0 config/config.yaml | 5 +++++ config/config_bianca.yaml | 4 ++++ config/resources.yaml | 5 +++++ config/resources_bianca.yaml | 5 +++++ workflow/Snakefile | 7 +++++++ 7 files changed, 30 insertions(+) create mode 100644 .tests/integration/reference/1000g.phase3.100k.b38.vcf.gz.dat.mu diff --git a/.tests/integration/config/config.yaml b/.tests/integration/config/config.yaml index 1fcdfeb..6f6aaff 100644 --- a/.tests/integration/config/config.yaml +++ b/.tests/integration/config/config.yaml @@ -332,5 +332,9 @@ vep_trio: vep_cache: "reference/VEP/" extra: "--assembly GRCh38 --check_existing --pick --max_af --buffer_size 50000" +verifybamid2: + container: "docker://hydragenetics/verifybamid2:2.0.1" + svd_mu: "reference/1000g.phase3.100k.b38.vcf.gz.dat.mu" + vt_decompose: container: "docker://hydragenetics/vt:2015.11.10" \ No newline at end of file diff --git a/.tests/integration/reference/1000g.phase3.100k.b38.vcf.gz.dat.mu b/.tests/integration/reference/1000g.phase3.100k.b38.vcf.gz.dat.mu new file mode 100644 index 0000000..e69de29 diff --git a/config/config.yaml b/config/config.yaml index 91e71a0..a843f30 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -236,6 +236,7 @@ multiqc: - "qc/picard_collect_wgs_metrics/{sample}_{type}.txt" - "qc/samtools_stats/{sample}_{type}.samtools-stats.txt" - "qc/samtools_idxstats/{sample}_{type}.samtools-idxstats.txt" + - "qc/verifybamid2/{sample}_{type}.selfSM" mt_reference: mt: "/beegfs-storage/data/ref_data/gatk_mito/Homo_sapiens_assembly38.chrM.fasta" @@ -332,5 +333,9 @@ vep_trio: vep_cache: "/beegfs-storage/data/ref_genomes/VEP/" extra: "--assembly GRCh38 --check_existing --pick --max_af --buffer_size 50000" +verifybamid2: + container: "docker://hydragenetics/verifybamid2:2.0.1" + svd_mu: "/beegfs-storage/data/ref_data/verifybamid2/1000g.phase3.100k.b38.vcf.gz.dat.mu" + vt_decompose: container: "docker://hydragenetics/vt:2015.11.10" diff --git a/config/config_bianca.yaml b/config/config_bianca.yaml index 06e0d84..35605bd 100644 --- a/config/config_bianca.yaml +++ b/config/config_bianca.yaml @@ -344,6 +344,10 @@ vep_trio: extra: --assembly GRCh38 --check_existing --pick --max_af --buffer_size 50000 vep_cache: /castor/project/proj/poirot_pipeline/reference_files/VEP/ +verifybamid2: + container: /castor/project/proj/poirot_pipeline/singularity_files/hydragenetics_verifybamid2_2.0.1.sif + svd_mu: /castor/project/proj/poirot_pipeline/reference_files/verifybamid2/1000g.phase3.100k.b38.vcf.gz.dat.mu + vt_decompose: container: /castor/project/proj/poirot_pipeline/singularity_files/hydragenetics_vt_2015.11.10.sif diff --git a/config/resources.yaml b/config/resources.yaml index ea9a7d4..2cc0a5f 100644 --- a/config/resources.yaml +++ b/config/resources.yaml @@ -160,3 +160,8 @@ tiddit: vep: threads: 4 + +verifybamid2: + mem_mb: 49152 + mem_per_cpu: 6144 + threads: 8 diff --git a/config/resources_bianca.yaml b/config/resources_bianca.yaml index 66db7bc..c6d3cd4 100644 --- a/config/resources_bianca.yaml +++ b/config/resources_bianca.yaml @@ -164,3 +164,8 @@ vep: mem_mb: 28672 mem_per_cpu: 7168 threads: 4 + +verifybamid2: + mem_mb: 49152 + mem_per_cpu: 6144 + threads: 8 \ No newline at end of file diff --git a/workflow/Snakefile b/workflow/Snakefile index 218d0aa..b3fbd6a 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -419,6 +419,13 @@ use rule samtools_idxstats from qc as qc_samtools_idxstats with: temp("qc/samtools_idxstats/{sample}_{type}.samtools-idxstats.txt"), +use rule verifybamid2 from qc as qc_verifybamid2 with: + input: + bam=lambda wildcards: get_bam_input(wildcards)[0], + bai=lambda wildcards: get_bam_input(wildcards)[1], + ref=config.get("reference", {}).get("fasta", ""), + svd_mu=config.get("verifybamid2", {}).get("svd_mu", ""), + module snv_indels: snakefile: get_module_snakefile(config, "hydra-genetics/snv_indels", path="workflow/Snakefile", tag=config["modules"]["snv_indels"]) From d3d5ac88d3f33d0a01e74b353619606925af9016 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A1draic=20Corcoran?= Date: Wed, 3 Jan 2024 10:16:54 +0100 Subject: [PATCH 4/4] style: Update Snakefile --- workflow/Snakefile | 1 + 1 file changed, 1 insertion(+) diff --git a/workflow/Snakefile b/workflow/Snakefile index b3fbd6a..25d1c3b 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -426,6 +426,7 @@ use rule verifybamid2 from qc as qc_verifybamid2 with: ref=config.get("reference", {}).get("fasta", ""), svd_mu=config.get("verifybamid2", {}).get("svd_mu", ""), + module snv_indels: snakefile: get_module_snakefile(config, "hydra-genetics/snv_indels", path="workflow/Snakefile", tag=config["modules"]["snv_indels"])