From 8ec19866f7ddafee9e249f37940661cd8682be7c Mon Sep 17 00:00:00 2001 From: Arielle R Munters Date: Tue, 15 Oct 2024 10:40:23 +0200 Subject: [PATCH 1/7] refactor update multiqc version: --- config/config.yaml | 6 +- config/multiqc_dna_config.yaml | 106 ++++++++++++----------- config/multiqc_rna_config.yaml | 100 +++++++++++++++------ workflow/Snakefile | 8 +- workflow/scripts/sample_order_multiqc.py | 2 +- 5 files changed, 140 insertions(+), 82 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index 2dde122..02c0c41 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -14,7 +14,7 @@ modules: fusions: "12f8354" parabricks: "v1.1.0" prealignment: "v1.1.0" - qc: "v0.3.0" + qc: "53c3a82" #"v0.3.0" reports: "7c8b8c5" misc: "v0.1.0" sentieon: "b002d39" @@ -164,7 +164,7 @@ mosdepth_bed: design_bed: "" multiqc: - container: "docker://hydragenetics/multiqc:1.11" + container: "docker://hydragenetics/multiqc:1.21" reports: DNA: config: "config/multiqc_dna_config.yaml" @@ -214,6 +214,8 @@ multiqc: - "prealignment/sortmerna/{sample}_{type}.rrna.log" - "qc/multiqc/RNA_number.table.tsv" - "qc/picard_collect_alignment_summary_metrics/{sample}_{type}.alignment_summary_metrics.txt" + - "alignment/star/{sample}_{type}.Log.final.out" + - "alignment/star/{sample}_{type}.ReadsPerGene.out.tab" pbrun_fq2bam: container: "docker://nvcr.io/nvidia/clara/clara-parabricks:4.0.0-1" diff --git a/config/multiqc_dna_config.yaml b/config/multiqc_dna_config.yaml index 534bb35..94130a0 100644 --- a/config/multiqc_dna_config.yaml +++ b/config/multiqc_dna_config.yaml @@ -3,23 +3,15 @@ extra_fn_clean_exts: ##from this until end - '.duplication_metrics' - type: regex pattern: '_fastq[12]' -extra_fn_clean_trim: - - 'Sample_WA-3560_' report_header_info: - Contact E-mail: "igp-klinsek-bioinfo@lists.uu.se" - Application Type: "TrueSeq PCR Free WGS" - Project Type: "Whole Genome" - -custom content: - order: - - fastqc - - mosdepth - - fastp - - peddy - - samtools - - picard +sp: + dna_number_table: + fn: "*DNA_number.table.tsv" custom_data: dna_number_table: @@ -32,10 +24,30 @@ custom_data: title: "DNA number" description: "DNA number based on SampleSheet" -sp: - dna_number_table: - fn: "*DNA_number.table.tsv" - +custom_table_header_config: + general_stats_table: + raw_total_sequences: + suffix: "" + title: "Total seqs M" + reads_mapped: + suffix: "" + title: "Reads mapped M" + reads_mapped_percent: + suffix: "" + reads_properly_paired_percent: + suffix: "" + median_coverage: + suffix: "" + 10_x_pc: + suffix: "" + 30_x_pc: + suffix: "" + 50_x_pc: + suffix: "" + PERCENT_DUPLICATION: + suffix: "" + summed_mean: + suffix: "" table_columns_visible: FastQC: @@ -45,7 +57,7 @@ table_columns_visible: percent_fails: False total_sequences: False fastp: - pct_adapter: True + pct_adapter: False pct_surviving: False after_filtering_gc_content: False filtering_result_passed_filter_reads: False @@ -68,26 +80,30 @@ table_columns_visible: sex_het_ratio: False error_sex_check: True predicted_sex_sex_check: True - Picard: - PCT_PF_READS_ALIGNED: False + "Picard: HsMetrics": + FOLD_ENRICHMENT: False + MEDIAN_TARGET_COVERAGE: False + PCT_TARGET_BASES_30X: False + ZERO_CVG_TARGETS_PCT: Falses + "Picard: InsertSizeMetrics": summed_median: False summed_mean: True + "Picard: Mark Duplicates": PERCENT_DUPLICATION: True + "Picard: WgsMetrics": MEDIAN_COVERAGE: False MEAN_COVERAGE: False SD_COVERAGE: False PCT_30X: False - PCT_TARGET_BASES_30X: False - FOLD_ENRICHMENT: False - TOTAL_READS: True - Samtools: + + "Samtools: stats": error_rate: False non-primary_alignments: False reads_mapped: False reads_mapped_percent: True reads_properly_paired_percent: True reads_MQ0_percent: False - raw_total_sequences: True + raw_total_sequences: True #tidigare from picard # mosdepth custom thresholds mosdepth_config: @@ -100,9 +116,9 @@ mosdepth_config: - 30 - 50 -# Patriks plug in, addera egna columner till general stats +# Custom columns to general stats multiqc_cgs: - Picard: + "Picard: HsMetrics": FOLD_80_BASE_PENALTY: title: "Fold80" description: "Fold80 penalty from picard hs metrics" @@ -121,25 +137,17 @@ multiqc_cgs: max: 100 scale: "RdYlGn-rev" format: "{:.2%}" - Samtools: + "Samtools: stats": average_quality: title: "Average Quality" description: "Ratio between the sum of base qualities and total length from Samtools stats" min: 0 max: 60 scale: "RdYlGn" - mosdepth: - 20_x_pc: #Cant get it to work - title: "20x percent" - description: "Fraction of genome with at least 20X coverage" - max: 100 - min: 0 - suffix: "%" - scale: "RdYlGn" # Galler alla kolumner oberoende pa module! table_columns_placement: - dna_number_table: + "Custom content: dna_number_table": dna_number: 300 mosdepth: median_coverage: 601 @@ -150,7 +158,7 @@ table_columns_placement: 20_x_pc: 604 30_x_pc: 605 50_x_pc: 606 - Samtools: + "Samtools: stats": raw_total_sequences: 500 reads_mapped: 501 reads_mapped_percent: 502 @@ -166,19 +174,15 @@ table_columns_placement: error_sex_check: 701 predicted_sex_sex_check: 702 family_id: 703 - Picard: - TOTAL_READS: 500 - PCT_SELECTED_BASES: 801 - FOLD_80_BASE_PENALTY: 802 - PCT_PF_READS_ALIGNED: 888 - summed_median: 888 - PERCENT_DUPLICATION: 803 - summed_mean: 804 - STANDARD_DEVIATION: 805 - ZERO_CVG_TARGETS_PCT: 888 - MEDIAN_COVERAGE: 888 - MEAN_COVERAGE: 888 - SD_COVERAGE: 888 - PCT_30X: 888 - PCT_TARGET_BASES_30X: 888 + "Picard: HsMetrics": FOLD_ENRICHMENT: 888 + MEDIAN_TARGET_COVERAGE: 888 + PCT_TARGET_BASES_30X: 888 + FOLD_80_BASE_PENALTY: 801 + PCT_SELECTED_BASES: 800 + ZERO_CVG_TARGETS_PCT: 805 + "Picard: InsertSizeMetrics": + summed_median: 803 + summed_mean: 803 + "Picard: Mark Duplicates": + PERCENT_DUPLICATION: 802 \ No newline at end of file diff --git a/config/multiqc_rna_config.yaml b/config/multiqc_rna_config.yaml index f298f48..407c9fe 100644 --- a/config/multiqc_rna_config.yaml +++ b/config/multiqc_rna_config.yaml @@ -2,22 +2,27 @@ decimalPoint_format: ',' extra_fn_clean_exts: ##from this until end - '.duplication_metrics' - type: regex - pattern: '_fastq[12]' -extra_fn_clean_trim: - - 'Sample_WA-3560_' + pattern: '^IHT[0-9]+-WP2_' + - type: regex + pattern: "_fastq[12]" + +table_sample_merge: + "R1": "_R1_001" + "R2": "_R2_001" + "L008": + - type: "regex" + pattern: "S[0-9]{1,2}_L008" + + report_header_info: - Contact E-mail: "igp-klinsek-bioinfo@lists.uu.se" - Application Type: "Whole Transcriptome Sequencing" - Project Type: "WTS" -custom content: - order: - - mosdepth - - rseqc - - sortmerna - - fastp - - fastqc +sp: + rna_number_table: + fn: "*RNA_number.table.tsv" custom_data: rna_number_table: @@ -31,10 +36,32 @@ custom_data: description: "RNA number based on SampleSheet" placement: 300 -sp: - rna_number_table: - fn: "*RNA_number.table.tsv" - +custom_table_header_config: + general_stats_table: + reads_mapped: + suffix: "" + title: "Reads mapped M" + reads_mapped_percent: + suffix: "" + reads_properly_paired_percent: + suffix: "" + median_coverage: + title: "Median Coverage" + suffix: "" + 10_x_pc: + suffix: "" + 30_x_pc: + suffix: "" + 50_x_pc: + suffix: "" + PERCENT_DUPLICATION: + suffix: "" + summed_mean: + suffix: "" + PCT_SELECTED_BASES: + suffix: "" + ZERO_CVG_TARGETS_PCT: + suffix: "" table_columns_visible: FastQC: @@ -42,14 +69,14 @@ table_columns_visible: percent_gc: False avg_sequence_length: False percent_fails: False - total_sequences: False + total_sequences: True fastp: pct_adapter: False pct_surviving: False after_filtering_gc_content: False filtering_result_passed_filter_reads: False - after_filtering_q30_bases: False - after_filtering_q30_rate: False + after_filtering_q30_bases: True + after_filtering_q30_rate: True pct_duplication: False mosdepth: median_coverage: True @@ -60,16 +87,35 @@ table_columns_visible: 20_x_pc: False 30_x_pc: False 50_x_pc: False + STAR: + star-total_reads: True + star-mapped: False + star-mapped_percent: True + star-uniquely_mapped: False + star-uniquely_mapped_percent: False + star-multimapped: False SortMeRNA: rRNA_pct: True - Picard: - PCT_PF_READS_ALIGNED: True + # Picard: + # PCT_PF_READS_ALIGNED: True + "Picard: Mark Duplicates": PERCENT_DUPLICATION: True - Samtools: - error_rate: False - non-primary_alignments: False - reads_mapped: True - reads_mapped_percent: True - reads_properly_paired_percent: True - reads_MQ0_percent: False - raw_total_sequences: True + + +# mosdepth custom thresholds +mosdepth_config: + general_stats_coverage: + - 1 + - 5 + - 10 + - 15 + - 20 + - 30 + - 50 + +# multiqc_cgs: +# "Picard: Alignment Summary": +# MEAN_READ_LENGTH: +# title: "Mean Read Length" +# description: "Mean read length from Picard Alignment Summary Metrics" +# format: "{:.1f}" \ No newline at end of file diff --git a/workflow/Snakefile b/workflow/Snakefile index cc56c70..1ab64f2 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -179,7 +179,13 @@ module alignment: config -use rule star from alignment as alignment_star +use rule star from alignment as alignment_star with: + output: + bam=temp("alignment/star/{sample}_{type}.bam"), + sj=temp("alignment/star/{sample}_{type}.SJ.out.tab"), + reads_per_gene=temp("alignment/star/{sample}_{type}.ReadsPerGene.out.tab"), + log_final=temp("alignment/star/{sample}_{type}.Log.final.out"), + use rule samtools_index from alignment as alignment_samtools_index diff --git a/workflow/scripts/sample_order_multiqc.py b/workflow/scripts/sample_order_multiqc.py index 6feb337..8b084d1 100644 --- a/workflow/scripts/sample_order_multiqc.py +++ b/workflow/scripts/sample_order_multiqc.py @@ -10,7 +10,7 @@ sample_order_index = ["sample", "s_index", "lab_id", "type"] for sample, type, fastq_path in snakemake.params.filelist: fastq = fastq_path.split("/")[-1] - lab_id = fastq.split("_")[0] + lab_id = fastq.split("_")[1] s_pattern = re.compile("_S([0-9]+)_") # In case of missing S-index in fastq1-filename set s_index to 99 (last) try: From fe942e1db279442d6db84a6e09d25da42850420a Mon Sep 17 00:00:00 2001 From: Arielle R Munters Date: Wed, 16 Oct 2024 15:11:21 +0200 Subject: [PATCH 2/7] feat: update multiqc version --- config/multiqc_dna_config.yaml | 62 ++++++++++++++++++++++++++-------- config/multiqc_rna_config.yaml | 58 ++++++++++++++++++------------- 2 files changed, 81 insertions(+), 39 deletions(-) diff --git a/config/multiqc_dna_config.yaml b/config/multiqc_dna_config.yaml index 94130a0..adf50b8 100644 --- a/config/multiqc_dna_config.yaml +++ b/config/multiqc_dna_config.yaml @@ -1,9 +1,39 @@ decimalPoint_format: ',' extra_fn_clean_exts: ##from this until end - '.duplication_metrics' + - type: regex + pattern: '^HG[0-9]+-[A-Za-z0-9-]+_' - type: regex pattern: '_fastq[12]' +table_sample_merge: + "R1": "_R1_001" + "R2": "_R2_001" + "L008": # to remove SX_L00X from ending to enable grouping + - type: "regex" + pattern: "_S[0-9]{1,2}_L008" + "L007": # to remove SX_L00X from ending to enable grouping + - type: "regex" + pattern: "_S[0-9]{1,2}_L007" + "L006": # to remove SX_L00X from ending to enable grouping + - type: "regex" + pattern: "_S[0-9]{1,2}_L006" + "L005": # to remove SX_L00X from ending to enable grouping + - type: "regex" + pattern: "_[0-9]{1,2}_L005" + "L004": # to remove SX_L00X from ending to enable grouping + - type: "regex" + pattern: "_S[0-9]{1,2}_L004" + "L003": # to remove SX_L00X from ending to enable grouping + - type: "regex" + pattern: "_S[0-9]{1,2}_L003" + "L002": # to remove SX_L00X from ending to enable grouping + - type: "regex" + pattern: "_S[0-9]{1,2}_L002" + "L001": # to remove SX_L00X from ending to enable grouping + - type: "regex" + pattern: "_S[0-9]{1,2}_L001" + report_header_info: - Contact E-mail: "igp-klinsek-bioinfo@lists.uu.se" - Application Type: "TrueSeq PCR Free WGS" @@ -24,14 +54,26 @@ custom_data: title: "DNA number" description: "DNA number based on SampleSheet" +# mosdepth custom thresholds +mosdepth_config: + general_stats_coverage: + - 1 + - 5 + - 10 + - 15 + - 20 + - 30 + - 50 + +# Remove suffix in general stats custom_table_header_config: general_stats_table: raw_total_sequences: suffix: "" - title: "Total seqs M" + title: "Total seqs [M]" reads_mapped: suffix: "" - title: "Reads mapped M" + title: "Reads mapped [M]" reads_mapped_percent: suffix: "" reads_properly_paired_percent: @@ -46,9 +88,11 @@ custom_table_header_config: suffix: "" PERCENT_DUPLICATION: suffix: "" + title: "Duplication [%]" summed_mean: suffix: "" +# General stats column visibility table_columns_visible: FastQC: percent_duplicates: False @@ -84,7 +128,7 @@ table_columns_visible: FOLD_ENRICHMENT: False MEDIAN_TARGET_COVERAGE: False PCT_TARGET_BASES_30X: False - ZERO_CVG_TARGETS_PCT: Falses + ZERO_CVG_TARGETS_PCT: False "Picard: InsertSizeMetrics": summed_median: False summed_mean: True @@ -95,7 +139,6 @@ table_columns_visible: MEAN_COVERAGE: False SD_COVERAGE: False PCT_30X: False - "Samtools: stats": error_rate: False non-primary_alignments: False @@ -105,17 +148,6 @@ table_columns_visible: reads_MQ0_percent: False raw_total_sequences: True #tidigare from picard -# mosdepth custom thresholds -mosdepth_config: - general_stats_coverage: - - 1 - - 5 - - 10 - - 15 - - 20 - - 30 - - 50 - # Custom columns to general stats multiqc_cgs: "Picard: HsMetrics": diff --git a/config/multiqc_rna_config.yaml b/config/multiqc_rna_config.yaml index 407c9fe..d0374b7 100644 --- a/config/multiqc_rna_config.yaml +++ b/config/multiqc_rna_config.yaml @@ -9,12 +9,10 @@ extra_fn_clean_exts: ##from this until end table_sample_merge: "R1": "_R1_001" "R2": "_R2_001" - "L008": + "L008": # to remove SX_L00X from ending to enable grouping - type: "regex" pattern: "S[0-9]{1,2}_L008" - - report_header_info: - Contact E-mail: "igp-klinsek-bioinfo@lists.uu.se" - Application Type: "Whole Transcriptome Sequencing" @@ -36,13 +34,20 @@ custom_data: description: "RNA number based on SampleSheet" placement: 300 +# mosdepth custom thresholds +mosdepth_config: + general_stats_coverage: + - 1 + - 5 + - 10 + - 15 + - 20 + - 30 + - 50 + +# Remove suffix from General stats columns custom_table_header_config: general_stats_table: - reads_mapped: - suffix: "" - title: "Reads mapped M" - reads_mapped_percent: - suffix: "" reads_properly_paired_percent: suffix: "" median_coverage: @@ -62,21 +67,37 @@ custom_table_header_config: suffix: "" ZERO_CVG_TARGETS_PCT: suffix: "" + star-total_reads: + suffix: "" + title: "Total readpairs [M]" + star-mapped_percent: + suffix: "" + title: "Aligned [%]" + star-uniquely_mapped_percent: + suffix: "" + title: "Uniq aligned [%]" + star-multimapped: + suffix: "" + title: "Multimapped [M]" + sortmerna-rRNA_pct: + suffix: "" + title: "rRNA [%]" +# General stats column visibility table_columns_visible: FastQC: percent_duplicates: False percent_gc: False avg_sequence_length: False percent_fails: False - total_sequences: True + total_sequences: False fastp: pct_adapter: False pct_surviving: False after_filtering_gc_content: False filtering_result_passed_filter_reads: False - after_filtering_q30_bases: True - after_filtering_q30_rate: True + after_filtering_q30_bases: False + after_filtering_q30_rate: False pct_duplication: False mosdepth: median_coverage: True @@ -92,8 +113,8 @@ table_columns_visible: star-mapped: False star-mapped_percent: True star-uniquely_mapped: False - star-uniquely_mapped_percent: False - star-multimapped: False + star-uniquely_mapped_percent: True + star-multimapped: True SortMeRNA: rRNA_pct: True # Picard: @@ -102,17 +123,6 @@ table_columns_visible: PERCENT_DUPLICATION: True -# mosdepth custom thresholds -mosdepth_config: - general_stats_coverage: - - 1 - - 5 - - 10 - - 15 - - 20 - - 30 - - 50 - # multiqc_cgs: # "Picard: Alignment Summary": # MEAN_READ_LENGTH: From c65b1673ce413b2a2db6e21c9f86fbea3aca14e4 Mon Sep 17 00:00:00 2001 From: Arielle R Munters Date: Thu, 17 Oct 2024 11:14:38 +0200 Subject: [PATCH 3/7] chore: add versions to multiqc and Results-folder --- workflow/rules/common.smk | 42 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index e006eaf..05fb024 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -3,15 +3,33 @@ __copyright__ = "Copyright 2022, Martin Rippin" __email__ = "arielle.munters@scilifelab.uu.se, nina.hollfelder@scilifelab.uu.se" __license__ = "GPL-3" + +import itertools +import numpy as np import pandas as pd -from snakemake.io import Wildcards -from typing import List, Union +import pathlib +import re from snakemake.utils import validate from snakemake.utils import min_version +import yaml +from datetime import datetime from hydra_genetics.utils.resources import load_resources from hydra_genetics.utils.samples import * from hydra_genetics.utils.units import * +from hydra_genetics import min_version as hydra_min_version + +from hydra_genetics.utils.misc import replace_dict_variables +from hydra_genetics.utils.misc import export_config_as_file +from hydra_genetics.utils.software_versions import add_version_files_to_multiqc +from hydra_genetics.utils.software_versions import add_software_version_to_config +from hydra_genetics.utils.software_versions import export_pipeline_version_as_file +from hydra_genetics.utils.software_versions import export_software_version_as_file +from hydra_genetics.utils.software_versions import get_pipeline_version +from hydra_genetics.utils.software_versions import use_container +from hydra_genetics.utils.software_versions import touch_software_version_file +from hydra_genetics.utils.software_versions import touch_pipeline_version_file_name + min_version("7.8.0") @@ -40,6 +58,26 @@ config = load_resources(config, config["resources"]) validate(config, schema="../schemas/resources.schema.yaml") +## get version information on pipeline, containers and software +pipeline_name = "fluffy_hematology_wgs" +pipeline_version = get_pipeline_version(workflow, pipeline_name=pipeline_name) +version_files = touch_pipeline_version_file_name( + pipeline_version, date_string=pipeline_name, directory="Results/versions/software" +) +if use_container(workflow): + version_files.append(touch_software_version_file(config, date_string=pipeline_name, directory="Results/versions/software")) +add_version_files_to_multiqc(config, version_files) + + +onstart: + export_pipeline_version_as_file(pipeline_version, date_string=pipeline_name, directory="Results/versions/software") + if use_container(workflow): + update_config, software_info = add_software_version_to_config(config, workflow, False) + export_software_version_as_file(software_info, date_string=pipeline_name, directory="Results/versions/software") + date_string = datetime.now().strftime("%Y%m%d") + export_config_as_file(update_config, date_string=date_string, directory="Results/versions") + + ### Read and validate samples file samples = pd.read_table(config["samples"], dtype=str).set_index("sample", drop=False) validate(samples, schema="../schemas/samples.schema.yaml") From ab1ab54e7a2217f699122f5065d4ad9bbe9884f2 Mon Sep 17 00:00:00 2001 From: Arielle R Munters Date: Mon, 21 Oct 2024 15:41:08 +0200 Subject: [PATCH 4/7] chore: update output_files to yaml --- .tests/integration/config.yaml | 2 +- .tests/integration/input/HD829-T_S3_R1.fq.gz | 0 .tests/integration/input/HD829-T_S3_R2.fq.gz | 0 .tests/integration/samples.tsv | 3 +- .tests/integration/units.tsv | 1 + config/config.yaml | 2 +- config/output_files.json | 84 ----- config/output_files.yaml | 374 +++++++++++++++++++ workflow/Snakefile | 71 ++-- workflow/rules/common.smk | 114 +++--- 10 files changed, 480 insertions(+), 171 deletions(-) create mode 100644 .tests/integration/input/HD829-T_S3_R1.fq.gz create mode 100644 .tests/integration/input/HD829-T_S3_R2.fq.gz delete mode 100644 config/output_files.json create mode 100644 config/output_files.yaml diff --git a/.tests/integration/config.yaml b/.tests/integration/config.yaml index dac8efe..a1f2479 100644 --- a/.tests/integration/config.yaml +++ b/.tests/integration/config.yaml @@ -1,4 +1,4 @@ -output: "../../config/output_files.json" +output: "../../config/output_files.yaml" reference: design_bed: "reference/homo_sapiens.wgs.bed" diff --git a/.tests/integration/input/HD829-T_S3_R1.fq.gz b/.tests/integration/input/HD829-T_S3_R1.fq.gz new file mode 100644 index 0000000..e69de29 diff --git a/.tests/integration/input/HD829-T_S3_R2.fq.gz b/.tests/integration/input/HD829-T_S3_R2.fq.gz new file mode 100644 index 0000000..e69de29 diff --git a/.tests/integration/samples.tsv b/.tests/integration/samples.tsv index f8cae6f..1d29cb7 100644 --- a/.tests/integration/samples.tsv +++ b/.tests/integration/samples.tsv @@ -1,2 +1,3 @@ sample tumor_content sex -NA12878 1.0 O +NA12878 1.0 K +HD829 1.0 O diff --git a/.tests/integration/units.tsv b/.tests/integration/units.tsv index 70eaa15..60b5267 100644 --- a/.tests/integration/units.tsv +++ b/.tests/integration/units.tsv @@ -2,3 +2,4 @@ sample type platform machine flowcell lane barcode fastq1 fastq2 adapter NA12878 T illumina NovaSeq 1FLOWCELL L001 ACGGAACA input/NA12878-T_S2_R1.fq.gz input/NA12878-T_S2_R2.fq.gz AAAA,TTTT NA12878 N illumina NovaSeq 1FLOWCELL L001 ACGGAACA input/NA12878-N_S1_R1.fq.gz input/NA12878-N_S1_R2.fq.gz CCCC,GGGG NA12878 R illumina NovaSeq 1FLOWCELL L001 ACGGAACA input/NA12878-R_S10_R1.fq.gz input/NA12878-R_S10_R2.fq.gz CCCC,GGGG +HD829 T illumina NovaSeq 1FLOWCELL L001 ACGGAACA input/HD829-T_S3_R1.fq.gz input/HD829-T_S3_R2.fq.gz AAAA,TTTT diff --git a/config/config.yaml b/config/config.yaml index 02c0c41..51853f5 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -1,7 +1,7 @@ resources: "resources.yaml" samples: "samples.tsv" units: "units.tsv" -output: "config/output_files.json" +output: "config/output_files.yaml" aligner: "bwa_gpu" # bwa_gpu or bwa_sentieon diff --git a/config/output_files.json b/config/output_files.json deleted file mode 100644 index e5d9ab9..0000000 --- a/config/output_files.json +++ /dev/null @@ -1,84 +0,0 @@ -{ - "Results/MultiQC_TN.html": {"name": "_results_multiqc_dna", "file": "qc/multiqc/multiqc_DNA.html", "types": ["T", "N"]}, - "Results/MultiQC_R.html": {"name": "_results_multiqc_rna", "file": "qc/multiqc/multiqc_RNA.html", "types": ["R"]}, - - "Results/{sample}/DNA_fusions/{sample}_T.dux4_igh_read_count.txt": {"name": "_results_dux4_igh", "file": "reports/dux_read_counts/{sample}_T.dux4_igh.txt", "types": ["T"]}, - "Results/{sample}/DNA_fusions/{sample}_T.dux4_erg_read_count.txt": {"name": "_results_dux4_erg", "file": "reports/dux_read_counts/{sample}_T.dux4_erg.txt", "types": ["T"]}, - - "Results/{sample}/RNA_fusions/{sample}_R.arriba.tsv": {"name": "_results_arriba", "file": "fusions/arriba/{sample}_R.fusions.tsv", "types": ["R"]}, - "Results/{sample}/RNA_fusions/{sample}_R.arriba.plot.pdf": {"name": "_results_arriba_draw_fusion", "file": "fusions/arriba_draw_fusion/{sample}_R.pdf", "types": ["R"]}, - "Results/{sample}/RNA_fusions/{sample}_R.star_fusion.tsv": {"name": "_results_star_fusion", "file": "fusions/star_fusion/{sample}_R/star-fusion.fusion_predictions.tsv", "types": ["R"]}, - "Results/{sample}/RNA_fusions/{sample}_R.fusioncatcher.tsv": {"name": "_results_fusioncatcher", "file": "fusions/fusioncatcher/{sample}_R/final-list_candidate-fusion-genes.txt", "types": ["R"]}, - "Results/{sample}/Cram/{sample}_R.bam": {"name": "_results_star_cram", "file": "alignment/star/{sample}_R.bam", "types": ["R"]}, - "Results/{sample}/Cram/{sample}_R.bam.bai": {"name": "_results_star_crai", "file": "alignment/star/{sample}_R.bam.bai", "types": ["R"]}, - "Results/{sample}/RNA_fusions/{sample}_R.fusioncatcher_dux4-igh_counts.txt": {"name": "_results_fusioncatcher_dux4_igh_counts", "file": "fusions/fusioncatcher/{sample}_R/dux4-igh_counts.txt", "types": ["R"]}, - "Results/{sample}/RNA_fusions/{sample}_R.fusioncatcher_dux4-igh_filtered-results.txt": {"name": "_results_fusioncatcher_dux4_igh_hits", "file": "fusions/fusioncatcher/{sample}_R/dux4-igh_hits.txt", "types": ["R"]}, - - "Results/{sample}/Cram/{sample}_{type}.crumble.cram": {"name": "_results_cram", "file": "compression/crumble/{sample}_{type}.crumble.cram", "types": ["T", "N"]}, - "Results/{sample}/Cram/{sample}_{type}.crumble.cram.crai": {"name": "_results_crai", "file": "compression/crumble/{sample}_{type}.crumble.cram.crai", "types": ["T", "N"]}, - "Results/{sample}/SNV_indels/{sample}_T.vep.vcf.gz": {"name": "_results_vcf_t", "file": "parabricks/pbrun_mutectcaller_t/{sample}_T.vep.vcf.gz", "types": ["T"]}, - "Results/{sample}/SNV_indels/{sample}_T.vep.vcf.gz.tbi": {"name": "_results_tbi_t", "file": "parabricks/pbrun_mutectcaller_t/{sample}_T.vep.vcf.gz.tbi", "types": ["T"]}, - "Results/{sample}/SNV_indels/{sample}_T.vep.all.vcf.gz": {"name": "_results_vcf_all_t", "file": "parabricks/pbrun_mutectcaller_t/{sample}_T.vep.include.all.vcf.gz", "types": ["T"]}, - "Results/{sample}/SNV_indels/{sample}_T.vep.all.vcf.gz.tbi": {"name": "_results_tbi_all_t", "file": "parabricks/pbrun_mutectcaller_t/{sample}_T.vep.include.all.vcf.gz.tbi", "types": ["T"]}, - "Results/{sample}/SNV_indels/{sample}_T.vep.aml.vcf.gz": {"name": "_results_vcf_aml_t", "file": "parabricks/pbrun_mutectcaller_t/{sample}_T.vep.include.aml.vcf.gz", "types": ["T"]}, - "Results/{sample}/SNV_indels/{sample}_T.vep.aml.vcf.gz.tbi": {"name": "_results_tbi_aml_t", "file": "parabricks/pbrun_mutectcaller_t/{sample}_T.vep.include.aml.vcf.gz.tbi", "types": ["T"]}, - "Results/{sample}/SNV_indels/{sample}_T.vep.tm.vcf.gz": {"name": "_results_vcf_tm_t", "file": "parabricks/pbrun_mutectcaller_t/{sample}_T.vep.include.tm.vcf.gz", "types": ["T"]}, - "Results/{sample}/SNV_indels/{sample}_T.vep.tm.vcf.gz.tbi": {"name": "_results_tbi_tm_t", "file": "parabricks/pbrun_mutectcaller_t/{sample}_T.vep.include.tm.vcf.gz.tbi", "types": ["T"]}, - "Results/{sample}/SNV_indels/{sample}_T.xlsx": {"name": "_results_xlsx_t", "file": "export_to_xlsx/t/{sample}_T.snvs.xlsx", "types": ["T"]}, - - "Results/{sample}/SNV_indels/{sample}_TN.vep.vcf.gz": {"name": "_results_vcf_tn", "file": "parabricks/pbrun_mutectcaller_tn/{sample}.vep.vcf.gz", "types": ["TN"]}, - "Results/{sample}/SNV_indels/{sample}_TN.vep.vcf.gz.tbi": {"name": "_results_tbi_tn", "file": "parabricks/pbrun_mutectcaller_tn/{sample}.vep.vcf.gz.tbi", "types": ["TN"]}, - "Results/{sample}/SNV_indels/{sample}_TN.vep.all.vcf.gz": {"name": "_results_vcf_all", "file": "parabricks/pbrun_mutectcaller_tn/{sample}.vep.include.all.vcf.gz", "types": ["TN"]}, - "Results/{sample}/SNV_indels/{sample}_TN.vep.all.vcf.gz.tbi": {"name": "_results_tbi_all", "file": "parabricks/pbrun_mutectcaller_tn/{sample}.vep.include.all.vcf.gz.tbi", "types": ["TN"]}, - "Results/{sample}/SNV_indels/{sample}_TN.vep.aml.vcf.gz": {"name": "_results_vcf_aml", "file": "parabricks/pbrun_mutectcaller_tn/{sample}.vep.include.aml.vcf.gz", "types": ["TN"]}, - "Results/{sample}/SNV_indels/{sample}_TN.vep.aml.vcf.gz.tbi": {"name": "_results_tbi_aml", "file": "parabricks/pbrun_mutectcaller_tn/{sample}.vep.include.aml.vcf.gz.tbi", "types": ["TN"]}, - "Results/{sample}/SNV_indels/{sample}_TN.vep.tm.vcf.gz": {"name": "_results_vcf_tm", "file": "parabricks/pbrun_mutectcaller_tn/{sample}.vep.include.tm.vcf.gz", "types": ["TN"]}, - "Results/{sample}/SNV_indels/{sample}_TN.vep.tm.vcf.gz.tbi": {"name": "_results_tbi_tm", "file": "parabricks/pbrun_mutectcaller_tn/{sample}.vep.include.tm.vcf.gz.tbi", "types": ["TN"]}, - "Results/{sample}/SNV_indels/{sample}_TN.xlsx": {"name": "_results_xlsx_tn", "file": "export_to_xlsx/tn/{sample}.snvs.xlsx", "types": ["TN"]}, - - "Results/{sample}/SNV_indels/{sample}_mutectcaller_T.all.tsv": {"name": "_results_tsv_all_t", "file": "tsv_files/{sample}_mutectcaller_t.all.tsv", "types": ["T"]}, - "Results/{sample}/SNV_indels/{sample}_mutectcaller_T.aml.tsv": {"name": "_results_tsv_aml_t", "file": "tsv_files/{sample}_mutectcaller_t.aml.tsv", "types": ["T"]}, - - "Results/{sample}/SNV_indels/{sample}_mutectcaller_TN.all.tsv": {"name": "_results_tsv_all", "file": "tsv_files/{sample}_mutectcaller_tn.all.tsv", "types": ["TN"]}, - "Results/{sample}/SNV_indels/{sample}_mutectcaller_TN.aml.tsv": {"name": "_results_tsv_aml", "file": "tsv_files/{sample}_mutectcaller_tn.aml.tsv", "types": ["TN"]}, - "Results/{sample}/SNV_indels/{sample}.pindel.vcf": {"name": "_results_pindel_vcf", "file": "cnv_sv/pindel_vcf/{sample}_T.no_tc.vcf", "types": ["T"]}, - "Results/{sample}/CNV/{sample}_T.pathology.vcf.gz": {"name": "_results_cnvkit_vcf", "file": "cnv_sv/cnvkit_vcf/{sample}_T.pathology.vcf.gz", "types": ["T"]}, - "Results/{sample}/CNV/{sample}_T.pathology.vcf.gz.tbi": {"name": "_results_cnvkit_tbi", "file": "cnv_sv/cnvkit_vcf/{sample}_T.pathology.vcf.gz.tbi", "types": ["T"]}, - "Results/{sample}/CNV/{sample}_T.CNV.xlsx": {"name": "_results_cnvkit_table", "file": "cnv_sv/cnvkit_table/{sample}_T.CNV.xlsx", "types": ["T"]}, - "Results/{sample}/CNV/{sample}_T.png": {"name": "_results_cnvkit_scatter_whole", "file": "cnv_sv/cnvkit_scatter/{sample}_T.png", "types": ["T"]}, - "Results/{sample}/CNV/{sample}_T_chr{chr}.png": {"name": "_results_cnvkit_scatter", "file": "cnv_sv/cnvkit_scatter/{sample}_T_chr{chr}.png", "types": ["T"]}, - "Results/{sample}/CNV/GATK/{sample}_T.pathology.vcf.gz": {"name": "_results_gatk_vcf", "file": "cnv_sv/gatk_vcf/{sample}_T.pathology.vcf.gz", "types": ["T"]}, - "Results/{sample}/CNV/GATK/{sample}_T.pathology.vcf.gz.tbi": {"name": "_results_gatk_vcf_tbi", "file": "cnv_sv/gatk_vcf/{sample}_T.pathology.vcf.gz.tbi", "types": ["T"]}, - "Results/{sample}/CNV/cnv_html_report/{sample}_T.pathology.chr{chr}.cnv_report.html": {"name": "_results_cnv_html_report_chr", "file": "reports/cnv_html_report/{sample}_T.pathology.chr{chr}.cnv_report.html", "types": ["T"]}, - - "Results/{sample}/SV/{sample}_manta_T.ssa.vcf.gz": {"name": "_results_manta_t_vcf", "file": "cnv_sv/manta_run_workflow_t/{sample}.ssa.vcf.gz", "types": ["T"]}, - "Results/{sample}/SV/{sample}_manta_T.ssa.vcf.gz.tbi": {"name": "_results_manta_t_tbi", "file": "cnv_sv/manta_run_workflow_t/{sample}.ssa.vcf.gz.tbi", "types": ["T"]}, - "Results/{sample}/SV/{sample}_manta_T.ssa.all.vcf.gz": {"name": "_results_manta_t_vcf_all", "file": "cnv_sv/manta_run_workflow_t/{sample}.ssa.include.all.vcf.gz", "types": ["T"]}, - "Results/{sample}/SV/{sample}_manta_T.ssa.all.vcf.gz.tbi": {"name": "_results_manta_t_tbi_all", "file": "cnv_sv/manta_run_workflow_t/{sample}.ssa.include.all.vcf.gz.tbi", "types": ["T"]}, - "Results/{sample}/SV/{sample}_manta_T.ssa.aml.vcf.gz": {"name": "_results_manta_t_vcf_aml", "file": "cnv_sv/manta_run_workflow_t/{sample}.ssa.include.aml.vcf.gz", "types": ["T"]}, - "Results/{sample}/SV/{sample}_manta_T.ssa.aml.vcf.gz.tbi": {"name": "_results_manta_t_tbi_aml", "file": "cnv_sv/manta_run_workflow_t/{sample}.ssa.include.aml.vcf.gz.tbi", "types": ["T"]}, - "Results/{sample}/SV/{sample}_manta_T.tsv": {"name": "_results_manta_t_tsv", "file": "tsv_files/{sample}_manta_t.tsv", "types": ["T"]}, - "Results/{sample}/SV/{sample}_manta_T.del.tsv": {"name": "_results_manta_t_tsv_del", "file": "tsv_files/{sample}_manta_t.del.tsv", "types": ["T"]}, - "Results/{sample}/SV/{sample}_manta_T.ins.tsv": {"name": "_results_manta_t_tsv_ins", "file": "tsv_files/{sample}_manta_t.ins.tsv", "types": ["T"]}, - "Results/{sample}/SV/{sample}_manta_T.dup.tsv": {"name": "_results_manta_t_tsv_dup", "file": "tsv_files/{sample}_manta_t.dup.tsv", "types": ["T"]}, - "Results/{sample}/SV/{sample}_manta_T.bnd.tsv": {"name": "_results_manta_t_tsv_bnd", "file": "tsv_files/{sample}_manta_t.tsv", "types": ["T"]}, - "Results/{sample}/SV/{sample}_manta_T.bnd.all.tsv": {"name": "_results_manta_t_tsv_bnd_all", "file": "tsv_files/{sample}_manta_t.all.tsv", "types": ["T"]}, - "Results/{sample}/SV/{sample}_manta_T.bnd.aml.tsv": {"name": "_results_manta_t_tsv_bnd_aml", "file": "tsv_files/{sample}_manta_t.aml.tsv", "types": ["T"]}, - - "Results/{sample}/SV/{sample}_manta_TN.ssa.vcf.gz": {"name": "_results_manta_tn_vcf", "file": "cnv_sv/manta_run_workflow_tn/{sample}.ssa.vcf.gz", "types": ["TN"]}, - "Results/{sample}/SV/{sample}_manta_TN.ssa.vcf.gz.tbi": {"name": "_results_manta_tn_tbi", "file": "cnv_sv/manta_run_workflow_tn/{sample}.ssa.vcf.gz.tbi", "types": ["TN"]}, - "Results/{sample}/SV/{sample}_manta_TN.ssa.all.vcf.gz": {"name": "_results_manta_tn_vcf_all", "file": "cnv_sv/manta_run_workflow_tn/{sample}.ssa.include.all.vcf.gz", "types": ["TN"]}, - "Results/{sample}/SV/{sample}_manta_TN.ssa.all.vcf.gz.tbi": {"name": "_results_manta_tn_tbi_all", "file": "cnv_sv/manta_run_workflow_tn/{sample}.ssa.include.all.vcf.gz.tbi", "types": ["TN"]}, - "Results/{sample}/SV/{sample}_manta_TN.ssa.aml.vcf.gz": {"name": "_results_manta_tn_vcf_aml", "file": "cnv_sv/manta_run_workflow_tn/{sample}.ssa.include.aml.vcf.gz", "types": ["TN"]}, - "Results/{sample}/SV/{sample}_manta_TN.ssa.aml.vcf.gz.tbi": {"name": "_results_manta_tn_tbi_aml", "file": "cnv_sv/manta_run_workflow_tn/{sample}.ssa.include.aml.vcf.gz.tbi", "types": ["TN"]}, - "Results/{sample}/SV/{sample}_manta_TN.del.tsv": {"name": "_results_manta_tsv_del", "file": "tsv_files/{sample}_manta_tn.del.tsv", "types": ["TN"]}, - "Results/{sample}/SV/{sample}_manta_TN.ins.tsv": {"name": "_results_manta_tsv_ins", "file": "tsv_files/{sample}_manta_tn.ins.tsv", "types": ["TN"]}, - "Results/{sample}/SV/{sample}_manta_TN.dup.tsv": {"name": "_results_manta_tsv_dup", "file": "tsv_files/{sample}_manta_tn.dup.tsv", "types": ["TN"]}, - "Results/{sample}/SV/{sample}_manta_TN.bnd.tsv": {"name": "_results_manta_tsv_bnd", "file": "tsv_files/{sample}_manta_tn.tsv", "types": ["TN"]}, - "Results/{sample}/SV/{sample}_manta_TN.bnd.all.tsv": {"name": "_results_manta_tsv_bnd_all", "file": "tsv_files/{sample}_manta_tn.all.tsv", "types": ["TN"]}, - "Results/{sample}/SV/{sample}_manta_TN.bnd.aml.tsv": {"name": "_results_manta_tsv_bnd_aml", "file": "tsv_files/{sample}_manta_tn.aml.tsv", "types": ["TN"]}, - "Archive/{sample}_{flowcell}_{lane}_{barcode}_{type}.spring": {"name": "_archives_spring", "file": "compression/spring/{sample}_{flowcell}_{lane}_{barcode}_{type}.spring", "types": ["T", "N", "R"]}, - - "Results/{sample}/CNV/SVDB/{sample}_T.pathology.svdb_query.vcf.gz": {"name": "_results_svdb_vcf", "file": "cnv_sv/svdb_query/{sample}_T.pathology.svdb_query.vcf.gz", "types": ["T"]}, - "Results/{sample}/CNV/SVDB/{sample}_T.pathology.svdb_query.vcf.gz.tbi": {"name": "_results_svdb_tbi", "file": "cnv_sv/svdb_query/{sample}_T.pathology.svdb_query.vcf.gz.tbi", "types": ["T"]} -} diff --git a/config/output_files.yaml b/config/output_files.yaml new file mode 100644 index 0000000..a154e18 --- /dev/null +++ b/config/output_files.yaml @@ -0,0 +1,374 @@ +directory: ./Results + +files: + - name: Spring compression + input: "compression/spring/{sample}_{flowcell}_{lane}_{barcode}_{type}.spring" + output: Archive/{sample}_{flowcell}_{lane}_{barcode}_{type}.spring + types: ["T", "N", "R"] + + - name: MultiQC DNA + input: "qc/multiqc/multiqc_DNA.html" + output: multiqc_TN.html + types: ["T", "N"] + + - name: Cram T and N + input: compression/crumble/{sample}_{type}.crumble.cram + output: "{sample}/Cram/{sample}_{type}.crumble.cram" + types: ["T", "N"] + + - name: Crai T and N + input: compression/crumble/{sample}_{type}.crumble.cram.crai + output: "{sample}/Cram/{sample}_{type}.crumble.cram.crai" + types: ["T", "N"] + + # Tumor + # SNV indels + - name: Parabricks T vcf + input: "parabricks/pbrun_mutectcaller_t/{sample}_T.vep.vcf.gz" + output: "{sample}/SNV_indels/{sample}_T.vep.vcf.gz" + types: ["T"] + + - name: Parabricks T tbi + input: "parabricks/pbrun_mutectcaller_t/{sample}_T.vep.vcf.gz.tbi" + output: "{sample}/SNV_indels/{sample}_T.vep.vcf.gz.tbi" + types: ["T"] + + - name: Parabricks T vcf ALL subsample + input: "parabricks/pbrun_mutectcaller_t/{sample}_T.vep.include.all.vcf.gz" + output: "{sample}/SNV_indels/{sample}_T.vep.all.vcf.gz" + types: ["T"] + + - name: Parabricks T tbi ALL subsample + input: "parabricks/pbrun_mutectcaller_t/{sample}_T.vep.include.all.vcf.gz.tbi" + output: "{sample}/SNV_indels/{sample}_T.vep.all.vcf.gz.tbi" + types: ["T"] + + - name: Parabricks T tsv ALL subsample + input: tsv_files/{sample}_mutectcaller_t.all.tsv + output: "{sample}/SNV_indels/{sample}_mutectcaller_T.all.tsv" + types: ["T"] + + - name: Parabricks T vcf AML subsample + input: "parabricks/pbrun_mutectcaller_t/{sample}_T.vep.include.aml.vcf.gz" + output: "{sample}/SNV_indels/{sample}_T.vep.aml.vcf.gz" + types: ["T"] + + - name: Parabricks T tbi AML subsample + input: "parabricks/pbrun_mutectcaller_t/{sample}_T.vep.include.aml.vcf.gz.tbi" + output: "{sample}/SNV_indels/{sample}_T.vep.aml.vcf.gz.tbi" + types: ["T"] + + - name: Parabricks T tsv AML subsample + input: tsv_files/{sample}_mutectcaller_t.aml.tsv + output: "{sample}/SNV_indels/{sample}_mutectcaller_T.aml.tsv" + types: ["T"] + + - name: Parabricks T vcf TM subsample + input: "parabricks/pbrun_mutectcaller_t/{sample}_T.vep.include.tm.vcf.gz" + output: "{sample}/SNV_indels/{sample}_T.vep.tm.vcf.gz" + types: ["T"] + + - name: Parabricks T tbi TM subsample + input: "parabricks/pbrun_mutectcaller_t/{sample}_T.vep.include.tm.vcf.gz.tbi" + output: "{sample}/SNV_indels/{sample}_T.vep.tm.vcf.gz.tbi" + types: ["T"] + + - name: Parabricks T xlsx + input: "export_to_xlsx/t/{sample}_T.snvs.xlsx" + output: "{sample}/SNV_indels/{sample}_T.xlsx" + types: ["T"] + + - name: Pindel vcf + input: "cnv_sv/pindel_vcf/{sample}_T.no_tc.vcf" + output: "{sample}/SNV_indels/{sample}.pindel.vcf" + types: ["T"] + + # CNV + - name: CNVkit vcf + input: "cnv_sv/cnvkit_vcf/{sample}_T.pathology.vcf.gz" + output: "{sample}/CNV/vcfs/{sample}_T.cnvkit.pathology.vcf.gz" + types: ["T"] + + - name: CNVkit tbi + input: "cnv_sv/cnvkit_vcf/{sample}_T.pathology.vcf.gz.tbi" + output: "{sample}/CNV/vcfs/{sample}_T.cnvkit.pathology.vcf.gz.tbi" + types: ["T"] + + - name: GATK vcf + input: "cnv_sv/gatk_vcf/{sample}_T.pathology.vcf.gz" + output: "{sample}/CNV/vcfs/{sample}_T.gatk.pathology.vcf.gz" + types: ["T"] + + - name: GATK tbi + input: "cnv_sv/gatk_vcf/{sample}_T.pathology.vcf.gz.tbi" + output: "{sample}/CNV/vcfs/{sample}_T.gatk.pathology.vcf.gz.tbi" + types: ["T"] + + - name: CNVkit xlsx + input: "cnv_sv/cnvkit_table/{sample}_T.CNV.xlsx" + output: "{sample}/CNV/{sample}_T.CNVkit.xlsx" + types: ["T"] + + - name: CNVkit plot whole + input: "cnv_sv/cnvkit_scatter/{sample}_T.png" + output: "{sample}/CNV/{sample}_T.png" + types: ["T"] + + - name: CNVkit plot per chr + input: "cnv_sv/cnvkit_scatter/{sample}_T_chr{chr}.png" + output: "{sample}/CNV/{sample}_T_chr{chr}.png" + types: ["T"] + + - name: CNV report per chr + input: "reports/cnv_html_report/{sample}_T.pathology.chr{chr}.cnv_report.html" + output: "{sample}/CNV/cnv_html_report/{sample}_T.pathology.chr{chr}.cnv_report.html" + types: ["T"] + + - name: CNV svdb vcf + input: "cnv_sv/svdb_query/{sample}_T.pathology.svdb_query.vcf.gz" + output: "{sample}/CNV/{sample}_T.pathology.cnv-calls.vcf.gz" + types: ["T"] + + - name: CNV svdb tbi + input: "cnv_sv/svdb_query/{sample}_T.pathology.svdb_query.vcf.gz.tbi" + output: "{sample}/CNV/{sample}_T.pathology.cnv-calls.vcf.gz.tbi" + types: ["T"] + + # DNA fusions + - name: DUX4 igh read count + input: "reports/dux_read_counts/{sample}_T.dux4_igh.txt" + output: "{sample}/DNA_fusions/{sample}_T.dux4_igh_read_count.txt" + types: ["T"] + + - name: DUX4 ERG read count + input: "reports/dux_read_counts/{sample}_T.dux4_erg.txt" + output: "{sample}/DNA_fusions/{sample}_T.dux4_erg_read_count.txt" + types: ["T"] + + # SVs + - name: Manta T vcf + input: "cnv_sv/manta_run_workflow_t/{sample}.ssa.vcf.gz" + output: "{sample}/SV/{sample}_manta_T.ssa.vcf.gz" + types: ["T"] + + - name: Manta T tbi + input: "cnv_sv/manta_run_workflow_t/{sample}.ssa.vcf.gz.tbi" + output: "{sample}/SV/{sample}_manta_T.ssa.vcf.gz.tbi" + types: ["T"] + + - name: Manta T vcf ALL subsample + input: "cnv_sv/manta_run_workflow_t/{sample}.ssa.include.all.vcf.gz" + output: "{sample}/SV/{sample}_manta_T.ssa.all.vcf.gz" + types: ["T"] + + - name: Manta T tbi ALL subsample + input: "cnv_sv/manta_run_workflow_t/{sample}.ssa.include.all.vcf.gz.tbi" + output: "{sample}/SV/{sample}_manta_T.ssa.all.vcf.gz.tbi" + types: ["T"] + + - name: Manta T vcf AML subsample + input: "cnv_sv/manta_run_workflow_t/{sample}.ssa.include.aml.vcf.gz" + output: "{sample}/SV/{sample}_manta_T.ssa.aml.vcf.gz" + types: ["T"] + + - name: Manta T tbi AML subsample + input: "cnv_sv/manta_run_workflow_t/{sample}.ssa.include.aml.vcf.gz" + output: "{sample}/SV/{sample}_manta_T.ssa.aml.vcf.gz.tbi" + types: ["T"] + + - name: Manta T tsv del subsample + input: "tsv_files/{sample}_manta_t.del.tsv" + output: "{sample}/SV/{sample}_manta_T.del.tsv" + types: ["T"] + + - name: Manta T tsv ins subsample + input: "tsv_files/{sample}_manta_t.ins.tsv" + output: "{sample}/SV/{sample}_manta_T.ins.tsv" + types: ["T"] + + - name: Manta T tsv dup subsample + input: "tsv_files/{sample}_manta_t.dup.tsv" + output: "{sample}/SV/{sample}_manta_T.dup.tsv" + types: ["T"] + + - name: Manta T tsv bnd subsample + input: "tsv_files/{sample}_manta_t.tsv" + output: "{sample}/SV/{sample}_manta_T.bnd.tsv" + types: ["T"] + + - name: Manta T tsv bnd-all subsample + input: "tsv_files/{sample}_manta_t.all.tsv" + output: "{sample}/SV/{sample}_manta_T.bnd.all.tsv" + types: ["T"] + + - name: Manta T tsv bnd-aml subsample + input: "tsv_files/{sample}_manta_t.aml.tsv" + output: "{sample}/SV/{sample}_manta_T.bnd.aml.tsv" + types: ["T"] + + # Matched samples TN + # SNV indels + - name: Parabricks TN vcf + input: "parabricks/pbrun_mutectcaller_tn/{sample}.vep.vcf.gz" + output: "{sample}/SNV_indels/{sample}_TN.vep.vcf.gz" + types: ["TN"] + + - name: Parabricks TN tbi + input: "parabricks/pbrun_mutectcaller_tn/{sample}.vep.vcf.gz.tbi" + output: "{sample}/SNV_indels/{sample}_TN.vep.vcf.gz.tbi" + types: ["TN"] + + - name: Parabricks TN vcf ALL subsample + input: "parabricks/pbrun_mutectcaller_tn/{sample}.vep.include.all.vcf.gz" + output: "{sample}/SNV_indels/{sample}_TN.vep.all.vcf.gz" + types: ["TN"] + + - name: Parabricks TN tbi ALL subsample + input: "parabricks/pbrun_mutectcaller_tn/{sample}.vep.include.all.vcf.gz.tbi" + output: "{sample}/SNV_indels/{sample}_TN.vep.all.vcf.gz.tbi" + types: ["TN"] + + - name: Parabricks TN tsv ALL subsample + input: "tsv_files/{sample}_mutectcaller_tn.all.tsv" + output: "{sample}/SNV_indels/{sample}_mutectcaller_TN.all.tsv" + types: ["TN"] + + - name: Parabricks TN vcf AML subsample + input: parabricks/pbrun_mutectcaller_tn/{sample}.vep.include.aml.vcf.gz + output: "{sample}/SNV_indels/{sample}_TN.vep.aml.vcf.gz" + types: ["TN"] + + - name: Parabricks TN tbi AML subsample + input: parabricks/pbrun_mutectcaller_tn/{sample}.vep.include.aml.vcf.gz.tbi + output: "{sample}/SNV_indels/{sample}_TN.vep.aml.vcf.gz.tbi" + types: ["TN"] + + - name: Parabricks Tn tsv AML subsample + input: "tsv_files/{sample}_mutectcaller_tn.aml.tsv" + output: "{sample}/SNV_indels/{sample}_mutectcaller_TN.aml.tsv" + types: ["TN"] + + - name: Parabricks TN vcf TM subsample + input: parabricks/pbrun_mutectcaller_tn/{sample}.vep.include.tm.vcf.gz + output: "{sample}/SNV_indels/{sample}_TN.vep.tm.vcf.gz" + types: ["TN"] + + - name: Parabricks TN tbi TM subsample + input: parabricks/pbrun_mutectcaller_tn/{sample}.vep.include.tm.vcf.gz.tbi + output: "{sample}/SNV_indels/{sample}_TN.vep.tm.vcf.gz.tbi" + types: ["TN"] + + - name: Parabricks TN xlsx + input: "export_to_xlsx/tn/{sample}.snvs.xlsx" + output: "{sample}/SNV_indels/{sample}_TN.xlsx" + types: ["TN"] + + # SVs + - name: Manta TN vcf + input: "cnv_sv/manta_run_workflow_tn/{sample}.ssa.vcf.gz" + output: "{sample}/SV/{sample}_manta_TN.ssa.vcf.gz" + types: ["TN"] + + - name: Manta TN tbi + input: "cnv_sv/manta_run_workflow_tn/{sample}.ssa.vcf.gz" + output: "{sample}/SV/{sample}_manta_TN.ssa.vcf.gz.tbi" + types: ["TN"] + + - name: Manta TN vcf ALL subsample + input: "cnv_sv/manta_run_workflow_tn/{sample}.ssa.include.all.vcf.gz" + output: "{sample}/SV/{sample}_manta_TN.ssa.all.vcf.gz" + types: ["TN"] + + - name: Manta TN tbi ALL subsample + input: "cnv_sv/manta_run_workflow_tn/{sample}.ssa.include.all.vcf.gz.tbi" + output: "{sample}/SV/{sample}_manta_TN.ssa.all.vcf.gz.tbi" + types: ["TN"] + + - name: Manta TN vcf AML subsample + input: "cnv_sv/manta_run_workflow_tn/{sample}.ssa.include.aml.vcf.gz" + output: "{sample}/SV/{sample}_manta_TN.ssa.aml.vcf.gz" + types: ["TN"] + + - name: Manta TN tbi AML subsample + input: "cnv_sv/manta_run_workflow_tn/{sample}.ssa.include.aml.vcf.gz.tbi" + output: "{sample}/SV/{sample}_manta_TN.ssa.aml.vcf.gz.tbi" + types: ["TN"] + + - name: Manta TN tsv del subsample + input: "tsv_files/{sample}_manta_tn.del.tsv" + output: "{sample}/SV/{sample}_manta_TN.del.tsv" + types: ["TN"] + + - name: Manta TN tsv ins subsample + input: "tsv_files/{sample}_manta_tn.ins.tsv" + output: "{sample}/SV/{sample}_manta_TN.ins.tsv" + types: ["TN"] + + - name: Manta TN tsv dup subsample + input: "tsv_files/{sample}_manta_tn.dup.tsv" + output: "{sample}/SV/{sample}_manta_TN.dup.tsv" + types: ["TN"] + + - name: Manta TN tsv bnd subsample + input: "tsv_files/{sample}_manta_tn.tsv" + output: "{sample}/SV/{sample}_manta_TN.bnd.tsv" + types: ["TN"] + + - name: Manta TN tsv bnd-all subsample + input: "tsv_files/{sample}_manta_tn.all.tsv" + output: "{sample}/SV/{sample}_manta_TN.bnd.all.tsv" + types: ["TN"] + + - name: Manta TN tsv bnd-aml subsample + input: "tsv_files/{sample}_manta_tn.aml.tsv" + output: "{sample}/SV/{sample}_manta_TN.bnd.aml.tsv" + types: ["TN"] + + + # RNA + - name: MultiQC RNA + input: "qc/multiqc/multiqc_RNA.html" + output: multiqc_R.html + types: ["R"] + + # alignment + - name: RNA bam + input: "alignment/star/{sample}_R.bam" + output: "{sample}/Cram/{sample}_R.bam" + types: ["R"] + + - name: RNA bai + input: "alignment/star/{sample}_R.bam.bai" + output: "{sample}/Cram/{sample}_R.bam.bai" + types: ["R"] + + # Fusions + - name: Arriba tsv + input: "fusions/arriba/{sample}_R.fusions.tsv" + output: "{sample}/RNA_fusions/{sample}_R.arriba.tsv" + types: ["R"] + + - name: Arriba pdf + input: fusions/arriba_draw_fusion/{sample}_R.pdf + output: "{sample}/RNA_fusions/{sample}_R.arriba.plot.pdf" + types: ["R"] + + - name: Star-fusion tsv + input: fusions/star_fusion/{sample}_R/star-fusion.fusion_predictions.tsv + output: "{sample}/RNA_fusions/{sample}_R.star_fusion.tsv" + types: ["R"] + + - name: Fusioncather tsv + input: fusions/fusioncatcher/{sample}_R/final-list_candidate-fusion-genes.txt + output: "{sample}/RNA_fusions/{sample}_R.fusioncatcher.tsv" + types: ["R"] + + - name: Fusioncatcher dux4-igh counts + input: fusions/fusioncatcher/{sample}_R/dux4-igh_counts.txt + output: "{sample}/RNA_fusions/{sample}_R.fusioncatcher_dux4-igh_counts.txt" + types: ["R"] + + - name: Fusioncatcher dux4-igh calls + input: fusions/fusioncatcher/{sample}_R/dux4-igh_hits.txt + output: "{sample}/RNA_fusions/{sample}_R.fusioncatcher_dux4-igh_filtered-results.txt" + types: ["R"] diff --git a/workflow/Snakefile b/workflow/Snakefile index 1ab64f2..27511d3 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -16,21 +16,22 @@ include: "rules/manta_to_tsv.smk" include: "rules/peddy_create_ped.smk" +ruleorder: annotation_simple_sv_annotation_tn > misc_bgzip ruleorder: cnv_sv_manta_run_workflow_tn > misc_tabix ruleorder: cnv_sv_manta_run_workflow_tn > misc_bgzip ruleorder: cnv_sv_manta_run_workflow_t > misc_tabix ruleorder: cnv_sv_manta_run_workflow_t > misc_bgzip -ruleorder: _results_manta_tn_tbi > misc_tabix -ruleorder: _results_manta_t_tbi > misc_tabix -ruleorder: _results_cnvkit_tbi > misc_tabix -ruleorder: _results_vcf_tn > misc_bgzip -ruleorder: _results_gatk_vcf > misc_bgzip -ruleorder: _results_gatk_vcf_tbi > misc_tabix -ruleorder: annotation_simple_sv_annotation_tn > misc_bgzip -ruleorder: _results_pindel_vcf > misc_bgzip ruleorder: gatk_model_segments > cnv_sv_gatk_model_segments -ruleorder: _results_svdb_vcf > misc_bgzip -ruleorder: _results_svdb_tbi > misc_tabix + +ruleorder: copy_parabricks_tn_vcf > misc_bgzip +ruleorder: copy_manta_tn_tbi > misc_tabix +ruleorder: copy_manta_t_tbi > misc_tabix +ruleorder: copy_cnvkit_tbi > misc_tabix +ruleorder: copy_gatk_vcf > misc_bgzip +ruleorder: copy_gatk_tbi > misc_tabix +ruleorder: copy_pindel_vcf > misc_bgzip +ruleorder: copy_cnv_svdb_vcf > misc_bgzip +ruleorder: copy_cnv_svdb_tbi > misc_tabix aligner = config.get("aligner", None) @@ -43,31 +44,30 @@ elif aligner == "bwa_gpu": include: "rules/mutectcaller_to_tsv.smk" include: "rules/sample_order_multiqc.smk" - ruleorder: _results_manta_tn_tbi_all > misc_tabix - ruleorder: _results_manta_tn_tbi_aml > misc_tabix - ruleorder: _results_manta_t_tbi_all > misc_tabix - ruleorder: _results_manta_t_tbi_aml > misc_tabix - ruleorder: _results_tbi_tn > misc_tabix - ruleorder: _results_tbi_all > misc_tabix - ruleorder: _results_tbi_aml > misc_tabix - ruleorder: _results_crai > misc_samtools_index - ruleorder: _results_vcf_t > misc_bgzip - ruleorder: _results_tbi_t > misc_tabix - ruleorder: _results_tbi_tn > misc_tabix - ruleorder: _results_vcf_aml > misc_bgzip - ruleorder: _results_vcf_aml_t > misc_bgzip - ruleorder: _results_tbi_aml > misc_tabix - ruleorder: _results_tbi_aml_t > misc_tabix - ruleorder: _results_vcf_all > misc_bgzip - ruleorder: _results_tbi_all > misc_tabix - ruleorder: _results_vcf_all_t > misc_bgzip - ruleorder: _results_tbi_all_t > misc_tabix - ruleorder: _results_vcf_tm > misc_bgzip - ruleorder: _results_tbi_tm > misc_tabix - ruleorder: _results_vcf_tm_t > misc_bgzip - ruleorder: _results_tbi_tm_t > misc_tabix ruleorder: parabricks_pbrun_fq2bam > alignment_samtools_index - ruleorder: _results_star_crai > alignment_samtools_index + ruleorder: copy_crai_t_and_n > misc_samtools_index + ruleorder: copy_rna_bai > alignment_samtools_index + + ruleorder: copy_parabricks_tn_tbi > misc_tabix + ruleorder: copy_parabricks_tn_vcf_all_subsample > misc_bgzip + ruleorder: copy_parabricks_tn_tbi_all_subsample > misc_tabix + ruleorder: copy_parabricks_tn_vcf_aml_subsample > misc_bgzip + ruleorder: copy_parabricks_tn_tbi_aml_subsample > misc_tabix + ruleorder: copy_parabricks_tn_vcf_tm_subsample > misc_bgzip + ruleorder: copy_parabricks_tn_tbi_tm_subsample > misc_tabix + ruleorder: copy_manta_tn_tbi_all_subsample > misc_tabix + ruleorder: copy_manta_tn_tbi_aml_subsample > misc_tabix + + ruleorder: copy_parabricks_t_vcf > misc_bgzip + ruleorder: copy_parabricks_t_tbi > misc_tabix + ruleorder: copy_parabricks_t_vcf_all_subsample > misc_bgzip + ruleorder: copy_parabricks_t_tbi_all_subsample > misc_tabix + ruleorder: copy_parabricks_t_vcf_aml_subsample > misc_bgzip + ruleorder: copy_parabricks_t_tbi_aml_subsample > misc_tabix + ruleorder: copy_parabricks_t_vcf_tm_subsample > misc_bgzip + ruleorder: copy_parabricks_t_tbi_tm_subsample > misc_tabix + ruleorder: copy_manta_t_tbi_all_subsample > misc_tabix + ruleorder: copy_manta_t_tbi_aml_subsample > misc_tabix elif aligner == "bwa_sentieon": @@ -77,7 +77,7 @@ elif aligner == "bwa_sentieon": rule all: input: - unpack(compile_output_list), + unpack(compile_output_file_list), module annotation: @@ -187,7 +187,6 @@ use rule star from alignment as alignment_star with: log_final=temp("alignment/star/{sample}_{type}.Log.final.out"), - use rule samtools_index from alignment as alignment_samtools_index diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 05fb024..36ddd7d 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -93,7 +93,7 @@ validate(units, schema="../schemas/units.schema.yaml") ### Read output_files for cp rules with open(config["output"]) as output: - output_json = json.load(output) + output_spec = yaml.safe_load(output.read()) ### Set wildcard constraints @@ -228,15 +228,18 @@ def get_json_for_merge_cnv_json(wildcards): return ["reports/cnv_html_report/{sample}_{type}.{caller}.{tc_method}.json".format(caller=c, **wildcards) for c in callers] -def compile_output_list(wildcards): +def compile_output_file_list(wildcards): + outdir = pathlib.Path(output_spec["directory"]) output_files = [] - types = type_generator(set([unit.type for unit in units.itertuples()])) + output_fullpath = [] + chromosome_numbers = ["X", "Y"] chromosome_numbers.extend(range(1, 23)) - for output in output_json: + for filedef in output_spec["files"]: + # add all output that is not TN output_files += set( [ - output.format( + filedef["output"].format( sample=sample, type=unit_type, chr=chromosome_number, @@ -247,16 +250,18 @@ def compile_output_list(wildcards): for chromosome_number in chromosome_numbers for sample in get_samples(samples) for unit_type in get_unit_types(units, sample) - if unit_type in set(output_json[output]["types"]) + if unit_type in set(filedef["types"]) for flowcell in set([u.flowcell for u in units.loc[(sample, unit_type)].dropna().itertuples()]) for barcode in set([u.barcode for u in units.loc[(sample, unit_type)].dropna().itertuples()]) for lane in set([u.lane for u in units.loc[(sample, unit_type)].dropna().itertuples()]) ] ) - for output in output_json: + + # Iterate all files again and add all TN files for samples that have both T and N in units + for filedef in output_spec["files"]: output_files += set( [ - output.format( + filedef["output"].format( sample=sample, type=unit_type, chr=chromosome_number, @@ -264,45 +269,58 @@ def compile_output_list(wildcards): for chromosome_number in chromosome_numbers for sample in get_samples(samples) for unit_type in type_generator(get_unit_types(units, sample)) - if unit_type in set(output_json[output]["types"]) and unit_type == "TN" + if unit_type in set(filedef["types"]) and unit_type == "TN" ] ) - return list(set(output_files)) - - -def generate_copy_code(workflow, output_json): - code = "" - for result, values in output_json.items(): - if values["file"] is not None: - input_file = values["file"] - output_file = result - rule_name = values["name"] - mem_mb = config.get("_copy", {}).get("mem_mb", config["default_resources"]["mem_mb"]) - mem_per_cpu = config.get("_copy", {}).get("mem_mb", config["default_resources"]["mem_mb"]) - partition = config.get("_copy", {}).get("partition", config["default_resources"]["partition"]) - threads = config.get("_copy", {}).get("threads", config["default_resources"]["threads"]) - time = config.get("_copy", {}).get("time", config["default_resources"]["time"]) - copy_container = config.get("_copy", {}).get("container", config["default_container"]) - result_file = os.path.basename(output_file) - code += f'@workflow.rule(name="{rule_name}")\n' - code += f'@workflow.input("{input_file}")\n' - code += f'@workflow.output("{output_file}")\n' - if "{chr}" in output_file: - code += f'@workflow.log("logs/{rule_name}_{result_file}_chr{{chr}}.log")\n' - else: - code += f'@workflow.log("logs/{rule_name}_{result_file}.log")\n' - code += f'@workflow.container("{copy_container}")\n' - code += f'@workflow.resources(time = "{time}", threads = {threads}, mem_mb = {mem_mb}, mem_per_cpu = {mem_per_cpu}, partition = "{partition}")\n' - code += '@workflow.shellcmd("cp {input} {output}")\n\n' - code += "@workflow.run\n" - code += ( - f"def __rule_{rule_name}(input, output, params, wildcards, threads, resources, log, version, rule, " - "conda_env, container_img, singularity_args, use_singularity, env_modules, bench_record, jobid, is_shell, " - "bench_iteration, cleanup_scripts, shadow_dir, edit_notebook, conda_base_path, basedir, runtime_sourcecache_path, " - "__is_snakemake_rule_func=True):\n" - '\tshell ( "(cp --preserve=timestamps {input[0]} {output[0]}) &> {log}" , bench_record=bench_record, bench_iteration=bench_iteration)\n\n' - ) - exec(compile(code, "result_to_copy", "exec"), workflow.globals) - - -generate_copy_code(workflow, output_json) + # Add directory to beginning of each outputfile + for op in output_files: + output_fullpath.append(outdir / Path(op)) + + return list(set(output_fullpath)) + + +def generate_copy_rules(output_spec): + output_directory = pathlib.Path(output_spec["directory"]) + rulestrings = [] + + for f in output_spec["files"]: + if f["input"] is None: + continue + + rule_name = "copy_{}".format("_".join(re.sub(r"[\"'-.,]", "", f["name"].strip().lower()).split())) + input_file = pathlib.Path(f["input"]) + output_file = output_directory / pathlib.Path(f["output"]) + + mem_mb = config.get("_copy", {}).get("mem_mb", config["default_resources"]["mem_mb"]) + mem_per_cpu = config.get("_copy", {}).get("mem_per_cpu", config["default_resources"]["mem_per_cpu"]) + partition = config.get("_copy", {}).get("partition", config["default_resources"]["partition"]) + threads = config.get("_copy", {}).get("threads", config["default_resources"]["threads"]) + time = config.get("_copy", {}).get("time", config["default_resources"]["time"]) + copy_container = config.get("_copy", {}).get("container", config["default_container"]) + + rule_code = "\n".join( + [ + f'@workflow.rule(name="{rule_name}")', + f'@workflow.input("{input_file}")', + f'@workflow.output("{output_file}")', + f'@workflow.log("logs/{rule_name}_{output_file.name}.log")', + f'@workflow.container("{copy_container}")', + f'@workflow.resources(time="{time}", threads={threads}, mem_mb="{mem_mb}", ' + f'mem_per_cpu={mem_per_cpu}, partition="{partition}")', + '@workflow.shellcmd("cp --preserve=timestamps -r {input} {output}")', + "@workflow.run\n", + f"def __rule_{rule_name}(input, output, params, wildcards, threads, resources, " + "log, version, rule, conda_env, container_img, singularity_args, use_singularity, " + "env_modules, bench_record, jobid, is_shell, bench_iteration, cleanup_scripts, " + "shadow_dir, edit_notebook, conda_base_path, basedir, runtime_sourcecache_path, " + "__is_snakemake_rule_func=True):", + '\tshell("(cp --preserve=timestamps -r {input[0]} {output[0]}) &> {log}", bench_record=bench_record, ' + "bench_iteration=bench_iteration)\n\n", + ] + ) + rulestrings.append(rule_code) + + exec(compile("\n".join(rulestrings), "copy_result_files", "exec"), workflow.globals) + + +generate_copy_rules(output_spec) From a6291041d00d90d53cd84aa730df0fe1034de7db Mon Sep 17 00:00:00 2001 From: Arielle R Munters Date: Thu, 24 Oct 2024 10:51:58 +0200 Subject: [PATCH 5/7] fix: add ruleorder and update multiqc rna config --- config/multiqc_rna_config.yaml | 12 ++++++------ workflow/Snakefile | 1 + 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/config/multiqc_rna_config.yaml b/config/multiqc_rna_config.yaml index d0374b7..71a8c43 100644 --- a/config/multiqc_rna_config.yaml +++ b/config/multiqc_rna_config.yaml @@ -67,19 +67,19 @@ custom_table_header_config: suffix: "" ZERO_CVG_TARGETS_PCT: suffix: "" - star-total_reads: + total_reads: suffix: "" title: "Total readpairs [M]" - star-mapped_percent: + mapped_percent: suffix: "" title: "Aligned [%]" - star-uniquely_mapped_percent: + uniquely_mapped_percent: suffix: "" title: "Uniq aligned [%]" - star-multimapped: + multimapped: suffix: "" title: "Multimapped [M]" - sortmerna-rRNA_pct: + rRNA_pct: suffix: "" title: "rRNA [%]" @@ -128,4 +128,4 @@ table_columns_visible: # MEAN_READ_LENGTH: # title: "Mean Read Length" # description: "Mean read length from Picard Alignment Summary Metrics" -# format: "{:.1f}" \ No newline at end of file +# format: "{:.1f}" diff --git a/workflow/Snakefile b/workflow/Snakefile index 27511d3..3ce43c6 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -21,6 +21,7 @@ ruleorder: cnv_sv_manta_run_workflow_tn > misc_tabix ruleorder: cnv_sv_manta_run_workflow_tn > misc_bgzip ruleorder: cnv_sv_manta_run_workflow_t > misc_tabix ruleorder: cnv_sv_manta_run_workflow_t > misc_bgzip +ruleorder: fix_af > filtering_filter_vcf ruleorder: gatk_model_segments > cnv_sv_gatk_model_segments ruleorder: copy_parabricks_tn_vcf > misc_bgzip From 41ed4c547fc2069bbe95aa31bb71faf1373840f2 Mon Sep 17 00:00:00 2001 From: Arielle R Munters Date: Thu, 24 Oct 2024 11:10:44 +0200 Subject: [PATCH 6/7] fix: update requirements to hydra genetics 3.0.0 --- requirements.txt | 66 ++++++++---------------------------------------- 1 file changed, 10 insertions(+), 56 deletions(-) diff --git a/requirements.txt b/requirements.txt index 55a725a..0df967a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,56 +1,10 @@ -appdirs==1.4.4 -attrs==23.1.0 -certifi==2023.7.22 -charset-normalizer==3.3.2 -click==8.1.7 -colorama==0.4.6 -commonmark==0.9.1 -ConfigArgParse==1.7 -connection-pool==0.0.3 -datrie==0.8.2 -docutils==0.20.1 -dpath==2.1.6 -drmaa==0.7.9 -fastjsonschema==2.18.1 -gitdb==4.0.11 -GitPython==3.1.40 -hydra-genetics==1.3.0 -idna==3.4 -Jinja2==3.0.1 -jsonschema==4.19.2 -jsonschema-specifications==2023.7.1 -jupyter-core==5.5.0 -MarkupSafe==2.1.3 -nbformat==5.9.2 -networkx==3.2.1 -numpy==1.26.1 -pandas==1.5.2 -pip==20.2.4 -plac==1.4.1 -platformdirs==3.11.0 -psutil==5.9.6 -PuLP==2.7.0 -pyaml==23.9.7 -Pygments==2.16.1 -pysam==0.22.0 -python-dateutil==2.8.2 -pytz==2023.3.post1 -PyYAML==6.0.1 -referencing==0.30.2 -requests==2.31.0 -reretry==0.11.8 -rich==10.9.0 -rpds-py==0.12.0 -setuptools==50.3.2 -six==1.16.0 -smart-open==6.4.0 -smmap==5.0.1 -snakemake==7.19.1 -stopit==1.1.2 -tabulate==0.8.10 -throttler==1.2.2 -toposort==1.10 -traitlets==5.13.0 -urllib3==2.0.7 -wrapt==1.15.0 -yte==1.5.1 +pulp<2.8 +hydra-genetics==3.0.0 +pandas>=1.3.1 +snakemake~=7.32 +singularity==3.0.0 +jinja2==3.0.1 +networkx +pyyaml +drmaa==0.7.9 # if using drmaa scheduler +smart_open<7.0.0 From d8c349fa310e84a373a3f401c797913b9f52561f Mon Sep 17 00:00:00 2001 From: Arielle R Munters Date: Thu, 24 Oct 2024 11:28:49 +0200 Subject: [PATCH 7/7] style: edit pycodestyle errors --- workflow/scripts/peddy_create_ped.py | 5 ++++- workflow/scripts/sample_order_multiqc.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/workflow/scripts/peddy_create_ped.py b/workflow/scripts/peddy_create_ped.py index d4fd5d7..cbd8581 100644 --- a/workflow/scripts/peddy_create_ped.py +++ b/workflow/scripts/peddy_create_ped.py @@ -19,4 +19,7 @@ else: sex = "0" with open("qc/peddy/" + line[header_line.index("sample")] + ".peddy.fam", "w+") as pedfile: - pedfile.write("\t".join([line[header_line.index("sample")], line[header_line.index("sample")] + "_T", "0", "0", sex, "-9"]) + "\n") + pedfile.write( + "\t".join([line[header_line.index("sample")], line[header_line.index("sample")] + "_T", "0", "0", sex, "-9"]) + + "\n" + ) diff --git a/workflow/scripts/sample_order_multiqc.py b/workflow/scripts/sample_order_multiqc.py index 8b084d1..6dcf840 100644 --- a/workflow/scripts/sample_order_multiqc.py +++ b/workflow/scripts/sample_order_multiqc.py @@ -15,7 +15,7 @@ # In case of missing S-index in fastq1-filename set s_index to 99 (last) try: s_index = int(s_pattern.search(fastq).group(1)) - except: + except AttributeError: s_index = 99 # If same sample sequenced twice use latest runs s_index for old_sample, old_s, old_lab, old_type in sample_order_duplicates: