From a4fd5647a0f197266a3a32a430e1e1a202bf4663 Mon Sep 17 00:00:00 2001
From: jonca79 <54137490+jonca79@users.noreply.github.com>
Date: Fri, 18 Oct 2024 11:11:39 +0200
Subject: [PATCH 01/14] fix: jumble output is dependent on design bed name

---
 config/output_reference_files.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/config/output_reference_files.yaml b/config/output_reference_files.yaml
index 3afc91e3..9315171a 100644
--- a/config/output_reference_files.yaml
+++ b/config/output_reference_files.yaml
@@ -5,7 +5,7 @@ files:
     types:
       - N
   - name: jumble_pon
-    input: references/jumble_reference/design.bed.reference.RDS
+    input: references/jumble_reference/pool1_pool2.sort.merged.padded20.cnv200.hg19.split_fusion_genes.reannotated.230222.bed.reference.RDS
     output: result/jumble.PoN.RDS
     types:
       - N

From 64db79b22dd09eedb38a091b38b93e7b78d6fea6 Mon Sep 17 00:00:00 2001
From: jonca79 <54137490+jonca79@users.noreply.github.com>
Date: Fri, 18 Oct 2024 12:43:40 +0200
Subject: [PATCH 02/14] feat: make jumble reference name based on config

---
 workflow/rules/common_references.smk | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/workflow/rules/common_references.smk b/workflow/rules/common_references.smk
index 82747999..82e7219f 100644
--- a/workflow/rules/common_references.smk
+++ b/workflow/rules/common_references.smk
@@ -54,11 +54,12 @@ def compile_output_list(wildcards):
     for filedef in output_spec["files"]:
         output_files += set(
             [
-                filedef["output"].format(sample=sample, type=unit_type, caller=caller)
+                filedef["output"].format(sample=sample, type=unit_type, caller=caller, design=design)
                 for sample in get_samples(samples)
                 for unit_type in get_unit_types(units, sample)
                 if unit_type in set(filedef["types"]).intersection(types)
                 for caller in config["bcbio_variation_recall_ensemble"]["callers"]
+                for design in config["reference"]["design_bed"]
             ]
         )
     return list(set(output_files))

From 39e9d8f1964f0153be1372614e78b57a933c57af Mon Sep 17 00:00:00 2001
From: jonca79 <54137490+jonca79@users.noreply.github.com>
Date: Fri, 18 Oct 2024 12:44:54 +0200
Subject: [PATCH 03/14] fix: rm hardcoded jumble reference name

---
 config/output_reference_files.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/config/output_reference_files.yaml b/config/output_reference_files.yaml
index 9315171a..988a610e 100644
--- a/config/output_reference_files.yaml
+++ b/config/output_reference_files.yaml
@@ -5,7 +5,7 @@ files:
     types:
       - N
   - name: jumble_pon
-    input: references/jumble_reference/pool1_pool2.sort.merged.padded20.cnv200.hg19.split_fusion_genes.reannotated.230222.bed.reference.RDS
+    input: references/jumble_reference/{design}.reference.RDS
     output: result/jumble.PoN.RDS
     types:
       - N

From 67c4be7805a34b21120cfbf4661d245071624daa Mon Sep 17 00:00:00 2001
From: jonca79 <54137490+jonca79@users.noreply.github.com>
Date: Fri, 18 Oct 2024 12:50:27 +0200
Subject: [PATCH 04/14] fix: only use file name and not path

---
 workflow/rules/common_references.smk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflow/rules/common_references.smk b/workflow/rules/common_references.smk
index 82e7219f..509ec16e 100644
--- a/workflow/rules/common_references.smk
+++ b/workflow/rules/common_references.smk
@@ -59,7 +59,7 @@ def compile_output_list(wildcards):
                 for unit_type in get_unit_types(units, sample)
                 if unit_type in set(filedef["types"]).intersection(types)
                 for caller in config["bcbio_variation_recall_ensemble"]["callers"]
-                for design in config["reference"]["design_bed"]
+                for design in config["reference"]["design_bed"].split("/")[-1]
             ]
         )
     return list(set(output_files))

From 35e4514931c62486f11f86287bb5e0afea06b8ec Mon Sep 17 00:00:00 2001
From: jonca79 <54137490+jonca79@users.noreply.github.com>
Date: Fri, 18 Oct 2024 12:51:20 +0200
Subject: [PATCH 05/14] fix: add design wildcard to output

---
 config/output_reference_files.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/config/output_reference_files.yaml b/config/output_reference_files.yaml
index 988a610e..6c1b5d96 100644
--- a/config/output_reference_files.yaml
+++ b/config/output_reference_files.yaml
@@ -6,7 +6,7 @@ files:
       - N
   - name: jumble_pon
     input: references/jumble_reference/{design}.reference.RDS
-    output: result/jumble.PoN.RDS
+    output: result/jumble.{design}.PoN.RDS
     types:
       - N
   - name: gatk_pon

From 9c5d57016e38db16fb08e27a389c5b53d394e317 Mon Sep 17 00:00:00 2001
From: jonca79 <54137490+jonca79@users.noreply.github.com>
Date: Fri, 18 Oct 2024 13:20:02 +0200
Subject: [PATCH 06/14] fix: design name as input to rule all

---
 workflow/rules/common_references.smk | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/workflow/rules/common_references.smk b/workflow/rules/common_references.smk
index 509ec16e..7b757665 100644
--- a/workflow/rules/common_references.smk
+++ b/workflow/rules/common_references.smk
@@ -54,12 +54,11 @@ def compile_output_list(wildcards):
     for filedef in output_spec["files"]:
         output_files += set(
             [
-                filedef["output"].format(sample=sample, type=unit_type, caller=caller, design=design)
+                filedef["output"].format(sample=sample, type=unit_type, caller=caller, design=config["reference"]["design_bed"].split("/")[-1])
                 for sample in get_samples(samples)
                 for unit_type in get_unit_types(units, sample)
                 if unit_type in set(filedef["types"]).intersection(types)
                 for caller in config["bcbio_variation_recall_ensemble"]["callers"]
-                for design in config["reference"]["design_bed"].split("/")[-1]
             ]
         )
     return list(set(output_files))

From 1b5e076e0e100b58a48a945c9044af102c0076d8 Mon Sep 17 00:00:00 2001
From: jonca79 <54137490+jonca79@users.noreply.github.com>
Date: Fri, 18 Oct 2024 13:24:47 +0200
Subject: [PATCH 07/14] style: snakefmt

---
 workflow/rules/common_references.smk | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/workflow/rules/common_references.smk b/workflow/rules/common_references.smk
index 7b757665..ecd1d2f5 100644
--- a/workflow/rules/common_references.smk
+++ b/workflow/rules/common_references.smk
@@ -54,7 +54,9 @@ def compile_output_list(wildcards):
     for filedef in output_spec["files"]:
         output_files += set(
             [
-                filedef["output"].format(sample=sample, type=unit_type, caller=caller, design=config["reference"]["design_bed"].split("/")[-1])
+                filedef["output"].format(
+                    sample=sample, type=unit_type, caller=caller, design=config["reference"]["design_bed"].split("/")[-1]
+                )
                 for sample in get_samples(samples)
                 for unit_type in get_unit_types(units, sample)
                 if unit_type in set(filedef["types"]).intersection(types)

From 68d253a359117ba7728d9c7217d6100fb621ad0a Mon Sep 17 00:00:00 2001
From: jonca79 <54137490+jonca79@users.noreply.github.com>
Date: Mon, 21 Oct 2024 12:56:02 +0200
Subject: [PATCH 08/14] fix(report_fusion): fix div by zero

---
 workflow/scripts/report_fusions.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/workflow/scripts/report_fusions.py b/workflow/scripts/report_fusions.py
index 2a560545..0dbe3c10 100644
--- a/workflow/scripts/report_fusions.py
+++ b/workflow/scripts/report_fusions.py
@@ -190,16 +190,16 @@
         if int(Junction_read_count) < housekeeping_genes[gene2][0]:
             continue
     # Min AF for frequent FP gene fusions and housekeeping gene
-    if (gene1 in artefact_gene_dict and gene2 in artefact_gene_dict[gene1]):
+    if (gene1 in artefact_gene_dict and gene2 in artefact_gene_dict[gene1] and artefact_gene_dict[gene1][gene2][3] > 0):
         if int(Junction_read_count) / artefact_gene_dict[gene1][gene2][3] < artefact_gene_dict[gene1][gene2][2]:
             continue
-    if (gene2 in artefact_gene_dict and gene1 in artefact_gene_dict[gene2]):
+    if (gene2 in artefact_gene_dict and gene1 in artefact_gene_dict[gene2] and artefact_gene_dict[gene2][gene1][3] > 0):
         if int(Junction_read_count) / artefact_gene_dict[gene2][gene1][3] < artefact_gene_dict[gene2][gene1][2]:
             continue
-    if gene1 in housekeeping_genes:
+    if gene1 in housekeeping_genes and housekeeping_genes[gene1][3] > 0:
         if int(Junction_read_count) / housekeeping_genes[gene1][3] < housekeeping_genes[gene1][2]:
             continue
-    if gene2 in housekeeping_genes:
+    if gene2 in housekeeping_genes and housekeeping_genes[gene2][3] > 0:
         if int(Junction_read_count) / housekeeping_genes[gene2][3] < housekeeping_genes[gene2][2]:
             continue
     breakpoint1 = lline[7][:-2]
@@ -282,16 +282,16 @@
         if int(Spanning_reads_unique) < housekeeping_genes[gene2][1]:
             continue
     # Min AF for frequent FP gene fusions and housekeeping gene
-    if (gene1 in artefact_gene_dict and gene2 in artefact_gene_dict[gene1]):
+    if (gene1 in artefact_gene_dict and gene2 in artefact_gene_dict[gene1] and artefact_gene_dict[gene1][gene2][3] > 0):
         if int(Spanning_reads_unique) / artefact_gene_dict[gene1][gene2][3] < artefact_gene_dict[gene1][gene2][2]:
             continue
-    if (gene2 in artefact_gene_dict and gene1 in artefact_gene_dict[gene2]):
+    if (gene2 in artefact_gene_dict and gene1 in artefact_gene_dict[gene2] and artefact_gene_dict[gene2][gene1][3] > 0):
         if int(Spanning_reads_unique) / artefact_gene_dict[gene2][gene1][3] < artefact_gene_dict[gene2][gene1][2]:
             continue
-    if gene1 in housekeeping_genes:
+    if gene1 in housekeeping_genes and housekeeping_genes[gene1][3] > 0:
         if int(Spanning_reads_unique) / housekeeping_genes[gene1][3] < housekeeping_genes[gene1][2]:
             continue
-    if gene2 in housekeeping_genes:
+    if gene2 in housekeeping_genes and housekeeping_genes[gene2][3] > 0:
         if int(Spanning_reads_unique) / housekeeping_genes[gene2][3] < housekeeping_genes[gene2][2]:
             continue
     # Flag fusions annotated that are fusions with very high probability

From 996891dcc486e4a578f6bc6f8777395bb175720c Mon Sep 17 00:00:00 2001
From: jonca79 <54137490+jonca79@users.noreply.github.com>
Date: Tue, 22 Oct 2024 08:46:07 +0200
Subject: [PATCH 09/14] fix(cnvkit_batch): fix input variable name to match
 wrapper

---
 workflow/Snakefile_references.smk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflow/Snakefile_references.smk b/workflow/Snakefile_references.smk
index cfed69cd..89df70b8 100644
--- a/workflow/Snakefile_references.smk
+++ b/workflow/Snakefile_references.smk
@@ -54,7 +54,7 @@ use rule cnvkit_batch from cnv_sv as cnv_sv_cnvkit_batch with:
     input:
         bam="alignment/samtools_merge_bam/{sample}_{type}.bam",
         bai="alignment/samtools_merge_bam/{sample}_{type}.bam.bai",
-        cnv_reference="references/cnvkit_build_normal_reference/cnvkit.PoN.cnn",
+        reference="references/cnvkit_build_normal_reference/cnvkit.PoN.cnn",
 
 
 use rule background_annotation from annotation as annotation_background_annotation with:

From 76f2cc8dbe418adb069feae59e89f10ab5a2c588 Mon Sep 17 00:00:00 2001
From: jonca79 <jonas.almlof@igp.uu.se>
Date: Tue, 22 Oct 2024 14:13:25 +0200
Subject: [PATCH 10/14] fix: rm multiple sample lines in MultiQC_RNA

---
 config/reports/multiqc_config_dna.yaml | 10 ++++++++++
 config/reports/multiqc_config_rna.yaml | 19 ++++++++++++++++---
 2 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/config/reports/multiqc_config_dna.yaml b/config/reports/multiqc_config_dna.yaml
index 10752422..9f8233b4 100644
--- a/config/reports/multiqc_config_dna.yaml
+++ b/config/reports/multiqc_config_dna.yaml
@@ -1,3 +1,13 @@
+title: "Clinical Genomics MultiQC Report"
+subtitle: "Reference used: GRCh37"
+intro_text: "The MultiQC DNA report summarise analysis results from GMS560 panel data that been analysed by the Twist Solid pipeline (https://github.com/genomic-medicine-sweden/Twist_Solid)."
+
+report_header_info:
+  - Contact E-mail: "igp-klinsek-bioinfo@lists.uu.se"
+  - Application Type: "Bioinformatic analysis of GMS560 panel for solid cancers"
+
+show_analysis_paths: True
+
 #decimalPoint_format: ','
 extra_fn_clean_exts: ##from this until end
     - '.duplication_metrics'
diff --git a/config/reports/multiqc_config_rna.yaml b/config/reports/multiqc_config_rna.yaml
index 3f59c443..8a4d9a9f 100644
--- a/config/reports/multiqc_config_rna.yaml
+++ b/config/reports/multiqc_config_rna.yaml
@@ -1,10 +1,23 @@
+title: "Clinical Genomics MultiQC Report"
+subtitle: "Reference used: GRCh37"
+intro_text: "The MultiQC RNA report summarise analysis results from GMS560 panel data that been analysed by the Twist Solid pipeline (https://github.com/genomic-medicine-sweden/Twist_Solid)."
+
+report_header_info:
+  - Contact E-mail: "igp-klinsek-bioinfo@lists.uu.se"
+  - Application Type: "Bioinformatic analysis of GMS560 panel for solid cancers"
+
+show_analysis_paths: True
+
+
 #decimalPoint_format: ','
 extra_fn_clean_exts: ##from this until end
     - '.duplication_metrics'
     - '.HsMetrics'
     - '.alignment_summary_metrics'
+    # - type: regex
+    #   pattern: '_fastq[12]'
     - type: regex
-      pattern: '_fastq[12]'
+      pattern: '_S[0-9]'
 #extra_fn_clean_trim:   #if found in beginning or end
 #fn_ignore_dirs:
 #fn_ignore_files:
@@ -33,8 +46,8 @@ table_columns_visible:
   "Samtools: stats":
     error_rate: False
     non-primary_alignments: False
-    reads_mapped: False
-    reads_mapped_percent: False
+    reads_mapped: True
+    reads_mapped_percent: True
     reads_properly_paired_percent: False
     reads_MQ0_percent: False
     raw_total_sequences: False

From 024ebcf8b6f6976a0ddc886cf53c19757be80275 Mon Sep 17 00:00:00 2001
From: jonca79 <jonas.almlof@igp.uu.se>
Date: Tue, 22 Oct 2024 14:15:38 +0200
Subject: [PATCH 11/14] fix: rm multiple sample lines in MultiQC_RNA

---
 config/reports/multiqc_config_rna.yaml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/config/reports/multiqc_config_rna.yaml b/config/reports/multiqc_config_rna.yaml
index 8a4d9a9f..de8e342d 100644
--- a/config/reports/multiqc_config_rna.yaml
+++ b/config/reports/multiqc_config_rna.yaml
@@ -14,13 +14,13 @@ extra_fn_clean_exts: ##from this until end
     - '.duplication_metrics'
     - '.HsMetrics'
     - '.alignment_summary_metrics'
-    # - type: regex
-    #   pattern: '_fastq[12]'
     - type: regex
-      pattern: '_S[0-9]'
+      pattern: '_fastq[12]'
 #extra_fn_clean_trim:   #if found in beginning or end
 #fn_ignore_dirs:
-#fn_ignore_files:
+fn_ignore_files:
+    - type: regex
+      pattern: '_S[0-9]'
 
 use_filename_as_sample_name:
   - picard/hsmetrics

From 0470ecbf13dc09bc72a42b543cdda562048cd404 Mon Sep 17 00:00:00 2001
From: jonca79 <jonas.almlof@igp.uu.se>
Date: Tue, 22 Oct 2024 15:56:50 +0200
Subject: [PATCH 12/14] fix: rm multiple sample lines in MultiQC_RNA

---
 config/reports/multiqc_config_rna.yaml | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/config/reports/multiqc_config_rna.yaml b/config/reports/multiqc_config_rna.yaml
index de8e342d..26f998cc 100644
--- a/config/reports/multiqc_config_rna.yaml
+++ b/config/reports/multiqc_config_rna.yaml
@@ -16,11 +16,13 @@ extra_fn_clean_exts: ##from this until end
     - '.alignment_summary_metrics'
     - type: regex
       pattern: '_fastq[12]'
-#extra_fn_clean_trim:   #if found in beginning or end
-#fn_ignore_dirs:
-fn_ignore_files:
     - type: regex
       pattern: '_S[0-9]'
+    - type: regex
+      pattern: '_R[12]'
+#extra_fn_clean_trim:   #if found in beginning or end
+#fn_ignore_dirs:
+#fn_ignore_files:
 
 use_filename_as_sample_name:
   - picard/hsmetrics

From 94f445ffb7e9e46dd916370a1fa5f0117bd1506c Mon Sep 17 00:00:00 2001
From: jonca79 <jonas.almlof@igp.uu.se>
Date: Tue, 22 Oct 2024 16:13:42 +0200
Subject: [PATCH 13/14] fix: rm multiple sample lines in MultiQC_RNA

---
 config/reports/multiqc_config_rna.yaml | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/config/reports/multiqc_config_rna.yaml b/config/reports/multiqc_config_rna.yaml
index 26f998cc..22cca865 100644
--- a/config/reports/multiqc_config_rna.yaml
+++ b/config/reports/multiqc_config_rna.yaml
@@ -14,12 +14,8 @@ extra_fn_clean_exts: ##from this until end
     - '.duplication_metrics'
     - '.HsMetrics'
     - '.alignment_summary_metrics'
-    - type: regex
-      pattern: '_fastq[12]'
-    - type: regex
-      pattern: '_S[0-9]'
-    - type: regex
-      pattern: '_R[12]'
+    - type: regex_keep
+      pattern: '[0-9A-Z-]+'
 #extra_fn_clean_trim:   #if found in beginning or end
 #fn_ignore_dirs:
 #fn_ignore_files:

From 733ac6996c70175c8604e1ba55245b0bcc6b67fb Mon Sep 17 00:00:00 2001
From: jonca79 <54137490+jonca79@users.noreply.github.com>
Date: Wed, 23 Oct 2024 11:23:15 +0200
Subject: [PATCH 14/14] fix: sample mixup in percentage

---
 workflow/scripts/sample_mixup_check.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflow/scripts/sample_mixup_check.py b/workflow/scripts/sample_mixup_check.py
index 46048cf3..3e2db10b 100644
--- a/workflow/scripts/sample_mixup_check.py
+++ b/workflow/scripts/sample_mixup_check.py
@@ -65,7 +65,7 @@ def read_vcf(vcf_filename, vcf_dict, samples):
         if rna_samples[rna_sample][dna_sample] > best_gt_match:
             best_dna_sample = dna_sample
             best_gt_match = rna_samples[rna_sample][dna_sample]
-    p_match = round(best_gt_match / 42.0, 1)
+    p_match = round(best_gt_match * 100 / 42.0, 1)
     report.write(f"{rna_sample}\t{best_dna_sample}\t{best_gt_match}\t{p_match}%\t")
     if p_match > match_cutoff:
         report.write(f"yes\n")