clinical-genomics-uppsala · elleira · Oct 24, 2024 · Oct 15, 2024 · Oct 16, 2024 · Oct 17, 2024
diff --git a/.tests/integration/config.yaml b/.tests/integration/config.yaml
@@ -1,4 +1,4 @@
-output: "../../config/output_files.json"
+output: "../../config/output_files.yaml"
 
 reference:
   design_bed: "reference/homo_sapiens.wgs.bed"

diff --git a/.tests/integration/input/HD829-T_S3_R1.fq.gz b/.tests/integration/input/HD829-T_S3_R1.fq.gz
diff --git a/.tests/integration/input/HD829-T_S3_R2.fq.gz b/.tests/integration/input/HD829-T_S3_R2.fq.gz
diff --git a/.tests/integration/samples.tsv b/.tests/integration/samples.tsv
@@ -1,2 +1,3 @@
 sample	tumor_content	sex
-NA12878	1.0	O
+NA12878	1.0	K
+HD829	1.0	O
diff --git a/.tests/integration/units.tsv b/.tests/integration/units.tsv
@@ -2,3 +2,4 @@ sample	type	platform	machine	flowcell	lane	barcode	fastq1	fastq2	adapter
 NA12878	T	illumina	NovaSeq	1FLOWCELL	L001	ACGGAACA	input/NA12878-T_S2_R1.fq.gz	input/NA12878-T_S2_R2.fq.gz	AAAA,TTTT
 NA12878	N	illumina	NovaSeq	1FLOWCELL	L001	ACGGAACA	input/NA12878-N_S1_R1.fq.gz	input/NA12878-N_S1_R2.fq.gz	CCCC,GGGG
 NA12878	R	illumina	NovaSeq	1FLOWCELL	L001	ACGGAACA	input/NA12878-R_S10_R1.fq.gz	input/NA12878-R_S10_R2.fq.gz	CCCC,GGGG
+HD829	T	illumina	NovaSeq	1FLOWCELL	L001	ACGGAACA	input/HD829-T_S3_R1.fq.gz	input/HD829-T_S3_R2.fq.gz	AAAA,TTTT
diff --git a/config/config.yaml b/config/config.yaml
@@ -1,7 +1,7 @@
 resources: "resources.yaml"
 samples: "samples.tsv"
 units: "units.tsv"
-output: "config/output_files.json"
+output: "config/output_files.yaml"
 
 aligner: "bwa_gpu" # bwa_gpu or bwa_sentieon
 
@@ -14,7 +14,7 @@ modules:
   fusions: "12f8354"
   parabricks: "v1.1.0"
   prealignment: "v1.1.0"
-  qc: "v0.3.0"
+  qc: "53c3a82" #"v0.3.0"
   reports: "7c8b8c5"
   misc: "v0.1.0"
   sentieon: "b002d39"
@@ -164,7 +164,7 @@ mosdepth_bed:
   design_bed: ""
 
 multiqc:
-  container: "docker://hydragenetics/multiqc:1.11"
+  container: "docker://hydragenetics/multiqc:1.21"
   reports:
     DNA:
       config: "config/multiqc_dna_config.yaml"
@@ -214,6 +214,8 @@ multiqc:
         - "prealignment/sortmerna/{sample}_{type}.rrna.log"
         - "qc/multiqc/RNA_number.table.tsv"
         - "qc/picard_collect_alignment_summary_metrics/{sample}_{type}.alignment_summary_metrics.txt"
+        - "alignment/star/{sample}_{type}.Log.final.out"
+        - "alignment/star/{sample}_{type}.ReadsPerGene.out.tab"
 
 pbrun_fq2bam:
   container: "docker://nvcr.io/nvidia/clara/clara-parabricks:4.0.0-1"

diff --git a/config/multiqc_dna_config.yaml b/config/multiqc_dna_config.yaml
@@ -1,25 +1,47 @@
 decimalPoint_format: ','
 extra_fn_clean_exts: ##from this until end
     - '.duplication_metrics'
+    - type: regex
+      pattern: '^HG[0-9]+-[A-Za-z0-9-]+_'
     - type: regex
       pattern: '_fastq[12]'
-extra_fn_clean_trim:
-  - 'Sample_WA-3560_'
+
+table_sample_merge:
+  "R1": "_R1_001"
+  "R2": "_R2_001"
+  "L008": # to remove SX_L00X from ending to enable grouping 
+    - type: "regex"
+      pattern: "_S[0-9]{1,2}_L008"
+  "L007": # to remove SX_L00X from ending to enable grouping 
+    - type: "regex"
+      pattern: "_S[0-9]{1,2}_L007"
+  "L006": # to remove SX_L00X from ending to enable grouping 
+    - type: "regex"
+      pattern: "_S[0-9]{1,2}_L006"
+  "L005": # to remove SX_L00X from ending to enable grouping 
+    - type: "regex"
+      pattern: "_[0-9]{1,2}_L005"
+  "L004": # to remove SX_L00X from ending to enable grouping 
+    - type: "regex"
+      pattern: "_S[0-9]{1,2}_L004"
+  "L003": # to remove SX_L00X from ending to enable grouping 
+    - type: "regex"
+      pattern: "_S[0-9]{1,2}_L003"
+  "L002": # to remove SX_L00X from ending to enable grouping 
+    - type: "regex"
+      pattern: "_S[0-9]{1,2}_L002"
+  "L001": # to remove SX_L00X from ending to enable grouping 
+    - type: "regex"
+      pattern: "_S[0-9]{1,2}_L001"
 
 report_header_info:
   - Contact E-mail: "[email protected]"
   - Application Type: "TrueSeq PCR Free WGS"
   - Project Type: "Whole Genome"
-
-custom content:
-  order:
-    - fastqc
-    - mosdepth
-    - fastp
-    - peddy
-    - samtools
-    - picard
 
+sp:
+  dna_number_table:
+    fn: "*DNA_number.table.tsv"
 
 custom_data:
   dna_number_table:
@@ -32,11 +54,45 @@ custom_data:
           title: "DNA number"
           description: "DNA number based on SampleSheet"
 
-sp:
-  dna_number_table:
-    fn: "*DNA_number.table.tsv"
+# mosdepth custom thresholds
+mosdepth_config:
+  general_stats_coverage:
+    - 1
+    - 5
+    - 10
+    - 15
+    - 20
+    - 30
+    - 50
 
+# Remove suffix in general stats
+custom_table_header_config:
+  general_stats_table:
+    raw_total_sequences:
+      suffix: ""
+      title: "Total seqs [M]"
+    reads_mapped:
+      suffix: ""
+      title: "Reads mapped [M]"
+    reads_mapped_percent:
+      suffix: ""
+    reads_properly_paired_percent:
+      suffix: ""
+    median_coverage:
+      suffix: ""
+    10_x_pc:
+      suffix: ""
+    30_x_pc:
+      suffix: ""
+    50_x_pc:
+      suffix: ""
+    PERCENT_DUPLICATION:
+      suffix: ""
+      title: "Duplication [%]"
+    summed_mean:
+      suffix: ""
 
+# General stats column visibility
 table_columns_visible:
   FastQC:
     percent_duplicates: False
@@ -45,7 +101,7 @@ table_columns_visible:
     percent_fails: False
     total_sequences: False
   fastp:
-    pct_adapter: True
+    pct_adapter: False
     pct_surviving: False
     after_filtering_gc_content: False
     filtering_result_passed_filter_reads: False
@@ -68,41 +124,33 @@ table_columns_visible:
     sex_het_ratio: False
     error_sex_check: True
     predicted_sex_sex_check: True
-  Picard:
-    PCT_PF_READS_ALIGNED: False
+  "Picard: HsMetrics":
+    FOLD_ENRICHMENT: False
+    MEDIAN_TARGET_COVERAGE: False
+    PCT_TARGET_BASES_30X: False
+    ZERO_CVG_TARGETS_PCT: False
+  "Picard: InsertSizeMetrics":
     summed_median: False
     summed_mean: True
+  "Picard: Mark Duplicates":
     PERCENT_DUPLICATION: True
+  "Picard: WgsMetrics":
     MEDIAN_COVERAGE: False
     MEAN_COVERAGE: False
     SD_COVERAGE: False
     PCT_30X: False
-    PCT_TARGET_BASES_30X: False
-    FOLD_ENRICHMENT: False
-    TOTAL_READS: True
-  Samtools:
+  "Samtools: stats":
     error_rate: False
     non-primary_alignments: False
     reads_mapped: False
     reads_mapped_percent: True
     reads_properly_paired_percent: True
     reads_MQ0_percent: False
-    raw_total_sequences: True
-
-# mosdepth custom thresholds
-mosdepth_config:
-  general_stats_coverage:
-    - 1
-    - 5
-    - 10
-    - 15
-    - 20
-    - 30
-    - 50
+    raw_total_sequences: True #tidigare from picard
 
-# Patriks plug in, addera egna columner till general stats
+# Custom columns to general stats
 multiqc_cgs:
-  Picard:
+  "Picard: HsMetrics":
     FOLD_80_BASE_PENALTY:
       title: "Fold80"
       description: "Fold80 penalty from picard hs metrics"
@@ -121,25 +169,17 @@ multiqc_cgs:
       max: 100
       scale: "RdYlGn-rev"
       format: "{:.2%}"
-  Samtools:
+  "Samtools: stats":
     average_quality:
       title: "Average Quality"
       description: "Ratio between the sum of base qualities and total length from Samtools stats"
       min: 0
       max: 60
       scale: "RdYlGn"
-  mosdepth:
-     20_x_pc: #Cant get it to work
-        title: "20x percent"
-        description: "Fraction of genome with at least 20X coverage"
-        max: 100
-        min: 0
-        suffix: "%"
-        scale: "RdYlGn"
 
 # Galler alla kolumner oberoende pa module!
 table_columns_placement:
-  dna_number_table:
+  "Custom content: dna_number_table":
     dna_number: 300
   mosdepth:
     median_coverage: 601
@@ -150,7 +190,7 @@ table_columns_placement:
     20_x_pc: 604
     30_x_pc: 605
     50_x_pc: 606
-  Samtools:
+  "Samtools: stats":
     raw_total_sequences: 500
     reads_mapped: 501
     reads_mapped_percent: 502
@@ -166,19 +206,15 @@ table_columns_placement:
     error_sex_check: 701
     predicted_sex_sex_check: 702
     family_id: 703
-  Picard:
-    TOTAL_READS: 500
-    PCT_SELECTED_BASES: 801
-    FOLD_80_BASE_PENALTY: 802
-    PCT_PF_READS_ALIGNED: 888
-    summed_median: 888
-    PERCENT_DUPLICATION: 803
-    summed_mean: 804
-    STANDARD_DEVIATION: 805
-    ZERO_CVG_TARGETS_PCT: 888
-    MEDIAN_COVERAGE: 888
-    MEAN_COVERAGE: 888
-    SD_COVERAGE: 888
-    PCT_30X: 888
-    PCT_TARGET_BASES_30X: 888
+  "Picard: HsMetrics":
     FOLD_ENRICHMENT: 888
+    MEDIAN_TARGET_COVERAGE: 888
+    PCT_TARGET_BASES_30X: 888
+    FOLD_80_BASE_PENALTY: 801
+    PCT_SELECTED_BASES: 800
+    ZERO_CVG_TARGETS_PCT: 805
+  "Picard: InsertSizeMetrics":
+    summed_median: 803
+    summed_mean: 803
+  "Picard: Mark Duplicates":
+    PERCENT_DUPLICATION: 802