Skip to content

Commit

Permalink
Merge branch 'develop' into update_xlsx
Browse files Browse the repository at this point in the history
  • Loading branch information
elleira committed Oct 24, 2024
2 parents 86eb32c + cd98f4c commit 69fcc9c
Show file tree
Hide file tree
Showing 15 changed files with 703 additions and 313 deletions.
2 changes: 1 addition & 1 deletion .tests/integration/config.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
output: "../../config/output_files.json"
output: "../../config/output_files.yaml"

reference:
design_bed: "reference/homo_sapiens.wgs.bed"
Expand Down
Empty file.
Empty file.
3 changes: 2 additions & 1 deletion .tests/integration/samples.tsv
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
sample tumor_content sex
NA12878 1.0 O
NA12878 1.0 K
HD829 1.0 O
1 change: 1 addition & 0 deletions .tests/integration/units.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ sample type platform machine flowcell lane barcode fastq1 fastq2 adapter
NA12878 T illumina NovaSeq 1FLOWCELL L001 ACGGAACA input/NA12878-T_S2_R1.fq.gz input/NA12878-T_S2_R2.fq.gz AAAA,TTTT
NA12878 N illumina NovaSeq 1FLOWCELL L001 ACGGAACA input/NA12878-N_S1_R1.fq.gz input/NA12878-N_S1_R2.fq.gz CCCC,GGGG
NA12878 R illumina NovaSeq 1FLOWCELL L001 ACGGAACA input/NA12878-R_S10_R1.fq.gz input/NA12878-R_S10_R2.fq.gz CCCC,GGGG
HD829 T illumina NovaSeq 1FLOWCELL L001 ACGGAACA input/HD829-T_S3_R1.fq.gz input/HD829-T_S3_R2.fq.gz AAAA,TTTT
8 changes: 5 additions & 3 deletions config/config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
resources: "resources.yaml"
samples: "samples.tsv"
units: "units.tsv"
output: "config/output_files.json"
output: "config/output_files.yaml"

aligner: "bwa_gpu" # bwa_gpu or bwa_sentieon

Expand All @@ -14,7 +14,7 @@ modules:
fusions: "12f8354"
parabricks: "v1.1.0"
prealignment: "v1.1.0"
qc: "v0.3.0"
qc: "53c3a82" #"v0.3.0"
reports: "7c8b8c5"
snv_indels: "v0.6.0"
misc: "v0.1.0"
Expand Down Expand Up @@ -167,7 +167,7 @@ mosdepth_bed:
design_bed: ""

multiqc:
container: "docker://hydragenetics/multiqc:1.11"
container: "docker://hydragenetics/multiqc:1.21"
reports:
DNA:
config: "config/multiqc_dna_config.yaml"
Expand Down Expand Up @@ -217,6 +217,8 @@ multiqc:
- "prealignment/sortmerna/{sample}_{type}.rrna.log"
- "qc/multiqc/RNA_number.table.tsv"
- "qc/picard_collect_alignment_summary_metrics/{sample}_{type}.alignment_summary_metrics.txt"
- "alignment/star/{sample}_{type}.Log.final.out"
- "alignment/star/{sample}_{type}.ReadsPerGene.out.tab"

pbrun_fq2bam:
container: "docker://nvcr.io/nvidia/clara/clara-parabricks:4.0.0-1"
Expand Down
158 changes: 97 additions & 61 deletions config/multiqc_dna_config.yaml
Original file line number Diff line number Diff line change
@@ -1,25 +1,47 @@
decimalPoint_format: ','
extra_fn_clean_exts: ##from this until end
- '.duplication_metrics'
- type: regex
pattern: '^HG[0-9]+-[A-Za-z0-9-]+_'
- type: regex
pattern: '_fastq[12]'
extra_fn_clean_trim:
- 'Sample_WA-3560_'

table_sample_merge:
"R1": "_R1_001"
"R2": "_R2_001"
"L008": # to remove SX_L00X from ending to enable grouping
- type: "regex"
pattern: "_S[0-9]{1,2}_L008"
"L007": # to remove SX_L00X from ending to enable grouping
- type: "regex"
pattern: "_S[0-9]{1,2}_L007"
"L006": # to remove SX_L00X from ending to enable grouping
- type: "regex"
pattern: "_S[0-9]{1,2}_L006"
"L005": # to remove SX_L00X from ending to enable grouping
- type: "regex"
pattern: "_[0-9]{1,2}_L005"
"L004": # to remove SX_L00X from ending to enable grouping
- type: "regex"
pattern: "_S[0-9]{1,2}_L004"
"L003": # to remove SX_L00X from ending to enable grouping
- type: "regex"
pattern: "_S[0-9]{1,2}_L003"
"L002": # to remove SX_L00X from ending to enable grouping
- type: "regex"
pattern: "_S[0-9]{1,2}_L002"
"L001": # to remove SX_L00X from ending to enable grouping
- type: "regex"
pattern: "_S[0-9]{1,2}_L001"

report_header_info:
- Contact E-mail: "[email protected]"
- Application Type: "TrueSeq PCR Free WGS"
- Project Type: "Whole Genome"

custom content:
order:
- fastqc
- mosdepth
- fastp
- peddy
- samtools
- picard

sp:
dna_number_table:
fn: "*DNA_number.table.tsv"

custom_data:
dna_number_table:
Expand All @@ -32,11 +54,45 @@ custom_data:
title: "DNA number"
description: "DNA number based on SampleSheet"

sp:
dna_number_table:
fn: "*DNA_number.table.tsv"
# mosdepth custom thresholds
mosdepth_config:
general_stats_coverage:
- 1
- 5
- 10
- 15
- 20
- 30
- 50

# Remove suffix in general stats
custom_table_header_config:
general_stats_table:
raw_total_sequences:
suffix: ""
title: "Total seqs [M]"
reads_mapped:
suffix: ""
title: "Reads mapped [M]"
reads_mapped_percent:
suffix: ""
reads_properly_paired_percent:
suffix: ""
median_coverage:
suffix: ""
10_x_pc:
suffix: ""
30_x_pc:
suffix: ""
50_x_pc:
suffix: ""
PERCENT_DUPLICATION:
suffix: ""
title: "Duplication [%]"
summed_mean:
suffix: ""

# General stats column visibility
table_columns_visible:
FastQC:
percent_duplicates: False
Expand All @@ -45,7 +101,7 @@ table_columns_visible:
percent_fails: False
total_sequences: False
fastp:
pct_adapter: True
pct_adapter: False
pct_surviving: False
after_filtering_gc_content: False
filtering_result_passed_filter_reads: False
Expand All @@ -68,41 +124,33 @@ table_columns_visible:
sex_het_ratio: False
error_sex_check: True
predicted_sex_sex_check: True
Picard:
PCT_PF_READS_ALIGNED: False
"Picard: HsMetrics":
FOLD_ENRICHMENT: False
MEDIAN_TARGET_COVERAGE: False
PCT_TARGET_BASES_30X: False
ZERO_CVG_TARGETS_PCT: False
"Picard: InsertSizeMetrics":
summed_median: False
summed_mean: True
"Picard: Mark Duplicates":
PERCENT_DUPLICATION: True
"Picard: WgsMetrics":
MEDIAN_COVERAGE: False
MEAN_COVERAGE: False
SD_COVERAGE: False
PCT_30X: False
PCT_TARGET_BASES_30X: False
FOLD_ENRICHMENT: False
TOTAL_READS: True
Samtools:
"Samtools: stats":
error_rate: False
non-primary_alignments: False
reads_mapped: False
reads_mapped_percent: True
reads_properly_paired_percent: True
reads_MQ0_percent: False
raw_total_sequences: True

# mosdepth custom thresholds
mosdepth_config:
general_stats_coverage:
- 1
- 5
- 10
- 15
- 20
- 30
- 50
raw_total_sequences: True #tidigare from picard

# Patriks plug in, addera egna columner till general stats
# Custom columns to general stats
multiqc_cgs:
Picard:
"Picard: HsMetrics":
FOLD_80_BASE_PENALTY:
title: "Fold80"
description: "Fold80 penalty from picard hs metrics"
Expand All @@ -121,25 +169,17 @@ multiqc_cgs:
max: 100
scale: "RdYlGn-rev"
format: "{:.2%}"
Samtools:
"Samtools: stats":
average_quality:
title: "Average Quality"
description: "Ratio between the sum of base qualities and total length from Samtools stats"
min: 0
max: 60
scale: "RdYlGn"
mosdepth:
20_x_pc: #Cant get it to work
title: "20x percent"
description: "Fraction of genome with at least 20X coverage"
max: 100
min: 0
suffix: "%"
scale: "RdYlGn"

# Galler alla kolumner oberoende pa module!
table_columns_placement:
dna_number_table:
"Custom content: dna_number_table":
dna_number: 300
mosdepth:
median_coverage: 601
Expand All @@ -150,7 +190,7 @@ table_columns_placement:
20_x_pc: 604
30_x_pc: 605
50_x_pc: 606
Samtools:
"Samtools: stats":
raw_total_sequences: 500
reads_mapped: 501
reads_mapped_percent: 502
Expand All @@ -166,19 +206,15 @@ table_columns_placement:
error_sex_check: 701
predicted_sex_sex_check: 702
family_id: 703
Picard:
TOTAL_READS: 500
PCT_SELECTED_BASES: 801
FOLD_80_BASE_PENALTY: 802
PCT_PF_READS_ALIGNED: 888
summed_median: 888
PERCENT_DUPLICATION: 803
summed_mean: 804
STANDARD_DEVIATION: 805
ZERO_CVG_TARGETS_PCT: 888
MEDIAN_COVERAGE: 888
MEAN_COVERAGE: 888
SD_COVERAGE: 888
PCT_30X: 888
PCT_TARGET_BASES_30X: 888
"Picard: HsMetrics":
FOLD_ENRICHMENT: 888
MEDIAN_TARGET_COVERAGE: 888
PCT_TARGET_BASES_30X: 888
FOLD_80_BASE_PENALTY: 801
PCT_SELECTED_BASES: 800
ZERO_CVG_TARGETS_PCT: 805
"Picard: InsertSizeMetrics":
summed_median: 803
summed_mean: 803
"Picard: Mark Duplicates":
PERCENT_DUPLICATION: 802
Loading

0 comments on commit 69fcc9c

Please sign in to comment.