Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: dev to update_xlsx #67

Merged
merged 9 commits into from
Oct 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .tests/integration/config.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
output: "../../config/output_files.json"
output: "../../config/output_files.yaml"

reference:
design_bed: "reference/homo_sapiens.wgs.bed"
Expand Down
Empty file.
Empty file.
3 changes: 2 additions & 1 deletion .tests/integration/samples.tsv
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
sample tumor_content sex
NA12878 1.0 O
NA12878 1.0 K
HD829 1.0 O
1 change: 1 addition & 0 deletions .tests/integration/units.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ sample type platform machine flowcell lane barcode fastq1 fastq2 adapter
NA12878 T illumina NovaSeq 1FLOWCELL L001 ACGGAACA input/NA12878-T_S2_R1.fq.gz input/NA12878-T_S2_R2.fq.gz AAAA,TTTT
NA12878 N illumina NovaSeq 1FLOWCELL L001 ACGGAACA input/NA12878-N_S1_R1.fq.gz input/NA12878-N_S1_R2.fq.gz CCCC,GGGG
NA12878 R illumina NovaSeq 1FLOWCELL L001 ACGGAACA input/NA12878-R_S10_R1.fq.gz input/NA12878-R_S10_R2.fq.gz CCCC,GGGG
HD829 T illumina NovaSeq 1FLOWCELL L001 ACGGAACA input/HD829-T_S3_R1.fq.gz input/HD829-T_S3_R2.fq.gz AAAA,TTTT
8 changes: 5 additions & 3 deletions config/config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
resources: "resources.yaml"
samples: "samples.tsv"
units: "units.tsv"
output: "config/output_files.json"
output: "config/output_files.yaml"

aligner: "bwa_gpu" # bwa_gpu or bwa_sentieon

Expand All @@ -14,7 +14,7 @@ modules:
fusions: "12f8354"
parabricks: "v1.1.0"
prealignment: "v1.1.0"
qc: "v0.3.0"
qc: "53c3a82" #"v0.3.0"
reports: "7c8b8c5"
misc: "v0.1.0"
sentieon: "b002d39"
Expand Down Expand Up @@ -164,7 +164,7 @@ mosdepth_bed:
design_bed: ""

multiqc:
container: "docker://hydragenetics/multiqc:1.11"
container: "docker://hydragenetics/multiqc:1.21"
reports:
DNA:
config: "config/multiqc_dna_config.yaml"
Expand Down Expand Up @@ -214,6 +214,8 @@ multiqc:
- "prealignment/sortmerna/{sample}_{type}.rrna.log"
- "qc/multiqc/RNA_number.table.tsv"
- "qc/picard_collect_alignment_summary_metrics/{sample}_{type}.alignment_summary_metrics.txt"
- "alignment/star/{sample}_{type}.Log.final.out"
- "alignment/star/{sample}_{type}.ReadsPerGene.out.tab"

pbrun_fq2bam:
container: "docker://nvcr.io/nvidia/clara/clara-parabricks:4.0.0-1"
Expand Down
158 changes: 97 additions & 61 deletions config/multiqc_dna_config.yaml
Original file line number Diff line number Diff line change
@@ -1,25 +1,47 @@
decimalPoint_format: ','
extra_fn_clean_exts: ##from this until end
- '.duplication_metrics'
- type: regex
pattern: '^HG[0-9]+-[A-Za-z0-9-]+_'
- type: regex
pattern: '_fastq[12]'
extra_fn_clean_trim:
- 'Sample_WA-3560_'

table_sample_merge:
"R1": "_R1_001"
"R2": "_R2_001"
"L008": # to remove SX_L00X from ending to enable grouping
- type: "regex"
pattern: "_S[0-9]{1,2}_L008"
"L007": # to remove SX_L00X from ending to enable grouping
- type: "regex"
pattern: "_S[0-9]{1,2}_L007"
"L006": # to remove SX_L00X from ending to enable grouping
- type: "regex"
pattern: "_S[0-9]{1,2}_L006"
"L005": # to remove SX_L00X from ending to enable grouping
- type: "regex"
pattern: "_[0-9]{1,2}_L005"
"L004": # to remove SX_L00X from ending to enable grouping
- type: "regex"
pattern: "_S[0-9]{1,2}_L004"
"L003": # to remove SX_L00X from ending to enable grouping
- type: "regex"
pattern: "_S[0-9]{1,2}_L003"
"L002": # to remove SX_L00X from ending to enable grouping
- type: "regex"
pattern: "_S[0-9]{1,2}_L002"
"L001": # to remove SX_L00X from ending to enable grouping
- type: "regex"
pattern: "_S[0-9]{1,2}_L001"

report_header_info:
- Contact E-mail: "[email protected]"
- Application Type: "TrueSeq PCR Free WGS"
- Project Type: "Whole Genome"

custom content:
order:
- fastqc
- mosdepth
- fastp
- peddy
- samtools
- picard

sp:
dna_number_table:
fn: "*DNA_number.table.tsv"

custom_data:
dna_number_table:
Expand All @@ -32,11 +54,45 @@ custom_data:
title: "DNA number"
description: "DNA number based on SampleSheet"

sp:
dna_number_table:
fn: "*DNA_number.table.tsv"
# mosdepth custom thresholds
mosdepth_config:
general_stats_coverage:
- 1
- 5
- 10
- 15
- 20
- 30
- 50

# Remove suffix in general stats
custom_table_header_config:
general_stats_table:
raw_total_sequences:
suffix: ""
title: "Total seqs [M]"
reads_mapped:
suffix: ""
title: "Reads mapped [M]"
reads_mapped_percent:
suffix: ""
reads_properly_paired_percent:
suffix: ""
median_coverage:
suffix: ""
10_x_pc:
suffix: ""
30_x_pc:
suffix: ""
50_x_pc:
suffix: ""
PERCENT_DUPLICATION:
suffix: ""
title: "Duplication [%]"
summed_mean:
suffix: ""

# General stats column visibility
table_columns_visible:
FastQC:
percent_duplicates: False
Expand All @@ -45,7 +101,7 @@ table_columns_visible:
percent_fails: False
total_sequences: False
fastp:
pct_adapter: True
pct_adapter: False
pct_surviving: False
after_filtering_gc_content: False
filtering_result_passed_filter_reads: False
Expand All @@ -68,41 +124,33 @@ table_columns_visible:
sex_het_ratio: False
error_sex_check: True
predicted_sex_sex_check: True
Picard:
PCT_PF_READS_ALIGNED: False
"Picard: HsMetrics":
FOLD_ENRICHMENT: False
MEDIAN_TARGET_COVERAGE: False
PCT_TARGET_BASES_30X: False
ZERO_CVG_TARGETS_PCT: False
"Picard: InsertSizeMetrics":
summed_median: False
summed_mean: True
"Picard: Mark Duplicates":
PERCENT_DUPLICATION: True
"Picard: WgsMetrics":
MEDIAN_COVERAGE: False
MEAN_COVERAGE: False
SD_COVERAGE: False
PCT_30X: False
PCT_TARGET_BASES_30X: False
FOLD_ENRICHMENT: False
TOTAL_READS: True
Samtools:
"Samtools: stats":
error_rate: False
non-primary_alignments: False
reads_mapped: False
reads_mapped_percent: True
reads_properly_paired_percent: True
reads_MQ0_percent: False
raw_total_sequences: True

# mosdepth custom thresholds
mosdepth_config:
general_stats_coverage:
- 1
- 5
- 10
- 15
- 20
- 30
- 50
raw_total_sequences: True #tidigare from picard

# Patriks plug in, addera egna columner till general stats
# Custom columns to general stats
multiqc_cgs:
Picard:
"Picard: HsMetrics":
FOLD_80_BASE_PENALTY:
title: "Fold80"
description: "Fold80 penalty from picard hs metrics"
Expand All @@ -121,25 +169,17 @@ multiqc_cgs:
max: 100
scale: "RdYlGn-rev"
format: "{:.2%}"
Samtools:
"Samtools: stats":
average_quality:
title: "Average Quality"
description: "Ratio between the sum of base qualities and total length from Samtools stats"
min: 0
max: 60
scale: "RdYlGn"
mosdepth:
20_x_pc: #Cant get it to work
title: "20x percent"
description: "Fraction of genome with at least 20X coverage"
max: 100
min: 0
suffix: "%"
scale: "RdYlGn"

# Galler alla kolumner oberoende pa module!
table_columns_placement:
dna_number_table:
"Custom content: dna_number_table":
dna_number: 300
mosdepth:
median_coverage: 601
Expand All @@ -150,7 +190,7 @@ table_columns_placement:
20_x_pc: 604
30_x_pc: 605
50_x_pc: 606
Samtools:
"Samtools: stats":
raw_total_sequences: 500
reads_mapped: 501
reads_mapped_percent: 502
Expand All @@ -166,19 +206,15 @@ table_columns_placement:
error_sex_check: 701
predicted_sex_sex_check: 702
family_id: 703
Picard:
TOTAL_READS: 500
PCT_SELECTED_BASES: 801
FOLD_80_BASE_PENALTY: 802
PCT_PF_READS_ALIGNED: 888
summed_median: 888
PERCENT_DUPLICATION: 803
summed_mean: 804
STANDARD_DEVIATION: 805
ZERO_CVG_TARGETS_PCT: 888
MEDIAN_COVERAGE: 888
MEAN_COVERAGE: 888
SD_COVERAGE: 888
PCT_30X: 888
PCT_TARGET_BASES_30X: 888
"Picard: HsMetrics":
FOLD_ENRICHMENT: 888
MEDIAN_TARGET_COVERAGE: 888
PCT_TARGET_BASES_30X: 888
FOLD_80_BASE_PENALTY: 801
PCT_SELECTED_BASES: 800
ZERO_CVG_TARGETS_PCT: 805
"Picard: InsertSizeMetrics":
summed_median: 803
summed_mean: 803
"Picard: Mark Duplicates":
PERCENT_DUPLICATION: 802
Loading
Loading