From 3782c40a953b384973e4869d6f23e8805003d248 Mon Sep 17 00:00:00 2001 From: yeising <161250010+yeising@users.noreply.github.com> Date: Fri, 11 Oct 2024 16:38:33 +0200 Subject: [PATCH] feat: qm report (#95) * fix: typo * feat: include qualimap report in snakemake report * refactor: removed compress rules * style: changed names to better reflect their purpose * refactor: removed localrule declaration from rules that no longer exist * fix: profile uses new rule names * style: report viewability and renamed rule parameters to match their rules * style: changed l2fc threshold and improved naming * fix: config keys for test diles, formatting * fix: input path * Update workflow/rules/qc.smk no string detour, just taking config from config dir Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> * style: samstats -> bamstats in output directory * fix: accomodating for previous change in the all rule --------- Co-authored-by: Christian Meesters Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> Co-authored-by: cmeesters --- .test/config-simple/config.yml | 6 +- config/Mainz-MogonNHR/config.yml | 8 +-- workflow/profile/Mainz-MogonNHR/config.yaml | 8 +-- .../report/nanoplot_all_samples_report.rst | 2 +- workflow/report/qualimap.rst | 1 + workflow/rules/alignmod.smk | 16 ++--- workflow/rules/commons.smk | 12 ++-- workflow/rules/diffexp.smk | 18 +++++ workflow/rules/qc.smk | 70 ++++++++----------- workflow/rules/quantification.smk | 2 +- 10 files changed, 79 insertions(+), 64 deletions(-) create mode 100644 workflow/report/qualimap.rst diff --git a/.test/config-simple/config.yml b/.test/config-simple/config.yml index de42b32..f624cfd 100644 --- a/.test/config-simple/config.yml +++ b/.test/config-simple/config.yml @@ -39,10 +39,10 @@ maximum_secondary: 100 secondary_score_ratio: 1.0 # Samtools view opts, "-b" creates BAM from SAM. -sview_opts: "-b" +samtobam_opts: "-b" # Samtools sort opts, -ssort_opts: "" +bamsort_opts: "" # Salmon library type salmon_libtype: "U" @@ -51,7 +51,7 @@ salmon_libtype: "U" # QC options # Samtools stats opts -sstats_opts: "" +bamstats_opts: "" # Count filtering options - customize these according to your experimental design: diff --git a/config/Mainz-MogonNHR/config.yml b/config/Mainz-MogonNHR/config.yml index fdb08c8..0680df4 100644 --- a/config/Mainz-MogonNHR/config.yml +++ b/config/Mainz-MogonNHR/config.yml @@ -37,10 +37,10 @@ maximum_secondary: 100 secondary_score_ratio: 1.0 # Samtools view opts, "-b" creates BAM from SAM. -sview_opts: "-b" +samtobam_opts: "-b" # Samtools sort opts, -ssort_opts: "" +bamsort_opts: "" # Salmon library type salmon_libtype: "U" @@ -49,7 +49,7 @@ salmon_libtype: "U" # QC options # Samtools stats opts -sstats_opts: "" +bamstats_opts: "" # Count filtering options - customize these according to your experimental design: @@ -71,7 +71,7 @@ design_factors: - "condition" # # The (log2) log fold change under the null hypothesis. (default: 0). -lfc_null: 0.1 +lfc_null: 1 # # The alternative hypothesis for computing wald p-values. By default, # the normal Wald test assesses deviation of the estimated log fold diff --git a/workflow/profile/Mainz-MogonNHR/config.yaml b/workflow/profile/Mainz-MogonNHR/config.yaml index a8fc6e9..4fd98ec 100644 --- a/workflow/profile/Mainz-MogonNHR/config.yaml +++ b/workflow/profile/Mainz-MogonNHR/config.yaml @@ -39,22 +39,22 @@ set-resources: mem_mb_per_cpu: 1800 runtime: "1h" - sam_sort: + bam_sort: cpus_per_task: 4 mem_mb_per_cpu: 7200 runtime: "2h" - sam_view: + sam_to_bam: cpus_per_task: 1 mem_mb_per_cpu: 1800 runtime: "1h" - sam_index: + bam_index: cpus_per_task: 8 mem_mb_per_cpu: 1800 runtime: "30m" - sam_stats: + bam_stats: cpus_per_task: 8 mem_mb_per_cpu: 1800 runtime: "30m" diff --git a/workflow/report/nanoplot_all_samples_report.rst b/workflow/report/nanoplot_all_samples_report.rst index a68d88e..d2bef6b 100644 --- a/workflow/report/nanoplot_all_samples_report.rst +++ b/workflow/report/nanoplot_all_samples_report.rst @@ -1 +1 @@ -Full `NanoPlot, `_ sequencing quality report for total samples . \ No newline at end of file +Full `NanoPlot, `_ sequencing quality report for total samples. \ No newline at end of file diff --git a/workflow/report/qualimap.rst b/workflow/report/qualimap.rst new file mode 100644 index 0000000..9252fc5 --- /dev/null +++ b/workflow/report/qualimap.rst @@ -0,0 +1 @@ +Full `QualiMap, `_ BAMQC alignment quality report for total samples. \ No newline at end of file diff --git a/workflow/rules/alignmod.smk b/workflow/rules/alignmod.smk index c257d02..8702ea3 100644 --- a/workflow/rules/alignmod.smk +++ b/workflow/rules/alignmod.smk @@ -1,24 +1,24 @@ -rule sam_view: +rule sam_to_bam: input: sam="alignments/{sample}.sam", output: - "sorted_alignments/{sample}.bam", + "alignments/{sample}.bam", log: - "logs/samtools/samview_{sample}.log", + "logs/samtools/samtobam_{sample}.log", params: - extra=f'{config["sview_opts"]}', + extra=f'{config["samtobam_opts"]}', wrapper: "v3.13.4/bio/samtools/view" -rule sam_sort: +rule bam_sort: input: - sam="alignments/{sample}.sam", + bam="alignments/{sample}.bam", output: "sorted_alignments/{sample}_sorted.bam", log: - "logs/samtools/samsort_{sample}.log", + "logs/samtools/bamsort_{sample}.log", params: - extra=f'{config["ssort_opts"]}', + extra=f'{config["bamsort_opts"]}', wrapper: "v3.13.4/bio/samtools/sort" diff --git a/workflow/rules/commons.smk b/workflow/rules/commons.smk index b4f98fa..ca6813e 100644 --- a/workflow/rules/commons.smk +++ b/workflow/rules/commons.smk @@ -65,10 +65,14 @@ def aggregate_input(samples): def rule_all_input(): all_input = list() all_input.append("versions.txt") - all_input.extend(expand("QC/NanoPlot/{sample}.tar.gz", sample=samples["sample"])) - all_input.append("QC/NanoPlot/all_samples.tar.gz") - all_input.extend(expand("QC/samstats/{sample}.txt", sample=samples["sample"])) - all_input.extend(expand("QC/qualimap/{sample}.tar.gz", sample=samples["sample"])) + all_input.extend( + expand("NanoPlot/{sample}/NanoPlot-report.html", sample=samples["sample"]) + ) + all_input.append("NanoPlot/all_samples/NanoPlot-report.html") + all_input.extend(expand("QC/bamstats/{sample}.txt", sample=samples["sample"])) + all_input.extend( + expand("qualimap/{sample}/qualimapReport.html", sample=samples["sample"]) + ) all_input.extend( expand("counts/{sample}_salmon/quant.sf", sample=samples["sample"]) ) diff --git a/workflow/rules/diffexp.smk b/workflow/rules/diffexp.smk index a89e867..4303835 100644 --- a/workflow/rules/diffexp.smk +++ b/workflow/rules/diffexp.smk @@ -6,33 +6,51 @@ rule de_analysis: "de_analysis/dispersion_graph.svg", category="Results", caption="../report/dispersion_graph.rst", + labels={ + "figure": "Dispersion graph", + }, ), ma_graph=report( "de_analysis/ma_graph.svg", category="Results", caption="../report/ma_graph.rst", + labels={ + "figure": "MA plot", + }, ), de_heatmap=report( "de_analysis/heatmap.svg", category="Results", caption="../report/heatmap.rst", + labels={ + "figure": "Gene heatmap", + }, ), correlation_matrix=report( "de_analysis/correlation_matrix.svg", category="Results", caption="../report/correlation_matrix.rst", + labels={ + "figure": "Correlation matrix", + }, ), normalized_counts="de_analysis/normalized_counts.csv", de_top_heatmap=report( "de_analysis/heatmap_top.svg", category="Results", caption="../report/heatmap_top.rst", + labels={ + "figure": "Top gene heatmap", + }, ), lfc_analysis="de_analysis/lfc_analysis.csv", volcano_plot=report( "de_analysis/volcano_plot.svg", category="Results", caption="../report/volcano_plot.rst", + labels={ + "figure": "Volcano plot", + }, ), params: samples=samples, diff --git a/workflow/rules/qc.smk b/workflow/rules/qc.smk index 4ad25e9..4c00190 100644 --- a/workflow/rules/qc.smk +++ b/workflow/rules/qc.smk @@ -2,9 +2,7 @@ import os localrules: - compress_nplot, - compress_nplot_all, - compress_map_qc, + qm_report, configfile: "config/config.yml" @@ -20,7 +18,12 @@ rule plot_samples: scatter=report( "NanoPlot/{sample}/NanoPlot-report.html", category="Quality control", + subcategory="NanoPlot", caption="../report/nanoplot_sample_report.rst", + labels={ + "model": "NanoPlot", + "figure": "{sample}", + }, ), params: outdir=lambda wildcards: f"NanoPlot/{wildcards.sample}", @@ -42,7 +45,12 @@ rule plot_all_samples: scatter=report( "NanoPlot/all_samples/NanoPlot-report.html", category="Quality control", + subcategory="NanoPlot", caption="../report/nanoplot_all_samples_report.rst", + labels={ + "model": "NanoPlot", + "figure": "All samples", + }, ), # This parameter is in line with the Snakemake docs 8.20.3 guideline on how to avoid having parameters as output prefixes params: @@ -56,32 +64,6 @@ rule plot_all_samples: "--fastq {input} --outdir {params.outdir} 2> {log}" -rule compress_nplot: - input: - samples=rules.plot_samples.output, - output: - "QC/NanoPlot/{sample}.tar.gz", - log: - "logs/NanoPlot/compress_{sample}.log", - conda: - "../envs/base.yml" - script: - "../scripts/make_archive.py" - - -rule compress_nplot_all: - input: - all_samples=rules.plot_all_samples.output, - output: - "QC/NanoPlot/all_samples.tar.gz", - log: - "logs/NanoPlot/compress_all_samples.log", - conda: - "../envs/base.yml" - script: - "../scripts/make_archive.py" - - rule map_qc: input: bam="sorted_alignments/{sample}_sorted.bam", @@ -93,27 +75,37 @@ rule map_qc: "v4.4.0/bio/qualimap/bamqc" -rule compress_map_qc: +# this is a dummy rule to create input for the report because the QualiMap wrapper only accepts directories as valid output +rule qm_report: input: map_qc=rules.map_qc.output, output: - "QC/qualimap/{sample}.tar.gz", + qm_report=report( + "qualimap/{sample}/qualimapReport.html", + category="Quality control", + subcategory="QualiMap", + caption="../report/qualimap.rst", + labels={ + "model": "QualiMap", + "figure": "{sample}", + }, + ), log: - "logs/qualimap/compress_{sample}.log", + "logs/qualimap/{sample}_report.log", conda: "../envs/base.yml" - script: - "../scripts/make_archive.py" + shell: + "cp -a QC/qualimap/{wildcards.sample} qualimap/ 2> {log}" -rule sam_stats: +rule bam_stats: input: - bam="sorted_alignments/{sample}.bam", + bam="alignments/{sample}.bam", output: - "QC/samstats/{sample}.txt", + "QC/bamstats/{sample}.txt", log: - "logs/samtools/samstats_{sample}.log", + "logs/samtools/bamstats_{sample}.log", params: - extra=f'{config["sstats_opts"]}', + extra=config["bamstats_opts"], wrapper: "v3.13.4/bio/samtools/stats" diff --git a/workflow/rules/quantification.smk b/workflow/rules/quantification.smk index dda3a4e..7b30ccb 100644 --- a/workflow/rules/quantification.smk +++ b/workflow/rules/quantification.smk @@ -4,7 +4,7 @@ localrules: rule count_reads: input: - bam="sorted_alignments/{sample}.bam", + bam="alignments/{sample}.bam", trs="transcriptome/transcriptome.fa", output: tsv="counts/{sample}_salmon/quant.sf",