nf-cmgg · nvnieuwk · Nov 7, 2024 · Nov 13, 2024 · Nov 13, 2024 · Nov 13, 2024
diff --git a/.nf-core.yml b/.nf-core.yml
@@ -28,6 +28,7 @@ lint:
     - manifest.homePage
     - validation.help.afterText
     - validation.summary.afterText
+  subworkflow_changes: false
 nf_core_version: 3.0.2
 repository_type: pipeline
 template:
@@ -42,4 +43,4 @@ template:
   skip_features:
     - fastqc
     - is_nfcore
-  version: 1.9.0dev
+  version: 2.0.0dev
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,16 +3,20 @@
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## v1.9.0dev
+## v2.0.0dev
 
 ### New features
 
 1. Added the `--min_callable_coverage` parameter to state what the lowest coverage should for a region to be classified as callable.
+2. Added the [`elprep` caller](https://github.com/ExaScience/elprep) as an alternative to the haplotypecaller.
 
 ### Changes
 
 1. Added the `--squash-ploidy` argument to the RTG vcfeval process.
 2. Update to nf-core v3.0.1
+3. Completely reworked the output directory structure to a more sensible structure. The pipeline can now be run on the same output directory every time and will incrementally add files to the correct family folder. See the [output documentation](https://nf-cmgg.github.io/germline/latest/output/) for more info.
+4. Migrated to the new workflow output definitions.
+5. Bumped the minimal Nextflow version to 24.10.0.
 
 ### Fixes
 

diff --git a/conf/test.config b/conf/test.config
@@ -46,7 +46,6 @@ params {
     validate           = true
     add_ped            = true
     vep_chunk_size     = 10000
-    project            = "test"
     normalize          = true
     updio              = true
     automap            = true

diff --git a/main.nf b/main.nf
@@ -51,6 +51,7 @@ params.vcfanno_config       = getGenomeAttribute('vcfanno_config', params.genome
 include { GERMLINE                } from './workflows/germline'
 include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_cmgg_germline_pipeline'
 include { PIPELINE_COMPLETION     } from './subworkflows/local/utils_cmgg_germline_pipeline'
+include { getWorkflowVersion      } from './subworkflows/nf-core/utils_nfcore_pipeline'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -213,14 +214,14 @@ workflow {
 
     // Filtering out input GVCFs from the output publishing fixes an issue in the current implementation of
     // the workflow output definitions: https://github.com/nextflow-io/nextflow/issues/5480
-    ch_gvcfs_out = GERMLINE.out.gvcfs.filter { _meta, gvcf, _tbi -> gvcf.startsWith(workflow.workDir) }
+    def ch_gvcfs_out = GERMLINE.out.gvcfs.filter { _meta, gvcf, _tbi -> gvcf.startsWith(workflow.workDir) }
 
     publish:
     ch_gvcfs_out >> 'gvcfs'
-    GERMLINE.out.genomicsdb >> 'genomicsdb'
     GERMLINE.out.single_beds >> 'single_beds'
     GERMLINE.out.validation >> 'validation'
     GERMLINE.out.gvcf_reports >> 'gvcf_reports'
+    GERMLINE.out.genomicsdb >> 'genomicsdb'
     GERMLINE.out.vcfs >> 'vcfs'
     GERMLINE.out.gemini >> 'gemini'
     GERMLINE.out.peds >> 'peds'
@@ -231,61 +232,58 @@ workflow {
     GERMLINE.out.multiqc_report >> 'multiqc'
 }
 
-def project_name = params.project ?: workflow.runName
-def final_prefix = params.skip_date_project ? "${project_name}" : "${new Date().format("yyyy-MM-dd")}_${project_name}"
-
 output {
     'gvcfs' {
         path { meta, gvcf, _tbi -> { file ->
             if(file == gvcf.name) {
-                return "${meta.id}/${meta.id}.${meta.caller}.g.vcf.gz"
+                return "${meta.family}/${meta.id}_${getWorkflowVersion().replace(".", "_")}_${new Date().format("yyyy_MM_dd")}/${meta.id}.${meta.caller}.g.vcf.gz"
             }
-            return "${meta.id}/${meta.id}.${meta.caller}.g.vcf.gz.tbi"
+            return "${meta.family}/${meta.id}_${getWorkflowVersion().replace(".", "_")}_${new Date().format("yyyy_MM_dd")}/${meta.id}.${meta.caller}.g.vcf.gz.tbi"
         } }
     }
-    'genomicsdb' {
-        enabled (params.output_genomicsdb || params.only_merge)
-        path { meta, genomicsdb ->
-            { file -> "${final_prefix}/${meta.family}/${meta.id}_${meta.caller}_genomicsdb"}
-        }
-    }
     'single_beds' {
-        path { meta, _bed -> { _file -> "${meta.id}/${meta.id}.bed" } }
+        path { meta, _bed -> { _file -> "${meta.family}/${meta.id}_${getWorkflowVersion().replace(".", "_")}_${new Date().format("yyyy_MM_dd")}/${meta.id}.bed" } }
     }
     'validation' {
-        path { meta, _report -> { file -> "${meta.id}/validation/${meta.caller}/${file}" } }
+        path { meta, _report -> { file -> "${meta.family}/${meta.id}_${getWorkflowVersion().replace(".", "_")}_${new Date().format("yyyy_MM_dd")}/validation/${meta.caller}/${file}" } }
     }
     'gvcf_reports' {
-        path { meta, _report -> { _file -> "${meta.id}/reports/${meta.id}.${meta.caller}.bcftools_stats.txt" }}
+        path { meta, _report -> { _file -> "${meta.family}/${meta.id}_${getWorkflowVersion().replace(".", "_")}_${new Date().format("yyyy_MM_dd")}/${meta.id}.${meta.caller}.bcftools_stats.txt" }}
+    }
+    'genomicsdb' {
+        enabled (params.output_genomicsdb || params.only_merge)
+        path { meta, _genomicsdb ->
+            { _file -> "${meta.family}/output_${getWorkflowVersion().replace(".", "_")}_${new Date().format("yyyy_MM_dd")}/${meta.id}_${meta.caller}_genomicsdb"}
+        }
     }
     'vcfs' {
         path { meta, vcf, _tbi -> { file ->
             if(file == vcf.name) {
-                return "${final_prefix}/${meta.family}/${meta.id}.${meta.caller}.vcf.gz"
+                return "${meta.family}/output_${getWorkflowVersion().replace(".", "_")}_${new Date().format("yyyy_MM_dd")}/${meta.id}.${meta.caller}.vcf.gz"
             }
-            return "${final_prefix}/${meta.family}/${meta.id}.${meta.caller}.vcf.gz.tbi"
+            return "${meta.family}/output_${getWorkflowVersion().replace(".", "_")}_${new Date().format("yyyy_MM_dd")}/${meta.id}.${meta.caller}.vcf.gz.tbi"
         } }
     }
     'gemini' {
-        path { meta, _db -> { _file -> "${final_prefix}/${meta.family}/${meta.id}.${meta.caller}.db"}}
+        path { meta, _db -> { _file -> "${meta.family}/output_${getWorkflowVersion().replace(".", "_")}_${new Date().format("yyyy_MM_dd")}/${meta.id}.${meta.caller}.db"}}
     }
     'peds' {
-        path { meta, _ped -> { _file -> "${final_prefix}/${meta.family}/${meta.id}.${meta.caller}.ped"}}
+        path { meta, _ped -> { _file -> "${meta.family}/output_${getWorkflowVersion().replace(".", "_")}_${new Date().format("yyyy_MM_dd")}/${meta.id}.${meta.caller}.ped"}}
     }
     'joint_beds' {
-        path { meta, _bed -> { _file -> "${final_prefix}/${meta.family}/${meta.id}.${meta.caller}.bed"}}
+        path { meta, _bed -> { _file -> "${meta.family}/output_${getWorkflowVersion().replace(".", "_")}_${new Date().format("yyyy_MM_dd")}/${meta.id}.${meta.caller}.bed"}}
     }
     'final_reports' {
-        path { meta, _report -> { file -> "${final_prefix}/${meta.family}/reports/${file}"}}
+        path { meta, _report -> { file -> "${meta.family}/qc_${getWorkflowVersion().replace(".", "_")}_${new Date().format("yyyy_MM_dd")}/${file}"}}
     }
     'automap' {
-        path { meta, _automap -> { file -> "${final_prefix}/${meta.family}/automap/${meta.caller}"}}
+        path { meta, _automap -> { _file -> "${meta.family}/output_${getWorkflowVersion().replace(".", "_")}_${new Date().format("yyyy_MM_dd")}/automap/${meta.caller}"}}
     }
     'updio' {
-        path { meta, _updio -> { file -> "${final_prefix}/${meta.family}/updio/${meta.caller}"}}
+        path { meta, _updio -> { _file -> "${meta.family}/output_${getWorkflowVersion().replace(".", "_")}_${new Date().format("yyyy_MM_dd")}/updio/${meta.caller}"}}
     }
     'multiqc' {
-        path { _report -> { _file -> "multiqc/multiqc_report.html"}}
+        path { _report -> { _file -> "${getWorkflowVersion().replace(".", "_")}_${new Date().format("yyyy_MM_dd")}/multiqc_report.html"}}
     }
 }
 

diff --git a/modules.json b/modules.json
@@ -234,7 +234,7 @@
                 "nf-core": {
                     "utils_nextflow_pipeline": {
                         "branch": "master",
-                        "git_sha": "1b6b9a3338d011367137808b49b923515080e3ba",
+                        "git_sha": "56372688d8979092cafbe0c5c3895b491166ca1c",
                         "installed_by": ["subworkflows"]
                     },
                     "utils_nfcore_pipeline": {

diff --git a/nextflow.config b/nextflow.config
@@ -22,8 +22,6 @@ params {
     add_ped                    = false
     validate                   = false
     roi                        = null
-    project                    = null
-    skip_date_project          = false
     only_call                  = false
     only_merge                 = false
     output_genomicsdb          = false
@@ -271,9 +269,20 @@ set -C # No clobber - prevent output redirection from overwriting files.
 // Disable process selector warnings by default. Use debug profile to enable warnings.
 nextflow.enable.configProcessNamesValidation = false
 
+manifest {
+    name            = 'nf-cmgg/germline'
+    author          = """nvnieuwk"""
+    homePage        = 'https://github.com/nf-cmgg/germline'
+    description     = """A nextflow pipeline for calling and annotating small germline variants from short DNA reads for WES and WGS data"""
+    mainScript      = 'main.nf'
+    nextflowVersion = '!>=24.10.0'
+    version         = '2.0.0dev'
+    doi             = ''
+}
+
 timeline {
     enabled = true
-    file    = "${params.outdir}/pipeline_info/execution_timeline_${new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')}.html"
+    file    = "${params.outdir}/v${manifest.version.replace('.', '_')}_${new Date().format("yyyy_MM_dd")}/execution_timeline_${new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')}.html"
 }
 report {
     enabled = true
@@ -288,20 +297,9 @@ dag {
     file    = timeline.file.replace("execution_timeline", "pipeline_dag")
 }
 
-manifest {
-    name            = 'nf-cmgg/germline'
-    author          = """nvnieuwk"""
-    homePage        = 'https://github.com/nf-cmgg/germline'
-    description     = """A nextflow pipeline for calling and annotating small germline variants from short DNA reads for WES and WGS data"""
-    mainScript      = 'main.nf'
-    nextflowVersion = '!>=24.10.0'
-    version         = '1.9.0dev'
-    doi             = ''
-}
-
 // Nextflow plugins
 plugins {
-    id 'nf-schema@2.1.1' // Validation of pipeline parameters and creation of an input channel from a sample sheet
+    id 'nf-schema@2.2.0' // Validation of pipeline parameters and creation of an input channel from a sample sheet
 }
 
 validation {
@@ -324,6 +322,7 @@ validation {
     }
     summary {
         beforeText = validation.help.beforeText
+        hideParams = ["genomes"]
     }
 }
 

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -122,6 +122,11 @@
                     "description": "Path to the elsites file. This is used when `elprep` is part of the callers.",
                     "fa_icon": "far fa-file-code"
                 },
+                "genomes": {
+                    "type": "object",
+                    "hidden": true,
+                    "description": "Object for genomes"
+                },
                 "genomes_base": {
                     "type": "string",
                     "default": "/references/",
@@ -210,15 +215,6 @@
                     "description": "Don't run mosdepth in fast-mode",
                     "help_text": "This is advised if you need exact coverage BED files as output."
                 },
-                "project": {
-                    "type": "string",
-                    "description": "The name of the project.",
-                    "help_text": "This will be used to specify the name of the final output files folder in the output directory."
-                },
-                "skip_date_project": {
-                    "type": "boolean",
-                    "description": "Don't add the current date to the output project folder."
-                },
                 "roi": {
                     "type": "string",
                     "description": "Path to the default ROI (regions of interest) BED file to be used for WES analysis.",
@@ -392,16 +388,6 @@
             "description": "Less common options for the pipeline, typically set in a config file.",
             "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.",
             "properties": {
-                "help": {
-                    "type": ["boolean", "string"],
-                    "description": "Display help text. Give a parameter name to this option to see the detailed help of that parameter.",
-                    "fa_icon": "fas fa-question-circle"
-                },
-                "help_full": {
-                    "type": "boolean",
-                    "description": "See the full help message of all parameters.",
-                    "fa_icon": "fas fa-question-circle"
-                },
                 "version": {
                     "type": "boolean",
                     "description": "Display version and exit.",

diff --git a/subworkflows/local/utils_cmgg_germline_pipeline/main.nf b/subworkflows/local/utils_cmgg_germline_pipeline/main.nf
@@ -84,7 +84,7 @@ workflow PIPELINE_INITIALISATION {
     )
 
     // Output the samplesheet
-    file(input).copyTo("${outdir}/samplesheet.csv")
+    file(input).copyTo("${outdir}/v${workflow.manifest.version.replace('.', '_')}_${new Date().format("yyyy_MM_dd")}/samplesheet.csv")
 
     emit:
     samplesheet = WATCHPATH_HANDLING.out.samplesheet

diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf
diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test
diff --git a/tests/pipeline/callers/main.nf.test b/tests/pipeline/callers/main.nf.test
@@ -20,14 +20,19 @@ nextflow_pipeline {
         then {
             assertAll(
                 { assert workflow.success },
+                { assert !workflow.stdout },
                 { assert snapshot(
-                    workflow.stdout,
                     path("${outputDir}")
                         .list()
-                        .findAll { !it.toString().endsWith("pipeline_info") }
                         .collect { getRecursiveFileNames(it, "${outputDir}") }
                         .flatten()
-                ).match("vardict") }
+                        .findAll {
+                            !(it.contains("/execution_") || it.contains("/params_") || it.contains("/pipeline_"))
+                        }
+                        .collect {
+                            it.replace(getDynamicOutputName(), "<version>_<date>")
+                        }
+                ).match() }
             )
         }
 
@@ -45,14 +50,19 @@ nextflow_pipeline {
         then {
             assertAll(
                 { assert workflow.success },
+                { assert !workflow.stdout },
                 { assert snapshot(
-                    workflow.stdout,
                     path("${outputDir}")
                         .list()
-                        .findAll { !it.toString().endsWith("pipeline_info") }
                         .collect { getRecursiveFileNames(it, "${outputDir}") }
                         .flatten()
-                ).match("haplotypecaller") }
+                        .findAll {
+                            !(it.contains("/execution_") || it.contains("/params_") || it.contains("/pipeline_"))
+                        }
+                        .collect {
+                            it.replace(getDynamicOutputName(), "<version>_<date>")
+                        }
+                ).match() }
             )
         }
 
@@ -70,14 +80,19 @@ nextflow_pipeline {
         then {
             assertAll(
                 { assert workflow.success },
+                { assert !workflow.stdout },
                 { assert snapshot(
-                    workflow.stdout,
                     path("${outputDir}")
                         .list()
-                        .findAll { !it.toString().endsWith("pipeline_info") }
                         .collect { getRecursiveFileNames(it, "${outputDir}") }
                         .flatten()
-                ).match("vardict + haplotypecaller") }
+                        .findAll {
+                            !(it.contains("/execution_") || it.contains("/params_") || it.contains("/pipeline_"))
+                        }
+                        .collect {
+                            it.replace(getDynamicOutputName(), "<version>_<date>")
+                        }
+                ).match() }
             )
         }
 
@@ -91,3 +106,9 @@ def getRecursiveFileNames(fileOrDir, outputDir) {
     }
     return fileOrDir.toString().replace("${outputDir}/", "")
 }
+
+def getDynamicOutputName() {
+    def Map nfcoreYaml = new groovy.yaml.YamlSlurper().parseText(file(".nf-core.yml").text)
+    def date = new java.text.SimpleDateFormat("yyyy_MM_dd").format(new Date())
+    return "v${nfcoreYaml.template.version.replace('.', '_')}_${date}" as String
+}