Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New output directory structure #222

Open
wants to merge 7 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .nf-core.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ lint:
- manifest.homePage
- validation.help.afterText
- validation.summary.afterText
subworkflow_changes: false
nf_core_version: 3.0.2
repository_type: pipeline
template:
Expand All @@ -42,4 +43,4 @@ template:
skip_features:
- fastqc
- is_nfcore
version: 1.9.0dev
version: 2.0.0dev
6 changes: 5 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,20 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## v1.9.0dev
## v2.0.0dev

### New features

1. Added the `--min_callable_coverage` parameter to state what the lowest coverage should for a region to be classified as callable.
2. Added the [`elprep` caller](https://github.com/ExaScience/elprep) as an alternative to the haplotypecaller.

### Changes

1. Added the `--squash-ploidy` argument to the RTG vcfeval process.
2. Update to nf-core v3.0.1
3. Completely reworked the output directory structure to a more sensible structure. The pipeline can now be run on the same output directory every time and will incrementally add files to the correct family folder. See the [output documentation](https://nf-cmgg.github.io/germline/latest/output/) for more info.
4. Migrated to the new workflow output definitions.
5. Bumped the minimal Nextflow version to 24.10.0.

### Fixes

Expand Down
1 change: 0 additions & 1 deletion conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ params {
validate = true
add_ped = true
vep_chunk_size = 10000
project = "test"
normalize = true
updio = true
automap = true
Expand Down
48 changes: 23 additions & 25 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ params.vcfanno_config = getGenomeAttribute('vcfanno_config', params.genome
include { GERMLINE } from './workflows/germline'
include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_cmgg_germline_pipeline'
include { PIPELINE_COMPLETION } from './subworkflows/local/utils_cmgg_germline_pipeline'
include { getWorkflowVersion } from './subworkflows/nf-core/utils_nfcore_pipeline'

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down Expand Up @@ -213,14 +214,14 @@ workflow {

// Filtering out input GVCFs from the output publishing fixes an issue in the current implementation of
// the workflow output definitions: https://github.com/nextflow-io/nextflow/issues/5480
ch_gvcfs_out = GERMLINE.out.gvcfs.filter { _meta, gvcf, _tbi -> gvcf.startsWith(workflow.workDir) }
def ch_gvcfs_out = GERMLINE.out.gvcfs.filter { _meta, gvcf, _tbi -> gvcf.startsWith(workflow.workDir) }

publish:
ch_gvcfs_out >> 'gvcfs'
GERMLINE.out.genomicsdb >> 'genomicsdb'
GERMLINE.out.single_beds >> 'single_beds'
GERMLINE.out.validation >> 'validation'
GERMLINE.out.gvcf_reports >> 'gvcf_reports'
GERMLINE.out.genomicsdb >> 'genomicsdb'
GERMLINE.out.vcfs >> 'vcfs'
GERMLINE.out.gemini >> 'gemini'
GERMLINE.out.peds >> 'peds'
Expand All @@ -231,61 +232,58 @@ workflow {
GERMLINE.out.multiqc_report >> 'multiqc'
}

def project_name = params.project ?: workflow.runName
def final_prefix = params.skip_date_project ? "${project_name}" : "${new Date().format("yyyy-MM-dd")}_${project_name}"

output {
'gvcfs' {
path { meta, gvcf, _tbi -> { file ->
if(file == gvcf.name) {
return "${meta.id}/${meta.id}.${meta.caller}.g.vcf.gz"
return "${meta.family}/${meta.id}_${getWorkflowVersion().replace(".", "_")}_${new Date().format("yyyy_MM_dd")}/${meta.id}.${meta.caller}.g.vcf.gz"
}
return "${meta.id}/${meta.id}.${meta.caller}.g.vcf.gz.tbi"
return "${meta.family}/${meta.id}_${getWorkflowVersion().replace(".", "_")}_${new Date().format("yyyy_MM_dd")}/${meta.id}.${meta.caller}.g.vcf.gz.tbi"
} }
}
'genomicsdb' {
enabled (params.output_genomicsdb || params.only_merge)
path { meta, genomicsdb ->
{ file -> "${final_prefix}/${meta.family}/${meta.id}_${meta.caller}_genomicsdb"}
}
}
'single_beds' {
path { meta, _bed -> { _file -> "${meta.id}/${meta.id}.bed" } }
path { meta, _bed -> { _file -> "${meta.family}/${meta.id}_${getWorkflowVersion().replace(".", "_")}_${new Date().format("yyyy_MM_dd")}/${meta.id}.bed" } }
}
'validation' {
path { meta, _report -> { file -> "${meta.id}/validation/${meta.caller}/${file}" } }
path { meta, _report -> { file -> "${meta.family}/${meta.id}_${getWorkflowVersion().replace(".", "_")}_${new Date().format("yyyy_MM_dd")}/validation/${meta.caller}/${file}" } }
}
'gvcf_reports' {
path { meta, _report -> { _file -> "${meta.id}/reports/${meta.id}.${meta.caller}.bcftools_stats.txt" }}
path { meta, _report -> { _file -> "${meta.family}/${meta.id}_${getWorkflowVersion().replace(".", "_")}_${new Date().format("yyyy_MM_dd")}/${meta.id}.${meta.caller}.bcftools_stats.txt" }}
}
'genomicsdb' {
enabled (params.output_genomicsdb || params.only_merge)
path { meta, _genomicsdb ->
{ _file -> "${meta.family}/output_${getWorkflowVersion().replace(".", "_")}_${new Date().format("yyyy_MM_dd")}/${meta.id}_${meta.caller}_genomicsdb"}
}
}
'vcfs' {
path { meta, vcf, _tbi -> { file ->
if(file == vcf.name) {
return "${final_prefix}/${meta.family}/${meta.id}.${meta.caller}.vcf.gz"
return "${meta.family}/output_${getWorkflowVersion().replace(".", "_")}_${new Date().format("yyyy_MM_dd")}/${meta.id}.${meta.caller}.vcf.gz"
}
return "${final_prefix}/${meta.family}/${meta.id}.${meta.caller}.vcf.gz.tbi"
return "${meta.family}/output_${getWorkflowVersion().replace(".", "_")}_${new Date().format("yyyy_MM_dd")}/${meta.id}.${meta.caller}.vcf.gz.tbi"
} }
}
'gemini' {
path { meta, _db -> { _file -> "${final_prefix}/${meta.family}/${meta.id}.${meta.caller}.db"}}
path { meta, _db -> { _file -> "${meta.family}/output_${getWorkflowVersion().replace(".", "_")}_${new Date().format("yyyy_MM_dd")}/${meta.id}.${meta.caller}.db"}}
}
'peds' {
path { meta, _ped -> { _file -> "${final_prefix}/${meta.family}/${meta.id}.${meta.caller}.ped"}}
path { meta, _ped -> { _file -> "${meta.family}/output_${getWorkflowVersion().replace(".", "_")}_${new Date().format("yyyy_MM_dd")}/${meta.id}.${meta.caller}.ped"}}
}
'joint_beds' {
path { meta, _bed -> { _file -> "${final_prefix}/${meta.family}/${meta.id}.${meta.caller}.bed"}}
path { meta, _bed -> { _file -> "${meta.family}/output_${getWorkflowVersion().replace(".", "_")}_${new Date().format("yyyy_MM_dd")}/${meta.id}.${meta.caller}.bed"}}
}
'final_reports' {
path { meta, _report -> { file -> "${final_prefix}/${meta.family}/reports/${file}"}}
path { meta, _report -> { file -> "${meta.family}/qc_${getWorkflowVersion().replace(".", "_")}_${new Date().format("yyyy_MM_dd")}/${file}"}}
}
'automap' {
path { meta, _automap -> { file -> "${final_prefix}/${meta.family}/automap/${meta.caller}"}}
path { meta, _automap -> { _file -> "${meta.family}/output_${getWorkflowVersion().replace(".", "_")}_${new Date().format("yyyy_MM_dd")}/automap/${meta.caller}"}}
}
'updio' {
path { meta, _updio -> { file -> "${final_prefix}/${meta.family}/updio/${meta.caller}"}}
path { meta, _updio -> { _file -> "${meta.family}/output_${getWorkflowVersion().replace(".", "_")}_${new Date().format("yyyy_MM_dd")}/updio/${meta.caller}"}}
}
'multiqc' {
path { _report -> { _file -> "multiqc/multiqc_report.html"}}
path { _report -> { _file -> "${getWorkflowVersion().replace(".", "_")}_${new Date().format("yyyy_MM_dd")}/multiqc_report.html"}}
}
}

Expand Down
2 changes: 1 addition & 1 deletion modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@
"nf-core": {
"utils_nextflow_pipeline": {
"branch": "master",
"git_sha": "1b6b9a3338d011367137808b49b923515080e3ba",
"git_sha": "56372688d8979092cafbe0c5c3895b491166ca1c",
"installed_by": ["subworkflows"]
},
"utils_nfcore_pipeline": {
Expand Down
29 changes: 14 additions & 15 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@ params {
add_ped = false
validate = false
roi = null
project = null
skip_date_project = false
only_call = false
only_merge = false
output_genomicsdb = false
Expand Down Expand Up @@ -271,9 +269,20 @@ set -C # No clobber - prevent output redirection from overwriting files.
// Disable process selector warnings by default. Use debug profile to enable warnings.
nextflow.enable.configProcessNamesValidation = false

manifest {
name = 'nf-cmgg/germline'
author = """nvnieuwk"""
homePage = 'https://github.com/nf-cmgg/germline'
description = """A nextflow pipeline for calling and annotating small germline variants from short DNA reads for WES and WGS data"""
mainScript = 'main.nf'
nextflowVersion = '!>=24.10.0'
version = '2.0.0dev'
doi = ''
}

timeline {
enabled = true
file = "${params.outdir}/pipeline_info/execution_timeline_${new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')}.html"
file = "${params.outdir}/v${manifest.version.replace('.', '_')}_${new Date().format("yyyy_MM_dd")}/execution_timeline_${new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')}.html"
}
report {
enabled = true
Expand All @@ -288,20 +297,9 @@ dag {
file = timeline.file.replace("execution_timeline", "pipeline_dag")
}

manifest {
name = 'nf-cmgg/germline'
author = """nvnieuwk"""
homePage = 'https://github.com/nf-cmgg/germline'
description = """A nextflow pipeline for calling and annotating small germline variants from short DNA reads for WES and WGS data"""
mainScript = 'main.nf'
nextflowVersion = '!>=24.10.0'
version = '1.9.0dev'
doi = ''
}

// Nextflow plugins
plugins {
id 'nf-schema@2.1.1' // Validation of pipeline parameters and creation of an input channel from a sample sheet
id 'nf-schema@2.2.0' // Validation of pipeline parameters and creation of an input channel from a sample sheet
}

validation {
Expand All @@ -324,6 +322,7 @@ validation {
}
summary {
beforeText = validation.help.beforeText
hideParams = ["genomes"]
}
}

Expand Down
24 changes: 5 additions & 19 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,11 @@
"description": "Path to the elsites file. This is used when `elprep` is part of the callers.",
"fa_icon": "far fa-file-code"
},
"genomes": {
"type": "object",
"hidden": true,
"description": "Object for genomes"
},
"genomes_base": {
"type": "string",
"default": "/references/",
Expand Down Expand Up @@ -210,15 +215,6 @@
"description": "Don't run mosdepth in fast-mode",
"help_text": "This is advised if you need exact coverage BED files as output."
},
"project": {
"type": "string",
"description": "The name of the project.",
"help_text": "This will be used to specify the name of the final output files folder in the output directory."
},
"skip_date_project": {
"type": "boolean",
"description": "Don't add the current date to the output project folder."
},
"roi": {
"type": "string",
"description": "Path to the default ROI (regions of interest) BED file to be used for WES analysis.",
Expand Down Expand Up @@ -392,16 +388,6 @@
"description": "Less common options for the pipeline, typically set in a config file.",
"help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.",
"properties": {
"help": {
"type": ["boolean", "string"],
"description": "Display help text. Give a parameter name to this option to see the detailed help of that parameter.",
"fa_icon": "fas fa-question-circle"
},
"help_full": {
"type": "boolean",
"description": "See the full help message of all parameters.",
"fa_icon": "fas fa-question-circle"
},
"version": {
"type": "boolean",
"description": "Display version and exit.",
Expand Down
2 changes: 1 addition & 1 deletion subworkflows/local/utils_cmgg_germline_pipeline/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ workflow PIPELINE_INITIALISATION {
)

// Output the samplesheet
file(input).copyTo("${outdir}/samplesheet.csv")
file(input).copyTo("${outdir}/v${workflow.manifest.version.replace('.', '_')}_${new Date().format("yyyy_MM_dd")}/samplesheet.csv")

emit:
samplesheet = WATCHPATH_HANDLING.out.samplesheet
Expand Down
16 changes: 12 additions & 4 deletions subworkflows/nf-core/utils_nextflow_pipeline/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

39 changes: 30 additions & 9 deletions tests/pipeline/callers/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,19 @@ nextflow_pipeline {
then {
assertAll(
{ assert workflow.success },
{ assert !workflow.stdout },
{ assert snapshot(
workflow.stdout,
path("${outputDir}")
.list()
.findAll { !it.toString().endsWith("pipeline_info") }
.collect { getRecursiveFileNames(it, "${outputDir}") }
.flatten()
).match("vardict") }
.findAll {
!(it.contains("/execution_") || it.contains("/params_") || it.contains("/pipeline_"))
}
.collect {
it.replace(getDynamicOutputName(), "<version>_<date>")
}
).match() }
)
}

Expand All @@ -45,14 +50,19 @@ nextflow_pipeline {
then {
assertAll(
{ assert workflow.success },
{ assert !workflow.stdout },
{ assert snapshot(
workflow.stdout,
path("${outputDir}")
.list()
.findAll { !it.toString().endsWith("pipeline_info") }
.collect { getRecursiveFileNames(it, "${outputDir}") }
.flatten()
).match("haplotypecaller") }
.findAll {
!(it.contains("/execution_") || it.contains("/params_") || it.contains("/pipeline_"))
}
.collect {
it.replace(getDynamicOutputName(), "<version>_<date>")
}
).match() }
)
}

Expand All @@ -70,14 +80,19 @@ nextflow_pipeline {
then {
assertAll(
{ assert workflow.success },
{ assert !workflow.stdout },
{ assert snapshot(
workflow.stdout,
path("${outputDir}")
.list()
.findAll { !it.toString().endsWith("pipeline_info") }
.collect { getRecursiveFileNames(it, "${outputDir}") }
.flatten()
).match("vardict + haplotypecaller") }
.findAll {
!(it.contains("/execution_") || it.contains("/params_") || it.contains("/pipeline_"))
}
.collect {
it.replace(getDynamicOutputName(), "<version>_<date>")
}
).match() }
)
}

Expand All @@ -91,3 +106,9 @@ def getRecursiveFileNames(fileOrDir, outputDir) {
}
return fileOrDir.toString().replace("${outputDir}/", "")
}

def getDynamicOutputName() {
def Map nfcoreYaml = new groovy.yaml.YamlSlurper().parseText(file(".nf-core.yml").text)
def date = new java.text.SimpleDateFormat("yyyy_MM_dd").format(new Date())
return "v${nfcoreYaml.template.version.replace('.', '_')}_${date}" as String
}
Loading
Loading