diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index a2ac81b..bc2c9b2 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -2,53 +2,53 @@ name: Bug report description: Report something that is broken or incorrect labels: bug body: -- type: textarea - id: description - attributes: - label: Description of the bug - description: A clear and concise description of what the bug is. - validations: - required: true -- type: textarea - id: command_used - attributes: - label: Command used and terminal output - description: Steps to reproduce the behaviour. Please paste the command you used - to launch the pipeline and the output from your terminal. - render: console - placeholder: '$ nextflow run ... - - - Some output where something broke - - ' -- type: textarea - id: files - attributes: - label: Relevant files - description: 'Please drag and drop the relevant files here. Create a `.zip` archive - if the extension is not allowed. - - Your verbose log file `.nextflow.log` is often useful _(this is a hidden file - in the directory where you launched the pipeline)_ as well as custom Nextflow - configuration files. - - ' -- type: textarea - id: system - attributes: - label: System information - description: '* Nextflow version _(eg. 23.04.0)_ - - * Hardware _(eg. HPC, Desktop, Cloud)_ - - * Executor _(eg. slurm, local, awsbatch)_ - - * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, - or Apptainer)_ - - * OS _(eg. CentOS Linux, macOS, Linux Mint)_ - - * Version of CenterForMedicalGeneticsGhent/nf-cmgg-qdnaseq _(eg. 1.1, 1.5, 1.8.2)_ - - ' + - type: textarea + id: description + attributes: + label: Description of the bug + description: A clear and concise description of what the bug is. + validations: + required: true + - type: textarea + id: command_used + attributes: + label: Command used and terminal output + description: Steps to reproduce the behaviour. Please paste the command you used + to launch the pipeline and the output from your terminal. + render: console + placeholder: "$ nextflow run ... + + + Some output where something broke + + " + - type: textarea + id: files + attributes: + label: Relevant files + description: "Please drag and drop the relevant files here. Create a `.zip` archive + if the extension is not allowed. + + Your verbose log file `.nextflow.log` is often useful _(this is a hidden file + in the directory where you launched the pipeline)_ as well as custom Nextflow + configuration files. + + " + - type: textarea + id: system + attributes: + label: System information + description: "* Nextflow version _(eg. 23.04.0)_ + + * Hardware _(eg. HPC, Desktop, Cloud)_ + + * Executor _(eg. slurm, local, awsbatch)_ + + * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud, + or Apptainer)_ + + * OS _(eg. CentOS Linux, macOS, Linux Mint)_ + + * Version of CenterForMedicalGeneticsGhent/nf-cmgg-qdnaseq _(eg. 1.1, 1.5, 1.8.2)_ + + " diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cb1baef..c01c468 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,9 +35,16 @@ jobs: with: version: "${{ matrix.NXF_VER }}" + - name: Install nf-test + run: | + conda install -c bioconda nf-test + - name: Run pipeline with test data - # TODO nf-core: You can customise CI pipeline run tests as required - # For example: adding multiple test runs with different parameters - # Remember that you can parallelise this by using strategy.matrix run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results + $CONDA/bin/nf-test test --junitxml=default.xml + + - name: Publish Test Report + uses: mikepenz/action-junit-report@v3 + if: always() # always run even if the previous step fails + with: + report_paths: "default.xml" diff --git a/.gitignore b/.gitignore index 5124c9a..2eef655 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ results/ testing/ testing* *.pyc +.nf-test* diff --git a/.nf-core.yml b/.nf-core.yml index 6f1e631..161076a 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,21 +1,24 @@ repository_type: pipeline lint: files_exist: - - CODE_OF_CONDUCT.md - - assets/nf-core-nf-cmgg-qdnaseq_logo_light.png - - docs/images/nf-core-nf-cmgg-qdnaseq_logo_light.png - - docs/images/nf-core-nf-cmgg-qdnaseq_logo_dark.png - - .github/ISSUE_TEMPLATE/config.yml - - .github/workflows/awstest.yml - - .github/workflows/awsfulltest.yml + - CODE_OF_CONDUCT.md + - assets/nf-core-nf-cmgg-qdnaseq_logo_light.png + - docs/images/nf-core-nf-cmgg-qdnaseq_logo_light.png + - docs/images/nf-core-nf-cmgg-qdnaseq_logo_dark.png + - .github/ISSUE_TEMPLATE/config.yml + - .github/workflows/awstest.yml + - .github/workflows/awsfulltest.yml + - lib/WorkflowNf-cmgg-qdnaseq.groovy files_unchanged: - - CODE_OF_CONDUCT.md - - assets/nf-core-nf-cmgg-qdnaseq_logo_light.png - - docs/images/nf-core-nf-cmgg-qdnaseq_logo_light.png - - docs/images/nf-core-nf-cmgg-qdnaseq_logo_dark.png - - .github/ISSUE_TEMPLATE/bug_report.yml + - CODE_OF_CONDUCT.md + - assets/nf-core-nf-cmgg-qdnaseq_logo_light.png + - docs/images/nf-core-nf-cmgg-qdnaseq_logo_light.png + - docs/images/nf-core-nf-cmgg-qdnaseq_logo_dark.png + - .github/ISSUE_TEMPLATE/bug_report.yml + - docs/README.md nextflow_config: - - manifest.name - - manifest.homePage + - manifest.name + - manifest.homePage multiqc_config: - - report_comment + - report_comment + pipeline_name_conventions: false diff --git a/.prettierignore b/.prettierignore index 437d763..25cc1b4 100644 --- a/.prettierignore +++ b/.prettierignore @@ -10,3 +10,5 @@ testing/ testing* *.pyc bin/ +.nf-test* +null diff --git a/CHANGELOG.md b/CHANGELOG.md index c40a9e5..7bb9d56 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v1.0dev - [date] +## v0.1.0 - Dancing Panda - [4 July 2023] Initial release of CenterForMedicalGeneticsGhent/nf-cmgg-qdnaseq, created with the [nf-core](https://nf-co.re/) template. diff --git a/README.md b/README.md index 4552461..585bcd3 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,3 @@ -[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) - [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) @@ -8,20 +6,13 @@ ## Introduction -**CenterForMedicalGeneticsGhent/nf-cmgg-qdnaseq** is a bioinformatics pipeline that ... - - - - - +**CenterForMedicalGeneticsGhent/nf-cmgg-qdnaseq** is a bioinformatics pipeline for creating qDNAseq annotations -1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) -2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) +1. Trim FASTQ files to read lengths of 50 with Trimmomatic +2. Align the reads with BWA (aln and samse/sampe) +3. Create a mappability WIG file with GenMap +4. Convert the WIG to BigWig with UCSC WigToBigWig +5. Create the annotations using a custom R script ## Usage @@ -30,9 +21,6 @@ > to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) > with `-profile test` before running the workflow on actual data. - - Now, you can run the pipeline using: - - ```bash nextflow run CenterForMedicalGeneticsGhent/nf-cmgg-qdnaseq \ -profile \ + --genome \ --input samplesheet.csv \ --outdir ``` @@ -68,19 +53,12 @@ CenterForMedicalGeneticsGhent/nf-cmgg-qdnaseq was originally written by nvnieuwk We thank the following people for their extensive assistance in the development of this pipeline: - - ## Contributions and Support If you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md). ## Citations - - - - - An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. This pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/master/LICENSE). diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml index d6f18ca..2cec2cc 100644 --- a/assets/methods_description_template.yml +++ b/assets/methods_description_template.yml @@ -3,8 +3,6 @@ description: "Suggested text and references to use when describing pipeline usag section_name: "CenterForMedicalGeneticsGhent/nf-cmgg-qdnaseq Methods Description" section_href: "https://github.com/CenterForMedicalGeneticsGhent/nf-cmgg-qdnaseq" plot_type: "html" -## TODO nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline -## You inject any metadata in the Nextflow '${workflow}' object data: |

Methods

Data was processed using CenterForMedicalGeneticsGhent/nf-cmgg-qdnaseq v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index d7c057d..84b4062 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,5 +1,5 @@ report_comment: > - This report has been generated by the CenterForMedicalGeneticsGhent/nf-cmgg-qdnaseq + This report has been generated by the CenterForMedicalGeneticsGhent/nf-cmgg-qdnaseq analysis pipeline. report_section_order: "CenterForMedicalGeneticsGhent-nf-cmgg-qdnaseq-methods-description": diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 5f653ab..36d2407 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,3 +1,3 @@ sample,fastq_1,fastq_2 -SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz -SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz, +test1,https://github.com/CenterForMedicalGeneticsGhent/nf-cmgg-test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/test_R1.fastq.gz,https://github.com/CenterForMedicalGeneticsGhent/nf-cmgg-test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/test_R2.fastq.gz +test2,https://github.com/CenterForMedicalGeneticsGhent/nf-cmgg-test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/test_R1.fastq.gz, diff --git a/assets/schema_input.json b/assets/schema_input.json index 3b28156..c9356ab 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -9,28 +9,21 @@ "properties": { "sample": { "type": "string", - "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces" + "meta": ["id"] }, "fastq_1": { "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$", - "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + "format": "file-path", + "exists": true, + "pattern": "^.*\\.fastq(\\.gz)?$" }, "fastq_2": { - "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", - "anyOf": [ - { - "type": "string", - "pattern": "^\\S+\\.f(ast)?q\\.gz$" - }, - { - "type": "string", - "maxLength": 0 - } - ] + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^.*\\.fastq(\\.gz)?$" } }, - "required": ["sample", "fastq_1"] + "required": ["fastq_1", "sample"] } } diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py deleted file mode 100755 index 4a758fe..0000000 --- a/bin/check_samplesheet.py +++ /dev/null @@ -1,259 +0,0 @@ -#!/usr/bin/env python - - -"""Provide a command line tool to validate and transform tabular samplesheets.""" - - -import argparse -import csv -import logging -import sys -from collections import Counter -from pathlib import Path - -logger = logging.getLogger() - - -class RowChecker: - """ - Define a service that can validate and transform each given row. - - Attributes: - modified (list): A list of dicts, where each dict corresponds to a previously - validated and transformed row. The order of rows is maintained. - - """ - - VALID_FORMATS = ( - ".fq.gz", - ".fastq.gz", - ) - - def __init__( - self, - sample_col="sample", - first_col="fastq_1", - second_col="fastq_2", - single_col="single_end", - **kwargs, - ): - """ - Initialize the row checker with the expected column names. - - Args: - sample_col (str): The name of the column that contains the sample name - (default "sample"). - first_col (str): The name of the column that contains the first (or only) - FASTQ file path (default "fastq_1"). - second_col (str): The name of the column that contains the second (if any) - FASTQ file path (default "fastq_2"). - single_col (str): The name of the new column that will be inserted and - records whether the sample contains single- or paired-end sequencing - reads (default "single_end"). - - """ - super().__init__(**kwargs) - self._sample_col = sample_col - self._first_col = first_col - self._second_col = second_col - self._single_col = single_col - self._seen = set() - self.modified = [] - - def validate_and_transform(self, row): - """ - Perform all validations on the given row and insert the read pairing status. - - Args: - row (dict): A mapping from column headers (keys) to elements of that row - (values). - - """ - self._validate_sample(row) - self._validate_first(row) - self._validate_second(row) - self._validate_pair(row) - self._seen.add((row[self._sample_col], row[self._first_col])) - self.modified.append(row) - - def _validate_sample(self, row): - """Assert that the sample name exists and convert spaces to underscores.""" - if len(row[self._sample_col]) <= 0: - raise AssertionError("Sample input is required.") - # Sanitize samples slightly. - row[self._sample_col] = row[self._sample_col].replace(" ", "_") - - def _validate_first(self, row): - """Assert that the first FASTQ entry is non-empty and has the right format.""" - if len(row[self._first_col]) <= 0: - raise AssertionError("At least the first FASTQ file is required.") - self._validate_fastq_format(row[self._first_col]) - - def _validate_second(self, row): - """Assert that the second FASTQ entry has the right format if it exists.""" - if len(row[self._second_col]) > 0: - self._validate_fastq_format(row[self._second_col]) - - def _validate_pair(self, row): - """Assert that read pairs have the same file extension. Report pair status.""" - if row[self._first_col] and row[self._second_col]: - row[self._single_col] = False - first_col_suffix = Path(row[self._first_col]).suffixes[-2:] - second_col_suffix = Path(row[self._second_col]).suffixes[-2:] - if first_col_suffix != second_col_suffix: - raise AssertionError("FASTQ pairs must have the same file extensions.") - else: - row[self._single_col] = True - - def _validate_fastq_format(self, filename): - """Assert that a given filename has one of the expected FASTQ extensions.""" - if not any(filename.endswith(extension) for extension in self.VALID_FORMATS): - raise AssertionError( - f"The FASTQ file has an unrecognized extension: {filename}\n" - f"It should be one of: {', '.join(self.VALID_FORMATS)}" - ) - - def validate_unique_samples(self): - """ - Assert that the combination of sample name and FASTQ filename is unique. - - In addition to the validation, also rename all samples to have a suffix of _T{n}, where n is the - number of times the same sample exist, but with different FASTQ files, e.g., multiple runs per experiment. - - """ - if len(self._seen) != len(self.modified): - raise AssertionError("The pair of sample name and FASTQ must be unique.") - seen = Counter() - for row in self.modified: - sample = row[self._sample_col] - seen[sample] += 1 - row[self._sample_col] = f"{sample}_T{seen[sample]}" - - -def read_head(handle, num_lines=10): - """Read the specified number of lines from the current position in the file.""" - lines = [] - for idx, line in enumerate(handle): - if idx == num_lines: - break - lines.append(line) - return "".join(lines) - - -def sniff_format(handle): - """ - Detect the tabular format. - - Args: - handle (text file): A handle to a `text file`_ object. The read position is - expected to be at the beginning (index 0). - - Returns: - csv.Dialect: The detected tabular format. - - .. _text file: - https://docs.python.org/3/glossary.html#term-text-file - - """ - peek = read_head(handle) - handle.seek(0) - sniffer = csv.Sniffer() - dialect = sniffer.sniff(peek) - return dialect - - -def check_samplesheet(file_in, file_out): - """ - Check that the tabular samplesheet has the structure expected by nf-core pipelines. - - Validate the general shape of the table, expected columns, and each row. Also add - an additional column which records whether one or two FASTQ reads were found. - - Args: - file_in (pathlib.Path): The given tabular samplesheet. The format can be either - CSV, TSV, or any other format automatically recognized by ``csv.Sniffer``. - file_out (pathlib.Path): Where the validated and transformed samplesheet should - be created; always in CSV format. - - Example: - This function checks that the samplesheet follows the following structure, - see also the `viral recon samplesheet`_:: - - sample,fastq_1,fastq_2 - SAMPLE_PE,SAMPLE_PE_RUN1_1.fastq.gz,SAMPLE_PE_RUN1_2.fastq.gz - SAMPLE_PE,SAMPLE_PE_RUN2_1.fastq.gz,SAMPLE_PE_RUN2_2.fastq.gz - SAMPLE_SE,SAMPLE_SE_RUN1_1.fastq.gz, - - .. _viral recon samplesheet: - https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv - - """ - required_columns = {"sample", "fastq_1", "fastq_2"} - # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. - with file_in.open(newline="") as in_handle: - reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle)) - # Validate the existence of the expected header columns. - if not required_columns.issubset(reader.fieldnames): - req_cols = ", ".join(required_columns) - logger.critical(f"The sample sheet **must** contain these column headers: {req_cols}.") - sys.exit(1) - # Validate each row. - checker = RowChecker() - for i, row in enumerate(reader): - try: - checker.validate_and_transform(row) - except AssertionError as error: - logger.critical(f"{str(error)} On line {i + 2}.") - sys.exit(1) - checker.validate_unique_samples() - header = list(reader.fieldnames) - header.insert(1, "single_end") - # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. - with file_out.open(mode="w", newline="") as out_handle: - writer = csv.DictWriter(out_handle, header, delimiter=",") - writer.writeheader() - for row in checker.modified: - writer.writerow(row) - - -def parse_args(argv=None): - """Define and immediately parse command line arguments.""" - parser = argparse.ArgumentParser( - description="Validate and transform a tabular samplesheet.", - epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv", - ) - parser.add_argument( - "file_in", - metavar="FILE_IN", - type=Path, - help="Tabular input samplesheet in CSV or TSV format.", - ) - parser.add_argument( - "file_out", - metavar="FILE_OUT", - type=Path, - help="Transformed output samplesheet in CSV format.", - ) - parser.add_argument( - "-l", - "--log-level", - help="The desired log level (default WARNING).", - choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"), - default="WARNING", - ) - return parser.parse_args(argv) - - -def main(argv=None): - """Coordinate argument parsing and program execution.""" - args = parse_args(argv) - logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s") - if not args.file_in.is_file(): - logger.error(f"The given input file {args.file_in} was not found!") - sys.exit(2) - args.file_out.parent.mkdir(parents=True, exist_ok=True) - check_samplesheet(args.file_in, args.file_out) - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/conf/base.config b/conf/base.config index c41d939..3287100 100644 --- a/conf/base.config +++ b/conf/base.config @@ -10,7 +10,6 @@ process { - // TODO nf-core: Check the defaults for all processes cpus = { check_max( 1 * task.attempt, 'cpus' ) } memory = { check_max( 6.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } @@ -19,13 +18,6 @@ process { maxRetries = 1 maxErrors = '-1' - // Process-specific resource requirements - // NOTE - Please try and re-use the labels below as much as possible. - // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. - // If possible, it would be nice to keep the same label naming convention when - // adding in your local modules too. - // TODO nf-core: Customise requirements for specific processes. - // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors withLabel:process_single { cpus = { check_max( 1 , 'cpus' ) } memory = { check_max( 6.GB * task.attempt, 'memory' ) } diff --git a/conf/modules.config b/conf/modules.config index da58a5d..e9df84b 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -10,26 +10,42 @@ ---------------------------------------------------------------------------------------- */ +def date = new Date().format("yyyy-MM-dd") + +if(!params.annotation_genome) { + params.annotation_genome = params.genome +} + process { - publishDir = [ - path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + withName: TRIMGALORE { + ext.args = "--hardtrim5 50" + } - withName: SAMPLESHEET_CHECK { - publishDir = [ - path: { "${params.outdir}/pipeline_info" }, + withName: BWA_ALN { + ext.args = "-n 2 -q 40" + } + + withName: GAWK { + ext.suffix = "sizes" + ext.args2 = '\'{print $1"\t"$2}\'' + } + + withName: GENMAP_MAP { + ext.args = '--wig --length 50 -E 2' + ext.prefix = {"${meta.id}_map"} + } + + withName: CREATE_ANNOTATIONS { + publishDir = [ + overwrite: true, + enabled: true, mode: params.publish_dir_mode, + path: "${params.outdir}/annotations-${date}", saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: FASTQC { - ext.args = '--quiet' - } - withName: CUSTOM_DUMPSOFTWAREVERSIONS { publishDir = [ path: { "${params.outdir}/pipeline_info" }, @@ -38,4 +54,14 @@ process { ] } + withName: MULTIQC { + publishDir = [ + overwrite: true, + enabled: true, + mode: params.publish_dir_mode, + path: "${params.outdir}/multiqc_reports", + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } diff --git a/conf/nf_test.config b/conf/nf_test.config new file mode 100644 index 0000000..963e210 --- /dev/null +++ b/conf/nf_test.config @@ -0,0 +1,34 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests with nf-test +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run CenterForMedicalGeneticsGhent/nf-cmgg-qdnaseq -profile nf_test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + input = "${params.baseDir}/tests/inputs/samplesheet.csv" + outdir = "${params.outputDir}" + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + input = "${projectDir}/assets/samplesheet.csv" + fasta = params.test_data["homo_sapiens"]["genome"]["genome_fasta"] + fai = params.test_data["homo_sapiens"]["genome"]["genome_fasta_fai"] + genomes_ignore = true + + // Genome references + genome = 'hg38' + bin_sizes = "10,5" +} diff --git a/conf/test.config b/conf/test.config index 95681be..fd57a5b 100644 --- a/conf/test.config +++ b/conf/test.config @@ -20,10 +20,12 @@ params { max_time = '6.h' // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv' + input = "${projectDir}/assets/samplesheet.csv" + fasta = params.test_data["homo_sapiens"]["genome"]["genome_fasta"] + fai = params.test_data["homo_sapiens"]["genome"]["genome_fasta_fai"] + genomes_ignore = true // Genome references - genome = 'R64-1-1' + genome = 'hg38' + bin_sizes = "10,5" } diff --git a/conf/test_full.config b/conf/test_full.config index e9e680e..dcd15bf 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -15,8 +15,6 @@ params { config_profile_description = 'Full test dataset to check pipeline function' // Input data for full size test - // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) - // TODO nf-core: Give any required params for the test so that command line flags are not needed input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' // Genome references diff --git a/docs/README.md b/docs/README.md index 9472c8c..02321c7 100644 --- a/docs/README.md +++ b/docs/README.md @@ -6,3 +6,5 @@ The CenterForMedicalGeneticsGhent/nf-cmgg-qdnaseq documentation is split into th - An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. - [Output](output.md) - An overview of the different results produced by the pipeline and how to interpret them. +- [Parameters](parameters.md) + - An overview of the pipeline parameters. diff --git a/docs/output.md b/docs/output.md index 5a63a1c..40cd5c4 100644 --- a/docs/output.md +++ b/docs/output.md @@ -6,37 +6,24 @@ This document describes the output produced by the pipeline. Most of the plots a The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. - - ## Pipeline overview The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: -- [FastQC](#fastqc) - Raw read QC +- Create annotations - Annotations for all bins supplied via the --bin_sizes parameter - [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution -### FastQC +### Create annotations
Output files -- `fastqc/` - - `*_fastqc.html`: FastQC report containing quality metrics. - - `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. +- `annotations-YYYY-MM-DD/` + - `.kbp.rda`: the annotation file for the bin size `bin_size` for the genome `genome`
-[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). - -![MultiQC - FastQC sequence counts plot](images/mqc_fastqc_counts.png) - -![MultiQC - FastQC mean quality scores plot](images/mqc_fastqc_quality.png) - -![MultiQC - FastQC adapter content plot](images/mqc_fastqc_adapter.png) - -> **NB:** The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They may contain adapter sequence and potentially regions with low quality. - ### MultiQC
diff --git a/docs/parameters.md b/docs/parameters.md new file mode 100644 index 0000000..0630c53 --- /dev/null +++ b/docs/parameters.md @@ -0,0 +1,78 @@ +# CenterForMedicalGeneticsGhent/nf-cmgg-qdnaseq pipeline parameters + +A nextflow pipeline for creating bin annotations for qDNAseq + +## Input/output options + +Define where the pipeline should find input data and save output data. + +| Parameter | Description | Type | Default | Required | Hidden | +| --------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | ------------------------ | -------- | ------ | +| `input` | Path to comma-separated file containing information about the samples in the experiment.
HelpYou will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row.
| `string` | | True | | +| `outdir` | The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure. | `string` | | True | | +| `email` | Email address for completion summary.
HelpSet this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.
| `string` | | | | +| `multiqc_title` | MultiQC report title. Printed as page header, used for filename if not otherwise specified. | `string` | | | | +| `bin_sizes` | A comma-separated list of bin sizes to create annotations from | `string` | 1000,500,50,30,15,10,5,1 | | | + +## Reference genome options + +Reference genome related files and options required for the workflow. + +| Parameter | Description | Type | Default | Required | Hidden | +| ------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- | ------- | -------- | ------ | +| `genome` | Name of the genome. | `string` | | True | | +| `annotation_genome` | The name of the genome used to create the annotations. This will default to the value supplied with --genome. | `string` | None | | | +| `fasta` | Path to FASTA genome file.
HelpThis parameter is _mandatory_ if `--genome` is not specified.
| `string` | | | | +| `fai` | Path to FASTA genome index file. | `string` | | | | +| `bwa` | The BWA index. | `string` | | | | +| `blacklist` | The blacklist BED file. | `string` | | | | +| `igenomes_base` | Directory / URL base for iGenomes references. | `string` | | | True | +| `igenomes_ignore` | Do not load the iGenomes reference config.
HelpDo not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`.
| `boolean` | | | True | +| `genomes_base` | Directory / URL base for CMGG references. | `string` | None | | | +| `genomes_ignore` | Do not load the CMGG reference config. | `boolean` | | | | +| `cmgg_config_base` | The path to the CMGG configs | `string` | /conf/ | | | + +## Institutional config options + +Parameters used to describe centralised config profiles. These should not be edited. + +| Parameter | Description | Type | Default | Required | Hidden | +| ---------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | -------------------------------------------------------- | -------- | ------ | +| `custom_config_version` | Git commit id for Institutional configs. | `string` | master | | True | +| `custom_config_base` | Base directory for Institutional configs.
HelpIf you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.
| `string` | https://raw.githubusercontent.com/nf-core/configs/master | | True | +| `config_profile_name` | Institutional config name. | `string` | | | True | +| `config_profile_description` | Institutional config description. | `string` | | | True | +| `config_profile_contact` | Institutional config contact information. | `string` | | | True | +| `config_profile_url` | Institutional config URL link. | `string` | | | True | + +## Max job request options + +Set the top limit for requested resources for any single job. + +| Parameter | Description | Type | Default | Required | Hidden | +| ------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- | ------- | -------- | ------ | +| `max_cpus` | Maximum number of CPUs that can be requested for any single job.
HelpUse to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`
| `integer` | 16 | | True | +| `max_memory` | Maximum amount of memory that can be requested for any single job.
HelpUse to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`
| `string` | 128.GB | | True | +| `max_time` | Maximum amount of time that can be requested for any single job.
HelpUse to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`
| `string` | 240.h | | True | + +## Generic options + +Less common options for the pipeline, typically set in a config file. + +| Parameter | Description | Type | Default | Required | Hidden | +| ---------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | --------- | ------- | -------- | ------ | +| `help` | Display help text. | `boolean` | | | True | +| `version` | Display version and exit. | `boolean` | | | True | +| `publish_dir_mode` | Method used to save pipeline results to output directory.
HelpThe Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.
| `string` | copy | | True | +| `email_on_fail` | Email address for completion summary, only when pipeline fails.
HelpAn email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully.
| `string` | | | True | +| `plaintext_email` | Send plain-text email instead of HTML. | `boolean` | | | True | +| `max_multiqc_email_size` | File size limit when attaching MultiQC reports to summary emails. | `string` | 25.MB | | True | +| `monochrome_logs` | Do not use coloured log outputs. | `boolean` | | | True | +| `hook_url` | Incoming hook URL for messaging service
HelpIncoming hook URL for messaging service. Currently, MS Teams and Slack are supported.
| `string` | | | True | +| `multiqc_config` | Custom config file to supply to MultiQC. | `string` | | | True | +| `multiqc_logo` | Custom logo file to supply to MultiQC. File name must also be set in the MultiQC config file | `string` | | | True | +| `multiqc_methods_description` | Custom MultiQC yaml file containing HTML including a methods description. | `string` | | | | +| `validate_params` | Boolean whether to validate parameters against the schema at runtime | `boolean` | True | | True | +| `validationShowHiddenParams` | Show all params when using `--help`
HelpBy default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters.
| `boolean` | | | True | +| `validationFailUnrecognisedParams` | Validation of parameters fails when an unrecognised parameter is found.
HelpBy default, when an unrecognised parameter is found, it returns a warinig.
| `boolean` | | | True | +| `validationLenientMode` | Validation of parameters in lenient more.
HelpAllows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode).
| `boolean` | | | True | diff --git a/docs/usage.md b/docs/usage.md index d975320..b7a0840 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -4,8 +4,6 @@ ## Introduction - - ## Samplesheet input You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 949ebb4..ddc3372 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -11,9 +11,6 @@ class WorkflowMain { // public static String citation(workflow) { return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + - // TODO nf-core: Add Zenodo DOI for pipeline after first release - //"* The pipeline\n" + - //" https://doi.org/10.5281/zenodo.XXXXXXX\n\n" + "* The nf-core framework\n" + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + "* Software dependencies\n" + diff --git a/lib/WorkflowNf-cmgg-qdnaseq.groovy b/lib/WorkflowQdnaseq.groovy similarity index 79% rename from lib/WorkflowNf-cmgg-qdnaseq.groovy rename to lib/WorkflowQdnaseq.groovy index 12fd22d..a2e9b1e 100755 --- a/lib/WorkflowNf-cmgg-qdnaseq.groovy +++ b/lib/WorkflowQdnaseq.groovy @@ -5,7 +5,7 @@ import nextflow.Nextflow import groovy.text.SimpleTemplateEngine -class WorkflowNf-cmgg-qdnaseq { +class WorkflowQdnaseq { // // Check and validate parameters @@ -53,12 +53,8 @@ class WorkflowNf-cmgg-qdnaseq { public static String toolCitationText(params) { - // TODO Optionally add in-text citation tools to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report def citation_text = [ "Tools used in the workflow included:", - "FastQC (Andrews 2010),", "MultiQC (Ewels et al. 2016)", "." ].join(' ').trim() @@ -68,11 +64,7 @@ class WorkflowNf-cmgg-qdnaseq { public static String toolBibliographyText(params) { - // TODO Optionally add bibliographic entries to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report def reference_text = [ - "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " ].join(' ').trim() @@ -93,11 +85,6 @@ class WorkflowNf-cmgg-qdnaseq { meta["tool_citations"] = "" meta["tool_bibliography"] = "" - // TODO Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! - //meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") - //meta["tool_bibliography"] = toolBibliographyText(params) - - def methods_text = mqc_methods_yaml.text def engine = new SimpleTemplateEngine() diff --git a/main.nf b/main.nf index ee870f3..70e38b5 100644 --- a/main.nf +++ b/main.nf @@ -15,7 +15,9 @@ nextflow.enable.dsl = 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') +params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') +params.fai = WorkflowMain.getGenomeAttribute(params, 'fai') +params.bwa = WorkflowMain.getGenomeAttribute(params, 'bwa') /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -47,13 +49,13 @@ WorkflowMain.initialise(workflow, params, log) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { NF-CMGG-QDNASEQ } from './workflows/nf-cmgg-qdnaseq' +include { QDNASEQ } from './workflows/qdnaseq' // // WORKFLOW: Run main CenterForMedicalGeneticsGhent/nf-cmgg-qdnaseq analysis pipeline // -workflow CENTERFORMEDICALGENETICSGHENT_NF-CMGG-QDNASEQ { - NF-CMGG-QDNASEQ () +workflow CENTERFORMEDICALGENETICSGHENT_QDNASEQ { + QDNASEQ () } /* @@ -67,7 +69,7 @@ workflow CENTERFORMEDICALGENETICSGHENT_NF-CMGG-QDNASEQ { // See: https://github.com/nf-core/rnaseq/issues/619 // workflow { - CENTERFORMEDICALGENETICSGHENT_NF-CMGG-QDNASEQ () + CENTERFORMEDICALGENETICSGHENT_QDNASEQ () } /* diff --git a/modules.json b/modules.json index 1e6448c..a72f268 100644 --- a/modules.json +++ b/modules.json @@ -5,19 +5,88 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "bwa/aln": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"], + "patch": "modules/nf-core/bwa/aln/bwa-aln.diff" + }, + "bwa/index": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "bwa/sampe": { + "branch": "master", + "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", + "installed_by": ["modules"], + "patch": "modules/nf-core/bwa/sampe/bwa-sampe.diff" + }, + "bwa/samse": { + "branch": "master", + "git_sha": "603ecbd9f45300c9788f197d2a15a005685b4220", + "installed_by": ["modules"], + "patch": "modules/nf-core/bwa/samse/bwa-samse.diff" + }, "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "76cc4938c1f6ea5c7d83fed1eeffc146787f9543", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, - "fastqc": { + "gawk": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "genmap/index": { + "branch": "master", + "git_sha": "2c4fc7bfe72e3a504e403486085f7f047a4f41c1", + "installed_by": ["modules"] + }, + "genmap/map": { + "branch": "master", + "git_sha": "2c4fc7bfe72e3a504e403486085f7f047a4f41c1", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "f2d63bd5b68925f98f572eed70993d205cc694b7", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "samtools/convert": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "samtools/faidx": { + "branch": "master", + "git_sha": "fd742419940e01ba1c5ecb172c3e32ec840662fe", + "installed_by": ["modules"] + }, + "samtools/index": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"], + "patch": "modules/nf-core/samtools/index/samtools-index.diff" + }, + "tabix/bgzip": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "trimgalore": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "ucsc/wigtobigwig": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, + "untar": { + "branch": "master", + "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", "installed_by": ["modules"] } } diff --git a/modules/local/create_annotations/Dockerfile b/modules/local/create_annotations/Dockerfile new file mode 100644 index 0000000..b8244d6 --- /dev/null +++ b/modules/local/create_annotations/Dockerfile @@ -0,0 +1,14 @@ +FROM mambaorg/micromamba:1.4-focal + +LABEL version="0.0.1" maintainer="Nicolas Vannieuwkerke " + +RUN micromamba install -y --name base -c conda-forge -c bioconda -c defaults \ + bioconductor-qdnaseq==1.34.0 \ + bioconductor-biobase==2.58.0 \ + ucsc-bigwigaverageoverbed==377 \ + r-biocmanager==1.30.21 \ + r-xml==3.99_0.14 \ + r-restfulr==0.0.15 \ + bioconductor-rtracklayer==1.58.0 \ + r-r.cache==0.16.0 \ + && micromamba clean --all --yes diff --git a/modules/local/create_annotations/main.nf b/modules/local/create_annotations/main.nf new file mode 100644 index 0000000..6a46ee6 --- /dev/null +++ b/modules/local/create_annotations/main.nf @@ -0,0 +1,36 @@ +process CREATE_ANNOTATIONS { + tag "$bin_size" + label 'process_single' + + container "quay.io/cmgg/qdnaseq:0.0.1" + + input: + val(bin_size) + tuple val(meta), path(bams, stageAs:"bams/*"), path(bais, stageAs:"bams/*") + tuple val(meta2), path(bigwig) + tuple val(meta3), path(blacklist) + + output: + tuple val(meta), path("*.rda"), emit: annotation + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + template "create_annotations.R" + + stub: + def prefix = task.ext.prefix ?: "${params.annotation_genome}.${bin_size}kbp" + + """ + touch ${prefix}.rda + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bioconductor-qdnaseq: 1.34.0 + bioconductor-biobase: 2.58.0 + ucsc-bigwigaverageoverbed: 377 + END_VERSIONS + """ +} diff --git a/modules/local/create_annotations/templates/create_annotations.R b/modules/local/create_annotations/templates/create_annotations.R new file mode 100644 index 0000000..505f916 --- /dev/null +++ b/modules/local/create_annotations/templates/create_annotations.R @@ -0,0 +1,54 @@ +#!/usr/bin/env Rscript + +# load required packages +library(Biobase) +library(BiocManager) +library(QDNAseq) +library(future) + +BiocManager::install("BSgenome.Hsapiens.UCSC.${params.annotation_genome}") +library(BSgenome.Hsapiens.UCSC.${params.annotation_genome}) + +binsize <- ${bin_size} + +bins <- createBins(bsgenome=BSgenome.Hsapiens.UCSC.${params.annotation_genome}, binSize=binsize) +bins\$mappability <- calculateMappability( + bins, + bigWigFile="${bigwig}", + bigWigAverageOverBed="bigWigAverageOverBed" +) + +bins\$blacklist <- calculateBlacklist(bins, bedFiles=c("${blacklist}")) + +bins\$residual <- NA +bins\$use <- bins\$bases > 0 + +# +tg <- binReadCounts(bins, path="bams") + +bins\$residual <- iterateResiduals(tg) + +bins <- AnnotatedDataFrame(bins, + varMetadata=data.frame( + labelDescription=c( + "Chromosome name", + "Base pair start position", + "Base pair end position", + "Percentage of non-N nucleotides (of full bin size)", + "Percentage of C and G nucleotides (of non-N nucleotides)", + "Average mappability of 50mers with a maximum of 2 mismatches", + "Percent overlap with ENCODE blacklisted regions", + "Median loess residual from 1000 Genomes (50mers)", + "Whether the bin should be used in subsequent analysis steps" + ), + row.names=colnames(bins) + ) +) + +save(bins, file=paste0("${params.annotation_genome}.${bin_size}kbp.rda"), compress='xz') + +sink("versions.yml") +cat("\\"task.process\\":\n") +cat(" bioconductor-qdnaseq: 1.34.0\n") +cat(" bioconductor-biobase: 2.58.0\n") +cat(" ucsc-bigwigaverageoverbed: 377\n") diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf deleted file mode 100644 index 21f164c..0000000 --- a/modules/local/samplesheet_check.nf +++ /dev/null @@ -1,31 +0,0 @@ -process SAMPLESHEET_CHECK { - tag "$samplesheet" - label 'process_single' - - conda "conda-forge::python=3.8.3" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/python:3.8.3' : - 'biocontainers/python:3.8.3' }" - - input: - path samplesheet - - output: - path '*.csv' , emit: csv - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: // This script is bundled with the pipeline, in CenterForMedicalGeneticsGhent/nf-cmgg-qdnaseq/bin/ - """ - check_samplesheet.py \\ - $samplesheet \\ - samplesheet.valid.csv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - END_VERSIONS - """ -} diff --git a/modules/nf-core/bwa/aln/bwa-aln.diff b/modules/nf-core/bwa/aln/bwa-aln.diff new file mode 100644 index 0000000..6ccaf21 --- /dev/null +++ b/modules/nf-core/bwa/aln/bwa-aln.diff @@ -0,0 +1,34 @@ +Changes in module 'nf-core/bwa/aln' +--- modules/nf-core/bwa/aln/main.nf ++++ modules/nf-core/bwa/aln/main.nf +@@ -62,4 +62,28 @@ + END_VERSIONS + """ + } ++ ++ stub: ++ def prefix = task.ext.prefix ?: "${meta.id}" ++ ++ if (meta.single_end) { ++ """ ++ touch ${prefix}.sai ++ ++ cat <<-END_VERSIONS > versions.yml ++ "${task.process}": ++ bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') ++ END_VERSIONS ++ """ ++ } else { ++ """ ++ touch ${prefix}.1.sai ++ touch ${prefix}.1.sai2 ++ ++ cat <<-END_VERSIONS > versions.yml ++ "${task.process}": ++ bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') ++ END_VERSIONS ++ """ ++ } + } + +************************************************************ diff --git a/modules/nf-core/bwa/aln/main.nf b/modules/nf-core/bwa/aln/main.nf new file mode 100644 index 0000000..bc7a363 --- /dev/null +++ b/modules/nf-core/bwa/aln/main.nf @@ -0,0 +1,89 @@ +process BWA_ALN { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::bwa=0.7.17" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bwa:0.7.17--h5bf99c6_8' : + 'biocontainers/bwa:0.7.17--h5bf99c6_8' }" + + input: + tuple val(meta) , path(reads) + tuple val(meta2), path(index) + + output: + tuple val(meta), path("*.sai"), emit: sai + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + if (meta.single_end) { + """ + INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` + + bwa aln \\ + $args \\ + -t $task.cpus \\ + -f ${prefix}.sai \\ + \$INDEX \\ + ${reads} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + END_VERSIONS + """ + } else { + """ + INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` + + bwa aln \\ + $args \\ + -t $task.cpus \\ + -f ${prefix}.1.sai \\ + \$INDEX \\ + ${reads[0]} + + bwa aln \\ + $args \\ + -t $task.cpus \\ + -f ${prefix}.2.sai \\ + \$INDEX \\ + ${reads[1]} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + END_VERSIONS + """ + } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + if (meta.single_end) { + """ + touch ${prefix}.sai + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + END_VERSIONS + """ + } else { + """ + touch ${prefix}.1.sai + touch ${prefix}.1.sai2 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/bwa/aln/meta.yml b/modules/nf-core/bwa/aln/meta.yml new file mode 100644 index 0000000..f4cc8f3 --- /dev/null +++ b/modules/nf-core/bwa/aln/meta.yml @@ -0,0 +1,59 @@ +name: bwa_aln +description: Find SA coordinates of the input reads for bwa short-read mapping +keywords: + - bwa + - aln + - short-read + - align + - reference + - fasta + - map + - fastq +tools: + - bwa: + description: | + BWA is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: http://bio-bwa.sourceforge.net/ + documentation: http://bio-bwa.sourceforge.net/ + doi: "10.1093/bioinformatics/btp324" + licence: ["GPL-3.0-or-later"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - meta2: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - index: + type: file + description: BWA genome index files + pattern: "Directory containing BWA index *.{amb,ann,bwt,pac,sa}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - sai: + type: file + description: Single or paired SA coordinate files + pattern: "*.sai" + +authors: + - "@jfy133" diff --git a/modules/nf-core/bwa/index/main.nf b/modules/nf-core/bwa/index/main.nf new file mode 100644 index 0000000..8d2e56d --- /dev/null +++ b/modules/nf-core/bwa/index/main.nf @@ -0,0 +1,51 @@ +process BWA_INDEX { + tag "$fasta" + label 'process_single' + + conda "bioconda::bwa=0.7.17" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bwa:0.7.17--hed695b0_7' : + 'biocontainers/bwa:0.7.17--hed695b0_7' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path(bwa) , emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + mkdir bwa + bwa \\ + index \\ + $args \\ + -p bwa/${fasta.baseName} \\ + $fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + END_VERSIONS + """ + + stub: + """ + mkdir bwa + + touch bwa/genome.amb + touch bwa/genome.ann + touch bwa/genome.bwt + touch bwa/genome.pac + touch bwa/genome.sa + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bwa/index/meta.yml b/modules/nf-core/bwa/index/meta.yml new file mode 100644 index 0000000..2c6cfcd --- /dev/null +++ b/modules/nf-core/bwa/index/meta.yml @@ -0,0 +1,42 @@ +name: bwa_index +description: Create BWA index for reference genome +keywords: + - index + - fasta + - genome + - reference +tools: + - bwa: + description: | + BWA is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: http://bio-bwa.sourceforge.net/ + documentation: http://www.htslib.org/doc/samtools.html + arxiv: arXiv:1303.3997 + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input genome fasta file +output: + - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - index: + type: file + description: BWA genome index files + pattern: "*.{amb,ann,bwt,pac,sa}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@maxulysse" diff --git a/modules/nf-core/bwa/sampe/bwa-sampe.diff b/modules/nf-core/bwa/sampe/bwa-sampe.diff new file mode 100644 index 0000000..a245279 --- /dev/null +++ b/modules/nf-core/bwa/sampe/bwa-sampe.diff @@ -0,0 +1,23 @@ +Changes in module 'nf-core/bwa/sampe' +--- modules/nf-core/bwa/sampe/main.nf ++++ modules/nf-core/bwa/sampe/main.nf +@@ -39,4 +39,17 @@ + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ ++ ++ stub: ++ def prefix = task.ext.prefix ?: "${meta.id}" ++ ++ """ ++ touch ${prefix}.bam ++ ++ cat <<-END_VERSIONS > versions.yml ++ "${task.process}": ++ bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') ++ samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') ++ END_VERSIONS ++ """ + } + +************************************************************ diff --git a/modules/nf-core/bwa/sampe/main.nf b/modules/nf-core/bwa/sampe/main.nf new file mode 100644 index 0000000..d3eb613 --- /dev/null +++ b/modules/nf-core/bwa/sampe/main.nf @@ -0,0 +1,55 @@ +process BWA_SAMPE { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::bwa=0.7.17 bioconda::samtools=1.16.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:219b6c272b25e7e642ae3ff0bf0c5c81a5135ab4-0' : + 'biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:219b6c272b25e7e642ae3ff0bf0c5c81a5135ab4-0' }" + + input: + tuple val(meta), path(reads), path(sai) + tuple val(meta2), path(index) + + output: + tuple val(meta), path("*.bam"), emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def read_group = meta.read_group ? "-r ${meta.read_group}" : "" + + """ + INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` + + bwa sampe \\ + $args \\ + $read_group \\ + \$INDEX \\ + $sai \\ + $reads | samtools sort -@ ${task.cpus} -O bam - > ${prefix}.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bwa/sampe/meta.yml b/modules/nf-core/bwa/sampe/meta.yml new file mode 100644 index 0000000..0cefb96 --- /dev/null +++ b/modules/nf-core/bwa/sampe/meta.yml @@ -0,0 +1,63 @@ +name: bwa_sampe +description: Convert paired-end bwa SA coordinate files to SAM format +keywords: + - bwa + - aln + - short-read + - align + - reference + - fasta + - map + - sam + - bam +tools: + - bwa: + description: | + BWA is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: http://bio-bwa.sourceforge.net/ + documentation: http://bio-bwa.sourceforge.net/ + doi: "10.1093/bioinformatics/btp324" + licence: ["GPL-3.0-or-later"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information. + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: FASTQ files specified alongside meta in input channel. + pattern: "*.{fastq,fq}.gz" + - sai: + type: file + description: SAI file specified alongside meta and reads in input channel. + pattern: "*.sai" + - meta2: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - index: + type: directory + description: Directory containing BWA index files (amb,ann,bwt,pac,sa) from BWA_INDEX + pattern: "bwa/" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bam: + type: file + description: BAM file + pattern: "*.bam" + +authors: + - "@jfy133" diff --git a/modules/nf-core/bwa/samse/bwa-samse.diff b/modules/nf-core/bwa/samse/bwa-samse.diff new file mode 100644 index 0000000..0f8c8c8 --- /dev/null +++ b/modules/nf-core/bwa/samse/bwa-samse.diff @@ -0,0 +1,23 @@ +Changes in module 'nf-core/bwa/samse' +--- modules/nf-core/bwa/samse/main.nf ++++ modules/nf-core/bwa/samse/main.nf +@@ -39,4 +39,17 @@ + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ ++ ++ stub: ++ def prefix = task.ext.prefix ?: "${meta.id}" ++ ++ """ ++ touch ${prefix}.bam ++ ++ cat <<-END_VERSIONS > versions.yml ++ "${task.process}": ++ bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') ++ samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') ++ END_VERSIONS ++ """ + } + +************************************************************ diff --git a/modules/nf-core/bwa/samse/main.nf b/modules/nf-core/bwa/samse/main.nf new file mode 100644 index 0000000..10457a2 --- /dev/null +++ b/modules/nf-core/bwa/samse/main.nf @@ -0,0 +1,55 @@ +process BWA_SAMSE { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::bwa=0.7.17 bioconda::samtools=1.16.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:219b6c272b25e7e642ae3ff0bf0c5c81a5135ab4-0' : + 'biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:219b6c272b25e7e642ae3ff0bf0c5c81a5135ab4-0' }" + + input: + tuple val(meta), path(reads), path(sai) + tuple val(meta2), path(index) + + output: + tuple val(meta), path("*.bam"), emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def read_group = meta.read_group ? "-r ${meta.read_group}" : "" + + """ + INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` + + bwa samse \\ + $args \\ + $read_group \\ + \$INDEX \\ + $sai \\ + $reads | samtools sort -@ ${task.cpus - 1} -O bam - > ${prefix}.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bwa/samse/meta.yml b/modules/nf-core/bwa/samse/meta.yml new file mode 100644 index 0000000..1deab21 --- /dev/null +++ b/modules/nf-core/bwa/samse/meta.yml @@ -0,0 +1,64 @@ +name: bwa_samse +description: Convert bwa SA coordinate file to SAM format +keywords: + - bwa + - aln + - short-read + - align + - reference + - fasta + - map + - sam + - bam + +tools: + - bwa: + description: | + BWA is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: http://bio-bwa.sourceforge.net/ + documentation: http://bio-bwa.sourceforge.net/ + doi: "10.1093/bioinformatics/btp324" + licence: ["GPL-3.0-or-later"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information. + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: FASTQ files specified alongside meta in input channel. + pattern: "*.{fastq,fq}.gz" + - sai: + type: file + description: SAI file specified alongside meta and reads in input channel. + pattern: "*.sai" + - meta2: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - index: + type: directory + description: Directory containing BWA index files (amb,ann,bwt,pac,sa) from BWA_INDEX + pattern: "bwa/" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bam: + type: file + description: BAM file + pattern: "*.bam" + +authors: + - "@jfy133" diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index 800a609..ebc8727 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -5,7 +5,7 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { conda "bioconda::multiqc=1.14" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py index e55b8d4..da03340 100755 --- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -4,11 +4,10 @@ """Provide functions to merge multiple versions.yml files.""" +import yaml import platform from textwrap import dedent -import yaml - def _make_versions_html(versions): """Generate a tabular HTML output of all versions for MultiQC.""" diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf deleted file mode 100644 index 9ae5838..0000000 --- a/modules/nf-core/fastqc/main.nf +++ /dev/null @@ -1,51 +0,0 @@ -process FASTQC { - tag "$meta.id" - label 'process_medium' - - conda "bioconda::fastqc=0.11.9" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : - 'quay.io/biocontainers/fastqc:0.11.9--0' }" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("*.html"), emit: html - tuple val(meta), path("*.zip") , emit: zip - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - // Make list of old name and new name pairs to use for renaming in the bash while loop - def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } - def rename_to = old_new_pairs*.join(' ').join(' ') - def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ') - """ - printf "%s %s\\n" $rename_to | while read old_name new_name; do - [ -f "\${new_name}" ] || ln -s \$old_name \$new_name - done - fastqc $args --threads $task.cpus $renamed_files - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.html - touch ${prefix}.zip - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml deleted file mode 100644 index 4da5bb5..0000000 --- a/modules/nf-core/fastqc/meta.yml +++ /dev/null @@ -1,52 +0,0 @@ -name: fastqc -description: Run FastQC on sequenced reads -keywords: - - quality control - - qc - - adapters - - fastq -tools: - - fastqc: - description: | - FastQC gives general quality metrics about your reads. - It provides information about the quality score distribution - across your reads, the per base sequence content (%A/C/G/T). - You get information about adapter contamination and other - overrepresented sequences. - homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ - documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ - licence: ["GPL-2.0-only"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - html: - type: file - description: FastQC report - pattern: "*_{fastqc.html}" - - zip: - type: file - description: FastQC report archive - pattern: "*_{fastqc.zip}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@grst" - - "@ewels" - - "@FelixKrueger" diff --git a/modules/nf-core/gawk/main.nf b/modules/nf-core/gawk/main.nf new file mode 100644 index 0000000..9ca20e8 --- /dev/null +++ b/modules/nf-core/gawk/main.nf @@ -0,0 +1,54 @@ +process GAWK { + tag "$meta.id" + label 'process_single' + + conda "anaconda::gawk=5.1.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gawk:5.1.0' : + 'biocontainers/gawk:5.1.0' }" + + input: + tuple val(meta), path(input) + path(program_file) + + output: + tuple val(meta), path("${prefix}.${suffix}"), emit: output + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' // args is used for the main arguments of the tool + def args2 = task.ext.args2 ?: '' // args2 is used to specify a program when no program file has been given + prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "${input.getExtension}" + + program = program_file ? "-f ${program_file}" : "${args2}" + + """ + awk \\ + ${args} \\ + ${program} \\ + ${input} \\ + > ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "${input.getExtension}" + + """ + touch ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gawk/meta.yml b/modules/nf-core/gawk/meta.yml new file mode 100644 index 0000000..6f392ca --- /dev/null +++ b/modules/nf-core/gawk/meta.yml @@ -0,0 +1,52 @@ +name: "gawk" +description: | + If you are like many computer users, you would frequently like to make changes in various text files + wherever certain patterns appear, or extract data from parts of certain lines while discarding the rest. + The job is easy with awk, especially the GNU implementation gawk. +keywords: + - gawk + - awk + - txt + - text + - file parsing +tools: + - "gawk": + description: "GNU awk" + homepage: "https://www.gnu.org/software/gawk/" + documentation: "https://www.gnu.org/software/gawk/manual/" + tool_dev_url: "https://www.gnu.org/prep/ftp.html" + + licence: "['GPL v3']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: The input file - Specify the logic that needs to be executed on this file on the `ext.args2` or in the program file + pattern: "*" + - program_file: + type: file + description: Optional file containing logic for awk to execute. If you don't wish to use a file, you can use `ext.args2` to specify the logic. + pattern: "*" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - output: + type: file + description: The output file - specify the name of this file using `ext.prefix` and the extension using `ext.suffix` + pattern: "*" + +authors: + - "@nvnieuwk" diff --git a/modules/nf-core/genmap/index/main.nf b/modules/nf-core/genmap/index/main.nf new file mode 100644 index 0000000..fa44511 --- /dev/null +++ b/modules/nf-core/genmap/index/main.nf @@ -0,0 +1,48 @@ +process GENMAP_INDEX { + tag "$meta.id" + label 'process_high' + + conda "bioconda::genmap=1.3.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/genmap:1.3.0--h1b792b2_1' : + 'biocontainers/genmap:1.3.0--h1b792b2_1' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("${prefix}") , emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "$meta.id" + + """ + genmap \\ + index \\ + --fasta-file ${fasta} \\ + --index ${prefix} \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + genmap: \$(genmap --version 2>&1 | sed 's/GenMap version: //; s/SeqAn.*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "$meta.id" + + """ + touch ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + genmap: \$(genmap --version 2>&1 | sed 's/GenMap version: //; s/SeqAn.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/genmap/index/meta.yml b/modules/nf-core/genmap/index/meta.yml new file mode 100644 index 0000000..e7f2f36 --- /dev/null +++ b/modules/nf-core/genmap/index/meta.yml @@ -0,0 +1,43 @@ +name: genmap_index +description: create index file for genmap +keywords: + - index + - mappability + - fasta +tools: + - genmap: + description: Ultra-fast computation of genome mappability. + homepage: https://github.com/cpockrandt/genmap + documentation: https://github.com/cpockrandt/genmap + tool_dev_url: https://github.com/cpockrandt/genmap + doi: "10.1093/bioinformatics/btaa222" + licence: ["BSD-3-Clause"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: fasta file to index + pattern: "*.{fasta,fa,fna}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - index: + type: directory + description: Genmap index directory + +authors: + - "@jianhong" + - "@nvnieuwk" diff --git a/modules/nf-core/genmap/map/main.nf b/modules/nf-core/genmap/map/main.nf new file mode 100644 index 0000000..99692d2 --- /dev/null +++ b/modules/nf-core/genmap/map/main.nf @@ -0,0 +1,66 @@ +process GENMAP_MAP { + tag "$meta.id" + label 'process_high' + + conda "bioconda::genmap=1.3.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/genmap:1.3.0--h1b792b2_1' : + 'biocontainers/genmap:1.3.0--h1b792b2_1' }" + + input: + tuple val(meta), path(index) + tuple val(meta2), path(regions) + + output: + tuple val(meta), path("*.wig") , optional:true, emit: wig + tuple val(meta), path("*.bedgraph") , optional:true, emit: bedgraph + tuple val(meta), path("*.txt") , optional:true, emit: txt + tuple val(meta), path("*.csv") , optional:true, emit: csv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "$meta.id" + def bed = regions ? "--selection ${regions}" : "" + + if ("$index" == "${prefix}") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + """ + genmap \\ + map \\ + ${args} \\ + ${bed} \\ + --threads ${task.cpus} \\ + --index ${index} \\ + --output ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + genmap: \$(genmap --version 2>&1 | sed 's/GenMap version: //; s/SeqAn.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "$meta.id" + def token_args = args.tokenize(" ") + def wig = token_args.contains("-w") || token_args.contains("--wig") ? "touch ${prefix}.wig" : "" + def bg = token_args.contains("-bg") || token_args.contains("--bedgraph") ? "touch ${prefix}.bedgraph" : "" + def txt = token_args.contains("-t") || token_args.contains("--txt") ? "touch ${prefix}.txt" : "" + def csv = token_args.contains("-d") || token_args.contains("--csv") ? "touch ${prefix}.csv" : "" + + if ("$index" == "${prefix}") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + """ + ${wig} + ${bg} + ${txt} + ${csv} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + genmap: \$(genmap --version 2>&1 | sed 's/GenMap version: //; s/SeqAn.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/genmap/map/meta.yml b/modules/nf-core/genmap/map/meta.yml new file mode 100644 index 0000000..9db0222 --- /dev/null +++ b/modules/nf-core/genmap/map/meta.yml @@ -0,0 +1,67 @@ +name: genmap_map +description: create mappability files for a genome +keywords: + - mappability + - index + - fasta + - bedgraph + - csv + - wig +tools: + - genmap: + description: Ultra-fast computation of genome mappability. + homepage: https://github.com/cpockrandt/genmap + documentation: https://github.com/cpockrandt/genmap + tool_dev_url: https://github.com/cpockrandt/genmap + doi: "10.1093/bioinformatics/btaa222" + licence: ["BSD-3-Clause"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - index: + type: directory + description: index directory + - meta2: + type: map + description: | + Groovy Map containing regions information + e.g. [ id:'test', single_end:false ] + - regions: + type: file + description: optional - a bed file with regions to define the mappability off + pattern: "*.bed" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - wig: + type: file + description: genmap wig mappability file + pattern: "*.wig" + - bedgraph: + type: file + description: genmap bedgraph mappability file + pattern: "*.bedgraph" + - txt: + type: file + description: genmap text mappability file + pattern: "*.txt" + - csv: + type: file + description: genmap csv mappability file + pattern: "*.csv" + +authors: + - "@jianhong" + - "@nvnieuwk" diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 4b60474..1fc387b 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -4,7 +4,7 @@ process MULTIQC { conda "bioconda::multiqc=1.14" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" diff --git a/modules/nf-core/samtools/convert/main.nf b/modules/nf-core/samtools/convert/main.nf new file mode 100644 index 0000000..29722ba --- /dev/null +++ b/modules/nf-core/samtools/convert/main.nf @@ -0,0 +1,42 @@ +process SAMTOOLS_CONVERT { + tag "$meta.id" + label 'process_low' + + conda "bioconda::samtools=1.17" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" + + input: + tuple val(meta), path(input), path(index) + path fasta + path fai + + output: + tuple val(meta), path("*.{cram,bam}"), path("*.{crai,bai}") , emit: alignment_index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def output_extension = input.getExtension() == "bam" ? "cram" : "bam" + + """ + samtools view \\ + --threads ${task.cpus} \\ + --reference ${fasta} \\ + $args \\ + $input \\ + -o ${prefix}.${output_extension} + + samtools index -@${task.cpus} ${prefix}.${output_extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/convert/meta.yml b/modules/nf-core/samtools/convert/meta.yml new file mode 100644 index 0000000..866c228 --- /dev/null +++ b/modules/nf-core/samtools/convert/meta.yml @@ -0,0 +1,52 @@ +name: samtools_convert +description: convert and then index CRAM -> BAM or BAM -> CRAM file +keywords: + - view + - index + - bam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file + pattern: "*.{bam,cram}" + - index: + type: file + description: BAM/CRAM index file + pattern: "*.{bai,crai}" + - fasta: + type: file + description: Reference file to create the CRAM file + pattern: "*.{fasta,fa}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - alignment_index: + type: file + description: filtered/converted BAM/CRAM file + index + pattern: "*{.bam/cram,.bai/crai}" + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" +authors: + - "@FriederikeHanssen" + - "@maxulysse" diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf new file mode 100644 index 0000000..59ed308 --- /dev/null +++ b/modules/nf-core/samtools/faidx/main.nf @@ -0,0 +1,50 @@ +process SAMTOOLS_FAIDX { + tag "$fasta" + label 'process_single' + + conda "bioconda::samtools=1.17" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" + + input: + tuple val(meta), path(fasta) + tuple val(meta2), path(fai) + + output: + tuple val(meta), path ("*.{fa,fasta}") , emit: fa , optional: true + tuple val(meta), path ("*.fai") , emit: fai, optional: true + tuple val(meta), path ("*.gzi") , emit: gzi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + faidx \\ + $fasta \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def match = (task.ext.args =~ /-o(?:utput)?\s(.*)\s?/).findAll() + def fastacmd = match[0] ? "touch ${match[0][1]}" : '' + """ + ${fastacmd} + touch ${fasta}.fai + + cat <<-END_VERSIONS > versions.yml + + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml new file mode 100644 index 0000000..957b25e --- /dev/null +++ b/modules/nf-core/samtools/faidx/meta.yml @@ -0,0 +1,57 @@ +name: samtools_faidx +description: Index FASTA file +keywords: + - index + - fasta + - faidx +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: FASTA file + pattern: "*.{fa,fasta}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fai: + type: file + description: FASTA index file + pattern: "*.{fai}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fai: + type: file + description: FASTA index file + pattern: "*.{fai}" + - gzi: + type: file + description: Optional gzip index file for compressed inputs + pattern: "*.gzi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@phue" diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf new file mode 100644 index 0000000..75d1c29 --- /dev/null +++ b/modules/nf-core/samtools/index/main.nf @@ -0,0 +1,45 @@ +process SAMTOOLS_INDEX { + tag "$meta.id" + label 'process_low' + + conda "bioconda::samtools=1.17" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : + 'biocontainers/samtools:1.17--h00cdaf9_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.{bai,crai}") , emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + index \\ + -@ ${task.cpus-1} \\ + $args \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${input}.bai + touch ${input}.csi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml new file mode 100644 index 0000000..8bd2fa6 --- /dev/null +++ b/modules/nf-core/samtools/index/meta.yml @@ -0,0 +1,53 @@ +name: samtools_index +description: Index SAM/BAM/CRAM file +keywords: + - index + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - crai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - csi: + type: file + description: CSI index file + pattern: "*.{csi}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@maxulysse" diff --git a/modules/nf-core/samtools/index/samtools-index.diff b/modules/nf-core/samtools/index/samtools-index.diff new file mode 100644 index 0000000..bba3e83 --- /dev/null +++ b/modules/nf-core/samtools/index/samtools-index.diff @@ -0,0 +1,26 @@ +Changes in module 'nf-core/samtools/index' +--- modules/nf-core/samtools/index/main.nf ++++ modules/nf-core/samtools/index/main.nf +@@ -11,10 +11,8 @@ + tuple val(meta), path(input) + + output: +- tuple val(meta), path("*.bai") , optional:true, emit: bai +- tuple val(meta), path("*.csi") , optional:true, emit: csi +- tuple val(meta), path("*.crai"), optional:true, emit: crai +- path "versions.yml" , emit: versions ++ tuple val(meta), path("*.{bai,crai}") , emit: index ++ path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when +@@ -37,7 +35,6 @@ + stub: + """ + touch ${input}.bai +- touch ${input}.crai + touch ${input}.csi + + cat <<-END_VERSIONS > versions.yml + +************************************************************ diff --git a/modules/nf-core/tabix/bgzip/main.nf b/modules/nf-core/tabix/bgzip/main.nf new file mode 100644 index 0000000..8c47d9e --- /dev/null +++ b/modules/nf-core/tabix/bgzip/main.nf @@ -0,0 +1,54 @@ +process TABIX_BGZIP { + tag "$meta.id" + label 'process_single' + + conda "bioconda::tabix=1.11" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/tabix:1.11--hdfd78af_0' : + 'biocontainers/tabix:1.11--hdfd78af_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("${output}") , emit: output + tuple val(meta), path("${output}.gzi"), emit: gzi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + in_bgzip = ["gz", "bgz", "bgzf"].contains(input.getExtension()) + extension = in_bgzip ? input.getBaseName().tokenize(".")[-1] : input.getExtension() + output = in_bgzip ? "${prefix}.${extension}" : "${prefix}.${extension}.gz" + command = in_bgzip ? '-d' : '' + // Name the index according to $prefix, unless a name has been requested + if ((args.matches("(^| )-i\\b") || args.matches("(^| )--index(\$| )")) && !args.matches("(^| )-I\\b") && !args.matches("(^| )--index-name\\b")) { + args = args + " -I ${output}.gzi" + } + """ + bgzip $command -c $args -@${task.cpus} $input > ${output} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + in_bgzip = ["gz", "bgz", "bgzf"].contains(input.getExtension()) + output = in_bgzip ? input.getBaseName() : "${prefix}.${input.getExtension()}.gz" + + """ + touch ${output} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/tabix/bgzip/meta.yml b/modules/nf-core/tabix/bgzip/meta.yml new file mode 100644 index 0000000..c3ea210 --- /dev/null +++ b/modules/nf-core/tabix/bgzip/meta.yml @@ -0,0 +1,47 @@ +name: tabix_bgzip +description: Compresses/decompresses files +keywords: + - compress + - decompress + - bgzip + - tabix +tools: + - bgzip: + description: | + Bgzip compresses or decompresses files in a similar manner to, and compatible with, gzip. + homepage: https://www.htslib.org/doc/tabix.html + documentation: http://www.htslib.org/doc/bgzip.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: file to compress or to decompress +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - output: + type: file + description: Output compressed/decompressed file + pattern: "*." + - gzi: + type: file + description: Optional gzip index file for compressed inputs + pattern: "*.gzi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@maxulysse" + - "@nvnieuwk" diff --git a/modules/nf-core/trimgalore/main.nf b/modules/nf-core/trimgalore/main.nf new file mode 100644 index 0000000..dcb77ae --- /dev/null +++ b/modules/nf-core/trimgalore/main.nf @@ -0,0 +1,75 @@ +process TRIMGALORE { + tag "$meta.id" + label 'process_high' + + conda "bioconda::trim-galore=0.6.7" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/trim-galore:0.6.7--hdfd78af_0' : + 'biocontainers/trim-galore:0.6.7--hdfd78af_0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*{3prime,5prime,trimmed,val}*.fq.gz"), emit: reads + tuple val(meta), path("*report.txt") , emit: log , optional: true + tuple val(meta), path("*unpaired*.fq.gz") , emit: unpaired, optional: true + tuple val(meta), path("*.html") , emit: html , optional: true + tuple val(meta), path("*.zip") , emit: zip , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + // Calculate number of --cores for TrimGalore based on value of task.cpus + // See: https://github.com/FelixKrueger/TrimGalore/blob/master/Changelog.md#version-060-release-on-1-mar-2019 + // See: https://github.com/nf-core/atacseq/pull/65 + def cores = 1 + if (task.cpus) { + cores = (task.cpus as int) - 4 + if (meta.single_end) cores = (task.cpus as int) - 3 + if (cores < 1) cores = 1 + if (cores > 8) cores = 8 + } + + // Added soft-links to original fastqs for consistent naming in MultiQC + def prefix = task.ext.prefix ?: "${meta.id}" + if (meta.single_end) { + def args_list = args.split("\\s(?=--)").toList() + args_list.removeAll { it.toLowerCase().contains('_r2 ') } + """ + [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz + trim_galore \\ + ${args_list.join(' ')} \\ + --cores $cores \\ + --gzip \\ + ${prefix}.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + trimgalore: \$(echo \$(trim_galore --version 2>&1) | sed 's/^.*version //; s/Last.*\$//') + cutadapt: \$(cutadapt --version) + END_VERSIONS + """ + } else { + """ + [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz + [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz + trim_galore \\ + $args \\ + --cores $cores \\ + --paired \\ + --gzip \\ + ${prefix}_1.fastq.gz \\ + ${prefix}_2.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + trimgalore: \$(echo \$(trim_galore --version 2>&1) | sed 's/^.*version //; s/Last.*\$//') + cutadapt: \$(cutadapt --version) + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/trimgalore/meta.yml b/modules/nf-core/trimgalore/meta.yml new file mode 100644 index 0000000..f84c4d7 --- /dev/null +++ b/modules/nf-core/trimgalore/meta.yml @@ -0,0 +1,64 @@ +name: trimgalore +description: Trim FastQ files using Trim Galore! +keywords: + - trimming + - adapters + - sequencing adapters + - fastq +tools: + - trimgalore: + description: | + A wrapper tool around Cutadapt and FastQC to consistently apply quality + and adapter trimming to FastQ files, with some extra functionality for + MspI-digested RRBS-type (Reduced Representation Bisufite-Seq) libraries. + homepage: https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/ + documentation: https://github.com/FelixKrueger/TrimGalore/blob/master/Docs/Trim_Galore_User_Guide.md + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input adapter trimmed FastQ files of size 1 and 2 for + single-end and paired-end data, respectively. + pattern: "*{3prime,5prime,trimmed,val}*.fq.gz" + - unpaired: + type: file + description: | + FastQ files containing unpaired reads from read 1 or read 2 + pattern: "*unpaired*.fq.gz" + - html: + type: file + description: FastQC report (optional) + pattern: "*_{fastqc.html}" + - zip: + type: file + description: FastQC report archive (optional) + pattern: "*_{fastqc.zip}" + - log: + type: file + description: Trim Galore! trimming report + pattern: "*_{report.txt}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@FelixKrueger" diff --git a/modules/nf-core/ucsc/wigtobigwig/main.nf b/modules/nf-core/ucsc/wigtobigwig/main.nf new file mode 100644 index 0000000..493cf57 --- /dev/null +++ b/modules/nf-core/ucsc/wigtobigwig/main.nf @@ -0,0 +1,50 @@ +process UCSC_WIGTOBIGWIG { + tag "$meta.id" + label 'process_single' + + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + conda "bioconda::ucsc-wigtobigwig=377" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ucsc-wigtobigwig:377--h0b8a92a_2' : + 'biocontainers/ucsc-wigtobigwig:377--h0b8a92a_2' }" + + input: + tuple val(meta), path(wig) + path sizes + + output: + tuple val(meta), path("*.bw"), emit: bw + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '377' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + wigToBigWig \\ + $args \\ + $wig \\ + $sizes \\ + ${prefix}.bw + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ucsc: $VERSION + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '377' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + touch ${prefix}.bw + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ucsc: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/ucsc/wigtobigwig/meta.yml b/modules/nf-core/ucsc/wigtobigwig/meta.yml new file mode 100644 index 0000000..a597fde --- /dev/null +++ b/modules/nf-core/ucsc/wigtobigwig/meta.yml @@ -0,0 +1,40 @@ +name: ucsc_wigtobigwig +description: Convert ascii format wig file to binary big wig format +keywords: + - wig + - bigwig +tools: + - ucsc: + description: | + Convert ascii format wig file (in fixedStep, variableStep + or bedGraph format) to binary big wig format + homepage: http://www.genome.ucsc.edu/goldenPath/help/bigWig.html + licence: ["varies; see http://genome.ucsc.edu/license"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - wig: + type: file + description: wig file + pattern: "*.{wig}" + - chromsizes: + type: file + description: chromosome sizes file + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bw: + type: file + description: bigwig file + pattern: "*.{bw}" + +authors: + - "@jianhong" + - "@projectoriented" diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf new file mode 100644 index 0000000..8cd1856 --- /dev/null +++ b/modules/nf-core/untar/main.nf @@ -0,0 +1,63 @@ +process UNTAR { + tag "$archive" + label 'process_single' + + conda "conda-forge::sed=4.7 bioconda::grep=3.4 conda-forge::tar=1.34" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("$prefix"), emit: untar + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, "")) + + """ + mkdir $prefix + + ## Ensures --strip-components only applied when top level of tar contents is a directory + ## If just files or multiple directories, place all in prefix + if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + tar \\ + -C $prefix --strip-components 1 \\ + -xavf \\ + $args \\ + $archive \\ + $args2 + else + tar \\ + -C $prefix \\ + -xavf \\ + $args \\ + $archive \\ + $args2 + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, "")) + """ + mkdir $prefix + touch ${prefix}/file.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/untar/meta.yml b/modules/nf-core/untar/meta.yml new file mode 100644 index 0000000..db241a6 --- /dev/null +++ b/modules/nf-core/untar/meta.yml @@ -0,0 +1,41 @@ +name: untar +description: Extract files. +keywords: + - untar + - uncompress + - extract +tools: + - untar: + description: | + Extract tar.gz files. + documentation: https://www.gnu.org/software/tar/manual/ + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be untar + pattern: "*.{tar}.{gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - untar: + type: directory + description: Directory containing contents of archive + pattern: "*/" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" diff --git a/nextflow.config b/nextflow.config index 814851c..e252e55 100644 --- a/nextflow.config +++ b/nextflow.config @@ -9,14 +9,19 @@ // Global default params, used in configs params { - // TODO nf-core: Specify your pipeline's command line flags // Input options input = null + blacklist = null + bin_sizes = "1000,500,50,30,15,10,5,1" + annotation_genome = null + // References genome = null - igenomes_base = 's3://ngi-igenomes/igenomes' - igenomes_ignore = false - + igenomes_base = null //'s3://ngi-igenomes/igenomes' + igenomes_ignore = true + genomes_base = null //"/references/" + genomes_ignore = false + cmgg_config_base = "/conf/" // MultiQC options multiqc_config = null @@ -54,7 +59,7 @@ params { // Schema validation default options validationFailUnrecognisedParams = false validationLenientMode = false - validationSchemaIgnoreParams = 'genomes' + validationSchemaIgnoreParams = 'genomes,test_data' validationShowHiddenParams = false validate_params = true @@ -70,6 +75,9 @@ try { System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") } +// Include test data config +includeConfig 'https://raw.githubusercontent.com/CenterForMedicalGeneticsGhent/nf-cmgg-test-datasets/main/nf-cmgg-testdata.config' + // Load CenterForMedicalGeneticsGhent/nf-cmgg-qdnaseq custom profiles from different institutions. // Warning: Uncomment only if a pipeline-specific instititutional config already exists on nf-core/configs! // try { @@ -168,6 +176,7 @@ profiles { } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } + nf_test { includeConfig 'conf/nf_test.config' } } // Set default registry for Docker and Podman independent of -profile @@ -185,6 +194,8 @@ plugins { // Load igenomes.config if required if (!params.igenomes_ignore) { includeConfig 'conf/igenomes.config' +} else if (!params.genomes_ignore){ + includeConfig "https://raw.githubusercontent.com/CenterForMedicalGeneticsGhent/nf-cmgg-configs/main/conf/Hsapiens/genomes.config" } else { params.genomes = [:] } @@ -227,7 +238,7 @@ manifest { description = """A nextflow pipeline for creating bin annotations for qDNAseq""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '1.0dev' + version = '0.1.0' doi = '' } diff --git a/nextflow_schema.json b/nextflow_schema.json index f1c5e04..8c57e12 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -18,6 +18,7 @@ "exists": true, "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", + "schema": "assets/schema_input.json", "description": "Path to comma-separated file containing information about the samples in the experiment.", "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row.", "fa_icon": "fas fa-file-csv" @@ -39,6 +40,11 @@ "type": "string", "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", "fa_icon": "fas fa-file-signature" + }, + "bin_sizes": { + "type": "string", + "default": "1000,500,50,30,15,10,5,1", + "description": "A comma-separated list of bin sizes to create annotations from" } } }, @@ -47,28 +53,56 @@ "type": "object", "fa_icon": "fas fa-dna", "description": "Reference genome related files and options required for the workflow.", + "required": ["genome"], "properties": { "genome": { "type": "string", - "description": "Name of iGenomes reference.", - "fa_icon": "fas fa-book", - "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." + "description": "Name of the genome.", + "fa_icon": "fas fa-book" + }, + "annotation_genome": { + "type": "string", + "default": null, + "description": "The name of the genome used to create the annotations. This will default to the value supplied with --genome." }, "fasta": { "type": "string", "format": "file-path", "exists": true, "mimetype": "text/plain", - "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", + "pattern": "^\\S+\\.fn?a(sta)?$", "description": "Path to FASTA genome file.", - "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", + "help_text": "This parameter is *mandatory* if `--genome` is not specified.", + "fa_icon": "far fa-file-code" + }, + "fai": { + "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", + "pattern": "^\\S+\\.fai$", + "description": "Path to FASTA genome index file.", + "fa_icon": "far fa-file-code" + }, + "bwa": { + "type": "string", + "format": "path", + "mimetype": "text/plain", + "description": "The BWA index.", + "fa_icon": "far fa-file-code" + }, + "blacklist": { + "type": "string", + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^\\S+\\.bed(\\.gz)?$", + "description": "The blacklist BED file.", "fa_icon": "far fa-file-code" }, "igenomes_base": { "type": "string", "format": "directory-path", "description": "Directory / URL base for iGenomes references.", - "default": "s3://ngi-igenomes/igenomes", "fa_icon": "fas fa-cloud-download-alt", "hidden": true }, @@ -78,6 +112,20 @@ "fa_icon": "fas fa-ban", "hidden": true, "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." + }, + "genomes_base": { + "type": "string", + "default": "None", + "description": "Directory / URL base for CMGG references." + }, + "genomes_ignore": { + "type": "boolean", + "description": "Do not load the CMGG reference config." + }, + "cmgg_config_base": { + "type": "string", + "default": "/conf/", + "description": "The path to the CMGG configs" } } }, @@ -175,14 +223,12 @@ "type": "boolean", "description": "Display help text.", "fa_icon": "fas fa-question-circle", - "default": false, "hidden": true }, "version": { "type": "boolean", "description": "Display version and exit.", "fa_icon": "fas fa-question-circle", - "default": false, "hidden": true }, "publish_dir_mode": { @@ -206,7 +252,6 @@ "type": "boolean", "description": "Send plain-text email instead of HTML.", "fa_icon": "fas fa-remove-format", - "default": false, "hidden": true }, "max_multiqc_email_size": { @@ -221,7 +266,6 @@ "type": "boolean", "description": "Do not use coloured log outputs.", "fa_icon": "fas fa-palette", - "default": false, "hidden": true }, "hook_url": { @@ -260,7 +304,6 @@ "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", - "default": false, "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." }, @@ -268,7 +311,6 @@ "type": "boolean", "fa_icon": "far fa-check-circle", "description": "Validation of parameters fails when an unrecognised parameter is found.", - "default": false, "hidden": true, "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." }, @@ -276,7 +318,6 @@ "type": "boolean", "fa_icon": "far fa-check-circle", "description": "Validation of parameters in lenient more.", - "default": false, "hidden": true, "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." } diff --git a/nf-test.config b/nf-test.config new file mode 100644 index 0000000..75570e9 --- /dev/null +++ b/nf-test.config @@ -0,0 +1,8 @@ +config { + + testsDir "tests" + workDir ".nf-test" + configFile "tests/nextflow.config" + profile "nf_test,docker" + +} diff --git a/subworkflows/local/fasta_mappability_genmap/main.nf b/subworkflows/local/fasta_mappability_genmap/main.nf new file mode 100644 index 0000000..6969074 --- /dev/null +++ b/subworkflows/local/fasta_mappability_genmap/main.nf @@ -0,0 +1,48 @@ +// +// Get the mappability bigwig file +// + +include { GAWK } from '../../../modules/nf-core/gawk/main' +include { GENMAP_INDEX } from '../../../modules/nf-core/genmap/index/main' +include { GENMAP_MAP } from '../../../modules/nf-core/genmap/map/main' +include { UCSC_WIGTOBIGWIG } from '../../../modules/nf-core/ucsc/wigtobigwig/main' + +workflow FASTA_MAPPABILITY_GENMAP { + + take: + ch_fasta // channel: [ val(meta), path(fasta) ] + ch_fai // channel: [ val(meta), path(fai) ] + + main: + + ch_versions = Channel.empty() + + GAWK( + ch_fai, + [] + ) + ch_versions = ch_versions.mix(GAWK.out.versions) + + GENMAP_INDEX( + ch_fasta + ) + ch_versions = ch_versions.mix(GENMAP_INDEX.out.versions) + + GENMAP_MAP( + GENMAP_INDEX.out.index, + [[],[]] + ) + ch_versions = ch_versions.mix(GENMAP_MAP.out.versions) + + UCSC_WIGTOBIGWIG( + GENMAP_MAP.out.wig, + GAWK.out.output.map { it[1] } + ) + ch_versions = ch_versions.mix(UCSC_WIGTOBIGWIG.out.versions) + + emit: + bigwig = UCSC_WIGTOBIGWIG.out.bw.collect() + + versions = ch_versions + +} \ No newline at end of file diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf deleted file mode 100644 index 0aecf87..0000000 --- a/subworkflows/local/input_check.nf +++ /dev/null @@ -1,44 +0,0 @@ -// -// Check input samplesheet and get read channels -// - -include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' - -workflow INPUT_CHECK { - take: - samplesheet // file: /path/to/samplesheet.csv - - main: - SAMPLESHEET_CHECK ( samplesheet ) - .csv - .splitCsv ( header:true, sep:',' ) - .map { create_fastq_channel(it) } - .set { reads } - - emit: - reads // channel: [ val(meta), [ reads ] ] - versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] -} - -// Function to get list of [ meta, [ fastq_1, fastq_2 ] ] -def create_fastq_channel(LinkedHashMap row) { - // create meta map - def meta = [:] - meta.id = row.sample - meta.single_end = row.single_end.toBoolean() - - // add path(s) of the fastq file(s) to the meta map - def fastq_meta = [] - if (!file(row.fastq_1).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}" - } - if (meta.single_end) { - fastq_meta = [ meta, [ file(row.fastq_1) ] ] - } else { - if (!file(row.fastq_2).exists()) { - exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}" - } - fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ] - } - return fastq_meta -} diff --git a/subworkflows/local/prep_alignments/main.nf b/subworkflows/local/prep_alignments/main.nf new file mode 100644 index 0000000..01afd5e --- /dev/null +++ b/subworkflows/local/prep_alignments/main.nf @@ -0,0 +1,75 @@ +// +// Prepare the alignment files +// + +include { TRIMGALORE } from '../../../modules/nf-core/trimgalore/main' +include { BWA_ALN } from '../../../modules/nf-core/bwa/aln/main' +include { BWA_SAMSE } from '../../../modules/nf-core/bwa/samse/main' +include { BWA_SAMPE } from '../../../modules/nf-core/bwa/sampe/main' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' + +workflow PREP_ALIGNMENTS { + + take: + ch_fastq // channel: [ val(meta), path(fastq_1), path(fastq_2)] + ch_bwa_index // channel: [ val(meta2), path(index) ] + + main: + + ch_versions = Channel.empty() + + TRIMGALORE( + ch_fastq + ) + ch_versions = ch_versions.mix(TRIMGALORE.out.versions.first()) + + BWA_ALN( + TRIMGALORE.out.reads, + ch_bwa_index + ) + ch_versions = ch_versions.mix(BWA_ALN.out.versions.first()) + + ch_fastq + .join(BWA_ALN.out.sai, failOnDuplicate:true, failOnMismatch:true) + .branch { meta, reads, sai -> + single_end: meta.single_end + paired_end: !meta.single_end + } + .set { ch_sai } + + BWA_SAMSE( + ch_sai.single_end, + ch_bwa_index + ) + ch_versions = ch_versions.mix(BWA_SAMSE.out.versions.first()) + + BWA_SAMPE( + ch_sai.paired_end, + ch_bwa_index + ) + ch_versions = ch_versions.mix(BWA_SAMPE.out.versions.first()) + + BWA_SAMPE.out.bam + .mix(BWA_SAMSE.out.bam) + .set { ch_bams } + + SAMTOOLS_INDEX( + ch_bams + ) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + + ch_bams + .join(SAMTOOLS_INDEX.out.index) + .map { meta, bam, bai -> + [ [id:"bams"], bam, bai] + } + .groupTuple() + .collect() + .set { ch_bams_out } + + emit: + bams = ch_bams_out // [ val(meta), path(bam), path(bai) ] + + versions = ch_versions + +} \ No newline at end of file diff --git a/tests/inputs/samplesheet.csv b/tests/inputs/samplesheet.csv new file mode 100644 index 0000000..36d2407 --- /dev/null +++ b/tests/inputs/samplesheet.csv @@ -0,0 +1,3 @@ +sample,fastq_1,fastq_2 +test1,https://github.com/CenterForMedicalGeneticsGhent/nf-cmgg-test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/test_R1.fastq.gz,https://github.com/CenterForMedicalGeneticsGhent/nf-cmgg-test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/test_R2.fastq.gz +test2,https://github.com/CenterForMedicalGeneticsGhent/nf-cmgg-test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/test_R1.fastq.gz, diff --git a/tests/main.nf.test b/tests/main.nf.test new file mode 100644 index 0000000..2ece124 --- /dev/null +++ b/tests/main.nf.test @@ -0,0 +1,24 @@ +nextflow_pipeline { + + name "Test Workflow main.nf" + script "main.nf" + options "-stub" + + test("Should run without failures") { + + expect { + def date = new java.text.SimpleDateFormat("yyyy-MM-dd").format(new Date()) + assertAll( + { assert workflow.success }, + { assert snapshot( + path("${outputDir}/annotations-${date}/hg38.5kbp.rda"), + path("${outputDir}/annotations-${date}/hg38.10kbp.rda"), + file("${outputDir}/multiqc_reports/multiqc_report.html").exists() + ).match() + } + ) + } + + } + +} diff --git a/tests/main.nf.test.snap b/tests/main.nf.test.snap new file mode 100644 index 0000000..085b2d7 --- /dev/null +++ b/tests/main.nf.test.snap @@ -0,0 +1,10 @@ +{ + "Should run without failures": { + "content": [ + "hg38.5kbp.rda:md5,d41d8cd98f00b204e9800998ecf8427e", + "hg38.10kbp.rda:md5,d41d8cd98f00b204e9800998ecf8427e", + true + ], + "timestamp": "2023-07-04T09:02:29+0000" + } +} \ No newline at end of file diff --git a/tests/nextflow.config b/tests/nextflow.config new file mode 100644 index 0000000..c19b1ad --- /dev/null +++ b/tests/nextflow.config @@ -0,0 +1,5 @@ +/* +======================================================================================== + Nextflow config file for running tests +======================================================================================== +*/ diff --git a/workflows/nf-cmgg-qdnaseq.nf b/workflows/nf-cmgg-qdnaseq.nf deleted file mode 100644 index 9e0538d..0000000 --- a/workflows/nf-cmgg-qdnaseq.nf +++ /dev/null @@ -1,133 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - PRINT PARAMS SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation' - -def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) -def citation = '\n' + WorkflowMain.citation(workflow) + '\n' -def summary_params = paramsSummaryMap(workflow) - -// Print parameter summary log to screen -log.info logo + paramsSummaryLog(workflow) + citation - -WorkflowNf-cmgg-qdnaseq.initialise(params, log) - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - CONFIG FILES -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() -ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() -ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT LOCAL MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules -// -include { INPUT_CHECK } from '../subworkflows/local/input_check' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT NF-CORE MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// MODULE: Installed directly from nf-core/modules -// -include { FASTQC } from '../modules/nf-core/fastqc/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN MAIN WORKFLOW -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// Info required for completion email and summary -def multiqc_report = [] - -workflow NF-CMGG-QDNASEQ { - - ch_versions = Channel.empty() - - // - // SUBWORKFLOW: Read in samplesheet, validate and stage input files - // - INPUT_CHECK ( - file(params.input) - ) - ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) - // TODO: OPTIONAL, you can use nf-validation plugin to create an input channel from the samplesheet with Channel.fromSamplesheet("input") - // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/ - // ! There is currently no tooling to help you write a sample sheet schema - - // - // MODULE: Run FastQC - // - FASTQC ( - INPUT_CHECK.out.reads - ) - ch_versions = ch_versions.mix(FASTQC.out.versions.first()) - - CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique().collectFile(name: 'collated_versions.yml') - ) - - // - // MODULE: MultiQC - // - workflow_summary = WorkflowNf-cmgg-qdnaseq.paramsSummaryMultiqc(workflow, summary_params) - ch_workflow_summary = Channel.value(workflow_summary) - - methods_description = WorkflowNf-cmgg-qdnaseq.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) - ch_methods_description = Channel.value(methods_description) - - ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) - - MULTIQC ( - ch_multiqc_files.collect(), - ch_multiqc_config.toList(), - ch_multiqc_custom_config.toList(), - ch_multiqc_logo.toList() - ) - multiqc_report = MULTIQC.out.report.toList() -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - COMPLETION EMAIL AND SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow.onComplete { - if (params.email || params.email_on_fail) { - NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) - } - NfcoreTemplate.summary(workflow, params, log) - if (params.hook_url) { - NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) - } -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - THE END -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ diff --git a/workflows/qdnaseq.nf b/workflows/qdnaseq.nf new file mode 100644 index 0000000..27806b6 --- /dev/null +++ b/workflows/qdnaseq.nf @@ -0,0 +1,224 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + PRINT PARAMS SUMMARY +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { paramsSummaryLog; paramsSummaryMap; fromSamplesheet } from 'plugin/nf-validation' + +def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) +def citation = '\n' + WorkflowMain.citation(workflow) + '\n' +def summary_params = paramsSummaryMap(workflow) + +// Print parameter summary log to screen +log.info logo + paramsSummaryLog(workflow) + citation + +WorkflowQdnaseq.initialise(params, log) + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + CONFIG FILES +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) +ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() +ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() +ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT LOCAL MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { FASTA_MAPPABILITY_GENMAP } from '../subworkflows/local/fasta_mappability_genmap/main' +include { PREP_ALIGNMENTS } from '../subworkflows/local/prep_alignments/main' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT NF-CORE MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { SAMTOOLS_FAIDX } from '../modules/nf-core/samtools/faidx/main' +include { TABIX_BGZIP } from '../modules/nf-core/tabix/bgzip/main' +include { BWA_INDEX } from '../modules/nf-core/bwa/index/main' +include { UNTAR } from '../modules/nf-core/untar/main' +include { CREATE_ANNOTATIONS } from '../modules/local/create_annotations/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// Info required for completion email and summary +def multiqc_report = [] + + +workflow QDNASEQ { + + ch_versions = Channel.empty() + + ch_fasta = Channel.fromPath(params.fasta).map { [[id:'reference'], it] }.collect() + + // FASTA index + if(!params.fai) { + SAMTOOLS_FAIDX( + ch_fasta + ) + ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions) + + SAMTOOLS_FAIDX.out.fai.collect().set { ch_fai } + } else { + ch_fai = Channel.fromPath(params.fai).map { [[id:'reference'], it] }.collect() + } + + // Blacklist BED + if(!params.blacklist) { + encode_url = "https://github.com/Boyle-Lab/Blacklist/raw/master/lists" + blacklist = file("${encode_url}/${params.annotation_genome}-blacklist.v2.bed.gz") + if(!blacklist.exists()) { + exit 1, "Cannot find a blacklist file for ${params.annotation_genome}. Please supply one with the --blacklist option. (Also mind that the pipeline expects short notations of the --annotation_genome (e.g. hg38 instead of GRCh38))" + } + ch_blacklist_input = Channel.of([[id:"blacklist_${params.annotation_genome}"], blacklist]) + } else { + ch_blacklist_input = Channel.of([[id:"blacklist_${params.annotation_genome}"], file(params.blacklist, checkIfExists:true)]) + } + + ch_blacklist_input + .branch { meta, bed -> + extension = bed.getExtension() + no_gz: extension != "gz" + gz: extension == "gz" + } + .set { ch_gz_input } + + TABIX_BGZIP( + ch_gz_input.gz + ) + ch_versions = ch_versions.mix(TABIX_BGZIP.out.versions) + + ch_gz_input.no_gz + .mix(TABIX_BGZIP.out.output) + .collect() + .set { ch_blacklist } + + // BWA index + if(!params.bwa) { + BWA_INDEX( + ch_fasta + ) + ch_versions = ch_versions.mix(BWA_INDEX.out.versions) + + BWA_INDEX.out.index.set { ch_bwa_index } + } else { + ch_bwa_index_in = Channel.from([[id:"reference"], file(params.bwa, checkIfExists:true)]) + if(params.bwa.endswith("tar.gz")) { + UNTAR( + ch_bwa_index_in + ) + ch_versions = ch_versions.mix(UNTAR.out.versions) + + UNTAR.out.untar.set { ch_bwa_index } + } else { + ch_bwa_index_in.set { ch_bwa_index } + } + } + + // Samplesheet + Channel.fromSamplesheet("input", immutable_meta:false) + .map { meta, fastq_1, fastq_2 -> + new_meta = meta + [single_end:fastq_2 ? false : true] + output = fastq_2 ? [ new_meta, [fastq_1, fastq_2] ] : [ new_meta, fastq_1 ] + output + } + .set { ch_fastq } + + // + // Prepare the aligment files + // + + PREP_ALIGNMENTS( + ch_fastq, + ch_bwa_index, + ) + ch_versions = ch_versions.mix(PREP_ALIGNMENTS.out.versions) + + // + // Define the mappability of the reference FASTA + // + + FASTA_MAPPABILITY_GENMAP( + ch_fasta, + ch_fai + ) + ch_versions = ch_versions.mix(FASTA_MAPPABILITY_GENMAP.out.versions) + + // + // Create the qdnaseq annotations + // + + CREATE_ANNOTATIONS( + Channel.fromList(params.bin_sizes.tokenize(",")), + PREP_ALIGNMENTS.out.bams, + FASTA_MAPPABILITY_GENMAP.out.bigwig, + ch_blacklist + ) + ch_versions = ch_versions.mix(CREATE_ANNOTATIONS.out.versions.first()) + + // + // Dump software versions + // + + CUSTOM_DUMPSOFTWAREVERSIONS ( + ch_versions.unique().collectFile(name: 'collated_versions.yml') + ) + + // + // MODULE: MultiQC + // + workflow_summary = WorkflowQdnaseq.paramsSummaryMultiqc(workflow, summary_params) + ch_workflow_summary = Channel.value(workflow_summary) + + methods_description = WorkflowQdnaseq.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) + ch_methods_description = Channel.value(methods_description) + + ch_multiqc_files = Channel.empty() + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) + + MULTIQC ( + ch_multiqc_files.collect(), + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList() + ) + multiqc_report = MULTIQC.out.report.toList() +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + COMPLETION EMAIL AND SUMMARY +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow.onComplete { + if (params.email || params.email_on_fail) { + NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) + } + NfcoreTemplate.summary(workflow, params, log) + if (params.hook_url) { + NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) + } +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/