diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 3670946..349e712 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -4,7 +4,6 @@ on:
push:
branches:
- dev
- - template_update
pull_request:
release:
types: [published]
diff --git a/.nf-core.yml b/.nf-core.yml
index 9480b62..16838f3 100644
--- a/.nf-core.yml
+++ b/.nf-core.yml
@@ -1,18 +1,12 @@
repository_type: pipeline
nf_core_version: "2.14.1"
-pipeline_todos: false
-
lint:
template_strings: False # "Jinja string found in" bin/create_regex.py and bin/seurat_qc.R
files_unchanged:
- - CODE_OF_CONDUCT.md
- - .github/CONTRIBUTING.md
- .github/workflows/linting.yml
- lib/NfcoreTemplate.groovy
- docs/images/nf-core-scnanoseq_logo_dark.png
pipeline_todos:
- README.md
- main.nf
- multiqc_config:
- - report_comment
diff --git a/.prettierignore b/.prettierignore
index b32659e..437d763 100644
--- a/.prettierignore
+++ b/.prettierignore
@@ -10,4 +10,3 @@ testing/
testing*
*.pyc
bin/
-docs/output.md
diff --git a/CITATIONS.md b/CITATIONS.md
index 3e3c488..3e9f38a 100644
--- a/CITATIONS.md
+++ b/CITATIONS.md
@@ -10,9 +10,9 @@
## Pipeline tools
-- [BLAZE](https://www.biorxiv.org/content/10.1101/2022.08.16.504056v1)
+- [BLAZE](https://pubmed.ncbi.nlm.nih.gov/37024980/)
- > You Y, Prawer Y D, De Paoli-Iseppi R, Hunt C P, Parish C L, Shim H, Clark M B. Identification of cell barcodes from long-read single-cell RNA-seq with BLAZE. bioRxiv 2022 Aug .08.16.504056; doi: 10.1101/2022.08.16.504056.
+ > You Y, Prawer YDJ, De Paoli-Iseppi R, Hunt CPJ, Parish CL, Shim H, Clark MB. Identification of cell barcodes from long-read single-cell RNA-seq with BLAZE. Genome Biol. 2023 Apr 6;24(1):66. doi: 10.1186/s13059-023-02907-y. PMID: 37024980; PMCID: PMC10077662.
- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)
diff --git a/README.md b/README.md
index 0ec4c7e..aa91c96 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@
## Introduction
-**nf-core/scnanoseq** is a bioinformatics best-practice analysis pipeline for 10X Genomics single-cell/nuclei RNA-seq for data derived from Oxford Nanopore Q20+ chemistry ([R10.4 flow cells (>Q20)](https://nanoporetech.com/about-us/news/oxford-nanopore-announces-technology-updates-nanopore-community-meeting)). Due to the expectation of >Q20 quality, the input data for the pipeline is not dependent on Illumina paired data. Please note `scnanoseq` can also process Oxford data with older chemistry, but we encourage usage of the Q20+ chemistry.
+**nf-core/scnanoseq** is a bioinformatics best-practice analysis pipeline for 10X Genomics single-cell/nuclei RNA-seq for data derived from Oxford Nanopore Q20+ chemistry ([R10.4 flow cells (>Q20)](https://nanoporetech.com/about-us/news/oxford-nanopore-announces-technology-updates-nanopore-community-meeting)). Due to the expectation of >Q20 quality, the input data for the pipeline is not dependent on Illumina paired data. **Please note `scnanoseq` can also process Oxford data with older chemistry, but we encourage usage of the Q20+ chemistry when possible**.
The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!
@@ -30,8 +30,8 @@ On release, automated continuous integration tests run the pipeline on a full-si
![scnanoseq diagram](assets/scnanoseq_diagram.png)
1. Raw read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/), [`NanoPlot`](https://github.com/wdecoster/NanoPlot), [`NanoComp`](https://github.com/wdecoster/nanocomp) and [`ToulligQC`](https://github.com/GenomiqueENS/toulligQC))
-2. Unzip and split FastQ ([`gunzip`](https://linux.die.net/man/1/gunzip))
- 1. Optional: Split fastq for faster processing ([`split`](https://linux.die.net/man/1/split))
+2. Unzip and split FASTQ ([`pigz`](https://github.com/madler/pigz))
+ 1. Optional: Split FASTQ for faster processing ([`split`](https://linux.die.net/man/1/split))
3. Trim and filter reads. ([`Nanofilt`](https://github.com/wdecoster/nanofilt))
4. Post trim QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/), [`NanoPlot`](https://github.com/wdecoster/NanoPlot) and [`ToulligQC`](https://github.com/GenomiqueENS/toulligQC))
5. Barcode detection using a custom whitelist or 10X whitelist. [`BLAZE`](https://github.com/shimlab/BLAZE)
@@ -43,7 +43,7 @@ On release, automated continuous integration tests run the pipeline on a full-si
9. Alignment ([`minimap2`](https://github.com/lh3/minimap2))
10. Post-alignment filtering of mapped reads and gathering mapping QC ([`SAMtools`](http://www.htslib.org/doc/samtools.html))
11. Post-alignment QC in unfiltered BAM files ([`NanoComp`](https://github.com/wdecoster/nanocomp), [`RSeQC`](https://rseqc.sourceforge.net/))
-12. Barcode tagging with read quality, BC, BC quality, UMI, and UMI quality (custom script `./bin/tag_barcodes.py`)
+12. Barcode (BC) tagging with read quality, BC quality, UMI quality (custom script `./bin/tag_barcodes.py`)
13. UMI-based deduplication [`UMI-tools`](https://github.com/CGATOxford/UMI-tools)
14. Gene and transcript level matrices generation. [`IsoQuant`](https://github.com/ablab/IsoQuant)
15. Preliminary matrix QC ([`Seurat`](https://github.com/satijalab/seurat))
@@ -56,9 +56,7 @@ On release, automated continuous integration tests run the pipeline on a full-si
First, prepare a samplesheet with your input data that looks as follows:
-`samplesheet.csv`:
-
-```csv
+```csv title="samplesheet.csv"
sample,fastq,cell_count
CONTROL_REP1,AEG588A1_S1.fastq.gz,5000
CONTROL_REP1,AEG588A1_S2.fastq.gz,5000
@@ -86,24 +84,29 @@ For more details and further functionality, please refer to the [usage documenta
## Pipeline output
+This pipeline produces feature barcode matrices at both the gene and transcript level and can be configured to retain introns within the counts themselves. These feature-barcode matrices are able to be ingested directly by most packages used for downstream analyses such as `Seurat`. Additionally, the pipeline produces a number of quality control metrics to ensure that the samples processed meet expected metrics for single-cell/nuclei data.
+
To see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/scnanoseq/results) tab on the nf-core website pipeline page.
-For more details about the output files and reports, please refer to the
+For more details about the full set of output files and reports, please refer to the
[output documentation](https://nf-co.re/scnanoseq/output).
-This pipeline produces feature barcode matrices at both the gene and transcript level and can retain introns within the counts themselves. These files are able to be ingested directly by most packages used for downstream analyses such as `Seurat`. In addition the pipeline produces a number of quality control metrics to ensure that the samples processed meet expected metrics for single-cell/nuclei data.
-
## Troubleshooting
-If you experience any issues, please make sure to submit an issue above. However, some resolutions for common issues will be noted below:
+If you experience any issues, please make sure to reach out on the [#scnanoseq slack channel](https://nfcore.slack.com/archives/C03TUE2K6NS) or [open an issue on our GitHub repository](https://github.com/nf-core/scnanoseq/issues/new/choose). However, some resolutions for common issues will be noted below:
-- Due to the nature of the data this pipeline analyzes, some tools can experience increased runtimes. For some of the custom tools made for this pipeline (`preextract_fastq.py` and `correct_barcodes.py`), we have leveraged the splitting that is done via the `split_amount` param to decrease their overall runtimes. The `split_amount` parameter will split the input fastqs into a number of fastq files that each have a number of lines based on the value used for this parameter. As a result, it is important not to set this parameter to be too low as it would cause the creation of a large number of files the pipeline will be processed. While this value can be highly dependent on the data, a good starting point for an analysis would be to set this value to `500000`. If you find that `PREEXTRACT_FASTQ` and `CORRECT_BARCODES` are still taking long amounts of time to run, it would be worth reducing this parameter to `200000` or `100000`, but keeping the value on the order of hundred of thousands or tens of thousands should help with with keeping the total number of processes minimal.
-- One issue that has been observed is a recurrent node failure on slurm clusters that does seem to be related to submission of nextflow jobs. This issue is not related to this pipeline itself, but rather to nextflow itself. Our reserach computing are currently working on a resolution. But we have two methods that appear to help overcome should this issue arise:
- 1. The first is to create a custom config that increases the memory request for the job that failed. This may take a couple attempts to find the correct requests, but we have noted that there does appear to be a memory issue occassionally with this errors.
- 2. The second resolution is to request an interactive session with a decent amount of time and memory and cpus in order to run the pipeline on the single node. Note that this will take time as there will be minimal parallelization, but this does seem to resolve the issue.
-- We acknowledge that analyzing PromethION is a common use case for this pipeline. Currently, the pipeline has been developed with defaults to analyze GridION and average sized PromethION data. For cases, where jobs have failed due for larger PromethION datasets, the defaults have been overwritten by a custom configuation file (provided by the `-c` Nextflow option) where resources were increased (substantially in some cases). Below are some of the overrides we have used, while these amounts may not work on every dataset, these will hopefully at least note which processes will need to have their resources increased:
+- Due to the nature of the data this pipeline analyzes, some tools can experience increased runtimes. For some of the custom tools made for this pipeline (`preextract_fastq.py` and `correct_barcodes.py`), we have leveraged the splitting that is done via the `split_amount` parameter to decrease their overall runtimes. The `split_amount` parameter will split the input FASTQs into a number of FASTQ files that each have a number of lines based on the value used for this parameter. As a result, it is important not to set this parameter to be too low as it would cause the creation of a large number of files the pipeline will be processed. While this value can be highly dependent on the data, a good starting point for an analysis would be to set this value to `500000`. If you find that `PREEXTRACT_FASTQ` and `CORRECT_BARCODES` are still taking long amounts of time to run, it would be worth reducing this parameter to `200000` or `100000`, but keeping the value on the order of hundred of thousands or tens of thousands should help with with keeping the total number of processes minimal. An example of setting this parameter to be equal to 500000 is shown below:
+```yml title="params.yml"
+split_amount: 500000
```
+- We have seen a recurrent node failure on slurm clusters that does seem to be related to submission of Nextflow jobs. This issue is not related to this pipeline per se, but rather to Nextflow itself. We are currently working on a resolution. But we have two methods that appear to help overcome should this issue arise:
+ 1. Provide a custom config that increases the memory request for the job that failed. This may take a couple attempts to find the correct requests, but we have noted that there does appear to be a memory issue occasionally with these errors.
+ 2. Request an interactive session with a decent amount of time and memory and CPUs in order to run the pipeline on the single node. Note that this will take time as there will be minimal parallelization, but this does seem to resolve the issue.
+- We acknowledge that analyzing PromethION is a common use case for this pipeline. Currently, the pipeline has been developed with defaults to analyze GridION and average sized PromethION data. For cases, where jobs have fail due for larger PromethION datasets, the defaults can be overwritten by a custom configuation file (provided by the `-c` Nextflow option) where resources are increased (substantially in some cases). Below are some of the overrides we have used, and while these amounts may not work on every dataset, these will hopefully at least note which processes will need to have their resources increased:
+
+```groovy title="custom.config"
+
process
{
withName: '.*:.*FASTQC.*'
@@ -126,6 +129,14 @@ process
}
}
+process
+{
+ withName: '.*:TAG_BARCODES'
+ {
+ memory = '60.GB'
+ }
+}
+
process
{
withName: '.*:SAMTOOLS_SORT'
@@ -146,8 +157,8 @@ process
{
withName: '.*:ISOQUANT'
{
- cpus = 40
- time = '135.h'
+ cpus = 30
+ memory = '85.GB'
}
}
```
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
index 690a26e..7109425 100644
--- a/assets/multiqc_config.yml
+++ b/assets/multiqc_config.yml
@@ -1,7 +1,5 @@
report_comment: >
- This report has been generated by the nf-core/scnanoseq
- analysis pipeline. For information about how to interpret these results, please see the
- documentation.
+ This report has been generated by the nf-core/scnanoseq analysis pipeline. For information about how to interpret these results, please see the documentation.
report_section_order:
"nf-core-scnanoseq-methods-description":
diff --git a/assets/schema_input.json b/assets/schema_input.json
index 3ac4ad6..7a755bc 100644
--- a/assets/schema_input.json
+++ b/assets/schema_input.json
@@ -21,7 +21,7 @@
"errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'"
},
"cell_count": {
- "type": "string"
+ "type": "integer"
}
},
"required": ["sample", "fastq", "cell_count"]
diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py
deleted file mode 100755
index 7cb50c0..0000000
--- a/bin/check_samplesheet.py
+++ /dev/null
@@ -1,250 +0,0 @@
-#!/usr/bin/env python
-
-
-"""Provide a command line tool to validate and transform tabular samplesheets."""
-
-
-import argparse
-import csv
-import logging
-import sys
-from collections import Counter
-from pathlib import Path
-
-logger = logging.getLogger()
-
-
-class RowChecker:
- """
- Define a service that can validate and transform each given row.
-
- Attributes:
- modified (list): A list of dicts, where each dict corresponds to a previously
- validated and transformed row. The order of rows is maintained.
-
- """
-
- VALID_FORMATS = (
- ".fq.gz",
- ".fastq.gz",
- )
-
- def __init__(
- self,
- sample_col="sample",
- first_col="fastq",
- second_col="cell_count",
- single_col="single_end",
- **kwargs,
- ):
- """
- Initialize the row checker with the expected column names.
-
- Args:
- sample_col (str): The name of the column that contains the sample name
- (default "sample").
- first_col (str): The name of the column that contains the first (or only)
- FASTQ file path (default "fastq").
- second_col (str): The name of the column that contains the expected cell
- count for the sample (default "cell_count").
- single_col (str): The name of the new column that will be inserted and
- records whether the sample contains single- or paired-end sequencing
- reads (default "single_end").
-
- """
- super().__init__(**kwargs)
- self._sample_col = sample_col
- self._first_col = first_col
- self._second_col = second_col
- self._single_col = single_col
- self._seen = set()
- self.modified = []
-
- def validate_and_transform(self, row):
- """
- Perform all validations on the given row and insert the read pairing status.
-
- Args:
- row (dict): A mapping from column headers (keys) to elements of that row
- (values).
-
- """
- self._validate_sample(row)
- self._validate_first(row)
- self._validate_second(row)
- self._set_single_end(row)
- self._seen.add((row[self._sample_col], row[self._first_col]))
- self.modified.append(row)
-
- def _validate_sample(self, row):
- """Assert that the sample name exists and convert spaces to underscores."""
- if len(row[self._sample_col]) <= 0:
- raise AssertionError("Sample input is required.")
- # Sanitize samples slightly.
- row[self._sample_col] = row[self._sample_col].replace(" ", "_")
-
- def _validate_first(self, row):
- """Assert that the first FASTQ entry is non-empty and has the right format."""
- if len(row[self._first_col]) <= 0:
- raise AssertionError("The FASTQ file is required.")
- self._validate_fastq_format(row[self._first_col])
-
- def _validate_second(self, row):
- """Assert that the cell count entry exists."""
- if len(row[self._second_col]) <= 0:
- raise AssertionError("The cell count is required.")
-
- def _set_single_end(self, row):
- """Assert that read pairs have the same file extension. Report pair status."""
- row[self._single_col] = True
-
- def _validate_fastq_format(self, filename):
- """Assert that a given filename has one of the expected FASTQ extensions."""
- if not any(filename.endswith(extension) for extension in self.VALID_FORMATS):
- raise AssertionError(
- f"The FASTQ file has an unrecognized extension: {filename}\n"
- f"It should be one of: {', '.join(self.VALID_FORMATS)}"
- )
-
- def validate_unique_samples(self):
- """
- Assert that the combination of sample name and FASTQ filename is unique.
-
- In addition to the validation, also rename all samples to have a suffix of _T{n}, where n is the
- number of times the same sample exist, but with different FASTQ files, e.g., multiple runs per experiment.
-
- """
- if len(self._seen) != len(self.modified):
- raise AssertionError("The pair of sample name and FASTQ must be unique.")
- seen = Counter()
- for row in self.modified:
- sample = row[self._sample_col]
- seen[sample] += 1
- row[self._sample_col] = f"{sample}_T{seen[sample]}"
-
-
-def read_head(handle, num_lines=10):
- """Read the specified number of lines from the current position in the file."""
- lines = []
- for idx, line in enumerate(handle):
- if idx == num_lines:
- break
- lines.append(line)
- return "".join(lines)
-
-
-def sniff_format(handle):
- """
- Detect the tabular format.
-
- Args:
- handle (text file): A handle to a `text file`_ object. The read position is
- expected to be at the beginning (index 0).
-
- Returns:
- csv.Dialect: The detected tabular format.
-
- .. _text file:
- https://docs.python.org/3/glossary.html#term-text-file
-
- """
- peek = read_head(handle)
- handle.seek(0)
- sniffer = csv.Sniffer()
- dialect = sniffer.sniff(peek)
- return dialect
-
-
-def check_samplesheet(file_in, file_out):
- """
- Check that the tabular samplesheet has the structure expected by nf-core pipelines.
-
- Validate the general shape of the table, expected columns, and each row. Also add
- an additional column which records whether one or two FASTQ reads were found.
-
- Args:
- file_in (pathlib.Path): The given tabular samplesheet. The format can be either
- CSV, TSV, or any other format automatically recognized by ``csv.Sniffer``.
- file_out (pathlib.Path): Where the validated and transformed samplesheet should
- be created; always in CSV format.
-
- Example:
- This function checks that the samplesheet follows the following structure,
- see also the `viral recon samplesheet`_::
-
- sample,fastq,cell_count
- SAMPLE_SE,SAMPLE_SE_RUN1_1.fastq.gz,1000
-
- .. _viral recon samplesheet:
- https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv
-
- """
- required_columns = {"sample", "fastq", "cell_count"}
- # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`.
- with file_in.open(newline="") as in_handle:
- reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle))
- # Validate the existence of the expected header columns.
- if not required_columns.issubset(reader.fieldnames):
- req_cols = ", ".join(required_columns)
- logger.critical(f"The sample sheet **must** contain these column headers: {req_cols}.")
- sys.exit(1)
- # Validate each row.
- checker = RowChecker()
- for i, row in enumerate(reader):
- try:
- checker.validate_and_transform(row)
- except AssertionError as error:
- logger.critical(f"{str(error)} On line {i + 2}.")
- sys.exit(1)
- checker.validate_unique_samples()
- header = list(reader.fieldnames)
- header.insert(1, "single_end")
- # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`.
- with file_out.open(mode="w", newline="") as out_handle:
- writer = csv.DictWriter(out_handle, header, delimiter=",")
- writer.writeheader()
- for row in checker.modified:
- writer.writerow(row)
-
-
-def parse_args(argv=None):
- """Define and immediately parse command line arguments."""
- parser = argparse.ArgumentParser(
- description="Validate and transform a tabular samplesheet.",
- epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv",
- )
- parser.add_argument(
- "file_in",
- metavar="FILE_IN",
- type=Path,
- help="Tabular input samplesheet in CSV or TSV format.",
- )
- parser.add_argument(
- "file_out",
- metavar="FILE_OUT",
- type=Path,
- help="Transformed output samplesheet in CSV format.",
- )
- parser.add_argument(
- "-l",
- "--log-level",
- help="The desired log level (default WARNING).",
- choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"),
- default="WARNING",
- )
- return parser.parse_args(argv)
-
-
-def main(argv=None):
- """Coordinate argument parsing and program execution."""
- args = parse_args(argv)
- logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s")
- if not args.file_in.is_file():
- logger.error(f"The given input file {args.file_in} was not found!")
- sys.exit(2)
- args.file_out.parent.mkdir(parents=True, exist_ok=True)
- check_samplesheet(args.file_in, args.file_out)
-
-
-if __name__ == "__main__":
- sys.exit(main())
diff --git a/conf/base.config b/conf/base.config
index ba3d12f..546de6b 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -48,7 +48,7 @@ process {
time = { check_max( 60.h * task.attempt, 'time' ) }
}
withLabel:process_high_memory {
- memory = { check_max( 250.GB * task.attempt, 'memory' ) }
+ memory = { check_max( 200.GB * task.attempt, 'memory' ) }
}
withLabel:error_ignore {
errorStrategy = 'ignore'
diff --git a/conf/modules.config b/conf/modules.config
index e5bbe08..cc4fc8c 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -62,7 +62,6 @@ if (!params.skip_qc && !params.skip_fastqc) {
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
-
}
}
@@ -94,7 +93,6 @@ if (!params.skip_qc && !params.skip_nanoplot) {
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
-
}
}
@@ -154,7 +152,6 @@ if (!params.skip_qc && !params.skip_toulligqc) {
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
-
}
}
@@ -193,28 +190,21 @@ if (!params.skip_qc){
}
}
- process {
- withName:'.*:BAM_SORT_STATS_SAMTOOLS_DEDUP:BAM_STATS_SAMTOOLS:.*' {
- ext.prefix = { "${meta.id}.dedup.sorted" }
- publishDir = [
- path: { "${params.outdir}/${meta.id}/bam/dedup" },
- mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
- ]
- }
- }
-
- process {
- withName:'.*:BAM_SORT_STATS_SAMTOOLS_SPLIT:BAM_STATS_SAMTOOLS:.*' {
- ext.prefix = { "${meta.id}.sorted" }
- publishDir = [
- enabled: false
- ]
+ if (!params.skip_dedup){
+ process {
+ withName:'.*:BAM_SORT_STATS_SAMTOOLS_DEDUP:BAM_STATS_SAMTOOLS:.*' {
+ ext.prefix = { "${meta.id}.dedup.sorted" }
+ publishDir = [
+ path: { "${params.outdir}/${meta.id}/bam/dedup" },
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+ ]
+ }
}
}
}
-if (!params.skip_qc) {
+if (!params.skip_qc && !params.skip_rseqc) {
process {
withName:'.*:RSEQC_READDISTRIBUTION' {
@@ -224,13 +214,11 @@ if (!params.skip_qc) {
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
-
}
-
}
// READ COUNTS
-if (!params.skip_qc) {
+if (!params.skip_qc && !params.skip_fastqc) {
process {
withName:'.*:READ_COUNTS' {
@@ -240,9 +228,7 @@ if (!params.skip_qc) {
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
-
}
-
}
/////////////////////
@@ -258,17 +244,6 @@ process {
}
}
-// PAFTOOLS
-process {
- withName: '.*:PAFTOOLS' {
- publishDir = [
- path: { "${params.outdir}/references/paftools" },
- mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
- ]
- }
-}
-
// MINIMAP2_INDEX
if (!params.skip_save_minimap2_index) {
process {
@@ -320,9 +295,9 @@ process {
}
}
-// GUNZIP
+// PIGZ_UNCOMPRESS
process {
- withName: '.*:GUNZIP.*' {
+ withName: '.*:PIGZ_UNCOMPRESS.*' {
publishDir = [
enabled: false
]
@@ -348,7 +323,7 @@ if (params.split_amount > 0) {
process {
withName: '.*:CAT_CAT_PREEXTRACT' {
- ext.prefix = { "${meta.id}.putative_bc_umi.tsv" }
+ ext.prefix = { "${meta.id}_filtered.fastq" }
publishDir = [
enabled: false
]
@@ -365,7 +340,7 @@ if (params.split_amount > 0) {
}
process {
- withName: '.*:ZIP_TRIM' {
+ withName: '.*:PIGZ_COMPRESS' {
publishDir = [
path: { "${params.outdir}/${meta.id}/fastq/extracted" },
mode: params.publish_dir_mode,
@@ -373,7 +348,6 @@ if (params.split_amount > 0) {
]
}
}
-
}
// FASTQ TRIMMING
@@ -396,20 +370,23 @@ if (!params.skip_trimming) {
]
}
}
-
}
// NANOFILT
if ( !params.skip_trimming ){
process {
withName:'.*:NANOFILT' {
+ ext.args = {
+ [
+ params.min_length ? "--length ${params.min_length}" : "",
+ params.min_q_score ? "--quality ${params.min_q_score}" : ""
+ ].join(' ').trim()
+ }
publishDir = [
enabled: false
]
}
}
-
-
}
}
@@ -468,16 +445,6 @@ process {
// BAM PROCESSING //
////////////////////
-// SAMTOOLS_VIEW_BAM
-process {
- withName:'.*:SAMTOOLS_VIEW_BAM' {
- ext.args = "-h --output-fmt bam"
- publishDir = [
- enabled: false
- ]
- }
-}
-
// SAMTOOLS_VIEW_FILTER
process {
withName:'.*:SAMTOOLS_VIEW_FILTER' {
@@ -531,23 +498,25 @@ process {
}
}
-process {
- withName:'.*:BAM_SORT_STATS_SAMTOOLS_DEDUP:SAMTOOLS_SORT' {
- ext.prefix = { "${meta.id}.dedup.sorted" }
- publishDir = [
- path: { "${params.outdir}/${meta.id}/bam/dedup" },
- mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
- ]
+if (!params.skip_dedup){
+ process {
+ withName:'.*:BAM_SORT_STATS_SAMTOOLS_DEDUP:SAMTOOLS_SORT' {
+ ext.prefix = { "${meta.id}.dedup.sorted" }
+ publishDir = [
+ path: { "${params.outdir}/${meta.id}/bam/dedup" },
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+ ]
+ }
}
-}
-process {
- withName:'.*:BAM_SORT_STATS_SAMTOOLS_SPLIT:.*' {
- ext.prefix = { "${meta.id}.sorted" }
- publishDir = [
- enabled: false
- ]
+ process {
+ withName:'.*:BAM_SORT_STATS_SAMTOOLS_SPLIT:.*' {
+ ext.prefix = { "${meta.id}.sorted" }
+ publishDir = [
+ enabled: false
+ ]
+ }
}
}
@@ -594,11 +563,13 @@ process {
}
}
-process {
- withName:'.*:SAMTOOLS_MERGE'{
- publishDir = [
- enabled: false
- ]
+if (!params.skip_dedup){
+ process {
+ withName:'.*:SAMTOOLS_MERGE'{
+ publishDir = [
+ enabled: false
+ ]
+ }
}
}
@@ -631,30 +602,32 @@ process {
// UMI DEDUPLICATION //
///////////////////////
-process {
- withName: '.*:BAMTOOLS_SPLIT' {
- ext.args = {
- [
- "-reference"
- ].join(' ').trim()
+if (!params.skip_dedup){
+ process {
+ withName: '.*:BAMTOOLS_SPLIT' {
+ ext.args = {
+ [
+ "-reference"
+ ].join(' ').trim()
+ }
+ publishDir = [
+ enabled: false
+ ]
}
- publishDir = [
- enabled: false
- ]
}
-}
-process {
- withName: '.*:UMITOOLS_DEDUP' {
- ext.args = {
- [
- '--per-cell'
- ].join(' ').trim()
+ process {
+ withName: '.*:UMITOOLS_DEDUP' {
+ ext.args = {
+ [
+ '--per-cell'
+ ].join(' ').trim()
+ }
+ ext.prefix = { "${meta.id}.dedup" }
+ publishDir = [
+ enabled: false
+ ]
}
- ext.prefix = { "${meta.id}.dedup" }
- publishDir = [
- enabled: false
- ]
}
}
@@ -662,14 +635,13 @@ process {
// ISOQUANT //
//////////////
-// NOTE: with_inconsistent will include introns within the counts, where as unique_only will only include exons
process {
withName: '.*:ISOQUANT' {
ext.args = {
[
"--complete_genedb",
params.stranded == "forward" ? "--stranded forward" : params.stranded == "reverse" ? "--stranded reverse" : "--stranded none",
- params.retain_introns ? "--gene_quantification all " : "--gene_quantification with_inconsistent ",
+ params.retain_introns ? "--gene_quantification all " : "--gene_quantification unique_inconsistent ",
params.retain_introns ? "--transcript_quantification all " : "--transcript_quantification unique_only ",
"--splice_correction_strategy default_ont ",
"--model_construction_strategy sensitive_ont "
@@ -708,8 +680,6 @@ if (!params.skip_qc && !params.skip_seurat) {
}
}
- //***TODO***: don't forget filtered outputs [here or in main workflow, need to check]
- // so outputs will need to be changed
process {
withName: '.*:COMBINE_SEURAT_STATS_GENE' {
ext.args = "-o gene.corrected.tsv -f gene"
@@ -733,9 +703,10 @@ if (!params.skip_qc && !params.skip_seurat) {
// MULTIQC //
/////////////
-if (!params.skip_qc) {
+if (!params.skip_qc && !params.skip_multiqc) {
process {
withName: '.*:MULTIQC_FINALQC' {
+ ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' }
publishDir = [
path: { "${params.outdir}/batch_qcs/multiqc/final_qc" },
mode: params.publish_dir_mode,
@@ -746,6 +717,7 @@ if (!params.skip_qc) {
process {
withName: '.*:MULTIQC_RAWQC' {
+ ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' }
publishDir = [
path: { "${params.outdir}/batch_qcs/multiqc/raw_qc" },
mode: params.publish_dir_mode,
diff --git a/conf/test_full.config b/conf/test_full.config
index b714c41..0ed950c 100644
--- a/conf/test_full.config
+++ b/conf/test_full.config
@@ -1,6 +1,6 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Nextflow config file for running minimal tests
+ Nextflow config file for running full-size tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a fast and simple pipeline test.
diff --git a/docs/images/read_counts.png b/docs/images/read_counts.png
new file mode 100644
index 0000000..a2fd4b0
Binary files /dev/null and b/docs/images/read_counts.png differ
diff --git a/docs/images/seurat.png b/docs/images/seurat.png
index e547b9e..34ce451 100644
Binary files a/docs/images/seurat.png and b/docs/images/seurat.png differ
diff --git a/docs/output.md b/docs/output.md
index 7bf21c6..24bc378 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -32,11 +32,14 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
- [Nanoplot](#nanoplot) - Long Read FASTQ QC
- [ToulligQC](#toulligqc) - Long Read FASTQ QC
- [RSeQC](#rseqc) - Various RNA-seq QC metrics
+ - [Read Counts](#read-counts) - Read Counts QC
- [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline
- [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution
## Preprocessing
+
### Nanofilt
+
Output files
@@ -47,10 +50,12 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
-[Nanofilt](https://github.com/wdecoster/nanocomp) is a tool used for filtering and trimming of long read sequencing data.
+[Nanofilt](https://github.com/wdecoster/nanofilt) is a tool used for filtering and trimming of long read sequencing data.
## Barcode Calling
+
### BLAZE
+
Output files
@@ -59,31 +64,33 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
- `blaze/*.bc_count.txt` : This is a file containing each barcode and the counts of how many reads support it.
- `blaze/*.knee_plot.png` : The knee plot detailing the ranking of each barcode.
- `blaze/*.putative_bc.csv` : This file contains the naively detected barcode for each read.
- - `blaze/*.whitelist.csv` : This is the detected "true" barcodes for the dataset.
+ - `blaze/*.whitelist.csv` : This is a list of the "true" barcodes detected for a sample. The length of the file should roughly match the expected amount of cells that is expected for the sample.
![BLAZE - knee plot](images/blaze.png)
-[BLAZE](https://github.com/shimlab/BLAZE) enables the accurate identification of barcodes and UMIs from Nanopore reads.
+[BLAZE](https://github.com/shimlab/BLAZE) enables the accurate identification of barcodes and UMIs from Nanopore reads. The files produced by BLAZE can be used to assess the quality of the barcode calling and the data.
+
+The knee plot (an example is listed above) that is provided by BLAZE shows all barcodes detected in a sample, ranked from highest to lowest read count. The "cliff-and-knee" shape (similar to the image above) is indicative of good quality. Deviations from this shape can be indicative of concerns with the data, such as low barcode counts. The `*.bc_count.txt` file can be used to accompany this figure to show every barcode and its abundance in the dataset.
## Alignment
+
### Minimap2
Output files
- `/`
- - `bam/`
- - `original/`
- - `*.sorted.bam` : The mapped and sorted bam.
- - `*.sorted.bam.bai` : The bam index for the mapped and sorted bam.
-
+ - `bam/` - `original/` - `*.sorted.bam` : The mapped and sorted bam. - `*.sorted.bam.bai` : The bam index for the mapped and sorted bam.
+
-[Minimap2](https://github.com/lh3/minimap2) is a versatile sequence alignment program that aligns DNA or mRNA sequences against a large reference database. Minimap2 is optimized for large, noisy reads making it a staple for alignment of nanopore reads
+[Minimap2](https://github.com/lh3/minimap2) is a versatile sequence alignment program that aligns DNA or mRNA sequences against a large reference database. Minimap2 is optimized for large, noisy reads making it a staple for alignment of nanopore reads.
## Alignment Post-processing
+
### Samtools
+
Output files
@@ -92,25 +99,8 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
- `mapped_only/`
- `*.sorted.bam` : The bam contaning only reads that were able to be mapped.
- `*.sorted.bam.bai` : The bam index for the bam containing only reads that were able to be mapped.
- - `qc/`
- - `samtools/`
- - `minimap/`
- - `*.minimap.flagstat` : The flagstat file for the bam obtained from minimap.
- - `*.minimap.idxstats` : The idxstats file for the bam obtained from minimap.
- - `*.minimap.stats` : The stats file for the bam obtained from minimap.
- - `mapped_only/`
- - `*.mapped_only.flagstat` : The flagstat file for the bam containing only mapped reads.
- - `*.mapped_only.idxstats` : The idxstats file for the bam containing only mapped reads.
- - `*.mapped_only.stats` : The stats file for the bam containing only mapped reads.
- - `corrected/`
- - `*.corrected.flagstat` : The flagstat file for the bam containing corrected barcodes.
- - `*.corrected.idxstats` : The idxstat file for the bam containing corrected barcodes.
- - `*.corrected.stats` : The stat file for the bam containing corrected barcodes.
- - `dedup/`
- - `*.dedup.flagstat` : The flagstat file for the bam containing deduplicated umis.
- - `*.dedup.idxstats` : The idxstats file for the bam containing deduplicated umis.
- - `*.dedup.stats` : The stats file for the bam containing deduplicated umis.
-
+ - `qc/` - `samtools/` - `minimap/` - `*.minimap.flagstat` : The flagstat file for the bam obtained from minimap. - `*.minimap.idxstats` : The idxstats file for the bam obtained from minimap. - `*.minimap.stats` : The stats file for the bam obtained from minimap. - `mapped_only/` - `*.mapped_only.flagstat` : The flagstat file for the bam containing only mapped reads. - `*.mapped_only.idxstats` : The idxstats file for the bam containing only mapped reads. - `*.mapped_only.stats` : The stats file for the bam containing only mapped reads. - `corrected/` - `*.corrected.flagstat` : The flagstat file for the bam containing corrected barcodes. - `*.corrected.idxstats` : The idxstat file for the bam containing corrected barcodes. - `*.corrected.stats` : The stat file for the bam containing corrected barcodes. - `dedup/` - `*.dedup.flagstat` : The flagstat file for the bam containing deduplicated umis. - `*.dedup.idxstats` : The idxstats file for the bam containing deduplicated umis. - `*.dedup.stats` : The stats file for the bam containing deduplicated umis.
+
![MultiQC - samtools idxstats](images/samtools_idxstats.png)
![MultiQC - samtools stats](images/samtools_stats.png)
@@ -118,6 +108,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
[Samtools](https://www.htslib.org/) is a suite of programs for reading, writing, editing, indexing, and viewing files that are in SAM, BAM, or CRAM format
### Barcode Tagging
+
Output files
@@ -137,9 +128,10 @@ UMI tag = "UR"
UMI quality tag = "UY"
```
-Please see "Barcode Correction" below for metadata added post-correction.
+Please see [Barcode Correction](#barcode-correction) below for metadata added post-correction.
### Barcode Correction
+
Output files
@@ -154,6 +146,7 @@ Please see "Barcode Correction" below for metadata added post-correction.
Barcode correction is a custom script that uses the whitelist generated by BLAZE in order to correct barcodes that are not on the whitelist into a whitelisted barcode. During this step, an additional BAM tag is added, `CB`, to indicate a barcode sequence that is error-corected.
### UMI Deduplication
+
Output files
@@ -168,7 +161,9 @@ Barcode correction is a custom script that uses the whitelist generated by BLAZE
[UMI-Tools](https://umi-tools.readthedocs.io/en/latest/reference/dedup.html) deduplicate reads based on the mapping co-ordinate and the UMI attached to the read. The identification of duplicate reads is performed in an error-aware manner by building networks of related UMIs
## Feature-Barcode Quantification
+
### IsoQuant
+
Output files
@@ -179,28 +174,26 @@ Barcode correction is a custom script that uses the whitelist generated by BLAZE
-[IsoQuant](https://github.com/ablab/IsoQuant) is a tool for the genome-based analysis of long RNA reads, such as PacBio or Oxford Nanopores. IsoQuant allows to reconstruct and quantify transcript models with high precision and decent recall. If the reference annotation is given, IsoQuant also assigns reads to the annotated isoforms based on their intron and exon structure. IsoQuant further performs annotated gene, isoform, exon and intron quantification
+[IsoQuant](https://github.com/ablab/IsoQuant) is a tool for the genome-based analysis of long RNA reads, such as PacBio or Oxford Nanopores. IsoQuant allows to reconstruct and quantify transcript models with high precision and decent recall. If the reference annotation is given, IsoQuant also assigns reads to the annotated isoforms based on their intron and exon structure. IsoQuant further performs annotated gene, isoform, exon and intron quantification. The outputs of IsoQuant can be important for downstream analysis with tools specialized in single-cell/nuclei analysis (e.g.: `Seurat`).
### Seurat
+
Output files
- `/`
- - `qc/`
- - `gene/`
- - `*.csv`: A file containing statistics about the cell-read distribution for genes.
- - `*.png`: A series of qc images to determine the quality of the gene quantification.
- - `transcript/`
- - `*.csv`: A file containing statistics about the cell-read distribution for transcript.
- - `*.png`: A series of qc images to determine the quality of the transcript quantification.
-
+ - `qc/` - `gene/` - `*.csv`: A file containing statistics about the cell-read distribution for genes. - `*.png`: A series of qc images to determine the quality of the gene quantification. - `transcript/` - `*.csv`: A file containing statistics about the cell-read distribution for transcript. - `*.png`: A series of qc images to determine the quality of the transcript quantification.
+
![MultiQC - seurat](images/seurat.png)
+_High level statistics are provided in the MultiQC report, as show in this image. These provide an overview of the quality of the data in order to assess if the results are suitable for tertiary analysis._
[Seurat](https://satijalab.org/seurat/) is an R package designed for QC, analysis, and exploration of single-cell RNA-seq data.
## Other steps
+
### UCSC
+
Output files
@@ -213,7 +206,9 @@ Barcode correction is a custom script that uses the whitelist generated by BLAZE
[`ucsc-gtftogenepred` and `ucsc-genepredtobed`](https://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/) are stand-alone applications developed by UCSC which, together, converts a GTF file the BED file format.
## Quality Control
+
### FastQC
+
Output files
@@ -259,6 +254,7 @@ The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They m
[Nanocomp](https://github.com/wdecoster/nanocomp) compares multiple runs of long read sequencing data and alignments. It creates violin plots or box plots of length, quality and percent identity and creates dynamic, overlaying read length histograms and a cumulative yield plot
### Nanoplot
+
Output files
@@ -280,6 +276,7 @@ The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They m
[Nanoplot](https://github.com/wdecoster/NanoPlot) is a plotting tool for long read sequencing data and alignments.
### ToulligQC
+
Output files
@@ -313,7 +310,22 @@ The FastQC plots displayed in the MultiQC report shows _untrimmed_ reads. They m
![RSeQC](images/rseqc.png)
-[RSeQC](https://rseqc.sourceforge.net/) package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data
+[RSeQC](https://rseqc.sourceforge.net/) package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data.
+
+### Read Counts
+
+
+Output files
+
+- `batch_qcs/`
+ - `read_counts/`
+ - `read_counts.csv`: This file contains the read counts for each sample at various points in the pipeline. Each row is a different sample, and the columns are the amount of reads the sample contained at that point in the pipeline.
+
+
+
+![Read Counts](images/read_counts.png)
+
+This is a custom script written using BASH scripting. Its purpose is to report the amount of reads that are filtered out at steps in the pipeline that will result in filtered reads, such as barcode detection, barcode correction, alignment, etc. Elevated levels of filtering can be indicative of quality concerns.
### MultiQC
diff --git a/docs/usage.md b/docs/usage.md
index 0146bf0..3ad83da 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -16,7 +16,7 @@ You will need to create a samplesheet with information about the samples you wou
The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across replicates 1 and 4 (`REP1` and `REP4` respectively):
-```console
+```csv title="samplesheet.csv"
sample,fastq,cell_count
CONTROL_REP1,AEG588A1_S1.fastq.gz,5000
CONTROL_REP1,AEG588A1_S2.fastq.gz,5000
@@ -33,7 +33,7 @@ CONTROL_REP4,AEG588A4_S3.fastq.gz,5000
The example `samplesheet.csv` below contains a single FASTQ file per biological replicate with sample specific cell counts.
-```console
+```csv title="samplesheet.csv"
sample,fastq,cell_count
CONTROL_REP1,AEG588A1_S1.fastq.gz,5000
CONTROL_REP2,AEG588A2_S1.fastq.gz,6000
@@ -88,12 +88,12 @@ The above pipeline run specified with a params file in yaml format:
nextflow run nf-core/scnanoseq -profile -params-file params.yaml
```
-with `params.yaml` containing:
+with
-```yaml
+```yaml title="params.yaml"
input: "./samplesheet.csv"
outdir: "./results/"
-genome: "/path/to/genome.fa"
+fasta: "/path/to/genome.fa"
gtf: "/path/to/genome.gtf"
barcode_format: "10X_3v3"
<...>
@@ -182,7 +182,7 @@ Specify the path to a specific config file (this is a core Nextflow command). Se
Whilst the default requirements set within the pipeline will hopefully work for most people with GridION and average size PromethION data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher requests (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped.
-To change the resource requests, please see the [max resources](https://nf-co.re/docs/usage/configuration#max-resources) and [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) section of the nf-core website. We have also provided a pipeline specific example of a custom configuration file in the Introduction page or the pipeline's GitHub `README` page.
+To change the resource requests, please see the [max resources](https://nf-co.re/docs/usage/configuration#max-resources) and [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) section of the nf-core website. We have also provided a pipeline specific example of a custom configuration file in the [Introduction page](https://nf-co.re/scnanoseq/latest/#troubleshooting).
### Custom Containers
diff --git a/main.nf b/main.nf
index 648ec36..814ee12 100644
--- a/main.nf
+++ b/main.nf
@@ -100,7 +100,6 @@ workflow {
NFCORE_SCNANOSEQ.out.multiqc_report
)
- //NFCORE_SCNANOSEQ()
}
/*
diff --git a/modules.json b/modules.json
index 6f0b053..b3ecaa7 100644
--- a/modules.json
+++ b/modules.json
@@ -30,22 +30,45 @@
"git_sha": "f4ae1d942bd50c5c0b9bd2de1393ce38315ba57c",
"installed_by": ["modules"]
},
- "gunzip": {
+ "minimap2/align": {
"branch": "master",
- "git_sha": "3a5fef109d113b4997c9822198664ca5f2716208",
+ "git_sha": "a33ef9475558c6b8da08c5f522ddaca1ec810306",
"installed_by": ["modules"]
},
+ "minimap2/index": {
+ "branch": "master",
+ "git_sha": "72e277acfd9e61a9f1368eafb4a9e83f5bcaa9f5",
+ "installed_by": ["modules"],
+ "patch": "modules/nf-core/minimap2/index/minimap2-index.diff"
+ },
"multiqc": {
"branch": "master",
"git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a",
"installed_by": ["modules"]
},
+ "nanocomp": {
+ "branch": "master",
+ "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+ "installed_by": ["modules"],
+ "patch": "modules/nf-core/nanocomp/nanocomp.diff"
+ },
"nanoplot": {
"branch": "master",
"git_sha": "a31407dfaf0cb0d04768d5cb439fc6f4523a6981",
"installed_by": ["modules"],
"patch": "modules/nf-core/nanoplot/nanoplot.diff"
},
+ "pigz/compress": {
+ "branch": "master",
+ "git_sha": "c00055a0b13d622b4f1f51a8e5be31deaf99ded7",
+ "installed_by": ["modules"]
+ },
+ "pigz/uncompress": {
+ "branch": "master",
+ "git_sha": "c00055a0b13d622b4f1f51a8e5be31deaf99ded7",
+ "installed_by": ["modules"],
+ "patch": "modules/nf-core/pigz/uncompress/pigz-uncompress.diff"
+ },
"rseqc/readdistribution": {
"branch": "master",
"git_sha": "6c7d8f1d6247655e4bc4d97f37b68b2461f645f6",
diff --git a/modules/local/correct_barcodes.nf b/modules/local/correct_barcodes.nf
index 6ed2756..55596e2 100644
--- a/modules/local/correct_barcodes.nf
+++ b/modules/local/correct_barcodes.nf
@@ -13,7 +13,7 @@ process CORRECT_BARCODES {
output:
tuple val(meta), path("*.corrected_bc_umi.tsv"), emit: corrected_bc_info
- path "versions.yml" , emit: versions
+ path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
@@ -25,10 +25,10 @@ process CORRECT_BARCODES {
"""
correct_barcodes.py \\
${args} \\
- -i ${bc_info} \\
- -o ${prefix}.corrected_bc_umi.tsv \\
- -w ${whitelist} \\
- -b ${bc_count_file} \\
+ --infile ${bc_info} \\
+ --outfile ${prefix}.corrected_bc_umi.tsv \\
+ --whitelist ${whitelist} \\
+ --barcode_count ${bc_count_file} \\
--skip_header
cat <<-END_VERSIONS > versions.yml
diff --git a/modules/local/isoquant.nf b/modules/local/isoquant.nf
index 2de8a6e..650b1c4 100644
--- a/modules/local/isoquant.nf
+++ b/modules/local/isoquant.nf
@@ -1,12 +1,11 @@
process ISOQUANT {
tag "$meta.id"
label 'process_high'
- label 'process_high_memory'
- conda "bioconda::isoquant=3.3.1"
+ conda "bioconda::isoquant=3.5.0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/isoquant:3.3.1--hdfd78af_0' :
- 'biocontainers/isoquant:3.3.1--hdfd78af_0' }"
+ 'https://depot.galaxyproject.org/singularity/isoquant:3.5.0--hdfd78af_0' :
+ 'biocontainers/isoquant:3.5.0--hdfd78af_0' }"
input:
tuple val(meta), path(bam), path(bai)
@@ -41,7 +40,7 @@ process ISOQUANT {
cat <<-END_VERSIONS > versions.yml
"${task.process}":
- isoquant: \$(isoquant.py -v 2>&1)
+ isoquant: \$(isoquant.py -v | sed 's#IsoQuant ##')
END_VERSIONS
"""
} else {
@@ -62,7 +61,7 @@ process ISOQUANT {
cat <<-END_VERSIONS > versions.yml
"${task.process}":
- isoquant: \$(isoquant.py -v 2>&1)
+ isoquant: \$(isoquant.py -v | sed 's#IsoQuant ##')
END_VERSIONS
"""
diff --git a/modules/local/minimap2_index.nf b/modules/local/minimap2_index.nf
deleted file mode 100644
index 9d3e7bc..0000000
--- a/modules/local/minimap2_index.nf
+++ /dev/null
@@ -1,40 +0,0 @@
-process MINIMAP2_INDEX {
- tag "$fasta"
- label 'process_medium'
-
- conda "bioconda::minimap2=2.24"
- container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/minimap2:2.24--h5bf99c6_0':
- 'biocontainers/minimap2:2.24--h5bf99c6_0' }"
-
- input:
- path fasta
- path bed
-
- output:
- path "*.mmi" , emit: index
- path "versions.yml" , emit: versions
-
- when:
- task.ext.when == null || task.ext.when
-
- // TODO: see if there is a better way of including additional
- // input (e.g.: bed / junctions), so we can use the module in nf-core rather than local
-
- script:
- def args = task.ext.args ?: ''
- def junctions = "--junc-bed ${bed}"
- """
- minimap2 \\
- $args \\
- $junctions \\
- -t $task.cpus \\
- -d ${fasta}.mmi \\
- $fasta
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- minimap2: \$(minimap2 --version 2>&1)
- END_VERSIONS
- """
-}
diff --git a/modules/local/nanocomp.nf b/modules/local/nanocomp.nf
deleted file mode 100644
index 48de180..0000000
--- a/modules/local/nanocomp.nf
+++ /dev/null
@@ -1,38 +0,0 @@
-process NANOCOMP {
- label 'process_high'
-
- conda "bioconda::nanocomp=1.20.0"
- container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/nanocomp:1.20.0--pyhdfd78af_0':
- 'biocontainers/nanocomp:1.20.0--pyhdfd78af_0' }"
-
- input:
- path(ont_files)
- path(idx_files)
-
- output:
- path "*.html" , emit: html
- path "*.txt" , emit: txt
- path "*.log" , emit: log
- path "versions.yml" , emit: versions
-
- when:
- task.ext.when == null || task.ext.when
-
- script:
- def args = task.ext.args ?: ''
- def input_files = ("$ont_files".contains(".fastq.gz") || "$ont_files".contains(".fq.gz")) ? "--fastq ${ont_files}" :
- ("$ont_files".contains(".bam")) ? "--bam ${ont_files}" : ''
-
- """
- NanoComp \\
- $args \\
- -t $task.cpus \\
- $input_files
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- nanocomp: \$(echo \$(NanoComp --version 2>&1) | sed 's/^.*NanoComp //; s/ .*\$//')
- END_VERSIONS
- """
-}
diff --git a/modules/local/paftools.nf b/modules/local/paftools.nf
deleted file mode 100644
index 9555ac3..0000000
--- a/modules/local/paftools.nf
+++ /dev/null
@@ -1,35 +0,0 @@
-process PAFTOOLS {
- tag "$gtf"
- label 'process_low'
-
- conda "bioconda::minimap2=2.24"
- container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/minimap2:2.24--h5bf99c6_0':
- 'biocontainers/minimap2:2.24--h5bf99c6_0' }"
-
- input:
- path gtf
-
- output:
- path "*.bed" , emit: bed
- path "versions.yml" , emit: versions
-
- when:
- task.ext.when == null || task.ext.when
-
- script:
- def args = task.ext.args ?: ''
-
- """
- paftools.js gff2bed \\
- $args \\
- $gtf > ${gtf.baseName}.bed
-
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- minimap2: \$(minimap2 --version 2>&1)
- END_VERSIONS
- """
-}
-
diff --git a/modules/local/pigz.nf b/modules/local/pigz.nf
deleted file mode 100644
index fc4c534..0000000
--- a/modules/local/pigz.nf
+++ /dev/null
@@ -1,37 +0,0 @@
-process PIGZ {
- tag "$meta.id"
- label 'process_low'
-
- conda "conda-forge::pigz=2.3.4"
- container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/pigz:2.3.4':
- 'biocontainers/pigz:2.3.4' }"
-
- input:
- tuple val(meta), path(unzipped_file)
- val addl_prefix
-
- output:
- tuple val(meta), path("*.gz"), emit: archive
- path "versions.yml" , emit: versions
-
- when:
- task.ext.when == null || task.ext.when
-
- script:
- def args = task.ext.args ?: ''
- def prefix = task.ext.prefix ?: "${meta.id}_${addl_prefix}"
- """
- cat ${unzipped_file} > ${prefix}.fastq
- pigz \\
- $args \\
- -f \\
- -p $task.cpus \\
- ${prefix}.fastq
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- pigz: \$(echo \$(pigz -V 2>&1) | sed 's/pigz //g' )
- END_VERSIONS
- """
-}
diff --git a/modules/local/read_counts.nf b/modules/local/read_counts.nf
index b3fdd36..bebca0b 100644
--- a/modules/local/read_counts.nf
+++ b/modules/local/read_counts.nf
@@ -13,7 +13,8 @@ process READ_COUNTS {
path correct_tsv
output:
- path "read_counts.csv" , emit: read_counts
+ path "read_counts.csv" , emit: read_counts
+ path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
@@ -26,5 +27,10 @@ process READ_COUNTS {
$args \\
--input ./ \\
--output read_counts.csv
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ perl: \$(perl --version | head -n2 | tail -n1 | sed -n 's/.*(v\\([^)]*\\)).*/\\1/p')
+ END_VERSIONS
"""
}
diff --git a/modules/local/split_file.nf b/modules/local/split_file.nf
index fe7ede3..0caac3c 100644
--- a/modules/local/split_file.nf
+++ b/modules/local/split_file.nf
@@ -13,9 +13,8 @@ process SPLIT_FILE {
val split_amount
output:
- // TODO: Make this more generalizable. Gunzip probably a good example
tuple val(meta), path("*$file_ext"), emit: split_files
- path "versions.yml" , emit: versions
+ path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf
deleted file mode 100644
index 468a6f2..0000000
--- a/modules/nf-core/gunzip/main.nf
+++ /dev/null
@@ -1,48 +0,0 @@
-process GUNZIP {
- tag "$archive"
- label 'process_single'
-
- conda "${moduleDir}/environment.yml"
- container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
- 'nf-core/ubuntu:20.04' }"
-
- input:
- tuple val(meta), path(archive)
-
- output:
- tuple val(meta), path("$gunzip"), emit: gunzip
- path "versions.yml" , emit: versions
-
- when:
- task.ext.when == null || task.ext.when
-
- script:
- def args = task.ext.args ?: ''
- gunzip = archive.toString() - '.gz'
- """
- # Not calling gunzip itself because it creates files
- # with the original group ownership rather than the
- # default one for that user / the work directory
- gzip \\
- -cd \\
- $args \\
- $archive \\
- > $gunzip
-
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//')
- END_VERSIONS
- """
-
- stub:
- gunzip = archive.toString() - '.gz'
- """
- touch $gunzip
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//')
- END_VERSIONS
- """
-}
diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml
deleted file mode 100644
index 231034f..0000000
--- a/modules/nf-core/gunzip/meta.yml
+++ /dev/null
@@ -1,39 +0,0 @@
-name: gunzip
-description: Compresses and decompresses files.
-keywords:
- - gunzip
- - compression
- - decompression
-tools:
- - gunzip:
- description: |
- gzip is a file format and a software application used for file compression and decompression.
- documentation: https://www.gnu.org/software/gzip/manual/gzip.html
- licence: ["GPL-3.0-or-later"]
-input:
- - meta:
- type: map
- description: |
- Optional groovy Map containing meta information
- e.g. [ id:'test', single_end:false ]
- - archive:
- type: file
- description: File to be compressed/uncompressed
- pattern: "*.*"
-output:
- - gunzip:
- type: file
- description: Compressed/uncompressed file
- pattern: "*.*"
- - versions:
- type: file
- description: File containing software versions
- pattern: "versions.yml"
-authors:
- - "@joseespinosa"
- - "@drpatelh"
- - "@jfy133"
-maintainers:
- - "@joseespinosa"
- - "@drpatelh"
- - "@jfy133"
diff --git a/modules/nf-core/gunzip/tests/main.nf.test b/modules/nf-core/gunzip/tests/main.nf.test
deleted file mode 100644
index 6406008..0000000
--- a/modules/nf-core/gunzip/tests/main.nf.test
+++ /dev/null
@@ -1,36 +0,0 @@
-nextflow_process {
-
- name "Test Process GUNZIP"
- script "../main.nf"
- process "GUNZIP"
- tag "gunzip"
- tag "modules_nfcore"
- tag "modules"
-
- test("Should run without failures") {
-
- when {
- params {
- outdir = "$outputDir"
- }
- process {
- """
- input[0] = Channel.of([
- [],
- file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
- ]
- )
- """
- }
- }
-
- then {
- assertAll(
- { assert process.success },
- { assert snapshot(process.out).match() }
- )
- }
-
- }
-
-}
diff --git a/modules/nf-core/gunzip/tests/main.nf.test.snap b/modules/nf-core/gunzip/tests/main.nf.test.snap
deleted file mode 100644
index 720fd9f..0000000
--- a/modules/nf-core/gunzip/tests/main.nf.test.snap
+++ /dev/null
@@ -1,31 +0,0 @@
-{
- "Should run without failures": {
- "content": [
- {
- "0": [
- [
- [
-
- ],
- "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec"
- ]
- ],
- "1": [
- "versions.yml:md5,54376d32aca20e937a4ec26dac228e84"
- ],
- "gunzip": [
- [
- [
-
- ],
- "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec"
- ]
- ],
- "versions": [
- "versions.yml:md5,54376d32aca20e937a4ec26dac228e84"
- ]
- }
- ],
- "timestamp": "2023-10-17T15:35:37.690477896"
- }
-}
\ No newline at end of file
diff --git a/modules/nf-core/gunzip/tests/tags.yml b/modules/nf-core/gunzip/tests/tags.yml
deleted file mode 100644
index fd3f691..0000000
--- a/modules/nf-core/gunzip/tests/tags.yml
+++ /dev/null
@@ -1,2 +0,0 @@
-gunzip:
- - modules/nf-core/gunzip/**
diff --git a/modules/nf-core/minimap2/align/environment.yml b/modules/nf-core/minimap2/align/environment.yml
new file mode 100644
index 0000000..41e8fe9
--- /dev/null
+++ b/modules/nf-core/minimap2/align/environment.yml
@@ -0,0 +1,11 @@
+name: minimap2_align
+
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+
+dependencies:
+ - bioconda::htslib=1.20
+ - bioconda::minimap2=2.28
+ - bioconda::samtools=1.20
diff --git a/modules/nf-core/minimap2/align/main.nf b/modules/nf-core/minimap2/align/main.nf
new file mode 100644
index 0000000..d82dc14
--- /dev/null
+++ b/modules/nf-core/minimap2/align/main.nf
@@ -0,0 +1,78 @@
+process MINIMAP2_ALIGN {
+ tag "$meta.id"
+ label 'process_high'
+
+ // Note: the versions here need to match the versions used in the mulled container below and minimap2/index
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0' :
+ 'biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0' }"
+
+ input:
+ tuple val(meta), path(reads)
+ tuple val(meta2), path(reference)
+ val bam_format
+ val bam_index_extension
+ val cigar_paf_format
+ val cigar_bam
+
+ output:
+ tuple val(meta), path("*.paf") , optional: true, emit: paf
+ tuple val(meta), path("*.bam") , optional: true, emit: bam
+ tuple val(meta), path("*.bam.${bam_index_extension}"), optional: true, emit: index
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def args2 = task.ext.args2 ?: ''
+ def args3 = task.ext.args3 ?: ''
+ def args4 = task.ext.args4 ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def bam_index = bam_index_extension ? "${prefix}.bam##idx##${prefix}.bam.${bam_index_extension} --write-index" : "${prefix}.bam"
+ def bam_output = bam_format ? "-a | samtools sort -@ ${task.cpus-1} -o ${bam_index} ${args2}" : "-o ${prefix}.paf"
+ def cigar_paf = cigar_paf_format && !bam_format ? "-c" : ''
+ def set_cigar_bam = cigar_bam && bam_format ? "-L" : ''
+ def bam_input = "${reads.extension}".matches('sam|bam|cram')
+ def samtools_reset_fastq = bam_input ? "samtools reset --threads ${task.cpus-1} $args3 $reads | samtools fastq --threads ${task.cpus-1} $args4 |" : ''
+ def query = bam_input ? "-" : reads
+ def target = reference ?: (bam_input ? error("BAM input requires reference") : reads)
+
+ """
+ $samtools_reset_fastq \\
+ minimap2 \\
+ $args \\
+ -t $task.cpus \\
+ $target \\
+ $query \\
+ $cigar_paf \\
+ $set_cigar_bam \\
+ $bam_output
+
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ minimap2: \$(minimap2 --version 2>&1)
+ samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+ END_VERSIONS
+ """
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def output_file = bam_format ? "${prefix}.bam" : "${prefix}.paf"
+ def bam_index = bam_index_extension ? "touch ${prefix}.bam.${bam_index_extension}" : ""
+ def bam_input = "${reads.extension}".matches('sam|bam|cram')
+ def target = reference ?: (bam_input ? error("BAM input requires reference") : reads)
+
+ """
+ touch $output_file
+ ${bam_index}
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ minimap2: \$(minimap2 --version 2>&1)
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/minimap2/align/meta.yml b/modules/nf-core/minimap2/align/meta.yml
new file mode 100644
index 0000000..8996f88
--- /dev/null
+++ b/modules/nf-core/minimap2/align/meta.yml
@@ -0,0 +1,84 @@
+name: minimap2_align
+description: A versatile pairwise aligner for genomic and spliced nucleotide sequences
+keywords:
+ - align
+ - fasta
+ - fastq
+ - genome
+ - paf
+ - reference
+tools:
+ - minimap2:
+ description: |
+ A versatile pairwise aligner for genomic and spliced nucleotide sequences.
+ homepage: https://github.com/lh3/minimap2
+ documentation: https://github.com/lh3/minimap2#uguide
+ licence: ["MIT"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - reads:
+ type: file
+ description: |
+ List of input FASTA or FASTQ files of size 1 and 2 for single-end
+ and paired-end data, respectively.
+ - meta2:
+ type: map
+ description: |
+ Groovy Map containing reference information
+ e.g. [ id:'test_ref']
+ - reference:
+ type: file
+ description: |
+ Reference database in FASTA format.
+ - bam_format:
+ type: boolean
+ description: Specify that output should be in BAM format
+ - bam_index_extension:
+ type: string
+ description: BAM alignment index extension (e.g. "bai")
+ - cigar_paf_format:
+ type: boolean
+ description: Specify that output CIGAR should be in PAF format
+ - cigar_bam:
+ type: boolean
+ description: |
+ Write CIGAR with >65535 ops at the CG tag. This is recommended when
+ doing XYZ (https://github.com/lh3/minimap2#working-with-65535-cigar-operations)
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - paf:
+ type: file
+ description: Alignment in PAF format
+ pattern: "*.paf"
+ - bam:
+ type: file
+ description: Alignment in BAM format
+ pattern: "*.bam"
+ - index:
+ type: file
+ description: BAM alignment index
+ pattern: "*.bam.*"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@heuermh"
+ - "@sofstam"
+ - "@sateeshperi"
+ - "@jfy133"
+ - "@fellen31"
+maintainers:
+ - "@heuermh"
+ - "@sofstam"
+ - "@sateeshperi"
+ - "@jfy133"
+ - "@fellen31"
diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test b/modules/nf-core/minimap2/align/tests/main.nf.test
new file mode 100644
index 0000000..4072c17
--- /dev/null
+++ b/modules/nf-core/minimap2/align/tests/main.nf.test
@@ -0,0 +1,441 @@
+nextflow_process {
+
+ name "Test Process MINIMAP2_ALIGN"
+ script "../main.nf"
+ process "MINIMAP2_ALIGN"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "minimap2"
+ tag "minimap2/align"
+
+ test("sarscov2 - fastq, fasta, true, [], false, false") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:true ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+ ]
+ input[1] = [
+ [ id:'test_ref' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = true
+ input[3] = []
+ input[4] = false
+ input[5] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ bam(process.out.bam[0][1]).getHeader(),
+ bam(process.out.bam[0][1]).getReadsMD5(),
+ process.out.versions
+ ).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - fastq, fasta, true, 'bai', false, false") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:true ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+ ]
+ input[1] = [
+ [ id:'test_ref' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = true
+ input[3] = 'bai'
+ input[4] = false
+ input[5] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ bam(process.out.bam[0][1]).getHeader(),
+ bam(process.out.bam[0][1]).getReadsMD5(),
+ file(process.out.index[0][1]).name,
+ process.out.versions
+ ).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - [fastq1, fastq2], fasta, true, false, false") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
+ ]
+ ]
+ input[1] = [
+ [ id:'test_ref' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = true
+ input[3] = []
+ input[4] = false
+ input[5] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ bam(process.out.bam[0][1]).getHeader(),
+ bam(process.out.bam[0][1]).getReadsMD5(),
+ process.out.versions
+ ).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - fastq, [], true, false, false") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:true ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ ]
+ input[1] = [
+ [ id:'test_ref' ], // meta map
+ []
+ ]
+ input[2] = true
+ input[3] = []
+ input[4] = false
+ input[5] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ bam(process.out.bam[0][1]).getHeader(),
+ bam(process.out.bam[0][1]).getReadsMD5(),
+ process.out.versions
+ ).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - bam, fasta, true, [], false, false") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:true ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true)
+ ]
+ input[1] = [
+ [ id:'test_ref' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = true
+ input[3] = []
+ input[4] = false
+ input[5] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ bam(process.out.bam[0][1]).getHeader(),
+ bam(process.out.bam[0][1]).getReadsMD5(),
+ process.out.versions
+ ).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - bam, fasta, true, 'bai', false, false") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:true ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true)
+ ]
+ input[1] = [
+ [ id:'test_ref' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = true
+ input[3] = 'bai'
+ input[4] = false
+ input[5] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ bam(process.out.bam[0][1]).getHeader(),
+ bam(process.out.bam[0][1]).getReadsMD5(),
+ file(process.out.index[0][1]).name,
+ process.out.versions
+ ).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - bam, [], true, false, false") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:true ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true)
+ ]
+ input[1] = [
+ [ id:'test_ref' ], // meta map
+ []
+ ]
+ input[2] = true
+ input[3] = []
+ input[4] = false
+ input[5] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.failed }
+ )
+ }
+
+ }
+
+ test("sarscov2 - fastq, fasta, true, [], false, false - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:true ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+ ]
+ input[1] = [
+ [ id:'test_ref' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = true
+ input[3] = []
+ input[4] = false
+ input[5] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - fastq, fasta, true, 'bai', false, false - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:true ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+ ]
+ input[1] = [
+ [ id:'test_ref' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = true
+ input[3] = 'bai'
+ input[4] = false
+ input[5] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - fastq, fasta, false, [], false, false - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:true ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+ ]
+ input[1] = [
+ [ id:'test_ref' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = false
+ input[3] = []
+ input[4] = false
+ input[5] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - bam, fasta, true, [], false, false - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:true ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true)
+ ]
+ input[1] = [
+ [ id:'test_ref' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = true
+ input[3] = []
+ input[4] = false
+ input[5] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - bam, fasta, true, 'bai', false, false - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:true ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true)
+ ]
+ input[1] = [
+ [ id:'test_ref' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ]
+ input[2] = true
+ input[3] = 'bai'
+ input[4] = false
+ input[5] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - bam, [], true, false, false - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:true ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true)
+ ]
+ input[1] = [
+ [ id:'test_ref' ], // meta map
+ []
+ ]
+ input[2] = true
+ input[3] = []
+ input[4] = false
+ input[5] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.failed }
+ )
+ }
+
+ }
+
+}
\ No newline at end of file
diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test.snap b/modules/nf-core/minimap2/align/tests/main.nf.test.snap
new file mode 100644
index 0000000..12264a8
--- /dev/null
+++ b/modules/nf-core/minimap2/align/tests/main.nf.test.snap
@@ -0,0 +1,476 @@
+{
+ "sarscov2 - bam, fasta, true, 'bai', false, false": {
+ "content": [
+ [
+ "@HD\tVN:1.6\tSO:coordinate",
+ "@SQ\tSN:MT192765.1\tLN:29829",
+ "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta -",
+ "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam##idx##test.bam.bai --write-index"
+ ],
+ "5d426b9a5f5b2c54f1d7f1e4c238ae94",
+ "test.bam.bai",
+ [
+ "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-25T09:03:00.827260362"
+ },
+ "sarscov2 - bam, fasta, true, 'bai', false, false - stub": {
+ "content": [
+ {
+ "0": [
+
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "3": [
+ "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+ ],
+ "bam": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "index": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "paf": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-23T11:21:37.92353539"
+ },
+ "sarscov2 - fastq, fasta, true, 'bai', false, false - stub": {
+ "content": [
+ {
+ "0": [
+
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "3": [
+ "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+ ],
+ "bam": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "index": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "paf": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-03T11:29:44.669021368"
+ },
+ "sarscov2 - fastq, fasta, false, [], false, false - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.paf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+ ],
+ "bam": [
+
+ ],
+ "index": [
+
+ ],
+ "paf": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.paf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-03T11:15:52.738781039"
+ },
+ "sarscov2 - fastq, fasta, true, [], false, false - stub": {
+ "content": [
+ {
+ "0": [
+
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+ ],
+ "bam": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "index": [
+
+ ],
+ "paf": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-03T11:15:23.033808223"
+ },
+ "sarscov2 - [fastq1, fastq2], fasta, true, false, false": {
+ "content": [
+ [
+ "@HD\tVN:1.6\tSO:coordinate",
+ "@SQ\tSN:MT192765.1\tLN:29829",
+ "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz test_2.fastq.gz",
+ "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam"
+ ],
+ "1bc392244f228bf52cf0b5a8f6a654c9",
+ [
+ "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-23T11:18:18.964586894"
+ },
+ "sarscov2 - fastq, fasta, true, [], false, false": {
+ "content": [
+ [
+ "@HD\tVN:1.6\tSO:coordinate",
+ "@SQ\tSN:MT192765.1\tLN:29829",
+ "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz",
+ "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam"
+ ],
+ "f194745c0ccfcb2a9c0aee094a08750",
+ [
+ "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-23T11:17:48.667488325"
+ },
+ "sarscov2 - fastq, fasta, true, 'bai', false, false": {
+ "content": [
+ [
+ "@HD\tVN:1.6\tSO:coordinate",
+ "@SQ\tSN:MT192765.1\tLN:29829",
+ "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz",
+ "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam##idx##test.bam.bai --write-index"
+ ],
+ "f194745c0ccfcb2a9c0aee094a08750",
+ "test.bam.bai",
+ [
+ "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-23T11:18:02.517416733"
+ },
+ "sarscov2 - bam, fasta, true, [], false, false": {
+ "content": [
+ [
+ "@HD\tVN:1.6\tSO:coordinate",
+ "@SQ\tSN:MT192765.1\tLN:29829",
+ "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta -",
+ "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam"
+ ],
+ "5d426b9a5f5b2c54f1d7f1e4c238ae94",
+ [
+ "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-25T09:02:49.64829488"
+ },
+ "sarscov2 - bam, fasta, true, [], false, false - stub": {
+ "content": [
+ {
+ "0": [
+
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+ ],
+ "bam": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "index": [
+
+ ],
+ "paf": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-23T11:21:22.162291795"
+ },
+ "sarscov2 - fastq, [], true, false, false": {
+ "content": [
+ [
+ "@HD\tVN:1.6\tSO:coordinate",
+ "@SQ\tSN:ERR5069949.2151832\tLN:150",
+ "@SQ\tSN:ERR5069949.576388\tLN:77",
+ "@SQ\tSN:ERR5069949.501486\tLN:146",
+ "@SQ\tSN:ERR5069949.1331889\tLN:132",
+ "@SQ\tSN:ERR5069949.2161340\tLN:80",
+ "@SQ\tSN:ERR5069949.973930\tLN:79",
+ "@SQ\tSN:ERR5069949.2417063\tLN:150",
+ "@SQ\tSN:ERR5069949.376959\tLN:151",
+ "@SQ\tSN:ERR5069949.1088785\tLN:149",
+ "@SQ\tSN:ERR5069949.1066259\tLN:147",
+ "@SQ\tSN:ERR5069949.2832676\tLN:139",
+ "@SQ\tSN:ERR5069949.2953930\tLN:151",
+ "@SQ\tSN:ERR5069949.324865\tLN:151",
+ "@SQ\tSN:ERR5069949.2185111\tLN:150",
+ "@SQ\tSN:ERR5069949.937422\tLN:151",
+ "@SQ\tSN:ERR5069949.2431709\tLN:150",
+ "@SQ\tSN:ERR5069949.1246538\tLN:148",
+ "@SQ\tSN:ERR5069949.1189252\tLN:98",
+ "@SQ\tSN:ERR5069949.2216307\tLN:147",
+ "@SQ\tSN:ERR5069949.3273002\tLN:148",
+ "@SQ\tSN:ERR5069949.3277445\tLN:151",
+ "@SQ\tSN:ERR5069949.3022231\tLN:147",
+ "@SQ\tSN:ERR5069949.184542\tLN:151",
+ "@SQ\tSN:ERR5069949.540529\tLN:149",
+ "@SQ\tSN:ERR5069949.686090\tLN:150",
+ "@SQ\tSN:ERR5069949.2787556\tLN:106",
+ "@SQ\tSN:ERR5069949.2650879\tLN:150",
+ "@SQ\tSN:ERR5069949.2064910\tLN:149",
+ "@SQ\tSN:ERR5069949.2328704\tLN:150",
+ "@SQ\tSN:ERR5069949.1067032\tLN:150",
+ "@SQ\tSN:ERR5069949.3338256\tLN:151",
+ "@SQ\tSN:ERR5069949.1412839\tLN:147",
+ "@SQ\tSN:ERR5069949.1538968\tLN:150",
+ "@SQ\tSN:ERR5069949.147998\tLN:94",
+ "@SQ\tSN:ERR5069949.366975\tLN:106",
+ "@SQ\tSN:ERR5069949.1372331\tLN:151",
+ "@SQ\tSN:ERR5069949.1709367\tLN:129",
+ "@SQ\tSN:ERR5069949.2388984\tLN:150",
+ "@SQ\tSN:ERR5069949.1132353\tLN:150",
+ "@SQ\tSN:ERR5069949.1151736\tLN:151",
+ "@SQ\tSN:ERR5069949.479807\tLN:150",
+ "@SQ\tSN:ERR5069949.2176303\tLN:151",
+ "@SQ\tSN:ERR5069949.2772897\tLN:151",
+ "@SQ\tSN:ERR5069949.1020777\tLN:122",
+ "@SQ\tSN:ERR5069949.465452\tLN:151",
+ "@SQ\tSN:ERR5069949.1704586\tLN:149",
+ "@SQ\tSN:ERR5069949.1258508\tLN:151",
+ "@SQ\tSN:ERR5069949.986441\tLN:119",
+ "@SQ\tSN:ERR5069949.2674295\tLN:148",
+ "@SQ\tSN:ERR5069949.885966\tLN:79",
+ "@SQ\tSN:ERR5069949.2342766\tLN:151",
+ "@SQ\tSN:ERR5069949.3122970\tLN:127",
+ "@SQ\tSN:ERR5069949.3279513\tLN:72",
+ "@SQ\tSN:ERR5069949.309410\tLN:151",
+ "@SQ\tSN:ERR5069949.532979\tLN:149",
+ "@SQ\tSN:ERR5069949.2888794\tLN:151",
+ "@SQ\tSN:ERR5069949.2205229\tLN:150",
+ "@SQ\tSN:ERR5069949.786562\tLN:151",
+ "@SQ\tSN:ERR5069949.919671\tLN:151",
+ "@SQ\tSN:ERR5069949.1328186\tLN:151",
+ "@SQ\tSN:ERR5069949.870926\tLN:149",
+ "@SQ\tSN:ERR5069949.2257580\tLN:151",
+ "@SQ\tSN:ERR5069949.3249622\tLN:77",
+ "@SQ\tSN:ERR5069949.611123\tLN:125",
+ "@SQ\tSN:ERR5069949.651338\tLN:142",
+ "@SQ\tSN:ERR5069949.169513\tLN:92",
+ "@SQ\tSN:ERR5069949.155944\tLN:150",
+ "@SQ\tSN:ERR5069949.2033605\tLN:150",
+ "@SQ\tSN:ERR5069949.2730382\tLN:142",
+ "@SQ\tSN:ERR5069949.2125592\tLN:150",
+ "@SQ\tSN:ERR5069949.1062611\tLN:151",
+ "@SQ\tSN:ERR5069949.1778133\tLN:151",
+ "@SQ\tSN:ERR5069949.3057020\tLN:95",
+ "@SQ\tSN:ERR5069949.2972968\tLN:141",
+ "@SQ\tSN:ERR5069949.2734474\tLN:149",
+ "@SQ\tSN:ERR5069949.856527\tLN:151",
+ "@SQ\tSN:ERR5069949.2098070\tLN:151",
+ "@SQ\tSN:ERR5069949.1552198\tLN:150",
+ "@SQ\tSN:ERR5069949.2385514\tLN:150",
+ "@SQ\tSN:ERR5069949.2270078\tLN:151",
+ "@SQ\tSN:ERR5069949.114870\tLN:150",
+ "@SQ\tSN:ERR5069949.2668880\tLN:147",
+ "@SQ\tSN:ERR5069949.257821\tLN:139",
+ "@SQ\tSN:ERR5069949.2243023\tLN:150",
+ "@SQ\tSN:ERR5069949.2605155\tLN:146",
+ "@SQ\tSN:ERR5069949.1340552\tLN:151",
+ "@SQ\tSN:ERR5069949.1561137\tLN:150",
+ "@SQ\tSN:ERR5069949.2361683\tLN:149",
+ "@SQ\tSN:ERR5069949.2521353\tLN:150",
+ "@SQ\tSN:ERR5069949.1261808\tLN:149",
+ "@SQ\tSN:ERR5069949.2734873\tLN:98",
+ "@SQ\tSN:ERR5069949.3017828\tLN:107",
+ "@SQ\tSN:ERR5069949.573706\tLN:150",
+ "@SQ\tSN:ERR5069949.1980512\tLN:151",
+ "@SQ\tSN:ERR5069949.1014693\tLN:150",
+ "@SQ\tSN:ERR5069949.3184655\tLN:150",
+ "@SQ\tSN:ERR5069949.29668\tLN:89",
+ "@SQ\tSN:ERR5069949.3258358\tLN:151",
+ "@SQ\tSN:ERR5069949.1476386\tLN:151",
+ "@SQ\tSN:ERR5069949.2415814\tLN:150",
+ "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a test_1.fastq.gz test_1.fastq.gz",
+ "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam"
+ ],
+ "16c1c651f8ec67383bcdee3c55aed94f",
+ [
+ "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-07-23T11:18:34.246998277"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/minimap2/align/tests/tags.yml b/modules/nf-core/minimap2/align/tests/tags.yml
new file mode 100644
index 0000000..39dba37
--- /dev/null
+++ b/modules/nf-core/minimap2/align/tests/tags.yml
@@ -0,0 +1,2 @@
+minimap2/align:
+ - "modules/nf-core/minimap2/align/**"
diff --git a/modules/nf-core/minimap2/index/environment.yml b/modules/nf-core/minimap2/index/environment.yml
new file mode 100644
index 0000000..8a912a1
--- /dev/null
+++ b/modules/nf-core/minimap2/index/environment.yml
@@ -0,0 +1,7 @@
+name: minimap2_index
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::minimap2=2.28
diff --git a/modules/local/minimap2_align.nf b/modules/nf-core/minimap2/index/main.nf
similarity index 52%
rename from modules/local/minimap2_align.nf
rename to modules/nf-core/minimap2/index/main.nf
index 11f6936..618b905 100644
--- a/modules/local/minimap2_align.nf
+++ b/modules/nf-core/minimap2/index/main.nf
@@ -1,19 +1,17 @@
-process MINIMAP2_ALIGN {
- tag "$meta.id"
+process MINIMAP2_INDEX {
label 'process_medium'
- conda "bioconda::minimap2=2.24"
+ // Note: the versions here need to match the versions used in minimap2/align
+ conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/minimap2:2.24--h5bf99c6_0':
- 'biocontainers/minimap2:2.24--h5bf99c6_0' }"
+ 'https://depot.galaxyproject.org/singularity/minimap2:2.28--he4a0461_0' :
+ 'biocontainers/minimap2:2.28--he4a0461_0' }"
input:
- tuple val(meta), path(fastq)
- path bed
- path reference
+ tuple val(meta), path(fasta)
output:
- tuple val(meta), path("*.sam"), emit: sam
+ tuple val(meta), path("*.mmi"), emit: index
path "versions.yml" , emit: versions
when:
@@ -21,15 +19,22 @@ process MINIMAP2_ALIGN {
script:
def args = task.ext.args ?: ''
- def prefix = task.ext.prefix ?: "${meta.id}"
- def junctions = "--junc-bed ${bed}"
"""
minimap2 \\
- $args \\
- $junctions \\
-t $task.cpus \\
- $reference \\
- $fastq > ${meta.id}.sam
+ -d ${fasta.baseName}.mmi \\
+ $args \\
+ $fasta
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ minimap2: \$(minimap2 --version 2>&1)
+ END_VERSIONS
+ """
+
+ stub:
+ """
+ touch ${fasta.baseName}.mmi
cat <<-END_VERSIONS > versions.yml
"${task.process}":
diff --git a/modules/nf-core/minimap2/index/meta.yml b/modules/nf-core/minimap2/index/meta.yml
new file mode 100644
index 0000000..1d29e3f
--- /dev/null
+++ b/modules/nf-core/minimap2/index/meta.yml
@@ -0,0 +1,43 @@
+name: minimap2_index
+description: Provides fasta index required by minimap2 alignment.
+keywords:
+ - index
+ - fasta
+ - reference
+tools:
+ - minimap2:
+ description: |
+ A versatile pairwise aligner for genomic and spliced nucleotide sequences.
+ homepage: https://github.com/lh3/minimap2
+ documentation: https://github.com/lh3/minimap2#uguide
+ licence: ["MIT"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - fasta:
+ type: file
+ description: |
+ Reference database in FASTA format.
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - index:
+ type: file
+ description: Minimap2 fasta index.
+ pattern: "*.mmi"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@yuukiiwa"
+ - "@drpatelh"
+maintainers:
+ - "@yuukiiwa"
+ - "@drpatelh"
diff --git a/modules/nf-core/minimap2/index/minimap2-index.diff b/modules/nf-core/minimap2/index/minimap2-index.diff
new file mode 100644
index 0000000..7960910
--- /dev/null
+++ b/modules/nf-core/minimap2/index/minimap2-index.diff
@@ -0,0 +1,12 @@
+Changes in module 'nf-core/minimap2/index'
+--- modules/nf-core/minimap2/index/main.nf
++++ modules/nf-core/minimap2/index/main.nf
+@@ -1,5 +1,5 @@
+ process MINIMAP2_INDEX {
+- label 'process_low'
++ label 'process_medium'
+
+ // Note: the versions here need to match the versions used in minimap2/align
+ conda "${moduleDir}/environment.yml"
+
+************************************************************
diff --git a/modules/nf-core/minimap2/index/tests/main.nf.test b/modules/nf-core/minimap2/index/tests/main.nf.test
new file mode 100644
index 0000000..97840ff
--- /dev/null
+++ b/modules/nf-core/minimap2/index/tests/main.nf.test
@@ -0,0 +1,32 @@
+nextflow_process {
+
+ name "Test Process MINIMAP2_INDEX"
+ script "../main.nf"
+ process "MINIMAP2_INDEX"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "minimap2"
+ tag "minimap2/index"
+
+ test("minimap2 index") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assert process.success
+ assert snapshot(process.out).match()
+ }
+
+ }
+
+}
\ No newline at end of file
diff --git a/modules/nf-core/minimap2/index/tests/main.nf.test.snap b/modules/nf-core/minimap2/index/tests/main.nf.test.snap
new file mode 100644
index 0000000..0b09882
--- /dev/null
+++ b/modules/nf-core/minimap2/index/tests/main.nf.test.snap
@@ -0,0 +1,68 @@
+{
+ "Should run without failures": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test_ref"
+ },
+ "genome.mmi:md5,72e450f12dc691e763c697463bdb1571"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,0fced0ee8015e7f50b82566e3db8f7b0"
+ ],
+ "index": [
+ [
+ {
+ "id": "test_ref"
+ },
+ "genome.mmi:md5,72e450f12dc691e763c697463bdb1571"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,0fced0ee8015e7f50b82566e3db8f7b0"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-03-18T11:46:30.000058092"
+ },
+ "minimap2 index": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "genome.mmi:md5,72e450f12dc691e763c697463bdb1571"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,2f8340380c6741e9261a284262a90bde"
+ ],
+ "index": [
+ [
+ {
+ "id": "test"
+ },
+ "genome.mmi:md5,72e450f12dc691e763c697463bdb1571"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,2f8340380c6741e9261a284262a90bde"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-04-05T10:58:29.828187662"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/minimap2/index/tests/tags.yml b/modules/nf-core/minimap2/index/tests/tags.yml
new file mode 100644
index 0000000..e5ef8e1
--- /dev/null
+++ b/modules/nf-core/minimap2/index/tests/tags.yml
@@ -0,0 +1,2 @@
+minimap2/index:
+ - modules/nf-core/minimap2/index/**
diff --git a/modules/nf-core/gunzip/environment.yml b/modules/nf-core/nanocomp/environment.yml
similarity index 60%
rename from modules/nf-core/gunzip/environment.yml
rename to modules/nf-core/nanocomp/environment.yml
index 25910b3..dc92191 100644
--- a/modules/nf-core/gunzip/environment.yml
+++ b/modules/nf-core/nanocomp/environment.yml
@@ -1,7 +1,7 @@
-name: gunzip
+name: nanocomp
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- - conda-forge::sed=4.7
+ - bioconda:nanocomp=1.21.0
diff --git a/modules/nf-core/nanocomp/main.nf b/modules/nf-core/nanocomp/main.nf
new file mode 100755
index 0000000..c2f5bcd
--- /dev/null
+++ b/modules/nf-core/nanocomp/main.nf
@@ -0,0 +1,138 @@
+process NANOCOMP {
+ label 'process_medium'
+ label 'process_high_memory'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/nanocomp:1.21.0--pyhdfd78af_0':
+ 'biocontainers/nanocomp:1.21.0--pyhdfd78af_0' }"
+
+ input:
+ tuple val(meta), path(filelist)
+
+ output:
+ tuple val(meta), path("*NanoComp-report.html"), emit: report_html
+ tuple val(meta), path("*NanoComp_lengths_violin.html"), emit: lengths_violin_html
+ tuple val(meta), path("*NanoComp_log_length_violin.html"), emit: log_length_violin_html
+ tuple val(meta), path("*NanoComp_N50.html"), emit: n50_html
+ tuple val(meta), path("*NanoComp_number_of_reads.html"), emit: number_of_reads_html
+ tuple val(meta), path("*NanoComp_OverlayHistogram.html"), emit: overlay_histogram_html
+ tuple val(meta), path("*NanoComp_OverlayHistogram_Normalized.html"), emit: overlay_histogram_normalized_html
+ tuple val(meta), path("*NanoComp_OverlayLogHistogram.html"), emit: overlay_log_histogram_html
+ tuple val(meta), path("*NanoComp_OverlayLogHistogram_Normalized.html"), emit: overlay_log_histogram_normalized_html
+ tuple val(meta), path("*NanoComp_total_throughput.html"), emit: total_throughput_html
+ tuple val(meta), path("*NanoComp_quals_violin.html"), emit: quals_violin_html, optional: true
+ tuple val(meta), path("*NanoComp_OverlayHistogram_Identity.html"), emit: overlay_histogram_identity_html, optional: true
+ tuple val(meta), path("*NanoComp_OverlayHistogram_PhredScore.html"), emit: overlay_histogram_phredscore_html, optional: true
+ tuple val(meta), path("*NanoComp_percentIdentity_violin.html"), emit: percent_identity_violin_html, optional: true
+ tuple val(meta), path("*NanoComp_ActivePoresOverTime.html"), emit: active_pores_over_time_html, optional: true
+ tuple val(meta), path("*NanoComp_CumulativeYieldPlot_Gigabases.html"), emit: cumulative_yield_plot_gigabases_html, optional: true
+ tuple val(meta), path("*NanoComp_sequencing_speed_over_time.html"), emit: sequencing_speed_over_time_html, optional: true
+ tuple val(meta), path("*NanoStats.txt"), emit: stats_txt
+ path "versions.yml", emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ if (prefix == ""){
+ prefixflag = ""
+ } else {
+ prefixflag = "--prefix " + prefix
+ }
+
+ //determine input file type
+ filetypes = []
+ for (file in filelist){
+ tokenized_filename = file.getName().tokenize('.')
+ if (tokenized_filename.size() < 2){
+ throw new java.lang.IndexOutOfBoundsException("Every input file to nanocomp has to have a file ending.")
+ }
+
+ first_namepart = true
+ extension_found = false
+
+ for (namepart in tokenized_filename){
+ if (namepart == ""){
+ continue
+ }
+
+ // prevent the file name to be seen as extension
+ if (first_namepart == true){
+ first_namepart = false
+ continue
+ }
+
+ if (["fq","fastq"].contains(namepart)){
+ filetypes.add("fastq")
+ extension_found = true
+ break
+ } else if (["fasta", "fna", "ffn", "faa", "frn", "fa"].contains(namepart)) {
+ filetypes.add("fasta")
+ extension_found = true
+ break
+ } else if (namepart == "bam") {
+ filetypes.add("bam")
+ extension_found = true
+ break
+ } else if (namepart == "txt") {
+ filetypes.add("summary")
+ extension_found = true
+ break
+ }
+ }
+
+ if (extension_found == false){
+ throw new java.lang.IllegalArgumentException("There was no suitable filetype found for " + file.getName() +
+ ". NanoComp only accepts fasta (fasta, fna, ffn, faa, frn, fa), fastq (fastq, fq), bam and Nanopore sequencing summary (txt).")
+ }
+ }
+
+ filetypes.unique()
+ if (filetypes.size() < 1){
+ throw new java.lang.IllegalArgumentException("There was no suitable filetype found in NanoComp input. Please use fasta, fastq, bam or Nanopore sequencing summary.")
+ }
+ if (filetypes.size() > 1){
+ throw new java.lang.IllegalArgumentException("You gave different filetypes to NanoComp. Please use only *one* of fasta, fastq, bam or Nanopore sequencing summary.")
+ }
+ filetype = filetypes[0]
+
+ """
+ NanoComp \\
+ --$filetype $filelist \\
+ --threads $task.cpus \\
+ $prefixflag \\
+ $args
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ nanocomp: \$(echo \$(NanoComp --version 2>&1) | sed 's/^.*NanoComp //; s/Using.*\$//' ))
+ END_VERSIONS
+ """
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ touch versions.yml
+ touch "${prefix}"NanoComp_lengths_violin.html
+ touch "${prefix}"NanoComp_log_length_violin.html
+ touch "${prefix}"NanoComp_N50.html
+ touch "${prefix}"NanoComp_number_of_reads.html
+ touch "${prefix}"NanoComp_OverlayHistogram.html
+ touch "${prefix}"NanoComp_OverlayHistogram_Normalized.html
+ touch "${prefix}"NanoComp_OverlayLogHistogram.html
+ touch "${prefix}"NanoComp_OverlayLogHistogram_Normalized.html
+ touch "${prefix}"NanoComp-report.html
+ touch "${prefix}"NanoComp_total_throughput.html
+ touch "${prefix}"NanoComp_quals_violin.html
+ touch "${prefix}"NanoComp_OverlayHistogram_Identity.html
+ touch "${prefix}"NanoComp_OverlayHistogram_PhredScore.html
+ touch "${prefix}"NanoComp_percentIdentity_violin.html
+ touch "${prefix}"NanoComp_ActivePoresOverTime.html
+ touch "${prefix}"NanoComp_CumulativeYieldPlot_Gigabases.html
+ touch "${prefix}"NanoComp_sequencing_speed_over_time.html
+ touch "${prefix}"NanoStats.txt
+ """
+}
diff --git a/modules/nf-core/nanocomp/meta.yml b/modules/nf-core/nanocomp/meta.yml
new file mode 100755
index 0000000..366c88e
--- /dev/null
+++ b/modules/nf-core/nanocomp/meta.yml
@@ -0,0 +1,106 @@
+name: "nanocomp"
+description: Compare multiple runs of long read sequencing data and alignments
+keywords:
+ - bam
+ - fasta
+ - fastq
+ - qc
+ - nanopore
+tools:
+ - "nanocomp":
+ description: "Compare multiple runs of long read sequencing data and alignments"
+ homepage: "https://github.com/wdecoster/nanocomp"
+ documentation: "https://github.com/wdecoster/nanocomp"
+ licence: "MIT License"
+input:
+ - meta:
+ type: map
+ description: Groovy Map containing sample information e.g. [ id:'test', single_end:false ]
+ - filelist:
+ type: file
+ description: List of all the files you want to compare, they have to be all the same filetype (either fastq, fasta, bam or Nanopore sequencing summary)
+ pattern: "*.{fastq,fq,fna,ffn,faa,frn,fa,fasta,txt,bam}"
+output:
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - meta:
+ type: map
+ description: Groovy Map containing sample information e.g. [ id:'test', single_end:false ]
+ - report_html:
+ type: file
+ description: Summary of all collected statistics
+ pattern: "*NanoComp-report.html"
+ - lengths_violin_html:
+ type: file
+ description: Violin plot of the sequence lengths
+ pattern: "*NanoComp_lengths_violin.html"
+ - log_length_violin_html:
+ type: file
+ description: Violin plot of the sequence lengths, log function applied
+ pattern: "*NanoComp_log_length_violin.html"
+ - n50_html:
+ type: file
+ description: Bar plot of N50 sequence length per sample
+ pattern: "*NanoComp_N50.html"
+ - number_of_reads_html:
+ type: file
+ description: Bar plot of number of reads per sample
+ pattern: "*NanoComp_number_of_reads.html"
+ - overlay_histogram_html:
+ type: file
+ description: Histogram of all read lengths per sample
+ pattern: "*NanoComp_OverlayHistogram.html"
+ - overlay_histogram_normalized_html:
+ type: file
+ description: Normalized histogram of all read lengths per sample
+ pattern: "*NanoComp_OverlayHistogram_Normalized.html"
+ - overlay_log_histogram_html:
+ type: file
+ description: Histogram of all read lengths per sample, log function applied
+ pattern: "*NanoComp_OverlayLogHistogram.html"
+ - overlay_log_histogram_normalized_html:
+ type: file
+ description: Normalized histogram of all read lengths per sample, log function applied
+ pattern: "*NanoComp_OverlayLogHistogram_Normalized.html"
+ - total_throughput_html:
+ type: file
+ description: Barplot comparing throughput in bases
+ pattern: "*NanoComp_total_throughput.html"
+ - quals_violin_html:
+ type: file
+ description: Violin plot of base qualities, only for bam, fastq and sequencing summary input
+ pattern: "*NanoComp_quals_violin.html"
+ - overlay_histogram_identity_html:
+ type: file
+ description: Histogram of perfect reference identity, only for bam input
+ pattern: "*NanoComp_OverlayHistogram_Identity.html"
+ - overlay_histogram_phredscore_html:
+ type: file
+ description: Histogram of phred scores, only for bam input
+ pattern: "*NanoComp_OverlayHistogram_PhredScore.html"
+ - percent_identity_violin_html:
+ type: file
+ description: Violin plot comparing perfect reference identity, only for bam input
+ pattern: "*NanoComp_percentIdentity_violin.html"
+ - active_pores_over_time_html:
+ type: file
+ description: Scatter plot of active pores over time, only for sequencing summary input
+ pattern: "*NanoComp_ActivePoresOverTime.html"
+ - cumulative_yield_plot_gigabases_html:
+ type: file
+ description: Scatter plot of cumulative yield, only for sequencing summary input
+ pattern: "*NanoComp_CumulativeYieldPlot_Gigabases.html"
+ - sequencing_speed_over_time_html:
+ type: file
+ description: Scatter plot of sequencing speed over time, only for sequencing summary input
+ pattern: "*NanoComp_sequencing_speed_over_time.html"
+ - stats_txt:
+ type: file
+ description: txt file with basic statistics
+ pattern: "*NanoStats.txt"
+authors:
+ - "@paulwolk"
+maintainers:
+ - "@paulwolk"
diff --git a/modules/nf-core/nanocomp/nanocomp.diff b/modules/nf-core/nanocomp/nanocomp.diff
new file mode 100644
index 0000000..422cb25
--- /dev/null
+++ b/modules/nf-core/nanocomp/nanocomp.diff
@@ -0,0 +1,12 @@
+Changes in module 'nf-core/nanocomp'
+--- modules/nf-core/nanocomp/main.nf
++++ modules/nf-core/nanocomp/main.nf
+@@ -1,5 +1,6 @@
+ process NANOCOMP {
+ label 'process_medium'
++ label 'process_high_memory'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+
+************************************************************
diff --git a/modules/nf-core/pigz/compress/environment.yml b/modules/nf-core/pigz/compress/environment.yml
new file mode 100644
index 0000000..7551d18
--- /dev/null
+++ b/modules/nf-core/pigz/compress/environment.yml
@@ -0,0 +1,9 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+name: "pigz_compress"
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - "pigz=2.8"
diff --git a/modules/nf-core/pigz/compress/main.nf b/modules/nf-core/pigz/compress/main.nf
new file mode 100644
index 0000000..152e700
--- /dev/null
+++ b/modules/nf-core/pigz/compress/main.nf
@@ -0,0 +1,45 @@
+process PIGZ_COMPRESS {
+ tag "$meta.id"
+ label 'process_low'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/pigz:2.8':
+ 'biocontainers/pigz:2.8' }"
+
+ input:
+ tuple val(meta), path(raw_file)
+
+ output:
+ tuple val(meta), path("$archive"), emit: archive
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ archive = raw_file.toString() + ".gz"
+ """
+ # Note: needs --stdout for pigz to avoid the following issue:
+ # pigz: skipping: ${raw_file} is a symbolic link
+ pigz --processes $task.cpus --stdout --force ${args} ${raw_file} > ${archive}
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ pigz:\$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ archive = raw_file.toString() + ".gz"
+ """
+ touch ${archive}
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ pigz:\$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/pigz/compress/meta.yml b/modules/nf-core/pigz/compress/meta.yml
new file mode 100644
index 0000000..42efd73
--- /dev/null
+++ b/modules/nf-core/pigz/compress/meta.yml
@@ -0,0 +1,47 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "pigz_compress"
+description: Compresses files with pigz.
+keywords:
+ - compress
+ - gzip
+ - parallelized
+tools:
+ - "pigz":
+ description: "Parallel implementation of the gzip algorithm."
+ homepage: "https://zlib.net/pigz/"
+ documentation: "https://zlib.net/pigz/pigz.pdf"
+
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1', single_end:false ]`
+
+ - raw_file:
+ type: file
+ description: File to be compressed
+ pattern: "*.*"
+
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1', single_end:false ]`
+
+ - archive:
+ type: file
+ description: The compressed file
+ pattern: "*.gz"
+
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+
+authors:
+ - "@leoisl"
+maintainers:
+ - "@leoisl"
diff --git a/modules/nf-core/pigz/compress/tests/main.nf.test b/modules/nf-core/pigz/compress/tests/main.nf.test
new file mode 100644
index 0000000..b3cb25e
--- /dev/null
+++ b/modules/nf-core/pigz/compress/tests/main.nf.test
@@ -0,0 +1,53 @@
+nextflow_process {
+ name "Test Process PIGZ_COMPRESS"
+ script "../main.nf"
+ process "PIGZ_COMPRESS"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "pigz"
+ tag "pigz/compress"
+
+ test("sarscov2 - genome - fasta") {
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test'], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ]
+ """
+ }
+ }
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("sarscov2 - genome - fasta - stub") {
+ options "-stub-run"
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test'], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ]
+ """
+ }
+ }
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ file(process.out.archive[0][1]).name,
+ process.out.versions
+ ).match()
+ }
+ )
+ }
+ }
+}
diff --git a/modules/nf-core/pigz/compress/tests/main.nf.test.snap b/modules/nf-core/pigz/compress/tests/main.nf.test.snap
new file mode 100644
index 0000000..4d8df9f
--- /dev/null
+++ b/modules/nf-core/pigz/compress/tests/main.nf.test.snap
@@ -0,0 +1,48 @@
+{
+ "sarscov2 - genome - fasta": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "genome.fasta.gz:md5,6e9fe4042a72f2345f644f239272b7e6"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,ca30e9e1ffa1394ba7eefdac8cf3a3ad"
+ ],
+ "archive": [
+ [
+ {
+ "id": "test"
+ },
+ "genome.fasta.gz:md5,6e9fe4042a72f2345f644f239272b7e6"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,ca30e9e1ffa1394ba7eefdac8cf3a3ad"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.3"
+ },
+ "timestamp": "2023-12-11T22:39:53.350546"
+ },
+ "sarscov2 - genome - fasta - stub": {
+ "content": [
+ "genome.fasta.gz",
+ [
+ "versions.yml:md5,ca30e9e1ffa1394ba7eefdac8cf3a3ad"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.3"
+ },
+ "timestamp": "2024-07-30T12:18:32.339508"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/pigz/compress/tests/tags.yml b/modules/nf-core/pigz/compress/tests/tags.yml
new file mode 100644
index 0000000..42c46bf
--- /dev/null
+++ b/modules/nf-core/pigz/compress/tests/tags.yml
@@ -0,0 +1,2 @@
+pigz/compress:
+ - "modules/nf-core/pigz/compress/**"
diff --git a/modules/nf-core/pigz/uncompress/main.nf b/modules/nf-core/pigz/uncompress/main.nf
new file mode 100644
index 0000000..c7528f8
--- /dev/null
+++ b/modules/nf-core/pigz/uncompress/main.nf
@@ -0,0 +1,48 @@
+process PIGZ_UNCOMPRESS {
+ label 'process_low'
+ //stageInMode 'copy' // this directive can be set in case the original input should be kept
+
+ conda "conda-forge::pigz"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/pigz:2.8':
+ 'biocontainers/pigz:2.8' }"
+
+ input:
+ tuple val(meta), path(zip)
+
+ output:
+ tuple val(meta), path("${uncompressed_filename}") , emit: file
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ uncompressed_filename = zip.toString() - '.gz'
+ // calling pigz -f to make it follow symlinks
+ """
+ unpigz \\
+ -p $task.cpus \\
+ -fk \\
+ $args \\
+ ${zip}
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' ))
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ uncompressed_filename = zip.toString() - '.gz'
+ """
+ touch ${zip.dropRight(3)}
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' ))
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/pigz/uncompress/meta.yml b/modules/nf-core/pigz/uncompress/meta.yml
new file mode 100644
index 0000000..c2d16cd
--- /dev/null
+++ b/modules/nf-core/pigz/uncompress/meta.yml
@@ -0,0 +1,42 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json
+name: "pigz_uncompress"
+description: write your description here
+keywords:
+ - uncompress
+ - gzip
+ - parallelized
+tools:
+ - "pigz":
+ description: "Parallel implementation of the gzip algorithm."
+ homepage: "https://zlib.net/pigz/"
+ documentation: "https://zlib.net/pigz/pigz.pdf"
+
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'test']`
+ - zip:
+ type: file
+ description: Gzipped file
+ pattern: "*.{gzip}"
+
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'test']`
+ - file:
+ type: file
+ description: File to compress
+ pattern: "*"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+
+authors:
+ - "@lrauschning"
diff --git a/modules/nf-core/pigz/uncompress/pigz-uncompress.diff b/modules/nf-core/pigz/uncompress/pigz-uncompress.diff
new file mode 100644
index 0000000..6736fca
--- /dev/null
+++ b/modules/nf-core/pigz/uncompress/pigz-uncompress.diff
@@ -0,0 +1,14 @@
+Changes in module 'nf-core/pigz/uncompress'
+--- modules/nf-core/pigz/uncompress/main.nf
++++ modules/nf-core/pigz/uncompress/main.nf
+@@ -42,7 +42,7 @@
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+- pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\w*//' ))
++ pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' ))
+ END_VERSIONS
+ """
+ }
+
+************************************************************
diff --git a/modules/nf-core/pigz/uncompress/tests/main.nf.test b/modules/nf-core/pigz/uncompress/tests/main.nf.test
new file mode 100644
index 0000000..62ab27e
--- /dev/null
+++ b/modules/nf-core/pigz/uncompress/tests/main.nf.test
@@ -0,0 +1,33 @@
+nextflow_process {
+
+ name "Test Process PIGZ_UNCOMPRESS"
+ script "modules/nf-core/pigz/uncompress/main.nf"
+ process "PIGZ_UNCOMPRESS"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "pigz"
+ tag "pigz/uncompress"
+
+ test("Should run without failures") {
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = [ [ id:'test'],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assert process.success
+ assert snapshot(process.out).match()
+ }
+
+ }
+
+}
\ No newline at end of file
diff --git a/modules/nf-core/pigz/uncompress/tests/main.nf.test.snap b/modules/nf-core/pigz/uncompress/tests/main.nf.test.snap
new file mode 100644
index 0000000..126dd7d
--- /dev/null
+++ b/modules/nf-core/pigz/uncompress/tests/main.nf.test.snap
@@ -0,0 +1,35 @@
+{
+ "Should run without failures": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,a2d5ce72baa8b303f25afb9cf094f683"
+ ],
+ "file": [
+ [
+ {
+ "id": "test"
+ },
+ "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,a2d5ce72baa8b303f25afb9cf094f683"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.03.0"
+ },
+ "timestamp": "2024-05-15T16:43:21.55056643"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/pigz/uncompress/tests/tags.yml b/modules/nf-core/pigz/uncompress/tests/tags.yml
new file mode 100644
index 0000000..6719a90
--- /dev/null
+++ b/modules/nf-core/pigz/uncompress/tests/tags.yml
@@ -0,0 +1,2 @@
+pigz/uncompress:
+ - modules/nf-core/pigz/uncompress/**
diff --git a/nextflow.config b/nextflow.config
index 6f36aa4..0fa07f7 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -22,7 +22,7 @@ params {
split_amount = 0
// Read Trimming Options
- min_length = 500
+ min_length = 1
min_q_score = 10
skip_trimming = false
@@ -38,7 +38,6 @@ params {
kmer_size = 14
// Analysis options
- analyze_uncorrected_bam = false
retain_introns = true
// Process Skipping options
@@ -55,10 +54,10 @@ params {
skip_multiqc = false
// MultiQC options
- multiqc_config = null
- multiqc_title = null
- multiqc_logo = null
- max_multiqc_email_size = '25.MB'
+ multiqc_config = null
+ multiqc_title = null
+ multiqc_logo = null
+ max_multiqc_email_size = '25.MB'
multiqc_methods_description = null
// Boilerplate options
@@ -276,7 +275,7 @@ manifest {
description = """Single-cell/nuclei pipeline for data derived from Oxford Nanopore"""
mainScript = 'main.nf'
nextflowVersion = '!>=23.04.0'
- version = '0.1.0dev'
+ version = '1.0.0'
doi = ''
}
@@ -315,7 +314,3 @@ def check_max(obj, type) {
}
}
}
-
-conda {
- useMamba = true
-}
diff --git a/nextflow_schema.json b/nextflow_schema.json
index f9489a5..6133333 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -84,7 +84,8 @@
"description": "Do not load the iGenomes reference config.",
"fa_icon": "fas fa-ban",
"hidden": true,
- "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`."
+ "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`.",
+ "default": true
}
},
"required": ["fasta", "gtf"]
@@ -98,7 +99,7 @@
"properties": {
"split_amount": {
"type": "integer",
- "description": "The amount of lines to split the fastq into (Default: 0)",
+ "description": "The amount of lines to split the fastq into for faster processing (Default: 0)",
"default": 0,
"fa_icon": "fas fa-cut"
}
@@ -112,7 +113,7 @@
"properties": {
"min_length": {
"type": "integer",
- "default": 500,
+ "default": 1,
"description": "Choose minimum read length.",
"fa_icon": "fas fa-cut"
},
@@ -184,11 +185,6 @@
"default": "",
"fa_icon": "fas fa-search",
"properties": {
- "analyze_uncorrected_bam": {
- "type": "boolean",
- "description": "Run downstream steps on the bam that contains reads that could not be corrected. Do not use this if no whitelist is provided.",
- "fa_icon": "fas fa-search"
- },
"retain_introns": {
"type": "boolean",
"default": true,
diff --git a/subworkflows/local/prepare_reference_files.nf b/subworkflows/local/prepare_reference_files.nf
index 191d2ce..66c0df5 100644
--- a/subworkflows/local/prepare_reference_files.nf
+++ b/subworkflows/local/prepare_reference_files.nf
@@ -2,9 +2,9 @@
// Creates gtfs to that add introns as features
//
-include { GUNZIP as GUNZIP_FASTA } from '../../modules/nf-core/gunzip/main'
-include { GUNZIP as GUNZIP_GTF } from '../../modules/nf-core/gunzip/main'
-include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main'
+include { PIGZ_UNCOMPRESS as UNZIP_FASTA } from '../../modules/nf-core/pigz/uncompress/main'
+include { PIGZ_UNCOMPRESS as UNZIP_GTF } from '../../modules/nf-core/pigz/uncompress/main'
+include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main'
workflow PREPARE_REFERENCE_FILES {
take:
@@ -21,20 +21,20 @@ workflow PREPARE_REFERENCE_FILES {
//
ch_prepared_fasta = Channel.empty()
if (fasta.endsWith('.gz')){
- GUNZIP_FASTA( [ [:], fasta ])
+ UNZIP_FASTA( [ [:], fasta ])
- ch_prepared_fasta = GUNZIP_FASTA.out.gunzip
- ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions)
+ ch_prepared_fasta = UNZIP_FASTA.out.file
+ ch_versions = ch_versions.mix(UNZIP_FASTA.out.versions)
} else {
ch_prepared_fasta = [ [:], fasta ]
}
ch_prepared_gtf = Channel.empty()
if (gtf.endsWith('.gz')){
- GUNZIP_GTF( [ [:], gtf ])
+ UNZIP_GTF( [ [:], gtf ])
- ch_prepared_gtf = GUNZIP_GTF.out.gunzip
- ch_versions = ch_versions.mix(GUNZIP_GTF.out.versions)
+ ch_prepared_gtf = UNZIP_GTF.out.file
+ ch_versions = ch_versions.mix(UNZIP_GTF.out.versions)
} else {
ch_prepared_gtf = [ [:], gtf]
}
diff --git a/subworkflows/nf-core/qcfastq_nanoplot_fastqc.nf b/subworkflows/nf-core/qcfastq_nanoplot_fastqc.nf
index dd644de..41ecaf0 100644
--- a/subworkflows/nf-core/qcfastq_nanoplot_fastqc.nf
+++ b/subworkflows/nf-core/qcfastq_nanoplot_fastqc.nf
@@ -1,6 +1,7 @@
/*
* FastQ QC with NanoPlot, ToulligQC and fastqc
- * subworkflow from nf-core/nanoseq
+ * subworkflow from nf-core/nanoseq with minor modifications
+ * (e.g: addition of ToulligQC)
* author: @yuukiiwa
*/
diff --git a/workflows/scnanoseq.nf b/workflows/scnanoseq.nf
index c918a72..871b578 100644
--- a/workflows/scnanoseq.nf
+++ b/workflows/scnanoseq.nf
@@ -4,15 +4,7 @@
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
-// This is if the user passes in direct regex
-def cell_barcode_pattern = ""
-// This is for if the user wants to do more human readable regex
-def cell_barcode_lengths = ""
-def umi_lengths = ""
-def blaze_whitelist = ""
-
-// TODO: Move this to a config file
if (params.barcode_format.equals("10X_3v3")) {
blaze_whitelist = file("$baseDir/assets/whitelist/3M-february-2018.zip")
}
@@ -31,11 +23,11 @@ if (params.whitelist) {
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
-ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true)
-ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty()
-ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty()
-ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true)
-ch_dummy_file = Channel.fromPath("$projectDir/assets/dummy_file.txt", checkIfExists: true)
+ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true)
+ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty()
+ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty()
+ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true)
+ch_dummy_file = Channel.fromPath("$projectDir/assets/dummy_file.txt", checkIfExists: true)
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -48,18 +40,12 @@ ch_dummy_file = Channel.fromPath("$projectDir/assets/dummy_file.txt", checkIfExi
//
include { NANOFILT } from "../modules/local/nanofilt"
-include { NANOCOMP as NANOCOMP_FASTQ } from "../modules/local/nanocomp"
-include { NANOCOMP as NANOCOMP_BAM } from "../modules/local/nanocomp"
include { SPLIT_FILE } from "../modules/local/split_file"
-include { SPLIT_FILE as SPLIT_FILE_BC_FASTQ } from "../modules/local/split_file"
-include { SPLIT_FILE as SPLIT_FILE_BC_CSV } from "../modules/local/split_file"
-include { PIGZ as ZIP_TRIM } from "../modules/local/pigz"
+include { SPLIT_FILE as SPLIT_FILE_BC_FASTQ } from "../modules/local/split_file"
+include { SPLIT_FILE as SPLIT_FILE_BC_CSV } from "../modules/local/split_file"
include { BLAZE } from "../modules/local/blaze"
include { PREEXTRACT_FASTQ } from "../modules/local/preextract_fastq.nf"
include { READ_COUNTS } from "../modules/local/read_counts.nf"
-include { PAFTOOLS } from "../modules/local/paftools"
-include { MINIMAP2_INDEX } from "../modules/local/minimap2_index"
-include { MINIMAP2_ALIGN } from "../modules/local/minimap2_align"
include { TAG_BARCODES } from "../modules/local/tag_barcodes"
include { CORRECT_BARCODES } from "../modules/local/correct_barcodes"
include { ISOQUANT } from "../modules/local/isoquant"
@@ -84,37 +70,41 @@ include { PREPARE_REFERENCE_FILES } from "../subworkflows/local/prepare_referenc
//
// MODULE: Installed directly from nf-core/modules
//
-include { GUNZIP } from "../modules/nf-core/gunzip/main"
-include { MULTIQC as MULTIQC_RAWQC } from '../modules/nf-core/multiqc/main'
-include { MULTIQC as MULTIQC_FINALQC } from '../modules/nf-core/multiqc/main'
-include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main'
-include { UMITOOLS_DEDUP } from '../modules/nf-core/umitools/dedup/main'
-include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_BAM } from "../modules/nf-core/samtools/view/main"
+include { PIGZ_UNCOMPRESS } from "../modules/nf-core/pigz/uncompress/main"
+include { PIGZ_COMPRESS } from "../modules/nf-core/pigz/compress/main"
+include { NANOCOMP as NANOCOMP_FASTQ } from "../modules/nf-core/nanocomp/main"
+include { NANOCOMP as NANOCOMP_BAM } from "../modules/nf-core/nanocomp/main"
+include { MULTIQC as MULTIQC_RAWQC } from "../modules/nf-core/multiqc/main"
+include { MULTIQC as MULTIQC_FINALQC } from "../modules/nf-core/multiqc/main"
+include { CUSTOM_DUMPSOFTWAREVERSIONS } from "../modules/nf-core/custom/dumpsoftwareversions/main"
+include { UMITOOLS_DEDUP } from "../modules/nf-core/umitools/dedup/main"
include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_FILTER } from "../modules/nf-core/samtools/view/main"
include { CAT_CAT } from "../modules/nf-core/cat/cat/main"
-include { CAT_CAT as CAT_CAT_PREEXTRACT } from "../modules/nf-core/cat/cat/main"
-include { CAT_CAT as CAT_CAT_BARCODE } from "../modules/nf-core/cat/cat/main"
-include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main'
-include { RSEQC_READDISTRIBUTION } from '../modules/nf-core/rseqc/readdistribution/main'
-include { BAMTOOLS_SPLIT } from '../modules/nf-core/bamtools/split/main'
-include { SAMTOOLS_MERGE } from '../modules/nf-core/samtools/merge/main'
-include { paramsSummaryMap } from 'plugin/nf-validation'
+include { CAT_CAT as CAT_CAT_PREEXTRACT } from "../modules/nf-core/cat/cat/main"
+include { CAT_CAT as CAT_CAT_BARCODE } from "../modules/nf-core/cat/cat/main"
+include { CAT_FASTQ } from "../modules/nf-core/cat/fastq/main"
+include { MINIMAP2_INDEX } from "../modules/nf-core/minimap2/index/main"
+include { MINIMAP2_ALIGN } from "../modules/nf-core/minimap2/align/main"
+include { RSEQC_READDISTRIBUTION } from "../modules/nf-core/rseqc/readdistribution/main"
+include { BAMTOOLS_SPLIT } from "../modules/nf-core/bamtools/split/main"
+include { SAMTOOLS_MERGE } from "../modules/nf-core/samtools/merge/main"
+include { paramsSummaryMap } from "plugin/nf-validation"
/*
* SUBWORKFLOW: Consisting entirely of nf-core/modules
*/
-include { QCFASTQ_NANOPLOT_FASTQC as FASTQC_NANOPLOT_PRE_TRIM } from '../subworkflows/nf-core/qcfastq_nanoplot_fastqc'
-include { QCFASTQ_NANOPLOT_FASTQC as FASTQC_NANOPLOT_POST_TRIM } from '../subworkflows/nf-core/qcfastq_nanoplot_fastqc'
-include { QCFASTQ_NANOPLOT_FASTQC as FASTQC_NANOPLOT_POST_EXTRACT } from '../subworkflows/nf-core/qcfastq_nanoplot_fastqc'
-include { BAM_SORT_STATS_SAMTOOLS as BAM_SORT_STATS_SAMTOOLS_MINIMAP } from "../subworkflows/nf-core/bam_sort_stats_samtools/main"
-include { BAM_SORT_STATS_SAMTOOLS as BAM_SORT_STATS_SAMTOOLS_FILTERED } from "../subworkflows/nf-core/bam_sort_stats_samtools/main"
-include { BAM_SORT_STATS_SAMTOOLS as BAM_SORT_STATS_SAMTOOLS_TAGGED } from "../subworkflows/nf-core/bam_sort_stats_samtools/main"
+include { QCFASTQ_NANOPLOT_FASTQC as FASTQC_NANOPLOT_PRE_TRIM } from "../subworkflows/nf-core/qcfastq_nanoplot_fastqc"
+include { QCFASTQ_NANOPLOT_FASTQC as FASTQC_NANOPLOT_POST_TRIM } from "../subworkflows/nf-core/qcfastq_nanoplot_fastqc"
+include { QCFASTQ_NANOPLOT_FASTQC as FASTQC_NANOPLOT_POST_EXTRACT } from "../subworkflows/nf-core/qcfastq_nanoplot_fastqc"
+include { BAM_SORT_STATS_SAMTOOLS as BAM_SORT_STATS_SAMTOOLS_MINIMAP } from "../subworkflows/nf-core/bam_sort_stats_samtools/main"
+include { BAM_SORT_STATS_SAMTOOLS as BAM_SORT_STATS_SAMTOOLS_FILTERED } from "../subworkflows/nf-core/bam_sort_stats_samtools/main"
+include { BAM_SORT_STATS_SAMTOOLS as BAM_SORT_STATS_SAMTOOLS_TAGGED } from "../subworkflows/nf-core/bam_sort_stats_samtools/main"
include { BAM_SORT_STATS_SAMTOOLS as BAM_SORT_STATS_SAMTOOLS_CORRECTED } from "../subworkflows/nf-core/bam_sort_stats_samtools/main"
-include { BAM_SORT_STATS_SAMTOOLS as BAM_SORT_STATS_SAMTOOLS_SPLIT } from "../subworkflows/nf-core/bam_sort_stats_samtools/main"
-include { BAM_SORT_STATS_SAMTOOLS as BAM_SORT_STATS_SAMTOOLS_DEDUP } from "../subworkflows/nf-core/bam_sort_stats_samtools/main"
-include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline'
-include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline'
-include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_scnanoseq_pipeline'
+include { BAM_SORT_STATS_SAMTOOLS as BAM_SORT_STATS_SAMTOOLS_SPLIT } from "../subworkflows/nf-core/bam_sort_stats_samtools/main"
+include { BAM_SORT_STATS_SAMTOOLS as BAM_SORT_STATS_SAMTOOLS_DEDUP } from "../subworkflows/nf-core/bam_sort_stats_samtools/main"
+include { paramsSummaryMultiqc } from "../subworkflows/nf-core/utils_nfcore_pipeline"
+include { softwareVersionsToYAML } from "../subworkflows/nf-core/utils_nfcore_pipeline"
+include { methodsDescriptionText } from "../subworkflows/local/utils_nfcore_scnanoseq_pipeline"
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -130,6 +120,7 @@ workflow SCNANOSEQ {
main:
ch_versions = Channel.empty()
+ ch_multiqc_report = Channel.empty()
//
// SUBWORKFLOW: Read in samplesheet, validate and stage input files
@@ -178,10 +169,16 @@ workflow SCNANOSEQ {
ch_nanocomp_fastq_txt = Channel.empty()
if (!params.skip_qc && !params.skip_fastq_nanocomp) {
- NANOCOMP_FASTQ ( ch_cat_fastq.collect{it[1]},
- ch_dummy_file )
- ch_nanocomp_fastq_html = NANOCOMP_FASTQ.out.html
- ch_nanocomp_fastq_txt = NANOCOMP_FASTQ.out.txt
+ NANOCOMP_FASTQ (
+ ch_cat_fastq
+ .collect{it[1]}
+ .map{
+ [ [ 'id': 'nanocomp_fastq.' ] , it ]
+ }
+ )
+
+ ch_nanocomp_fastq_html = NANOCOMP_FASTQ.out.report_html
+ ch_nanocomp_fastq_txt = NANOCOMP_FASTQ.out.stats_txt
ch_versions = ch_versions.mix( NANOCOMP_FASTQ.out.versions )
@@ -194,7 +191,7 @@ workflow SCNANOSEQ {
PREPARE_REFERENCE_FILES ( "",
"",
params.fasta,
- params.gtf)
+ params.gtf )
fasta = PREPARE_REFERENCE_FILES.out.prepped_fasta
fai = PREPARE_REFERENCE_FILES.out.prepped_fai
@@ -203,19 +200,10 @@ workflow SCNANOSEQ {
ch_versions = ch_versions.mix( PREPARE_REFERENCE_FILES.out.versions )
- //
- // MODULE: Generate junction file - paftools
- //
-
- PAFTOOLS ( gtf.map { meta, gtf -> [gtf]} )
- ch_bed = PAFTOOLS.out.bed
- ch_versions = ch_versions.mix(PAFTOOLS.out.versions)
-
//
// MODULE: Generate bed file from input gtf for rseqc
//
- //TODO: This uses params.gtf instead of gtf in PAFTOOLS
// come back to this once intron work is finished (likely input will be fine)
ch_pred = Channel.empty()
ch_rseqc_bed = Channel.empty()
@@ -232,15 +220,14 @@ workflow SCNANOSEQ {
//
// MODULE: Unzip fastq
//
- GUNZIP( ch_cat_fastq )
- ch_unzipped_fastqs = GUNZIP.out.gunzip
- ch_versions = ch_versions.mix( GUNZIP.out.versions )
+ PIGZ_UNCOMPRESS( ch_cat_fastq )
+ ch_unzipped_fastqs = PIGZ_UNCOMPRESS.out.file
+ ch_versions = ch_versions.mix( PIGZ_UNCOMPRESS.out.versions )
//
// MODULE: Trim and filter reads
//
- //ch_zipped_reads = Channel.empty()
ch_fastqc_multiqc_postrim = Channel.empty()
ch_trimmed_reads_combined = Channel.empty()
@@ -294,7 +281,6 @@ workflow SCNANOSEQ {
ch_versions = ch_versions.mix(FASTQC_NANOPLOT_POST_TRIM.out.fastqc_version.first().ifEmpty(null))
}
} else {
- //ch_zipped_reads = ch_cat_fastq
ch_trimmed_reads_combined = ch_unzipped_fastqs
}
@@ -302,15 +288,13 @@ workflow SCNANOSEQ {
// MODULE: Generate whitelist
//
- BLAZE ( ch_trimmed_reads_combined, blaze_whitelist)
+ BLAZE ( ch_trimmed_reads_combined, blaze_whitelist )
ch_putative_bc = BLAZE.out.putative_bc
ch_gt_whitelist = BLAZE.out.whitelist
ch_whitelist_bc_count = BLAZE.out.bc_count
ch_versions = ch_versions.mix(BLAZE.out.versions)
- ch_multiqc_report = Channel.empty()
-
ch_split_bc_fastqs = ch_trimmed_reads_combined
ch_split_bc = ch_putative_bc
if (params.split_amount > 0) {
@@ -332,7 +316,7 @@ workflow SCNANOSEQ {
// MODULE: Extract barcodes
//
- PREEXTRACT_FASTQ( ch_split_bc_fastqs.join(ch_split_bc), params.barcode_format)
+ PREEXTRACT_FASTQ( ch_split_bc_fastqs.join(ch_split_bc), params.barcode_format )
ch_barcode_info = PREEXTRACT_FASTQ.out.barcode_info
ch_preextract_fastq = PREEXTRACT_FASTQ.out.extracted_fastq
@@ -367,9 +351,9 @@ workflow SCNANOSEQ {
//
// MODULE: Zip the reads
//
- ZIP_TRIM (ch_cat_preextract_fastq, "filtered" )
- ch_extracted_fastq = ZIP_TRIM.out.archive
- ch_versions = ch_versions.mix(ZIP_TRIM.out.versions)
+ PIGZ_COMPRESS (ch_cat_preextract_fastq )
+ ch_extracted_fastq = PIGZ_COMPRESS.out.archive
+ ch_versions = ch_versions.mix(PIGZ_COMPRESS.out.versions)
}
//
@@ -394,16 +378,18 @@ workflow SCNANOSEQ {
ch_corrected_bc_info.collect{it[1]})
ch_read_counts = READ_COUNTS.out.read_counts
+ ch_versions = ch_versions.mix(READ_COUNTS.out.versions)
}
}
//
// MINIMAP2_INDEX
//
+ ch_minimap_ref = fasta
if (!params.skip_save_minimap2_index) {
- MINIMAP2_INDEX ( fasta.map { meta, fasta -> [fasta]}, ch_bed)
- ch_minimap_index = MINIMAP2_INDEX.out.index
+ MINIMAP2_INDEX ( fasta )
+ ch_minimap_ref = MINIMAP2_INDEX.out.index
ch_versions = ch_versions.mix(MINIMAP2_INDEX.out.versions)
}
@@ -411,25 +397,16 @@ workflow SCNANOSEQ {
// MINIMAP2_ALIGN
//
- if (!params.skip_save_minimap2_index) {
- ch_reference = ch_minimap_index.toList()
- } else {
- ch_reference = Channel.fromPath(fasta, checkIfExists: true).toList()
- }
- MINIMAP2_ALIGN ( ch_extracted_fastq, ch_bed, ch_reference )
+ MINIMAP2_ALIGN (
+ ch_extracted_fastq,
+ ch_minimap_ref,
+ true,
+ "bai",
+ "",
+ "" )
ch_versions = ch_versions.mix(MINIMAP2_ALIGN.out.versions)
- MINIMAP2_ALIGN.out.sam
- .combine( ch_dummy_file )
- .set { ch_minimap_sam }
-
- //
- // MODULE: Samtools view
- //
- SAMTOOLS_VIEW_BAM ( ch_minimap_sam, [[],[]], [] )
-
- ch_minimap_bam = SAMTOOLS_VIEW_BAM.out.bam
- ch_versions = ch_versions.mix(SAMTOOLS_VIEW_BAM.out.versions)
+ ch_minimap_bam = MINIMAP2_ALIGN.out.bam
// acquire only mapped reads from bam for downstream processing
// NOTE: some QCs steps are performed on the full BAM
@@ -479,11 +456,17 @@ workflow SCNANOSEQ {
if (!params.skip_qc && !params.skip_bam_nanocomp) {
- NANOCOMP_BAM ( ch_minimap_sorted_bam.collect{it[1]},
- ch_minimap_sorted_bai.collect{it[1]})
+ NANOCOMP_BAM (
+ ch_minimap_sorted_bam
+ .collect{it[1]}
+ .map{
+ [ [ 'id': 'nanocomp_bam.' ] , it ]
+ }
- ch_nanocomp_bam_html = NANOCOMP_BAM.out.html
- ch_nanocomp_bam_txt = NANOCOMP_BAM.out.txt
+ )
+
+ ch_nanocomp_bam_html = NANOCOMP_BAM.out.report_html
+ ch_nanocomp_bam_txt = NANOCOMP_BAM.out.stats_txt
ch_versions = ch_versions.mix( NANOCOMP_BAM.out.versions )
}
@@ -514,10 +497,10 @@ workflow SCNANOSEQ {
ch_tagged_sorted_idxstats = BAM_SORT_STATS_SAMTOOLS_TAGGED.out.idxstats
ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS_TAGGED.out.versions)
- // TODO: Rename the dedup_bam channel to be more descriptive
ch_dedup_sorted_bam = ch_tagged_sorted_bam
- ch_dedup_sorted_bam_bai = ch_tagged_sorted_bai
+ ch_dedup_sorted_bai = ch_tagged_sorted_bai
ch_dedup_sorted_flagstat = ch_tagged_sorted_flagstat
+ ch_dedup_sorted_idxstats = Channel.empty()
ch_dedup_log = Channel.empty()
if (!params.skip_dedup) {
@@ -642,9 +625,6 @@ workflow SCNANOSEQ {
ch_versions.unique().collectFile(name: 'collated_versions.yml')
)
- ch_multiqc_report = Channel.empty()
- ch_versions = Channel.empty()
-
if (!params.skip_qc && !params.skip_multiqc){
//
@@ -690,11 +670,11 @@ workflow SCNANOSEQ {
ch_multiqc_finalqc_files = ch_multiqc_finalqc_files.mix(ch_tagged_sorted_flagstat.collect{it[1]}.ifEmpty([]))
ch_multiqc_finalqc_files = ch_multiqc_finalqc_files.mix(ch_tagged_sorted_idxstats.collect{it[1]}.ifEmpty([]))
- ch_multiqc_finalqc_files = ch_multiqc_finalqc_files.mix(ch_dedup_sorted_flagstat.collect{it[1]}.ifEmpty([]))
- ch_multiqc_finalqc_files = ch_multiqc_finalqc_files.mix(ch_dedup_sorted_idxstats.collect{it[1]}.ifEmpty([]))
+ if (!params.skip_dedup) {
+ ch_multiqc_finalqc_files = ch_multiqc_finalqc_files.mix(ch_dedup_sorted_flagstat.collect{it[1]}.ifEmpty([]))
+ ch_multiqc_finalqc_files = ch_multiqc_finalqc_files.mix(ch_dedup_sorted_idxstats.collect{it[1]}.ifEmpty([]))
+ }
- // see issue #12 (too many files when split by chr)
- //ch_multiqc_finalqc_files = ch_multiqc_finalqc_files.mix(ch_dedup_log.collect{it[1]}.ifEmpty([]))
ch_multiqc_finalqc_files = ch_multiqc_finalqc_files.mix(ch_read_counts.collect().ifEmpty([]))
ch_multiqc_finalqc_files = ch_multiqc_finalqc_files.mix(ch_gene_stats_combined.collect().ifEmpty([]))
ch_multiqc_finalqc_files = ch_multiqc_finalqc_files.mix(ch_transcript_stats_combined.collect().ifEmpty([]))
@@ -711,7 +691,7 @@ workflow SCNANOSEQ {
emit:
multiqc_report = ch_multiqc_report.toList()
- versions = ch_versions
+ versions = ch_versions
}