Merge pull request #1 from CenterForMedicalGeneticsGhent/first-branch

Complete pipeline PR
nf-cmgg · Jul 4, 2023 · c02c8d0 · c02c8d0
2 parents 464356c + 76e7d38
commit c02c8d0
Show file tree

Hide file tree

Showing 76 changed files with 2,564 additions and 781 deletions.
diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -2,53 +2,53 @@ name: Bug report
 description: Report something that is broken or incorrect
 labels: bug
 body:
-- type: textarea
-  id: description
-  attributes:
-    label: Description of the bug
-    description: A clear and concise description of what the bug is.
-  validations:
-    required: true
-- type: textarea
-  id: command_used
-  attributes:
-    label: Command used and terminal output
-    description: Steps to reproduce the behaviour. Please paste the command you used
-      to launch the pipeline and the output from your terminal.
-    render: console
-    placeholder: '$ nextflow run ...
-
-
-      Some output where something broke
-
-      '
-- type: textarea
-  id: files
-  attributes:
-    label: Relevant files
-    description: 'Please drag and drop the relevant files here. Create a `.zip` archive
-      if the extension is not allowed.
-
-      Your verbose log file `.nextflow.log` is often useful _(this is a hidden file
-      in the directory where you launched the pipeline)_ as well as custom Nextflow
-      configuration files.
-
-      '
-- type: textarea
-  id: system
-  attributes:
-    label: System information
-    description: '* Nextflow version _(eg. 23.04.0)_
-
-      * Hardware _(eg. HPC, Desktop, Cloud)_
-
-      * Executor _(eg. slurm, local, awsbatch)_
-
-      * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud,
-      or Apptainer)_
-
-      * OS _(eg. CentOS Linux, macOS, Linux Mint)_
-
-      * Version of CenterForMedicalGeneticsGhent/nf-cmgg-qdnaseq _(eg. 1.1, 1.5, 1.8.2)_
-
-      '
+  - type: textarea
+    id: description
+    attributes:
+      label: Description of the bug
+      description: A clear and concise description of what the bug is.
+    validations:
+      required: true
+  - type: textarea
+    id: command_used
+    attributes:
+      label: Command used and terminal output
+      description: Steps to reproduce the behaviour. Please paste the command you used
+        to launch the pipeline and the output from your terminal.
+      render: console
+      placeholder: "$ nextflow run ...
+
+
+        Some output where something broke
+
+        "
+  - type: textarea
+    id: files
+    attributes:
+      label: Relevant files
+      description: "Please drag and drop the relevant files here. Create a `.zip` archive
+        if the extension is not allowed.
+
+        Your verbose log file `.nextflow.log` is often useful _(this is a hidden file
+        in the directory where you launched the pipeline)_ as well as custom Nextflow
+        configuration files.
+
+        "
+  - type: textarea
+    id: system
+    attributes:
+      label: System information
+      description: "* Nextflow version _(eg. 23.04.0)_
+
+        * Hardware _(eg. HPC, Desktop, Cloud)_
+
+        * Executor _(eg. slurm, local, awsbatch)_
+
+        * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter, Charliecloud,
+        or Apptainer)_
+
+        * OS _(eg. CentOS Linux, macOS, Linux Mint)_
+
+        * Version of CenterForMedicalGeneticsGhent/nf-cmgg-qdnaseq _(eg. 1.1, 1.5, 1.8.2)_
+
+        "
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -35,9 +35,16 @@ jobs:
         with:
           version: "${{ matrix.NXF_VER }}"
 
+      - name: Install nf-test
+        run: |
+          conda install -c bioconda nf-test
+
       - name: Run pipeline with test data
-        # TODO nf-core: You can customise CI pipeline run tests as required
-        # For example: adding multiple test runs with different parameters
-        # Remember that you can parallelise this by using strategy.matrix
         run: |
-          nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results
+          $CONDA/bin/nf-test test --junitxml=default.xml
+
+      - name: Publish Test Report
+        uses: mikepenz/action-junit-report@v3
+        if: always() # always run even if the previous step fails
+        with:
+          report_paths: "default.xml"
diff --git a/.gitignore b/.gitignore
@@ -6,3 +6,4 @@ results/
 testing/
 testing*
 *.pyc
+.nf-test*
diff --git a/.nf-core.yml b/.nf-core.yml
@@ -1,21 +1,24 @@
 repository_type: pipeline
 lint:
   files_exist:
-  - CODE_OF_CONDUCT.md
-  - assets/nf-core-nf-cmgg-qdnaseq_logo_light.png
-  - docs/images/nf-core-nf-cmgg-qdnaseq_logo_light.png
-  - docs/images/nf-core-nf-cmgg-qdnaseq_logo_dark.png
-  - .github/ISSUE_TEMPLATE/config.yml
-  - .github/workflows/awstest.yml
-  - .github/workflows/awsfulltest.yml
+    - CODE_OF_CONDUCT.md
+    - assets/nf-core-nf-cmgg-qdnaseq_logo_light.png
+    - docs/images/nf-core-nf-cmgg-qdnaseq_logo_light.png
+    - docs/images/nf-core-nf-cmgg-qdnaseq_logo_dark.png
+    - .github/ISSUE_TEMPLATE/config.yml
+    - .github/workflows/awstest.yml
+    - .github/workflows/awsfulltest.yml
+    - lib/WorkflowNf-cmgg-qdnaseq.groovy
   files_unchanged:
-  - CODE_OF_CONDUCT.md
-  - assets/nf-core-nf-cmgg-qdnaseq_logo_light.png
-  - docs/images/nf-core-nf-cmgg-qdnaseq_logo_light.png
-  - docs/images/nf-core-nf-cmgg-qdnaseq_logo_dark.png
-  - .github/ISSUE_TEMPLATE/bug_report.yml
+    - CODE_OF_CONDUCT.md
+    - assets/nf-core-nf-cmgg-qdnaseq_logo_light.png
+    - docs/images/nf-core-nf-cmgg-qdnaseq_logo_light.png
+    - docs/images/nf-core-nf-cmgg-qdnaseq_logo_dark.png
+    - .github/ISSUE_TEMPLATE/bug_report.yml
+    - docs/README.md
   nextflow_config:
-  - manifest.name
-  - manifest.homePage
+    - manifest.name
+    - manifest.homePage
   multiqc_config:
-  - report_comment
+    - report_comment
+  pipeline_name_conventions: false
diff --git a/.prettierignore b/.prettierignore
@@ -10,3 +10,5 @@ testing/
 testing*
 *.pyc
 bin/
+.nf-test*
+null
diff --git a/README.md b/README.md
@@ -1,5 +1,3 @@
-[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)
-
 [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/)
 [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)
 [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)
@@ -8,20 +6,13 @@
 
 ## Introduction
 
-**CenterForMedicalGeneticsGhent/nf-cmgg-qdnaseq** is a bioinformatics pipeline that ...
-
-<!-- TODO nf-core:
-   Complete this sentence with a 2-3 sentence summary of what types of data the pipeline ingests, a brief overview of the
-   major pipeline sections and the types of output it produces. You're giving an overview to someone new
-   to nf-core here, in 15-20 seconds. For an example, see https://github.com/nf-core/rnaseq/blob/master/README.md#introduction
--->
-
-<!-- TODO nf-core: Include a figure that guides the user through the major workflow steps. Many nf-core
-     workflows use the "tube map" design for that. See https://nf-co.re/docs/contributing/design_guidelines#examples for examples.   -->
-<!-- TODO nf-core: Fill in short bullet-pointed list of the default steps in the pipeline -->
+**CenterForMedicalGeneticsGhent/nf-cmgg-qdnaseq** is a bioinformatics pipeline for creating qDNAseq annotations
 
-1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))
-2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))
+1. Trim FASTQ files to read lengths of 50 with Trimmomatic
+2. Align the reads with BWA (aln and samse/sampe)
+3. Create a mappability WIG file with GenMap
+4. Convert the WIG to BigWig with UCSC WigToBigWig
+5. Create the annotations using a custom R script
 
 ## Usage
 
@@ -30,9 +21,6 @@
 > to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline)
 > with `-profile test` before running the workflow on actual data.
 
-<!-- TODO nf-core: Describe the minimum required steps to execute the pipeline, e.g. how to prepare samplesheets.
-     Explain what rows and columns represent. For instance (please edit as appropriate):
-
 First, prepare a samplesheet with your input data that looks as follows:
 
 `samplesheet.csv`:
@@ -44,15 +32,12 @@ CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz
 
 Each row represents a fastq file (single-end) or a pair of fastq files (paired end).
 
--->
-
 Now, you can run the pipeline using:
 
-<!-- TODO nf-core: update the following command to include all required parameters for a minimal example -->
-
 ```bash
 nextflow run CenterForMedicalGeneticsGhent/nf-cmgg-qdnaseq \
    -profile <docker/singularity/.../institute> \
+   --genome <genome> \
    --input samplesheet.csv \
    --outdir <OUTDIR>
 ```
@@ -68,19 +53,12 @@ CenterForMedicalGeneticsGhent/nf-cmgg-qdnaseq was originally written by nvnieuwk
 
 We thank the following people for their extensive assistance in the development of this pipeline:
 
-<!-- TODO nf-core: If applicable, make list of people who have also contributed -->
-
 ## Contributions and Support
 
 If you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).
 
 ## Citations
 
-<!-- TODO nf-core: Add citation for pipeline after first release. Uncomment lines below and update Zenodo doi and badge at the top of this file. -->
-<!-- If you use  CenterForMedicalGeneticsGhent/nf-cmgg-qdnaseq for your analysis, please cite it using the following doi: [10.5281/zenodo.XXXXXX](https://doi.org/10.5281/zenodo.XXXXXX) -->
-
-<!-- TODO nf-core: Add bibliography of tools and data used in your pipeline -->
-
 An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.
 
 This pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/master/LICENSE).

diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml
@@ -3,8 +3,6 @@ description: "Suggested text and references to use when describing pipeline usag
 section_name: "CenterForMedicalGeneticsGhent/nf-cmgg-qdnaseq Methods Description"
 section_href: "https://github.com/CenterForMedicalGeneticsGhent/nf-cmgg-qdnaseq"
 plot_type: "html"
-## TODO nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline
-## You inject any metadata in the Nextflow '${workflow}' object
 data: |
   <h4>Methods</h4>
   <p>Data was processed using CenterForMedicalGeneticsGhent/nf-cmgg-qdnaseq v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (<a href="https://doi.org/10.1038/s41587-020-0439-x">Ewels <em>et al.</em>, 2020</a>), utilising reproducible software environments from the Bioconda (<a href="https://doi.org/10.1038/s41592-018-0046-7">Grüning <em>et al.</em>, 2018</a>) and Biocontainers (<a href="https://doi.org/10.1093/bioinformatics/btx192">da Veiga Leprevost <em>et al.</em>, 2017</a>) projects.</p>

diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv
@@ -1,3 +1,3 @@
 sample,fastq_1,fastq_2
-SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz
-SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz,
+test1,https://github.com/CenterForMedicalGeneticsGhent/nf-cmgg-test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/test_R1.fastq.gz,https://github.com/CenterForMedicalGeneticsGhent/nf-cmgg-test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/test_R2.fastq.gz
+test2,https://github.com/CenterForMedicalGeneticsGhent/nf-cmgg-test-datasets/raw/main/data/genomics/homo_sapiens/illumina/fastq/test_R1.fastq.gz,
diff --git a/assets/schema_input.json b/assets/schema_input.json
@@ -9,28 +9,21 @@
         "properties": {
             "sample": {
                 "type": "string",
-                "pattern": "^\\S+$",
-                "errorMessage": "Sample name must be provided and cannot contain spaces"
+                "meta": ["id"]
             },
             "fastq_1": {
                 "type": "string",
-                "pattern": "^\\S+\\.f(ast)?q\\.gz$",
-                "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'"
+                "format": "file-path",
+                "exists": true,
+                "pattern": "^.*\\.fastq(\\.gz)?$"
             },
             "fastq_2": {
-                "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'",
-                "anyOf": [
-                    {
-                        "type": "string",
-                        "pattern": "^\\S+\\.f(ast)?q\\.gz$"
-                    },
-                    {
-                        "type": "string",
-                        "maxLength": 0
-                    }
-                ]
+                "type": "string",
+                "format": "file-path",
+                "exists": true,
+                "pattern": "^.*\\.fastq(\\.gz)?$"
             }
         },
-        "required": ["sample", "fastq_1"]
+        "required": ["fastq_1", "sample"]
     }
 }